# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
import os
print(os.listdir("../input"))
# Any results you write to the current directory are saved as output.
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
df = pd.read_csv('../input/creditcard.csv')
df.head()
df.describe()
df.isnull().sum()
df = df.drop('Time',axis=1)
X = df.drop('Class',axis=1).values
y = df['Class'].values
X.shape
X -= X.min(axis=0)
X /= X.max(axis=0)
X.mean()
X.shape
from sklearn.model_selection import train_test_split
X_train, X_test, y_train,y_test = train_test_split(X,y,test_size=0.1)
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from keras.layers import Input, Embedding, multiply, BatchNormalization
from keras.models import Model, Sequential
from keras.layers.core import Reshape, Dense, Dropout, Flatten
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import Conv2D, UpSampling2D
from keras.datasets import mnist
from keras.optimizers import Adam
from keras import backend as K
from keras import initializers
from keras.utils import to_categorical
K.set_image_dim_ordering('th')
# Deterministic output.
# Tired of seeing the same results every time? Remove the line below.
np.random.seed(1000)
# The results are a little better when the dimensionality of the random vector is only 10.
# The dimensionality has been left at 100 for consistency with other GAN implementations.
randomDim = 100
def build_generator(latent_dim,data_dim):
model = Sequential()
model.add(Dense(16, input_dim=latent_dim))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(32, input_dim=latent_dim))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(data_dim,activation='tanh'))
model.summary()
noise = Input(shape=(latent_dim,))
img = model(noise)
return Model(noise, img)
generator = build_generator(latent_dim=10,data_dim=29)
def build_discriminator(data_dim,num_classes):
model = Sequential()
model.add(Dense(31,input_dim=data_dim))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dropout(0.25))
model.add(Dense(16,input_dim=data_dim))
model.add(LeakyReLU(alpha=0.2))
model.summary()
img = Input(shape=(data_dim,))
features = model(img)
valid = Dense(1, activation="sigmoid")(features)
label = Dense(num_classes+1, activation="softmax")(features)
return Model(img, [valid, label])
discriminator = build_discriminator(data_dim=29,num_classes=2)
optimizer = Adam(0.0002, 0.5)
discriminator.compile(loss=['binary_crossentropy', 'categorical_crossentropy'],
loss_weights=[0.5, 0.5],
optimizer=optimizer,
metrics=['accuracy'])
noise = Input(shape=(10,))
img = generator(noise)
discriminator.trainable = False
valid,_ = discriminator(img)
combined = Model(noise , valid)
combined.compile(loss=['binary_crossentropy'],
optimizer=optimizer)
X_train.shape
from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(random_state=42)
X_res, y_res = rus.fit_sample(X, y)
X_res.shape
X_res -= X_res.min()
X_res /= X_res.max()
X_test -= X_test.min()
X_test /= X_test.max()
X_test_res, y_test_res = rus.fit_sample(X_test,y_test)
from sklearn.metrics import accuracy_score, f1_score
y_res.shape
def train(X_train,y_train,
X_test,y_test,
generator,discriminator,
combined,
num_classes,
epochs,
batch_size=128):
f1_progress = []
half_batch = int(batch_size / 2)
noise_until = epochs
# Class weights:
# To balance the difference in occurences of digit class labels.
# 50% of labels that the discriminator trains on are 'fake'.
# Weight = 1 / frequency
cw1 = {0: 1, 1: 1}
cw2 = {i: num_classes / half_batch for i in range(num_classes)}
cw2[num_classes] = 1 / half_batch
for epoch in range(epochs):
# ---------------------
# Train Discriminator
# ---------------------
# Select a random half batch of images
idx = np.random.randint(0, X_train.shape[0], half_batch)
imgs = X_train[idx]
# Sample noise and generate a half batch of new images
noise = np.random.normal(0, 1, (half_batch, 10))
gen_imgs = generator.predict(noise)
valid = np.ones((half_batch, 1))
fake = np.zeros((half_batch, 1))
labels = to_categorical(y_train[idx], num_classes=num_classes+1)
fake_labels = to_categorical(np.full((half_batch, 1), num_classes), num_classes=num_classes+1)
# Train the discriminator
d_loss_real = discriminator.train_on_batch(imgs, [valid, labels], class_weight=[cw1, cw2])
d_loss_fake = discriminator.train_on_batch(gen_imgs, [fake, fake_labels], class_weight=[cw1, cw2])
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# ---------------------
# Train Generator
# ---------------------
noise = np.random.normal(0, 1, (batch_size, 10))
validity = np.ones((batch_size, 1))
# Train the generator
g_loss = combined.train_on_batch(noise, validity, class_weight=[cw1, cw2])
# Plot the progress
print ("%d [D loss: %f, acc: %.2f%%, op_acc: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[3], 100*d_loss[4], g_loss))
if epoch % 10 == 0:
_,y_pred = discriminator.predict(X_test,batch_size=batch_size)
#print(y_pred.shape)
y_pred = np.argmax(y_pred[:,:-1],axis=1)
f1 = f1_score(y_test,y_pred)
print('Epoch: {}, F1: {:.5f}, F1P: {}'.format(epoch,f1,len(f1_progress)))
f1_progress.append(f1)
return f1_progress
f1_p = train(X_res,y_res,
X_test,y_test,
generator,discriminator,
combined,
num_classes=2,
epochs=5000,
batch_size=128)
fig = plt.figure(figsize=(10,7))
plt.plot(f1_p)
plt.xlabel('10 Epochs')
plt.ylabel('F1 Score Validation')