#!/usr/bin/env python # coding: utf-8 # # Simple Example of Incremental Learning Agent w/Python + Keras # # (EXPERIMENT) # This notebook walks through an experiment with building a machine learning model that will learn incrementally. # # Why attempt to learn incrementally? # # There are two primary reasons for attempting to build a model that learns incrementally: # # 1. Limited Data/Cold start problem # (Cold Start Problem~ Concerns the issue that the system cannot draw any inferences for users or items about which it has not yet gathered sufficient information). As we know, building a machine learning model requires a lot of data. This can hamper the inevitable problem of getting started ("Guess we can add ML once we have more data..."). But what if the model could start providing limited (admittedly low accuracy) predictions today and build up over time? # # 2. Data Privacy # The transfer of large files (training) to the cloud creates data privacy and security issues. Using incremental learning, we can build a model without the need to store sensitive files in the cloud. With this method, any private data can be encrypted in transit and only the model itself is stored on disk. # In[2]: import numpy from keras.models import Sequential from keras.layers import Dense import numpy as np import pandas as pd np.random.seed(4) # In[3]: ''' This dataset is originally from the National Institute of Diabetes and Digestive and Kidney Diseases. The objective is to predict based on diagnostic measurements whether a patient has diabetes. https://data.world/data-society/pima-indians-diabetes-database ''' df = pd.read_csv("../datasets/diabetes.csv") # In[4]: # Features & Target X = df.iloc[:, :8].values y = df.iloc[:, 8].values # In[5]: from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split from keras import optimizers # Need to rescale the features scaler = MinMaxScaler(feature_range=(0, 1)) rescaledX = scaler.fit_transform(X) # Split for training and evaluation X_train, X_test, y_train, y_test = train_test_split(rescaledX, y, test_size=0.33) # In[6]: import hashlib from keras.models import load_model # In[7]: class IncrementalAgent: def __init__(self, name): # Identify agent self.name = name # Need to keep track of predictions for evaluation of the model self.prediction_records = {} # Create model self.model = Sequential() self.model.add(Dense(12, input_dim=8, activation='relu')) self.model.add(Dense(8, activation='relu')) self.model.add(Dense(1, activation='sigmoid')) # Compile model sgd = optimizers.SGD(lr=0.01) self.model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) print("Model Initialized.") def train(self, feature_array, target, epoch_count, batch_count): target_to_array = np.array([[target]]) self.model.fit(feature_array, target_to_array, epochs=epoch_count, batch_size=batch_count) self.model.save('{}.h5'.format(self.name)) print("Model Trained.") def evaluate(self, feature_array, target): # Check if the features have previously been seen prediction_ref = hashlib.sha256(feature_array).hexdigest() if prediction_ref in self.prediction_records: print("Previous Predicted Class: {}".format(self.prediction_records[prediction_ref])) print("Actual Class: {}".format(target)) if self.prediction_records[prediction_ref] == target: print("Prediction Correct!") else: print("Prediction Incorrect. Relearn...") self.train(feature_array, target, 5, 1) print("Retry Predict & Update Previous Prediction") self.predict(feature_array) else: print("First time seeing this sample.") def predict(self, feature_array): # Create ID for prediction prediction_ref = hashlib.sha256(feature_array).hexdigest() # Actual prediction probability_prediction = self.model.predict(feature_array) probability_prediction = probability_prediction[0][0] print("Raw Probability: {}".format(probability_prediction)) # Get Class Prediction if probability_prediction > 0.5: prediction = 1 else: prediction = 0 # Add Prediction to Records self.prediction_records[prediction_ref] = prediction print("Predicted Class: {}".format(prediction)) # In[8]: agent1 = IncrementalAgent('Agent1') # In[9]: sample = np.array([X_train[2]]) target = np.array([y_train[2]])[0] # In[10]: agent1.train(sample, target, 5, 1) # In[11]: new_sample = np.array([X_train[3]]) # In[12]: agent1.predict(new_sample) # In[13]: new_target = np.array([y_train[3]])[0] # In[14]: agent1.evaluate(new_sample, new_target) # In[15]: next_sample = np.array([X_train[10]]) # In[16]: agent1.predict(next_sample) # In[17]: next_target = np.array([y_train[10]])[0] # In[18]: agent1.evaluate(next_sample, next_target) # ### Caveats: # - This example does not result in a greatly accurate model. # - There are better algorithm choices other than NNs for this dataset.