#!/usr/bin/env python # coding: utf-8 # # Encoders # # * Scalar # * Date/time # * Category # * Multi # In[1]: import numpy # In[2]: from nupic.encoders import ScalarEncoder get_ipython().run_line_magic('pinfo', 'ScalarEncoder') # In[3]: # 22 bits with 3 active representing values 0 to 100 # clipInput=True makes values >100 encode the same as 100 (instead of throwing a ValueError) # forced=True allows small values for `n` and `w` enc = ScalarEncoder(n=22, w=3, minval=2.5, maxval=97.5, clipInput=True, forced=True) print "3 =", enc.encode(3) print "4 =", enc.encode(4) print "5 =", enc.encode(5) # In[4]: # Encode maxval print "100 =", enc.encode(100) # See that any larger number gets the same encoding print "1000 =", enc.encode(1000) # In[5]: from nupic.encoders.random_distributed_scalar import RandomDistributedScalarEncoder get_ipython().run_line_magic('pinfo', 'RandomDistributedScalarEncoder') # In[6]: # 21 bits with 3 active with buckets of size 5 rdse = RandomDistributedScalarEncoder(n=21, w=3, resolution=5, offset=2.5) print "3 = ", rdse.encode(3) print "4 = ", rdse.encode(4) print "5 = ", rdse.encode(5) print print "100 = ", rdse.encode(100) print "1000 =", rdse.encode(1000) # In[7]: import datetime from nupic.encoders.date import DateEncoder get_ipython().run_line_magic('pinfo', 'DateEncoder') # In[8]: de = DateEncoder(season=5) now = datetime.datetime.strptime("2014-05-02 13:08:58", "%Y-%m-%d %H:%M:%S") print "now = ", de.encode(now) nextMonth = datetime.datetime.strptime("2014-06-02 13:08:58", "%Y-%m-%d %H:%M:%S") print "next month =", de.encode(nextMonth) xmas = datetime.datetime.strptime("2014-12-25 13:08:58", "%Y-%m-%d %H:%M:%S") print "xmas = ", de.encode(xmas) # In[9]: from nupic.encoders.category import CategoryEncoder categories = ("cat", "dog", "monkey", "slow loris") encoder = CategoryEncoder(w=3, categoryList=categories, forced=True) cat = encoder.encode("cat") dog = encoder.encode("dog") monkey = encoder.encode("monkey") loris = encoder.encode("slow loris") print "cat = ", cat print "dog = ", dog print "monkey = ", monkey print "slow loris =", loris # In[10]: print encoder.encode(None) # In[11]: print encoder.encode("unknown") # In[12]: print encoder.decode(cat) # In[13]: catdog = numpy.array([0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0]) print encoder.decode(catdog) # # Spatial Pooler # In[14]: from nupic.research.spatial_pooler import SpatialPooler get_ipython().run_line_magic('pinfo', 'SpatialPooler') # In[ ]: print SpatialPooler # In[15]: print SpatialPooler # In[16]: print SpatialPooler # In[17]: print SpatialPooler # In[18]: print SpatialPooler # In[19]: print SpatialPooler # In[20]: print SpatialPooler # In[21]: print SpatialPooler # In[22]: print SpatialPooler # In[23]: print len(cat) print cat # In[24]: sp = SpatialPooler(inputDimensions=(15,), columnDimensions=(4,), potentialRadius=15, numActiveColumnsPerInhArea=1, globalInhibition=True, synPermActiveInc=0.03, potentialPct=1.0) import numpy for column in xrange(4): connected = numpy.zeros((15,), dtype="int") sp.getConnectedSynapses(column, connected) print connected # In[25]: output = numpy.zeros((4,), dtype="int") sp.compute(cat, learn=True, activeArray=output) print output # In[26]: for _ in xrange(20): sp.compute(cat, learn=True, activeArray=output) # In[27]: for column in xrange(4): connected = numpy.zeros((15,), dtype="int") sp.getConnectedSynapses(column, connected) print connected # In[28]: for _ in xrange(200): sp.compute(cat, learn=True, activeArray=output) sp.compute(dog, learn=True, activeArray=output) sp.compute(monkey, learn=True, activeArray=output) sp.compute(loris, learn=True, activeArray=output) # In[29]: for column in xrange(4): connected = numpy.zeros((15,), dtype="int") sp.getConnectedSynapses(column, connected) print connected # In[30]: noisyCat = numpy.zeros((15,), dtype="uint32") noisyCat[3] = 1 noisyCat[4] = 1 # This is part of dog! noisyCat[6] = 1 print noisyCat # In[31]: sp.compute(noisyCat, learn=False, activeArray=output) print output # matches cat! # # Temporal Memory (a.k.a. Sequence Memory, Temporal Pooler) # # From: `examples/tm/hello_tm.py` # In[32]: from nupic.research.BacktrackingTM import BacktrackingTM get_ipython().run_line_magic('pinfo', 'BacktrackingTM') # In[33]: # Step 1: create Temporal Pooler instance with appropriate parameters tm = BacktrackingTM(numberOfCols=50, cellsPerColumn=2, initialPerm=0.5, connectedPerm=0.5, minThreshold=10, newSynapseCount=10, permanenceInc=0.1, permanenceDec=0.0, activationThreshold=8, globalDecay=0, burnIn=1, checkSynapseConsistency=False, pamLength=10) # In[34]: # Step 2: create input vectors to feed to the temporal memory. Each input vector # must be numberOfCols wide. Here we create a simple sequence of 5 vectors # representing the sequence A -> B -> C -> D -> E x = numpy.zeros((5, tm.numberOfCols), dtype="uint32") x[0,0:10] = 1 # Input SDR representing "A", corresponding to columns 0-9 x[1,10:20] = 1 # Input SDR representing "B", corresponding to columns 10-19 x[2,20:30] = 1 # Input SDR representing "C", corresponding to columns 20-29 x[3,30:40] = 1 # Input SDR representing "D", corresponding to columns 30-39 x[4,40:50] = 1 # Input SDR representing "E", corresponding to columns 40-49 # In[35]: # Step 3: send this simple sequence to the temporal memory for learning # We repeat the sequence 10 times for i in range(10): # Send each letter in the sequence in order for j in range(5): # The compute method performs one step of learning and/or inference. Note: # here we just perform learning but you can perform prediction/inference and # learning in the same step if you want (online learning). tm.compute(x[j], enableLearn = True, enableInference = False) # This function prints the segments associated with every cell.$$$$ # If you really want to understand the TP, uncomment this line. By following # every step you can get an excellent understanding for exactly how the TP # learns. #tm.printCells() # The reset command tells the TM that a sequence just ended and essentially # zeros out all the states. It is not strictly necessary but it's a bit # messier without resets, and the TM learns quicker with resets. tm.reset() # In[36]: # Step 4: send the same sequence of vectors and look at predictions made by # temporal memory # Utility routine for printing the input vector def formatRow(x): s = '' for c in range(len(x)): if c > 0 and c % 10 == 0: s += ' ' s += str(x[c]) s += ' ' return s for j in range(5): print "\n\n--------","ABCDE"[j],"-----------" print "Raw input vector\n",formatRow(x[j]) # Send each vector to the TP, with learning turned off tm.compute(x[j], enableLearn=False, enableInference=True) # This method prints out the active state of each cell followed by the # predicted state of each cell. For convenience the cells are grouped # 10 at a time. When there are multiple cells per column the printout # is arranged so the cells in a column are stacked together # # What you should notice is that the columns where active state is 1 # represent the SDR for the current input pattern and the columns where # predicted state is 1 represent the SDR for the next expected pattern print "\nAll the active and predicted cells:" tm.printStates(printPrevious=False, printLearnState=False) # tm.getPredictedState() gets the predicted cells. # predictedCells[c][i] represents the state of the i'th cell in the c'th # column. To see if a column is predicted, we can simply take the OR # across all the cells in that column. In numpy we can do this by taking # the max along axis 1. print "\n\nThe following columns are predicted by the temporal memory. This" print "should correspond to columns in the *next* item in the sequence." predictedCells = tm.getPredictedState() print formatRow(predictedCells.max(axis=1).nonzero()) # # Networks and Regions # # See slides. # # Online Prediction Framework # # * CLAModel # * OPF Client # * Swarming # # # CLAModel # # From `examples/opf/clients/hotgym/simple/hotgym.py` # # Model Parameters # # `MODEL_PARAMS` have all of the parameters for the CLA model and subcomponents # In[37]: # Model Params! MODEL_PARAMS = { # Type of model that the rest of these parameters apply to. 'model': "HTMPrediction", # Version that specifies the format of the config. 'version': 1, # Intermediate variables used to compute fields in modelParams and also # referenced from the control section. 'aggregationInfo': { 'days': 0, 'fields': [('consumption', 'sum')], 'hours': 1, 'microseconds': 0, 'milliseconds': 0, 'minutes': 0, 'months': 0, 'seconds': 0, 'weeks': 0, 'years': 0}, 'predictAheadTime': None, # Model parameter dictionary. 'modelParams': { # The type of inference that this model will perform 'inferenceType': 'TemporalMultiStep', 'sensorParams': { # Sensor diagnostic output verbosity control; # if > 0: sensor region will print out on screen what it's sensing # at each step 0: silent; >=1: some info; >=2: more info; # >=3: even more info (see compute() in py/regions/RecordSensor.py) 'verbosity' : 0, # Include the encoders we use 'encoders': { u'timestamp_timeOfDay': { 'fieldname': u'timestamp', 'name': u'timestamp_timeOfDay', 'timeOfDay': (21, 0.5), 'type': 'DateEncoder' }, u'timestamp_dayOfWeek': None, u'timestamp_weekend': None, u'consumption': { 'clipInput': True, 'fieldname': u'consumption', 'maxval': 100.0, 'minval': 0.0, 'n': 50, 'name': u'c1', 'type': 'ScalarEncoder', 'w': 21 }, }, # A dictionary specifying the period for automatically-generated # resets from a RecordSensor; # # None = disable automatically-generated resets (also disabled if # all of the specified values evaluate to 0). # Valid keys is the desired combination of the following: # days, hours, minutes, seconds, milliseconds, microseconds, weeks # # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12), # # (value generated from SENSOR_AUTO_RESET) 'sensorAutoReset' : None, }, 'spEnable': True, 'spParams': { # SP diagnostic output verbosity control; # 0: silent; >=1: some info; >=2: more info; 'spVerbosity' : 0, # Spatial Pooler implementation selector, see getSPClass # in py/regions/SPRegion.py for details # 'py' (default), 'cpp' (speed optimized, new) 'spatialImp' : 'cpp', 'globalInhibition': 1, # Number of cell columns in the cortical region (same number for # SP and TM) # (see also tpNCellsPerCol) 'columnCount': 2048, 'inputWidth': 0, # SP inhibition control (absolute value); # Maximum number of active columns in the SP region's output (when # there are more, the weaker ones are suppressed) 'numActiveColumnsPerInhArea': 40, 'seed': 1956, # potentialPct # What percent of the columns's receptive field is available # for potential synapses. At initialization time, we will # choose potentialPct * (2*potentialRadius+1)^2 'potentialPct': 0.5, # The default connected threshold. Any synapse whose # permanence value is above the connected threshold is # a "connected synapse", meaning it can contribute to the # cell's firing. Typical value is 0.10. Cells whose activity # level before inhibition falls below minDutyCycleBeforeInh # will have their own internal synPermConnectedCell # threshold set below this default value. # (This concept applies to both SP and TM and so 'cells' # is correct here as opposed to 'columns') 'synPermConnected': 0.1, 'synPermActiveInc': 0.1, 'synPermInactiveDec': 0.005, }, # Controls whether TM is enabled or disabled; # TM is necessary for making temporal predictions, such as predicting # the next inputs. Without TP, the model is only capable of # reconstructing missing sensor inputs (via SP). 'tmEnable' : True, 'tmParams': { # TM diagnostic output verbosity control; # 0: silent; [1..6]: increasing levels of verbosity # (see verbosity in nupic/trunk/py/nupic/research/TP.py and BacktrackingTMCPP.py) 'verbosity': 0, # Number of cell columns in the cortical region (same number for # SP and TM) # (see also tpNCellsPerCol) 'columnCount': 2048, # The number of cells (i.e., states), allocated per column. 'cellsPerColumn': 32, 'inputWidth': 2048, 'seed': 1960, # Temporal Pooler implementation selector (see _getTPClass in # CLARegion.py). 'temporalImp': 'cpp', # New Synapse formation count # NOTE: If None, use spNumActivePerInhArea # # TODO: need better explanation 'newSynapseCount': 20, # Maximum number of synapses per segment # > 0 for fixed-size CLA # -1 for non-fixed-size CLA # # TODO: for Ron: once the appropriate value is placed in TP # constructor, see if we should eliminate this parameter from # description.py. 'maxSynapsesPerSegment': 32, # Maximum number of segments per cell # > 0 for fixed-size CLA # -1 for non-fixed-size CLA # # TODO: for Ron: once the appropriate value is placed in TP # constructor, see if we should eliminate this parameter from # description.py. 'maxSegmentsPerCell': 128, # Initial Permanence # TODO: need better explanation 'initialPerm': 0.21, # Permanence Increment 'permanenceInc': 0.1, # Permanence Decrement # If set to None, will automatically default to tpPermanenceInc # value. 'permanenceDec' : 0.1, 'globalDecay': 0.0, 'maxAge': 0, # Minimum number of active synapses for a segment to be considered # during search for the best-matching segments. # None=use default # Replaces: tpMinThreshold 'minThreshold': 9, # Segment activation threshold. # A segment is active if it has >= tpSegmentActivationThreshold # connected synapses that are active due to infActiveState # None=use default # Replaces: tpActivationThreshold 'activationThreshold': 12, 'outputType': 'normal', # "Pay Attention Mode" length. This tells the TM how many new # elements to append to the end of a learned sequence at a time. # Smaller values are better for datasets with short sequences, # higher values are better for datasets with long sequences. 'pamLength': 1, }, 'clParams': { 'regionName' : 'SDRClassifierRegion', # Classifier diagnostic output verbosity control; # 0: silent; [1..6]: increasing levels of verbosity 'verbosity' : 0, # This controls how fast the classifier learns/forgets. Higher values # make it adapt faster and forget older patterns faster. 'alpha': 0.005, # This is set after the call to updateConfigFromSubConfig and is # computed from the aggregationInfo and predictAheadTime. 'steps': '1,5', 'implementation': 'cpp', }, 'trainSPNetOnlyIfRequested': False, }, } # # Dataset Helpers # In[38]: from pkg_resources import resource_filename datasetPath = resource_filename("nupic.datafiles", "extra/hotgym/hotgym.csv") print datasetPath with open(datasetPath) as inputFile: print for _ in xrange(8): print inputFile.next().strip() # # Loading Data # # `FileRecordStream` - file reader for the NuPIC file format (CSV with three header rows, understands datetimes) # In[39]: from nupic.data.file_record_stream import FileRecordStream def getData(): return FileRecordStream(datasetPath) data = getData() for _ in xrange(5): print data.next() # In[40]: from nupic.frameworks.opf.model_factory import ModelFactory model = ModelFactory.create(MODEL_PARAMS) model.enableInference({'predictedField': 'consumption'}) # In[41]: data = getData() for _ in xrange(100): record = dict(zip(data.getFieldNames(), data.next())) print "input: ", record["consumption"] result = model.run(record) print "prediction: ", result.inferences["multiStepBestPredictions"][1] # In[42]: print "5-step prediction: ", result.inferences["multiStepBestPredictions"][5] # # Anomaly Score # In[43]: # Model Params! MODEL_PARAMS = { # Type of model that the rest of these parameters apply to. 'model': "HTMPrediction", # Version that specifies the format of the config. 'version': 1, # Intermediate variables used to compute fields in modelParams and also # referenced from the control section. 'aggregationInfo': { 'days': 0, 'fields': [('consumption', 'sum')], 'hours': 1, 'microseconds': 0, 'milliseconds': 0, 'minutes': 0, 'months': 0, 'seconds': 0, 'weeks': 0, 'years': 0}, 'predictAheadTime': None, # Model parameter dictionary. 'modelParams': { # The type of inference that this model will perform 'inferenceType': 'TemporalAnomaly', 'sensorParams': { # Sensor diagnostic output verbosity control; # if > 0: sensor region will print out on screen what it's sensing # at each step 0: silent; >=1: some info; >=2: more info; # >=3: even more info (see compute() in py/regions/RecordSensor.py) 'verbosity' : 0, # Include the encoders we use 'encoders': { u'timestamp_timeOfDay': { 'fieldname': u'timestamp', 'name': u'timestamp_timeOfDay', 'timeOfDay': (21, 0.5), 'type': 'DateEncoder'}, u'timestamp_dayOfWeek': None, u'timestamp_weekend': None, u'consumption': { 'clipInput': True, 'fieldname': u'consumption', 'maxval': 100.0, 'minval': 0.0, 'n': 50, 'name': u'c1', 'type': 'ScalarEncoder', 'w': 21},}, # A dictionary specifying the period for automatically-generated # resets from a RecordSensor; # # None = disable automatically-generated resets (also disabled if # all of the specified values evaluate to 0). # Valid keys is the desired combination of the following: # days, hours, minutes, seconds, milliseconds, microseconds, weeks # # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12), # # (value generated from SENSOR_AUTO_RESET) 'sensorAutoReset' : None, }, 'spEnable': True, 'spParams': { # SP diagnostic output verbosity control; # 0: silent; >=1: some info; >=2: more info; 'spVerbosity' : 0, # Spatial Pooler implementation selector, see getSPClass # in py/regions/SPRegion.py for details # 'py' (default), 'cpp' (speed optimized, new) 'spatialImp' : 'cpp', 'globalInhibition': 1, # Number of cell columns in the cortical region (same number for # SP and TM) # (see also tpNCellsPerCol) 'columnCount': 2048, 'inputWidth': 0, # SP inhibition control (absolute value); # Maximum number of active columns in the SP region's output (when # there are more, the weaker ones are suppressed) 'numActiveColumnsPerInhArea': 40, 'seed': 1956, # potentialPct # What percent of the columns's receptive field is available # for potential synapses. At initialization time, we will # choose potentialPct * (2*potentialRadius+1)^2 'potentialPct': 0.5, # The default connected threshold. Any synapse whose # permanence value is above the connected threshold is # a "connected synapse", meaning it can contribute to the # cell's firing. Typical value is 0.10. Cells whose activity # level before inhibition falls below minDutyCycleBeforeInh # will have their own internal synPermConnectedCell # threshold set below this default value. # (This concept applies to both SP and TM and so 'cells' # is correct here as opposed to 'columns') 'synPermConnected': 0.1, 'synPermActiveInc': 0.1, 'synPermInactiveDec': 0.005, }, # Controls whether TM is enabled or disabled; # TM is necessary for making temporal predictions, such as predicting # the next inputs. Without TP, the model is only capable of # reconstructing missing sensor inputs (via SP). 'tmEnable' : True, 'tmParams': { # TM diagnostic output verbosity control; # 0: silent; [1..6]: increasing levels of verbosity # (see verbosity in nupic/trunk/py/nupic/research/TP.py and BacktrackingTMCPP.py) 'verbosity': 0, # Number of cell columns in the cortical region (same number for # SP and TM) # (see also tpNCellsPerCol) 'columnCount': 2048, # The number of cells (i.e., states), allocated per column. 'cellsPerColumn': 32, 'inputWidth': 2048, 'seed': 1960, # Temporal Pooler implementation selector (see _getTPClass in # CLARegion.py). 'temporalImp': 'cpp', # New Synapse formation count # NOTE: If None, use spNumActivePerInhArea # # TODO: need better explanation 'newSynapseCount': 20, # Maximum number of synapses per segment # > 0 for fixed-size CLA # -1 for non-fixed-size CLA # # TODO: for Ron: once the appropriate value is placed in TP # constructor, see if we should eliminate this parameter from # description.py. 'maxSynapsesPerSegment': 32, # Maximum number of segments per cell # > 0 for fixed-size CLA # -1 for non-fixed-size CLA # # TODO: for Ron: once the appropriate value is placed in TP # constructor, see if we should eliminate this parameter from # description.py. 'maxSegmentsPerCell': 128, # Initial Permanence # TODO: need better explanation 'initialPerm': 0.21, # Permanence Increment 'permanenceInc': 0.1, # Permanence Decrement # If set to None, will automatically default to tpPermanenceInc # value. 'permanenceDec' : 0.1, 'globalDecay': 0.0, 'maxAge': 0, # Minimum number of active synapses for a segment to be considered # during search for the best-matching segments. # None=use default # Replaces: tpMinThreshold 'minThreshold': 9, # Segment activation threshold. # A segment is active if it has >= tpSegmentActivationThreshold # connected synapses that are active due to infActiveState # None=use default # Replaces: tpActivationThreshold 'activationThreshold': 12, 'outputType': 'normal', # "Pay Attention Mode" length. This tells the TM how many new # elements to append to the end of a learned sequence at a time. # Smaller values are better for datasets with short sequences, # higher values are better for datasets with long sequences. 'pamLength': 1, }, 'clParams': { 'regionName' : 'SDRClassifierRegion', # Classifier diagnostic output verbosity control; # 0: silent; [1..6]: increasing levels of verbosity 'verbosity' : 0, # This controls how fast the classifier learns/forgets. Higher values # make it adapt faster and forget older patterns faster. 'alpha': 0.005, # This is set after the call to updateConfigFromSubConfig and is # computed from the aggregationInfo and predictAheadTime. 'steps': '1', 'implementation': 'cpp', }, 'anomalyParams': { u'anomalyCacheRecords': None, u'autoDetectThreshold': None, u'autoDetectWaitRecords': 2184 }, 'trainSPNetOnlyIfRequested': False, }, } # In[44]: from nupic.frameworks.opf.model_factory import ModelFactory model = ModelFactory.create(MODEL_PARAMS) model.enableInference({'predictedField': 'consumption'}) # In[45]: data = getData() for _ in xrange(5): record = dict(zip(data.getFieldNames(), data.next())) print "input: ", record["consumption"] result = model.run(record) print "prediction: ", result.inferences["multiStepBestPredictions"][1] # In[46]: print result # In[47]: print "anomaly score: ", result.inferences["anomalyScore"] # __See Subutai's talk for more info on anomaly detection!__ # # # Built-in OPF Clients # # `python examples/opf/bin/OpfRunExperiment.py examples/opf/experiments/multistep/hotgym/` # # Outputs `examples/opf/experiments/multistep/hotgym/inference/DefaultTask.TemporalMultiStep.predictionLog.csv` # # `python bin/run_swarm.py examples/opf/experiments/multistep/hotgym/permutations.py` # # Outputs `examples/opf/experiments/multistep/hotgym/model_0/description.py` # In[ ]: