import numpy from nupic.encoders import ScalarEncoder ScalarEncoder? # 22 bits with 3 active representing values 0 to 100 # clipInput=True makes values >100 encode the same as 100 (instead of throwing a ValueError) # forced=True allows small values for `n` and `w` enc = ScalarEncoder(n=22, w=3, minval=2.5, maxval=97.5, clipInput=True, forced=True) print "3 =", enc.encode(3) print "4 =", enc.encode(4) print "5 =", enc.encode(5) # Encode maxval print "100 =", enc.encode(100) # See that any larger number gets the same encoding print "1000 =", enc.encode(1000) from nupic.encoders.random_distributed_scalar import RandomDistributedScalarEncoder RandomDistributedScalarEncoder? # 21 bits with 3 active with buckets of size 5 rdse = RandomDistributedScalarEncoder(n=21, w=3, resolution=5, offset=2.5) print "3 = ", rdse.encode(3) print "4 = ", rdse.encode(4) print "5 = ", rdse.encode(5) print print "100 = ", rdse.encode(100) print "1000 =", rdse.encode(1000) import datetime from nupic.encoders.date import DateEncoder DateEncoder? de = DateEncoder(season=5) now = datetime.datetime.strptime("2014-05-02 13:08:58", "%Y-%m-%d %H:%M:%S") print "now = ", de.encode(now) nextMonth = datetime.datetime.strptime("2014-06-02 13:08:58", "%Y-%m-%d %H:%M:%S") print "next month =", de.encode(nextMonth) xmas = datetime.datetime.strptime("2014-12-25 13:08:58", "%Y-%m-%d %H:%M:%S") print "xmas = ", de.encode(xmas) from nupic.encoders.category import CategoryEncoder categories = ("cat", "dog", "monkey", "slow loris") encoder = CategoryEncoder(w=3, categoryList=categories, forced=True) cat = encoder.encode("cat") dog = encoder.encode("dog") monkey = encoder.encode("monkey") loris = encoder.encode("slow loris") print "cat = ", cat print "dog = ", dog print "monkey = ", monkey print "slow loris =", loris print encoder.encode(None) print encoder.encode("unknown") print encoder.decode(cat) catdog = numpy.array([0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0]) print encoder.decode(catdog) from nupic.research.spatial_pooler import SpatialPooler print SpatialPooler? print SpatialPooler print SpatialPooler print SpatialPooler print SpatialPooler print SpatialPooler print SpatialPooler print len(cat) print cat sp = SpatialPooler(inputDimensions=(15,), columnDimensions=(4,), potentialRadius=15, numActiveColumnsPerInhArea=1, globalInhibition=True, synPermActiveInc=0.03, potentialPct=1.0) import numpy for column in xrange(4): connected = numpy.zeros((15,), dtype="int") sp.getConnectedSynapses(column, connected) print connected output = numpy.zeros((4,), dtype="int") sp.compute(cat, learn=True, activeArray=output) print output for _ in xrange(20): sp.compute(cat, learn=True, activeArray=output) for column in xrange(4): connected = numpy.zeros((15,), dtype="int") sp.getConnectedSynapses(column, connected) print connected for _ in xrange(200): sp.compute(cat, learn=True, activeArray=output) sp.compute(dog, learn=True, activeArray=output) sp.compute(monkey, learn=True, activeArray=output) sp.compute(loris, learn=True, activeArray=output) for column in xrange(4): connected = numpy.zeros((15,), dtype="int") sp.getConnectedSynapses(column, connected) print connected noisyCat = numpy.zeros((15,), dtype="uint32") noisyCat[3] = 1 noisyCat[4] = 1 # This is part of dog! noisyCat[6] = 1 print noisyCat sp.compute(noisyCat, learn=False, activeArray=output) print output # matches cat! from nupic.research.TP import TP TP? # Step 1: create Temporal Pooler instance with appropriate parameters tp = TP(numberOfCols=50, cellsPerColumn=2, initialPerm=0.5, connectedPerm=0.5, minThreshold=10, newSynapseCount=10, permanenceInc=0.1, permanenceDec=0.0, activationThreshold=8, globalDecay=0, burnIn=1, checkSynapseConsistency=False, pamLength=10) # Step 2: create input vectors to feed to the temporal pooler. Each input vector # must be numberOfCols wide. Here we create a simple sequence of 5 vectors # representing the sequence A -> B -> C -> D -> E x = numpy.zeros((5, tp.numberOfCols), dtype="uint32") x[0,0:10] = 1 # Input SDR representing "A", corresponding to columns 0-9 x[1,10:20] = 1 # Input SDR representing "B", corresponding to columns 10-19 x[2,20:30] = 1 # Input SDR representing "C", corresponding to columns 20-29 x[3,30:40] = 1 # Input SDR representing "D", corresponding to columns 30-39 x[4,40:50] = 1 # Input SDR representing "E", corresponding to columns 40-49 # Step 3: send this simple sequence to the temporal pooler for learning # We repeat the sequence 10 times for i in range(10): # Send each letter in the sequence in order for j in range(5): # The compute method performs one step of learning and/or inference. Note: # here we just perform learning but you can perform prediction/inference and # learning in the same step if you want (online learning). tp.compute(x[j], enableLearn = True, computeInfOutput = False) # This function prints the segments associated with every cell.$$$$ # If you really want to understand the TP, uncomment this line. By following # every step you can get an excellent understanding for exactly how the TP # learns. #tp.printCells() # The reset command tells the TP that a sequence just ended and essentially # zeros out all the states. It is not strictly necessary but it's a bit # messier without resets, and the TP learns quicker with resets. tp.reset() # Step 4: send the same sequence of vectors and look at predictions made by # temporal pooler # Utility routine for printing the input vector def formatRow(x): s = '' for c in range(len(x)): if c > 0 and c % 10 == 0: s += ' ' s += str(x[c]) s += ' ' return s for j in range(5): print "\n\n--------","ABCDE"[j],"-----------" print "Raw input vector\n",formatRow(x[j]) # Send each vector to the TP, with learning turned off tp.compute(x[j], enableLearn=False, computeInfOutput=True) # This method prints out the active state of each cell followed by the # predicted state of each cell. For convenience the cells are grouped # 10 at a time. When there are multiple cells per column the printout # is arranged so the cells in a column are stacked together # # What you should notice is that the columns where active state is 1 # represent the SDR for the current input pattern and the columns where # predicted state is 1 represent the SDR for the next expected pattern print "\nAll the active and predicted cells:" tp.printStates(printPrevious=False, printLearnState=False) # tp.getPredictedState() gets the predicted cells. # predictedCells[c][i] represents the state of the i'th cell in the c'th # column. To see if a column is predicted, we can simply take the OR # across all the cells in that column. In numpy we can do this by taking # the max along axis 1. print "\n\nThe following columns are predicted by the temporal pooler. This" print "should correspond to columns in the *next* item in the sequence." predictedCells = tp.getPredictedState() print formatRow(predictedCells.max(axis=1).nonzero()) # Model Params! MODEL_PARAMS = { # Type of model that the rest of these parameters apply to. 'model': "CLA", # Version that specifies the format of the config. 'version': 1, # Intermediate variables used to compute fields in modelParams and also # referenced from the control section. 'aggregationInfo': { 'days': 0, 'fields': [('consumption', 'sum')], 'hours': 1, 'microseconds': 0, 'milliseconds': 0, 'minutes': 0, 'months': 0, 'seconds': 0, 'weeks': 0, 'years': 0}, 'predictAheadTime': None, # Model parameter dictionary. 'modelParams': { # The type of inference that this model will perform 'inferenceType': 'TemporalMultiStep', 'sensorParams': { # Sensor diagnostic output verbosity control; # if > 0: sensor region will print out on screen what it's sensing # at each step 0: silent; >=1: some info; >=2: more info; # >=3: even more info (see compute() in py/regions/RecordSensor.py) 'verbosity' : 0, # Include the encoders we use 'encoders': { u'timestamp_timeOfDay': { 'fieldname': u'timestamp', 'name': u'timestamp_timeOfDay', 'timeOfDay': (21, 0.5), 'type': 'DateEncoder' }, u'timestamp_dayOfWeek': None, u'timestamp_weekend': None, u'consumption': { 'clipInput': True, 'fieldname': u'consumption', 'maxval': 100.0, 'minval': 0.0, 'n': 50, 'name': u'c1', 'type': 'ScalarEncoder', 'w': 21 }, }, # A dictionary specifying the period for automatically-generated # resets from a RecordSensor; # # None = disable automatically-generated resets (also disabled if # all of the specified values evaluate to 0). # Valid keys is the desired combination of the following: # days, hours, minutes, seconds, milliseconds, microseconds, weeks # # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12), # # (value generated from SENSOR_AUTO_RESET) 'sensorAutoReset' : None, }, 'spEnable': True, 'spParams': { # SP diagnostic output verbosity control; # 0: silent; >=1: some info; >=2: more info; 'spVerbosity' : 0, # Spatial Pooler implementation selector, see getSPClass # in py/regions/SPRegion.py for details # 'py' (default), 'cpp' (speed optimized, new) 'spatialImp' : 'cpp', 'globalInhibition': 1, # Number of cell columns in the cortical region (same number for # SP and TP) # (see also tpNCellsPerCol) 'columnCount': 2048, 'inputWidth': 0, # SP inhibition control (absolute value); # Maximum number of active columns in the SP region's output (when # there are more, the weaker ones are suppressed) 'numActiveColumnsPerInhArea': 40, 'seed': 1956, # potentialPct # What percent of the columns's receptive field is available # for potential synapses. At initialization time, we will # choose potentialPct * (2*potentialRadius+1)^2 'potentialPct': 0.5, # The default connected threshold. Any synapse whose # permanence value is above the connected threshold is # a "connected synapse", meaning it can contribute to the # cell's firing. Typical value is 0.10. Cells whose activity # level before inhibition falls below minDutyCycleBeforeInh # will have their own internal synPermConnectedCell # threshold set below this default value. # (This concept applies to both SP and TP and so 'cells' # is correct here as opposed to 'columns') 'synPermConnected': 0.1, 'synPermActiveInc': 0.1, 'synPermInactiveDec': 0.005, }, # Controls whether TP is enabled or disabled; # TP is necessary for making temporal predictions, such as predicting # the next inputs. Without TP, the model is only capable of # reconstructing missing sensor inputs (via SP). 'tpEnable' : True, 'tpParams': { # TP diagnostic output verbosity control; # 0: silent; [1..6]: increasing levels of verbosity # (see verbosity in nupic/trunk/py/nupic/research/TP.py and TP10X*.py) 'verbosity': 0, # Number of cell columns in the cortical region (same number for # SP and TP) # (see also tpNCellsPerCol) 'columnCount': 2048, # The number of cells (i.e., states), allocated per column. 'cellsPerColumn': 32, 'inputWidth': 2048, 'seed': 1960, # Temporal Pooler implementation selector (see _getTPClass in # CLARegion.py). 'temporalImp': 'cpp', # New Synapse formation count # NOTE: If None, use spNumActivePerInhArea # # TODO: need better explanation 'newSynapseCount': 20, # Maximum number of synapses per segment # > 0 for fixed-size CLA # -1 for non-fixed-size CLA # # TODO: for Ron: once the appropriate value is placed in TP # constructor, see if we should eliminate this parameter from # description.py. 'maxSynapsesPerSegment': 32, # Maximum number of segments per cell # > 0 for fixed-size CLA # -1 for non-fixed-size CLA # # TODO: for Ron: once the appropriate value is placed in TP # constructor, see if we should eliminate this parameter from # description.py. 'maxSegmentsPerCell': 128, # Initial Permanence # TODO: need better explanation 'initialPerm': 0.21, # Permanence Increment 'permanenceInc': 0.1, # Permanence Decrement # If set to None, will automatically default to tpPermanenceInc # value. 'permanenceDec' : 0.1, 'globalDecay': 0.0, 'maxAge': 0, # Minimum number of active synapses for a segment to be considered # during search for the best-matching segments. # None=use default # Replaces: tpMinThreshold 'minThreshold': 9, # Segment activation threshold. # A segment is active if it has >= tpSegmentActivationThreshold # connected synapses that are active due to infActiveState # None=use default # Replaces: tpActivationThreshold 'activationThreshold': 12, 'outputType': 'normal', # "Pay Attention Mode" length. This tells the TP how many new # elements to append to the end of a learned sequence at a time. # Smaller values are better for datasets with short sequences, # higher values are better for datasets with long sequences. 'pamLength': 1, }, 'clParams': { 'regionName' : 'CLAClassifierRegion', # Classifier diagnostic output verbosity control; # 0: silent; [1..6]: increasing levels of verbosity 'clVerbosity' : 0, # This controls how fast the classifier learns/forgets. Higher values # make it adapt faster and forget older patterns faster. 'alpha': 0.005, # This is set after the call to updateConfigFromSubConfig and is # computed from the aggregationInfo and predictAheadTime. 'steps': '1,5', 'implementation': 'cpp', }, 'trainSPNetOnlyIfRequested': False, }, } from pkg_resources import resource_filename datasetPath = resource_filename("nupic.datafiles", "extra/hotgym/hotgym.csv") print datasetPath with open(datasetPath) as inputFile: print for _ in xrange(8): print inputFile.next().strip() from nupic.data.file_record_stream import FileRecordStream def getData(): return FileRecordStream(datasetPath) data = getData() for _ in xrange(5): print data.next() from nupic.frameworks.opf.modelfactory import ModelFactory model = ModelFactory.create(MODEL_PARAMS) model.enableInference({'predictedField': 'consumption'}) data = getData() for _ in xrange(100): record = dict(zip(data.getFieldNames(), data.next())) print "input: ", record["consumption"] result = model.run(record) print "prediction: ", result.inferences["multiStepBestPredictions"][1] print "5-step prediction: ", result.inferences["multiStepBestPredictions"][5] # Model Params! MODEL_PARAMS = { # Type of model that the rest of these parameters apply to. 'model': "CLA", # Version that specifies the format of the config. 'version': 1, # Intermediate variables used to compute fields in modelParams and also # referenced from the control section. 'aggregationInfo': { 'days': 0, 'fields': [('consumption', 'sum')], 'hours': 1, 'microseconds': 0, 'milliseconds': 0, 'minutes': 0, 'months': 0, 'seconds': 0, 'weeks': 0, 'years': 0}, 'predictAheadTime': None, # Model parameter dictionary. 'modelParams': { # The type of inference that this model will perform 'inferenceType': 'TemporalAnomaly', 'sensorParams': { # Sensor diagnostic output verbosity control; # if > 0: sensor region will print out on screen what it's sensing # at each step 0: silent; >=1: some info; >=2: more info; # >=3: even more info (see compute() in py/regions/RecordSensor.py) 'verbosity' : 0, # Include the encoders we use 'encoders': { u'timestamp_timeOfDay': { 'fieldname': u'timestamp', 'name': u'timestamp_timeOfDay', 'timeOfDay': (21, 0.5), 'type': 'DateEncoder'}, u'timestamp_dayOfWeek': None, u'timestamp_weekend': None, u'consumption': { 'clipInput': True, 'fieldname': u'consumption', 'maxval': 100.0, 'minval': 0.0, 'n': 50, 'name': u'c1', 'type': 'ScalarEncoder', 'w': 21},}, # A dictionary specifying the period for automatically-generated # resets from a RecordSensor; # # None = disable automatically-generated resets (also disabled if # all of the specified values evaluate to 0). # Valid keys is the desired combination of the following: # days, hours, minutes, seconds, milliseconds, microseconds, weeks # # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12), # # (value generated from SENSOR_AUTO_RESET) 'sensorAutoReset' : None, }, 'spEnable': True, 'spParams': { # SP diagnostic output verbosity control; # 0: silent; >=1: some info; >=2: more info; 'spVerbosity' : 0, # Spatial Pooler implementation selector, see getSPClass # in py/regions/SPRegion.py for details # 'py' (default), 'cpp' (speed optimized, new) 'spatialImp' : 'cpp', 'globalInhibition': 1, # Number of cell columns in the cortical region (same number for # SP and TP) # (see also tpNCellsPerCol) 'columnCount': 2048, 'inputWidth': 0, # SP inhibition control (absolute value); # Maximum number of active columns in the SP region's output (when # there are more, the weaker ones are suppressed) 'numActiveColumnsPerInhArea': 40, 'seed': 1956, # potentialPct # What percent of the columns's receptive field is available # for potential synapses. At initialization time, we will # choose potentialPct * (2*potentialRadius+1)^2 'potentialPct': 0.5, # The default connected threshold. Any synapse whose # permanence value is above the connected threshold is # a "connected synapse", meaning it can contribute to the # cell's firing. Typical value is 0.10. Cells whose activity # level before inhibition falls below minDutyCycleBeforeInh # will have their own internal synPermConnectedCell # threshold set below this default value. # (This concept applies to both SP and TP and so 'cells' # is correct here as opposed to 'columns') 'synPermConnected': 0.1, 'synPermActiveInc': 0.1, 'synPermInactiveDec': 0.005, }, # Controls whether TP is enabled or disabled; # TP is necessary for making temporal predictions, such as predicting # the next inputs. Without TP, the model is only capable of # reconstructing missing sensor inputs (via SP). 'tpEnable' : True, 'tpParams': { # TP diagnostic output verbosity control; # 0: silent; [1..6]: increasing levels of verbosity # (see verbosity in nupic/trunk/py/nupic/research/TP.py and TP10X*.py) 'verbosity': 0, # Number of cell columns in the cortical region (same number for # SP and TP) # (see also tpNCellsPerCol) 'columnCount': 2048, # The number of cells (i.e., states), allocated per column. 'cellsPerColumn': 32, 'inputWidth': 2048, 'seed': 1960, # Temporal Pooler implementation selector (see _getTPClass in # CLARegion.py). 'temporalImp': 'cpp', # New Synapse formation count # NOTE: If None, use spNumActivePerInhArea # # TODO: need better explanation 'newSynapseCount': 20, # Maximum number of synapses per segment # > 0 for fixed-size CLA # -1 for non-fixed-size CLA # # TODO: for Ron: once the appropriate value is placed in TP # constructor, see if we should eliminate this parameter from # description.py. 'maxSynapsesPerSegment': 32, # Maximum number of segments per cell # > 0 for fixed-size CLA # -1 for non-fixed-size CLA # # TODO: for Ron: once the appropriate value is placed in TP # constructor, see if we should eliminate this parameter from # description.py. 'maxSegmentsPerCell': 128, # Initial Permanence # TODO: need better explanation 'initialPerm': 0.21, # Permanence Increment 'permanenceInc': 0.1, # Permanence Decrement # If set to None, will automatically default to tpPermanenceInc # value. 'permanenceDec' : 0.1, 'globalDecay': 0.0, 'maxAge': 0, # Minimum number of active synapses for a segment to be considered # during search for the best-matching segments. # None=use default # Replaces: tpMinThreshold 'minThreshold': 9, # Segment activation threshold. # A segment is active if it has >= tpSegmentActivationThreshold # connected synapses that are active due to infActiveState # None=use default # Replaces: tpActivationThreshold 'activationThreshold': 12, 'outputType': 'normal', # "Pay Attention Mode" length. This tells the TP how many new # elements to append to the end of a learned sequence at a time. # Smaller values are better for datasets with short sequences, # higher values are better for datasets with long sequences. 'pamLength': 1, }, 'clParams': { 'regionName' : 'CLAClassifierRegion', # Classifier diagnostic output verbosity control; # 0: silent; [1..6]: increasing levels of verbosity 'clVerbosity' : 0, # This controls how fast the classifier learns/forgets. Higher values # make it adapt faster and forget older patterns faster. 'alpha': 0.005, # This is set after the call to updateConfigFromSubConfig and is # computed from the aggregationInfo and predictAheadTime. 'steps': '1', 'implementation': 'cpp', }, 'anomalyParams': { u'anomalyCacheRecords': None, u'autoDetectThreshold': None, u'autoDetectWaitRecords': 2184 }, 'trainSPNetOnlyIfRequested': False, }, } from nupic.frameworks.opf.modelfactory import ModelFactory model = ModelFactory.create(MODEL_PARAMS) model.enableInference({'predictedField': 'consumption'}) data = getData() for _ in xrange(5): record = dict(zip(data.getFieldNames(), data.next())) print "input: ", record["consumption"] result = model.run(record) print "prediction: ", result.inferences["multiStepBestPredictions"][1] print result print "anomaly score: ", result.inferences["anomalyScore"]