import numpy
from nupic.encoders import ScalarEncoder
ScalarEncoder?
# 22 bits with 3 active representing values 0 to 100
# clipInput=True makes values >100 encode the same as 100 (instead of throwing a ValueError)
# forced=True allows small values for `n` and `w`
enc = ScalarEncoder(n=22, w=3, minval=2.5, maxval=97.5, clipInput=True, forced=True)
print "3 =", enc.encode(3)
print "4 =", enc.encode(4)
print "5 =", enc.encode(5)
3 = [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] 4 = [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] 5 = [0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
# Encode maxval
print "100 =", enc.encode(100)
# See that any larger number gets the same encoding
print "1000 =", enc.encode(1000)
100 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1] 1000 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]
from nupic.encoders.random_distributed_scalar import RandomDistributedScalarEncoder
RandomDistributedScalarEncoder?
# 21 bits with 3 active with buckets of size 5
rdse = RandomDistributedScalarEncoder(n=21, w=3, resolution=5, offset=2.5)
print "3 = ", rdse.encode(3)
print "4 = ", rdse.encode(4)
print "5 = ", rdse.encode(5)
print
print "100 = ", rdse.encode(100)
print "1000 =", rdse.encode(1000)
3 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1] 4 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1] 5 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1] 100 = [0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] 1000 = [0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0]
import datetime
from nupic.encoders.date import DateEncoder
DateEncoder?
de = DateEncoder(season=5)
now = datetime.datetime.strptime("2014-05-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "now = ", de.encode(now)
nextMonth = datetime.datetime.strptime("2014-06-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "next month =", de.encode(nextMonth)
xmas = datetime.datetime.strptime("2014-12-25 13:08:58", "%Y-%m-%d %H:%M:%S")
print "xmas = ", de.encode(xmas)
now = [0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0] next month = [0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0] xmas = [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]
from nupic.encoders.category import CategoryEncoder
categories = ("cat", "dog", "monkey", "slow loris")
encoder = CategoryEncoder(w=3, categoryList=categories, forced=True)
cat = encoder.encode("cat")
dog = encoder.encode("dog")
monkey = encoder.encode("monkey")
loris = encoder.encode("slow loris")
print "cat = ", cat
print "dog = ", dog
print "monkey = ", monkey
print "slow loris =", loris
cat = [0 0 0 1 1 1 0 0 0 0 0 0 0 0 0] dog = [0 0 0 0 0 0 1 1 1 0 0 0 0 0 0] monkey = [0 0 0 0 0 0 0 0 0 1 1 1 0 0 0] slow loris = [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]
print encoder.encode(None)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
print encoder.encode("unknown")
[1 1 1 0 0 0 0 0 0 0 0 0 0 0 0]
print encoder.decode(cat)
({'category': ([(1, 1)], 'cat')}, ['category'])
catdog = numpy.array([0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0])
print encoder.decode(catdog)
({'category': ([(1, 2)], 'cat, dog')}, ['category'])
from nupic.research.spatial_pooler import SpatialPooler
print SpatialPooler?
print SpatialPooler
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
print len(cat)
print cat
15 [0 0 0 1 1 1 0 0 0 0 0 0 0 0 0]
sp = SpatialPooler(inputDimensions=(15,),
columnDimensions=(4,),
potentialRadius=15,
numActiveColumnsPerInhArea=1,
globalInhibition=True,
synPermActiveInc=0.03,
potentialPct=1.0)
import numpy
for column in xrange(4):
connected = numpy.zeros((15,), dtype="int")
sp.getConnectedSynapses(column, connected)
print connected
[0 0 1 1 1 1 0 0 0 0 1 1 1 1 0] [1 0 0 0 1 1 1 1 0 1 0 0 0 1 1] [1 1 0 0 0 0 0 1 1 1 1 1 1 0 0] [1 1 0 1 1 0 0 1 1 0 1 0 0 1 1]
output = numpy.zeros((4,), dtype="int")
sp.compute(cat, learn=True, activeArray=output)
print output
[1 0 0 0]
for _ in xrange(20):
sp.compute(cat, learn=True, activeArray=output)
for column in xrange(4):
connected = numpy.zeros((15,), dtype="int")
sp.getConnectedSynapses(column, connected)
print connected
[0 0 1 1 1 1 0 0 0 0 1 1 1 1 0] [1 0 0 0 1 1 1 1 0 1 0 0 0 1 1] [1 1 0 0 0 0 0 1 1 1 1 1 1 0 0] [1 1 0 1 1 0 0 1 1 0 1 0 0 1 1]
for _ in xrange(200):
sp.compute(cat, learn=True, activeArray=output)
sp.compute(dog, learn=True, activeArray=output)
sp.compute(monkey, learn=True, activeArray=output)
sp.compute(loris, learn=True, activeArray=output)
for column in xrange(4):
connected = numpy.zeros((15,), dtype="int")
sp.getConnectedSynapses(column, connected)
print connected
[0 0 0 1 1 1 0 0 0 0 0 0 0 0 0] [1 0 0 0 1 1 1 1 0 1 0 0 0 1 1] [0 0 0 0 0 0 0 0 0 1 1 1 0 0 0] [0 0 0 0 0 0 1 1 1 0 0 0 1 1 1]
noisyCat = numpy.zeros((15,), dtype="uint32")
noisyCat[3] = 1
noisyCat[4] = 1
# This is part of dog!
noisyCat[6] = 1
print noisyCat
[0 0 0 1 1 0 1 0 0 0 0 0 0 0 0]
sp.compute(noisyCat, learn=False, activeArray=output)
print output # matches cat!
[0 1 0 0]
From: examples/tm/hello_tm.py
from nupic.research.BacktrackingTM import BacktrackingTM
BacktrackingTM?
# Step 1: create Temporal Pooler instance with appropriate parameters
tm = BacktrackingTM(numberOfCols=50, cellsPerColumn=2,
initialPerm=0.5, connectedPerm=0.5,
minThreshold=10, newSynapseCount=10,
permanenceInc=0.1, permanenceDec=0.0,
activationThreshold=8,
globalDecay=0, burnIn=1,
checkSynapseConsistency=False,
pamLength=10)
# Step 2: create input vectors to feed to the temporal memory. Each input vector
# must be numberOfCols wide. Here we create a simple sequence of 5 vectors
# representing the sequence A -> B -> C -> D -> E
x = numpy.zeros((5, tm.numberOfCols), dtype="uint32")
x[0,0:10] = 1 # Input SDR representing "A", corresponding to columns 0-9
x[1,10:20] = 1 # Input SDR representing "B", corresponding to columns 10-19
x[2,20:30] = 1 # Input SDR representing "C", corresponding to columns 20-29
x[3,30:40] = 1 # Input SDR representing "D", corresponding to columns 30-39
x[4,40:50] = 1 # Input SDR representing "E", corresponding to columns 40-49
# Step 3: send this simple sequence to the temporal memory for learning
# We repeat the sequence 10 times
for i in range(10):
# Send each letter in the sequence in order
for j in range(5):
# The compute method performs one step of learning and/or inference. Note:
# here we just perform learning but you can perform prediction/inference and
# learning in the same step if you want (online learning).
tm.compute(x[j], enableLearn = True, enableInference = False)
# This function prints the segments associated with every cell.$$$$
# If you really want to understand the TP, uncomment this line. By following
# every step you can get an excellent understanding for exactly how the TP
# learns.
#tm.printCells()
# The reset command tells the TM that a sequence just ended and essentially
# zeros out all the states. It is not strictly necessary but it's a bit
# messier without resets, and the TM learns quicker with resets.
tm.reset()
# Step 4: send the same sequence of vectors and look at predictions made by
# temporal memory
# Utility routine for printing the input vector
def formatRow(x):
s = ''
for c in range(len(x)):
if c > 0 and c % 10 == 0:
s += ' '
s += str(x[c])
s += ' '
return s
for j in range(5):
print "\n\n--------","ABCDE"[j],"-----------"
print "Raw input vector\n",formatRow(x[j])
# Send each vector to the TP, with learning turned off
tm.compute(x[j], enableLearn=False, enableInference=True)
# This method prints out the active state of each cell followed by the
# predicted state of each cell. For convenience the cells are grouped
# 10 at a time. When there are multiple cells per column the printout
# is arranged so the cells in a column are stacked together
#
# What you should notice is that the columns where active state is 1
# represent the SDR for the current input pattern and the columns where
# predicted state is 1 represent the SDR for the next expected pattern
print "\nAll the active and predicted cells:"
tm.printStates(printPrevious=False, printLearnState=False)
# tm.getPredictedState() gets the predicted cells.
# predictedCells[c][i] represents the state of the i'th cell in the c'th
# column. To see if a column is predicted, we can simply take the OR
# across all the cells in that column. In numpy we can do this by taking
# the max along axis 1.
print "\n\nThe following columns are predicted by the temporal memory. This"
print "should correspond to columns in the *next* item in the sequence."
predictedCells = tm.getPredictedState()
print formatRow(predictedCells.max(axis=1).nonzero())
-------- A ----------- Raw input vector 1111111111 0000000000 0000000000 0000000000 0000000000 All the active and predicted cells: Inference Active state 1111111111 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 Inference Predicted state 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 1111111111 0000000000 0000000000 0000000000 The following columns are predicted by the temporal memory. This should correspond to columns in the *next* item in the sequence. [10 11 12 13 14 15 16 17 18 19] -------- B ----------- Raw input vector 0000000000 1111111111 0000000000 0000000000 0000000000 All the active and predicted cells: Inference Active state 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 1111111111 0000000000 0000000000 0000000000 Inference Predicted state 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 1111111111 0000000000 0000000000 The following columns are predicted by the temporal memory. This should correspond to columns in the *next* item in the sequence. [20 21 22 23 24 25 26 27 28 29] -------- C ----------- Raw input vector 0000000000 0000000000 1111111111 0000000000 0000000000 All the active and predicted cells: Inference Active state 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 1111111111 0000000000 0000000000 Inference Predicted state 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 1111111111 0000000000 The following columns are predicted by the temporal memory. This should correspond to columns in the *next* item in the sequence. [30 31 32 33 34 35 36 37 38 39] -------- D ----------- Raw input vector 0000000000 0000000000 0000000000 1111111111 0000000000 All the active and predicted cells: Inference Active state 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 1111111111 0000000000 Inference Predicted state 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 1111111111 The following columns are predicted by the temporal memory. This should correspond to columns in the *next* item in the sequence. [40 41 42 43 44 45 46 47 48 49] -------- E ----------- Raw input vector 0000000000 0000000000 0000000000 0000000000 1111111111 All the active and predicted cells: Inference Active state 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 1111111111 Inference Predicted state 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 0000000000 The following columns are predicted by the temporal memory. This should correspond to columns in the *next* item in the sequence. []
See slides.
MODEL_PARAMS
have all of the parameters for the CLA model and subcomponents
# Model Params!
MODEL_PARAMS = {
# Type of model that the rest of these parameters apply to.
'model': "HTMPrediction",
# Version that specifies the format of the config.
'version': 1,
# Intermediate variables used to compute fields in modelParams and also
# referenced from the control section.
'aggregationInfo': { 'days': 0,
'fields': [('consumption', 'sum')],
'hours': 1,
'microseconds': 0,
'milliseconds': 0,
'minutes': 0,
'months': 0,
'seconds': 0,
'weeks': 0,
'years': 0},
'predictAheadTime': None,
# Model parameter dictionary.
'modelParams': {
# The type of inference that this model will perform
'inferenceType': 'TemporalMultiStep',
'sensorParams': {
# Sensor diagnostic output verbosity control;
# if > 0: sensor region will print out on screen what it's sensing
# at each step 0: silent; >=1: some info; >=2: more info;
# >=3: even more info (see compute() in py/regions/RecordSensor.py)
'verbosity' : 0,
# Include the encoders we use
'encoders': {
u'timestamp_timeOfDay': {
'fieldname': u'timestamp',
'name': u'timestamp_timeOfDay',
'timeOfDay': (21, 0.5),
'type': 'DateEncoder'
},
u'timestamp_dayOfWeek': None,
u'timestamp_weekend': None,
u'consumption': {
'clipInput': True,
'fieldname': u'consumption',
'maxval': 100.0,
'minval': 0.0,
'n': 50,
'name': u'c1',
'type': 'ScalarEncoder',
'w': 21
},
},
# A dictionary specifying the period for automatically-generated
# resets from a RecordSensor;
#
# None = disable automatically-generated resets (also disabled if
# all of the specified values evaluate to 0).
# Valid keys is the desired combination of the following:
# days, hours, minutes, seconds, milliseconds, microseconds, weeks
#
# Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12),
#
# (value generated from SENSOR_AUTO_RESET)
'sensorAutoReset' : None,
},
'spEnable': True,
'spParams': {
# SP diagnostic output verbosity control;
# 0: silent; >=1: some info; >=2: more info;
'spVerbosity' : 0,
# Spatial Pooler implementation selector, see getSPClass
# in py/regions/SPRegion.py for details
# 'py' (default), 'cpp' (speed optimized, new)
'spatialImp' : 'cpp',
'globalInhibition': 1,
# Number of cell columns in the cortical region (same number for
# SP and TM)
# (see also tpNCellsPerCol)
'columnCount': 2048,
'inputWidth': 0,
# SP inhibition control (absolute value);
# Maximum number of active columns in the SP region's output (when
# there are more, the weaker ones are suppressed)
'numActiveColumnsPerInhArea': 40,
'seed': 1956,
# potentialPct
# What percent of the columns's receptive field is available
# for potential synapses. At initialization time, we will
# choose potentialPct * (2*potentialRadius+1)^2
'potentialPct': 0.5,
# The default connected threshold. Any synapse whose
# permanence value is above the connected threshold is
# a "connected synapse", meaning it can contribute to the
# cell's firing. Typical value is 0.10. Cells whose activity
# level before inhibition falls below minDutyCycleBeforeInh
# will have their own internal synPermConnectedCell
# threshold set below this default value.
# (This concept applies to both SP and TM and so 'cells'
# is correct here as opposed to 'columns')
'synPermConnected': 0.1,
'synPermActiveInc': 0.1,
'synPermInactiveDec': 0.005,
},
# Controls whether TM is enabled or disabled;
# TM is necessary for making temporal predictions, such as predicting
# the next inputs. Without TP, the model is only capable of
# reconstructing missing sensor inputs (via SP).
'tmEnable' : True,
'tmParams': {
# TM diagnostic output verbosity control;
# 0: silent; [1..6]: increasing levels of verbosity
# (see verbosity in nupic/trunk/py/nupic/research/TP.py and BacktrackingTMCPP.py)
'verbosity': 0,
# Number of cell columns in the cortical region (same number for
# SP and TM)
# (see also tpNCellsPerCol)
'columnCount': 2048,
# The number of cells (i.e., states), allocated per column.
'cellsPerColumn': 32,
'inputWidth': 2048,
'seed': 1960,
# Temporal Pooler implementation selector (see _getTPClass in
# CLARegion.py).
'temporalImp': 'cpp',
# New Synapse formation count
# NOTE: If None, use spNumActivePerInhArea
#
# TODO: need better explanation
'newSynapseCount': 20,
# Maximum number of synapses per segment
# > 0 for fixed-size CLA
# -1 for non-fixed-size CLA
#
# TODO: for Ron: once the appropriate value is placed in TP
# constructor, see if we should eliminate this parameter from
# description.py.
'maxSynapsesPerSegment': 32,
# Maximum number of segments per cell
# > 0 for fixed-size CLA
# -1 for non-fixed-size CLA
#
# TODO: for Ron: once the appropriate value is placed in TP
# constructor, see if we should eliminate this parameter from
# description.py.
'maxSegmentsPerCell': 128,
# Initial Permanence
# TODO: need better explanation
'initialPerm': 0.21,
# Permanence Increment
'permanenceInc': 0.1,
# Permanence Decrement
# If set to None, will automatically default to tpPermanenceInc
# value.
'permanenceDec' : 0.1,
'globalDecay': 0.0,
'maxAge': 0,
# Minimum number of active synapses for a segment to be considered
# during search for the best-matching segments.
# None=use default
# Replaces: tpMinThreshold
'minThreshold': 9,
# Segment activation threshold.
# A segment is active if it has >= tpSegmentActivationThreshold
# connected synapses that are active due to infActiveState
# None=use default
# Replaces: tpActivationThreshold
'activationThreshold': 12,
'outputType': 'normal',
# "Pay Attention Mode" length. This tells the TM how many new
# elements to append to the end of a learned sequence at a time.
# Smaller values are better for datasets with short sequences,
# higher values are better for datasets with long sequences.
'pamLength': 1,
},
'clParams': {
'regionName' : 'SDRClassifierRegion',
# Classifier diagnostic output verbosity control;
# 0: silent; [1..6]: increasing levels of verbosity
'verbosity' : 0,
# This controls how fast the classifier learns/forgets. Higher values
# make it adapt faster and forget older patterns faster.
'alpha': 0.005,
# This is set after the call to updateConfigFromSubConfig and is
# computed from the aggregationInfo and predictAheadTime.
'steps': '1,5',
'implementation': 'cpp',
},
'trainSPNetOnlyIfRequested': False,
},
}
from pkg_resources import resource_filename
datasetPath = resource_filename("nupic.datafiles", "extra/hotgym/hotgym.csv")
print datasetPath
with open(datasetPath) as inputFile:
print
for _ in xrange(8):
print inputFile.next().strip()
/Users/mleborgne/_git/nupic/src/nupic/datafiles/extra/hotgym/hotgym.csv gym,address,timestamp,consumption string,string,datetime,float S,,T, Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:00:00.0,5.3 Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:15:00.0,5.5 Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:30:00.0,5.1 Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:45:00.0,5.3 Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 01:00:00.0,5.2
FileRecordStream
- file reader for the NuPIC file format (CSV with three header rows, understands datetimes)
from nupic.data.file_record_stream import FileRecordStream
def getData():
return FileRecordStream(datasetPath)
data = getData()
for _ in xrange(5):
print data.next()
['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 0), 5.3] ['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 15), 5.5] ['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 30), 5.1] ['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 45), 5.3] ['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 1, 0), 5.2]
from nupic.frameworks.opf.model_factory import ModelFactory
model = ModelFactory.create(MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})
data = getData()
for _ in xrange(100):
record = dict(zip(data.getFieldNames(), data.next()))
print "input: ", record["consumption"]
result = model.run(record)
print "prediction: ", result.inferences["multiStepBestPredictions"][1]
input: 5.3 prediction: 5.3 input: 5.5 prediction: 5.5 input: 5.1 prediction: 5.36 input: 5.3 prediction: 5.1 input: 5.2 prediction: 5.342 input: 5.5 prediction: 5.2994 input: 4.5 prediction: 5.35958 input: 1.2 prediction: 4.92 input: 1.1 prediction: 1.2 input: 1.2 prediction: 1.17 input: 1.2 prediction: 1.179 input: 1.2 prediction: 1.1853 input: 1.2 prediction: 1.18971 input: 1.2 prediction: 1.192797 input: 1.1 prediction: 1.1949579 input: 1.2 prediction: 1.16647053 input: 1.1 prediction: 1.176529371 input: 1.2 prediction: 1.1535705597 input: 1.2 prediction: 1.16749939179 input: 1.1 prediction: 1.17724957425 input: 1.2 prediction: 1.15407470198 input: 6.0 prediction: 1.16785229138 input: 7.9 prediction: 5.551706 input: 8.4 prediction: 6.2561942 input: 10.6 prediction: 6.89933594 input: 12.4 prediction: 10.6 input: 12.1 prediction: 12.4 input: 12.4 prediction: 12.31 input: 11.4 prediction: 12.337 input: 11.2 prediction: 10.84 input: 10.8 prediction: 10.948 input: 12.0 prediction: 10.9036 input: 11.8 prediction: 11.23252 input: 11.9 prediction: 11.402764 input: 11.4 prediction: 11.5519348 input: 11.0 prediction: 11.50635436 input: 9.8 prediction: 11.354448052 input: 9.8 prediction: 10.8881136364 input: 10.8 prediction: 10.5616795455 input: 11.1 prediction: 10.6331756818 input: 11.1 prediction: 10.7732229773 input: 11.0 prediction: 10.8712560841 input: 10.7 prediction: 10.9098792589 input: 10.6 prediction: 10.8469154812 input: 10.3 prediction: 10.7728408368 input: 10.1 prediction: 10.6309885858 input: 12.9 prediction: 10.4716920101 input: 10.5 prediction: 10.4716920101 input: 9.7 prediction: 10.480184407 input: 9.7 prediction: 10.2461290849 input: 9.2 prediction: 10.0822903594 input: 9.2 prediction: 9.81760325161 input: 9.2 prediction: 9.63232227613 input: 9.3 prediction: 9.50262559329 input: 9.1 prediction: 9.4418379153 input: 9.0 prediction: 9.33928654071 input: 8.9 prediction: 9.2375005785 input: 9.0 prediction: 9.13625040495 input: 8.9 prediction: 9.09537528346 input: 8.9 prediction: 9.03676269843 input: 9.0 prediction: 8.9957338889 input: 9.2 prediction: 8.99701372223 input: 10.0 prediction: 9.05790960556 input: 10.7 prediction: 9.34053672389 input: 8.9 prediction: 9.74837570672 input: 9.0 prediction: 9.49386299471 input: 9.0 prediction: 9.34570409629 input: 9.3 prediction: 9.24199286741 input: 9.3 prediction: 9.25939500718 input: 9.1 prediction: 9.27157650503 input: 9.1 prediction: 9.22010355352 input: 9.1 prediction: 9.18407248746 input: 9.2 prediction: 9.15885074122 input: 9.4 prediction: 9.17119551886 input: 9.3 prediction: 9.2398368632 input: 9.3 prediction: 9.25788580424 input: 9.1 prediction: 9.27052006297 input: 9.1 prediction: 9.21936404408 input: 11.0 prediction: 9.18355483085 input: 9.0 prediction: 9.7284883816 input: 8.6 prediction: 9.50994186712 input: 3.0 prediction: 9.50994186712 input: 1.3 prediction: 4.344 input: 1.2 prediction: 1.20749660397 input: 1.3 prediction: 1.20524762278 input: 1.3 prediction: 1.23367333594 input: 1.3 prediction: 1.25357133516 input: 1.2 prediction: 1.26749993461 input: 1.3 prediction: 1.24724995423 input: 1.2 prediction: 1.26307496796 input: 1.3 prediction: 1.24415247757 input: 1.2 prediction: 1.2609067343 input: 1.3 prediction: 1.24263471401 input: 1.2 prediction: 1.25984429981 input: 1.1 prediction: 1.24189100987 input: 2.3 prediction: 1.19932370691 input: 5.5 prediction: 3.7308 input: 5.5 prediction: 6.8366746106 input: 5.8 prediction: 6.43567222742 input: 5.7 prediction: 6.24497055919
print "5-step prediction: ", result.inferences["multiStepBestPredictions"][5]
5-step prediction: 1.19932370691
# Model Params!
MODEL_PARAMS = {
# Type of model that the rest of these parameters apply to.
'model': "HTMPrediction",
# Version that specifies the format of the config.
'version': 1,
# Intermediate variables used to compute fields in modelParams and also
# referenced from the control section.
'aggregationInfo': { 'days': 0,
'fields': [('consumption', 'sum')],
'hours': 1,
'microseconds': 0,
'milliseconds': 0,
'minutes': 0,
'months': 0,
'seconds': 0,
'weeks': 0,
'years': 0},
'predictAheadTime': None,
# Model parameter dictionary.
'modelParams': {
# The type of inference that this model will perform
'inferenceType': 'TemporalAnomaly',
'sensorParams': {
# Sensor diagnostic output verbosity control;
# if > 0: sensor region will print out on screen what it's sensing
# at each step 0: silent; >=1: some info; >=2: more info;
# >=3: even more info (see compute() in py/regions/RecordSensor.py)
'verbosity' : 0,
# Include the encoders we use
'encoders': {
u'timestamp_timeOfDay': {
'fieldname': u'timestamp',
'name': u'timestamp_timeOfDay',
'timeOfDay': (21, 0.5),
'type': 'DateEncoder'},
u'timestamp_dayOfWeek': None,
u'timestamp_weekend': None,
u'consumption': {
'clipInput': True,
'fieldname': u'consumption',
'maxval': 100.0,
'minval': 0.0,
'n': 50,
'name': u'c1',
'type': 'ScalarEncoder',
'w': 21},},
# A dictionary specifying the period for automatically-generated
# resets from a RecordSensor;
#
# None = disable automatically-generated resets (also disabled if
# all of the specified values evaluate to 0).
# Valid keys is the desired combination of the following:
# days, hours, minutes, seconds, milliseconds, microseconds, weeks
#
# Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12),
#
# (value generated from SENSOR_AUTO_RESET)
'sensorAutoReset' : None,
},
'spEnable': True,
'spParams': {
# SP diagnostic output verbosity control;
# 0: silent; >=1: some info; >=2: more info;
'spVerbosity' : 0,
# Spatial Pooler implementation selector, see getSPClass
# in py/regions/SPRegion.py for details
# 'py' (default), 'cpp' (speed optimized, new)
'spatialImp' : 'cpp',
'globalInhibition': 1,
# Number of cell columns in the cortical region (same number for
# SP and TM)
# (see also tpNCellsPerCol)
'columnCount': 2048,
'inputWidth': 0,
# SP inhibition control (absolute value);
# Maximum number of active columns in the SP region's output (when
# there are more, the weaker ones are suppressed)
'numActiveColumnsPerInhArea': 40,
'seed': 1956,
# potentialPct
# What percent of the columns's receptive field is available
# for potential synapses. At initialization time, we will
# choose potentialPct * (2*potentialRadius+1)^2
'potentialPct': 0.5,
# The default connected threshold. Any synapse whose
# permanence value is above the connected threshold is
# a "connected synapse", meaning it can contribute to the
# cell's firing. Typical value is 0.10. Cells whose activity
# level before inhibition falls below minDutyCycleBeforeInh
# will have their own internal synPermConnectedCell
# threshold set below this default value.
# (This concept applies to both SP and TM and so 'cells'
# is correct here as opposed to 'columns')
'synPermConnected': 0.1,
'synPermActiveInc': 0.1,
'synPermInactiveDec': 0.005,
},
# Controls whether TM is enabled or disabled;
# TM is necessary for making temporal predictions, such as predicting
# the next inputs. Without TP, the model is only capable of
# reconstructing missing sensor inputs (via SP).
'tmEnable' : True,
'tmParams': {
# TM diagnostic output verbosity control;
# 0: silent; [1..6]: increasing levels of verbosity
# (see verbosity in nupic/trunk/py/nupic/research/TP.py and BacktrackingTMCPP.py)
'verbosity': 0,
# Number of cell columns in the cortical region (same number for
# SP and TM)
# (see also tpNCellsPerCol)
'columnCount': 2048,
# The number of cells (i.e., states), allocated per column.
'cellsPerColumn': 32,
'inputWidth': 2048,
'seed': 1960,
# Temporal Pooler implementation selector (see _getTPClass in
# CLARegion.py).
'temporalImp': 'cpp',
# New Synapse formation count
# NOTE: If None, use spNumActivePerInhArea
#
# TODO: need better explanation
'newSynapseCount': 20,
# Maximum number of synapses per segment
# > 0 for fixed-size CLA
# -1 for non-fixed-size CLA
#
# TODO: for Ron: once the appropriate value is placed in TP
# constructor, see if we should eliminate this parameter from
# description.py.
'maxSynapsesPerSegment': 32,
# Maximum number of segments per cell
# > 0 for fixed-size CLA
# -1 for non-fixed-size CLA
#
# TODO: for Ron: once the appropriate value is placed in TP
# constructor, see if we should eliminate this parameter from
# description.py.
'maxSegmentsPerCell': 128,
# Initial Permanence
# TODO: need better explanation
'initialPerm': 0.21,
# Permanence Increment
'permanenceInc': 0.1,
# Permanence Decrement
# If set to None, will automatically default to tpPermanenceInc
# value.
'permanenceDec' : 0.1,
'globalDecay': 0.0,
'maxAge': 0,
# Minimum number of active synapses for a segment to be considered
# during search for the best-matching segments.
# None=use default
# Replaces: tpMinThreshold
'minThreshold': 9,
# Segment activation threshold.
# A segment is active if it has >= tpSegmentActivationThreshold
# connected synapses that are active due to infActiveState
# None=use default
# Replaces: tpActivationThreshold
'activationThreshold': 12,
'outputType': 'normal',
# "Pay Attention Mode" length. This tells the TM how many new
# elements to append to the end of a learned sequence at a time.
# Smaller values are better for datasets with short sequences,
# higher values are better for datasets with long sequences.
'pamLength': 1,
},
'clParams': {
'regionName' : 'SDRClassifierRegion',
# Classifier diagnostic output verbosity control;
# 0: silent; [1..6]: increasing levels of verbosity
'verbosity' : 0,
# This controls how fast the classifier learns/forgets. Higher values
# make it adapt faster and forget older patterns faster.
'alpha': 0.005,
# This is set after the call to updateConfigFromSubConfig and is
# computed from the aggregationInfo and predictAheadTime.
'steps': '1',
'implementation': 'cpp',
},
'anomalyParams': {
u'anomalyCacheRecords': None,
u'autoDetectThreshold': None,
u'autoDetectWaitRecords': 2184
},
'trainSPNetOnlyIfRequested': False,
},
}
from nupic.frameworks.opf.model_factory import ModelFactory
model = ModelFactory.create(MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})
data = getData()
for _ in xrange(5):
record = dict(zip(data.getFieldNames(), data.next()))
print "input: ", record["consumption"]
result = model.run(record)
print "prediction: ", result.inferences["multiStepBestPredictions"][1]
input: 5.3 prediction: 5.3 input: 5.5 prediction: 5.5 input: 5.1 prediction: 5.36 input: 5.3 prediction: 5.1 input: 5.2 prediction: 5.342
print result
ModelResult( predictionNumber=4 rawInput={'timestamp': datetime.datetime(2010, 7, 2, 1, 0), 'gym': 'Balgowlah Platinum', 'consumption': 5.2, 'address': 'Shop 67 197-215 Condamine Street Balgowlah 2093'} sensorInput=SensorInput( dataRow=(5.2, 1.0) dataDict={'timestamp': datetime.datetime(2010, 7, 2, 1, 0), 'gym': 'Balgowlah Platinum', 'consumption': 5.2, 'address': 'Shop 67 197-215 Condamine Street Balgowlah 2093'} dataEncodings=[array([ 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32), array([ 0., 0., 0., ..., 0., 0., 0.], dtype=float32)] sequenceReset=0.0 category=-1 ) inferences={'multiStepPredictions': {1: {5.1: 0.0088801263517415546, 5.2: 0.010775254623541418, 5.341999999999999: 0.98034461902471692}}, 'multiStepBucketLikelihoods': {1: {1: 0.0088801263517415546, 2: 0.98034461902471692}}, 'multiStepBestPredictions': {1: 5.341999999999999}, 'anomalyLabel': '[]', 'anomalyScore': 0.40000001} metrics=None predictedFieldIdx=0 predictedFieldName=consumption classifierInput=ClassifierInput( dataRow=5.2 bucketIndex=2 ) )
print "anomaly score: ", result.inferences["anomalyScore"]
anomaly score: 0.4
See Subutai's talk for more info on anomaly detection!
python examples/opf/bin/OpfRunExperiment.py examples/opf/experiments/multistep/hotgym/
Outputs examples/opf/experiments/multistep/hotgym/inference/DefaultTask.TemporalMultiStep.predictionLog.csv
python bin/run_swarm.py examples/opf/experiments/multistep/hotgym/permutations.py
Outputs examples/opf/experiments/multistep/hotgym/model_0/description.py