Notebook

Encoders¶

Scalar
Date/time
Category
Multi

In [1]:

import numpy

In [2]:

from nupic.encoders import ScalarEncoder

ScalarEncoder?

In [3]:

# 22 bits with 3 active representing values 0 to 100
# clipInput=True makes values >100 encode the same as 100 (instead of throwing a ValueError)
# forced=True allows small values for `n` and `w`
enc = ScalarEncoder(n=22, w=3, minval=2.5, maxval=97.5, clipInput=True, forced=True)
print "3 =", enc.encode(3)
print "4 =", enc.encode(4)
print "5 =", enc.encode(5)

3 = [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
4 = [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
5 = [0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

In [4]:

# Encode maxval
print "100  =", enc.encode(100)
# See that any larger number gets the same encoding
print "1000 =", enc.encode(1000)

100  = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]
1000 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]

In [5]:

from nupic.encoders.random_distributed_scalar import RandomDistributedScalarEncoder

RandomDistributedScalarEncoder?

In [6]:

# 21 bits with 3 active with buckets of size 5
rdse = RandomDistributedScalarEncoder(n=21, w=3, resolution=5, offset=2.5)

print "3 =   ", rdse.encode(3)
print "4 =   ", rdse.encode(4)
print "5 =   ", rdse.encode(5)
print
print "100 = ", rdse.encode(100)
print "1000 =", rdse.encode(1000)

3 =    [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1]
4 =    [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1]
5 =    [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1]

100 =  [0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
1000 = [0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0]

In [7]:

import datetime
from nupic.encoders.date import DateEncoder

DateEncoder?

In [8]:

de = DateEncoder(season=5)

now = datetime.datetime.strptime("2014-05-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "now =       ", de.encode(now)
nextMonth = datetime.datetime.strptime("2014-06-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "next month =", de.encode(nextMonth)
xmas = datetime.datetime.strptime("2014-12-25 13:08:58", "%Y-%m-%d %H:%M:%S")
print "xmas =      ", de.encode(xmas)

now =        [0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0]
next month = [0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0]
xmas =       [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]

In [9]:

from nupic.encoders.category import CategoryEncoder

categories = ("cat", "dog", "monkey", "slow loris")
encoder = CategoryEncoder(w=3, categoryList=categories, forced=True)
cat = encoder.encode("cat")
dog = encoder.encode("dog")
monkey = encoder.encode("monkey")
loris = encoder.encode("slow loris")
print "cat =       ", cat
print "dog =       ", dog
print "monkey =    ", monkey
print "slow loris =", loris

cat =        [0 0 0 1 1 1 0 0 0 0 0 0 0 0 0]
dog =        [0 0 0 0 0 0 1 1 1 0 0 0 0 0 0]
monkey =     [0 0 0 0 0 0 0 0 0 1 1 1 0 0 0]
slow loris = [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]

In [10]:

print encoder.encode(None)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

In [11]:

print encoder.encode("unknown")

[1 1 1 0 0 0 0 0 0 0 0 0 0 0 0]

In [12]:

print encoder.decode(cat)

({'category': ([(1, 1)], 'cat')}, ['category'])

In [13]:

catdog = numpy.array([0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0])
print encoder.decode(catdog)

({'category': ([(1, 2)], 'cat, dog')}, ['category'])

Spatial Pooler¶

In [14]:

from nupic.research.spatial_pooler import SpatialPooler

print SpatialPooler?

In [ ]:

print SpatialPooler

In [15]:

print SpatialPooler

<class 'nupic.research.spatial_pooler.SpatialPooler'>

In [16]:

print SpatialPooler

<class 'nupic.research.spatial_pooler.SpatialPooler'>

In [17]:

print SpatialPooler

<class 'nupic.research.spatial_pooler.SpatialPooler'>

In [18]:

print SpatialPooler

<class 'nupic.research.spatial_pooler.SpatialPooler'>

In [19]:

print SpatialPooler

<class 'nupic.research.spatial_pooler.SpatialPooler'>

In [20]:

print SpatialPooler

<class 'nupic.research.spatial_pooler.SpatialPooler'>

In [21]:

print SpatialPooler

<class 'nupic.research.spatial_pooler.SpatialPooler'>

In [22]:

print SpatialPooler

<class 'nupic.research.spatial_pooler.SpatialPooler'>

In [23]:

print len(cat)
print cat

15
[0 0 0 1 1 1 0 0 0 0 0 0 0 0 0]

In [24]:

sp = SpatialPooler(inputDimensions=(15,),
                   columnDimensions=(4,),
                   potentialRadius=15,
                   numActiveColumnsPerInhArea=1,
                   globalInhibition=True,
                   synPermActiveInc=0.03,
                   potentialPct=1.0)
import numpy
for column in xrange(4):
    connected = numpy.zeros((15,), dtype="int")
    sp.getConnectedSynapses(column, connected)
    print connected

[0 0 1 1 1 1 0 0 0 0 1 1 1 1 0]
[1 0 0 0 1 1 1 1 0 1 0 0 0 1 1]
[1 1 0 0 0 0 0 1 1 1 1 1 1 0 0]
[1 1 0 1 1 0 0 1 1 0 1 0 0 1 1]

In [25]:

output = numpy.zeros((4,), dtype="int")
sp.compute(cat, learn=True, activeArray=output)
print output

[1 0 0 0]

In [26]:

for _ in xrange(20):
    sp.compute(cat, learn=True, activeArray=output)

In [27]:

for column in xrange(4):
    connected = numpy.zeros((15,), dtype="int")
    sp.getConnectedSynapses(column, connected)
    print connected

[0 0 1 1 1 1 0 0 0 0 1 1 1 1 0]
[1 0 0 0 1 1 1 1 0 1 0 0 0 1 1]
[1 1 0 0 0 0 0 1 1 1 1 1 1 0 0]
[1 1 0 1 1 0 0 1 1 0 1 0 0 1 1]

In [28]:

for _ in xrange(200):
    sp.compute(cat, learn=True, activeArray=output)
    sp.compute(dog, learn=True, activeArray=output)
    sp.compute(monkey, learn=True, activeArray=output)
    sp.compute(loris, learn=True, activeArray=output)

In [29]:

for column in xrange(4):
    connected = numpy.zeros((15,), dtype="int")
    sp.getConnectedSynapses(column, connected)
    print connected

[0 0 0 1 1 1 0 0 0 0 0 0 0 0 0]
[1 0 0 0 1 1 1 1 0 1 0 0 0 1 1]
[0 0 0 0 0 0 0 0 0 1 1 1 0 0 0]
[0 0 0 0 0 0 1 1 1 0 0 0 1 1 1]

In [30]:

noisyCat = numpy.zeros((15,), dtype="uint32")
noisyCat[3] = 1
noisyCat[4] = 1
# This is part of dog!
noisyCat[6] = 1
print noisyCat

[0 0 0 1 1 0 1 0 0 0 0 0 0 0 0]

In [31]:

sp.compute(noisyCat, learn=False, activeArray=output)
print output  # matches cat!

[0 1 0 0]

Temporal Memory (a.k.a. Sequence Memory, Temporal Pooler)¶

From: examples/tm/hello_tm.py

In [32]:

from nupic.research.BacktrackingTM import BacktrackingTM

BacktrackingTM?

In [33]:

# Step 1: create Temporal Pooler instance with appropriate parameters
tm = BacktrackingTM(numberOfCols=50, cellsPerColumn=2,
                    initialPerm=0.5, connectedPerm=0.5,
                    minThreshold=10, newSynapseCount=10,
                    permanenceInc=0.1, permanenceDec=0.0,
                    activationThreshold=8,
                    globalDecay=0, burnIn=1,
                    checkSynapseConsistency=False,
                    pamLength=10)

In [34]:

# Step 2: create input vectors to feed to the temporal memory. Each input vector
# must be numberOfCols wide. Here we create a simple sequence of 5 vectors
# representing the sequence A -> B -> C -> D -> E
x = numpy.zeros((5, tm.numberOfCols), dtype="uint32")
x[0,0:10]  = 1   # Input SDR representing "A", corresponding to columns 0-9
x[1,10:20] = 1   # Input SDR representing "B", corresponding to columns 10-19
x[2,20:30] = 1   # Input SDR representing "C", corresponding to columns 20-29
x[3,30:40] = 1   # Input SDR representing "D", corresponding to columns 30-39
x[4,40:50] = 1   # Input SDR representing "E", corresponding to columns 40-49

In [35]:

# Step 3: send this simple sequence to the temporal memory for learning
# We repeat the sequence 10 times
for i in range(10):

    # Send each letter in the sequence in order
    for j in range(5):

        # The compute method performs one step of learning and/or inference. Note:
        # here we just perform learning but you can perform prediction/inference and
        # learning in the same step if you want (online learning).
        tm.compute(x[j], enableLearn = True, enableInference = False)

        # This function prints the segments associated with every cell.$$$$
        # If you really want to understand the TP, uncomment this line. By following
        # every step you can get an excellent understanding for exactly how the TP
        # learns.
        #tm.printCells()

    # The reset command tells the TM that a sequence just ended and essentially
    # zeros out all the states. It is not strictly necessary but it's a bit
    # messier without resets, and the TM learns quicker with resets.
    tm.reset()

In [36]:

# Step 4: send the same sequence of vectors and look at predictions made by
# temporal memory

# Utility routine for printing the input vector
def formatRow(x):
    s = ''
    for c in range(len(x)):
        if c > 0 and c % 10 == 0:
            s += ' '
        s += str(x[c])
    s += ' '
    return s

for j in range(5):
    print "\n\n--------","ABCDE"[j],"-----------"
    print "Raw input vector\n",formatRow(x[j])

    # Send each vector to the TP, with learning turned off
    tm.compute(x[j], enableLearn=False, enableInference=True)

    # This method prints out the active state of each cell followed by the
    # predicted state of each cell. For convenience the cells are grouped
    # 10 at a time. When there are multiple cells per column the printout
    # is arranged so the cells in a column are stacked together
    #
    # What you should notice is that the columns where active state is 1
    # represent the SDR for the current input pattern and the columns where
    # predicted state is 1 represent the SDR for the next expected pattern
    print "\nAll the active and predicted cells:"
    tm.printStates(printPrevious=False, printLearnState=False)

    # tm.getPredictedState() gets the predicted cells.
    # predictedCells[c][i] represents the state of the i'th cell in the c'th
    # column. To see if a column is predicted, we can simply take the OR
    # across all the cells in that column. In numpy we can do this by taking
    # the max along axis 1.
    print "\n\nThe following columns are predicted by the temporal memory. This"
    print "should correspond to columns in the *next* item in the sequence."
    predictedCells = tm.getPredictedState()
    print formatRow(predictedCells.max(axis=1).nonzero())


-------- A -----------
Raw input vector
1111111111 0000000000 0000000000 0000000000 0000000000 

All the active and predicted cells:

Inference Active state
1111111111 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 0000000000 0000000000 
Inference Predicted state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 1111111111 0000000000 0000000000 0000000000 


The following columns are predicted by the temporal memory. This
should correspond to columns in the *next* item in the sequence.
[10 11 12 13 14 15 16 17 18 19] 


-------- B -----------
Raw input vector
0000000000 1111111111 0000000000 0000000000 0000000000 

All the active and predicted cells:

Inference Active state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 1111111111 0000000000 0000000000 0000000000 
Inference Predicted state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 1111111111 0000000000 0000000000 


The following columns are predicted by the temporal memory. This
should correspond to columns in the *next* item in the sequence.
[20 21 22 23 24 25 26 27 28 29] 


-------- C -----------
Raw input vector
0000000000 0000000000 1111111111 0000000000 0000000000 

All the active and predicted cells:

Inference Active state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 1111111111 0000000000 0000000000 
Inference Predicted state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 1111111111 0000000000 


The following columns are predicted by the temporal memory. This
should correspond to columns in the *next* item in the sequence.
[30 31 32 33 34 35 36 37 38 39] 


-------- D -----------
Raw input vector
0000000000 0000000000 0000000000 1111111111 0000000000 

All the active and predicted cells:

Inference Active state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 1111111111 0000000000 
Inference Predicted state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 0000000000 1111111111 


The following columns are predicted by the temporal memory. This
should correspond to columns in the *next* item in the sequence.
[40 41 42 43 44 45 46 47 48 49] 


-------- E -----------
Raw input vector
0000000000 0000000000 0000000000 0000000000 1111111111 

All the active and predicted cells:

Inference Active state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 0000000000 1111111111 
Inference Predicted state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 0000000000 0000000000 


The following columns are predicted by the temporal memory. This
should correspond to columns in the *next* item in the sequence.
[]

Networks and Regions¶

See slides.

Online Prediction Framework¶

CLAModel
OPF Client
Swarming

CLAModel¶

From examples/opf/clients/hotgym/simple/hotgym.py

Model Parameters¶

MODEL_PARAMS have all of the parameters for the CLA model and subcomponents

In [37]:

# Model Params!
MODEL_PARAMS = {
    # Type of model that the rest of these parameters apply to.
    'model': "HTMPrediction",

    # Version that specifies the format of the config.
    'version': 1,

    # Intermediate variables used to compute fields in modelParams and also
    # referenced from the control section.
    'aggregationInfo': {   'days': 0,
        'fields': [('consumption', 'sum')],
        'hours': 1,
        'microseconds': 0,
        'milliseconds': 0,
        'minutes': 0,
        'months': 0,
        'seconds': 0,
        'weeks': 0,
        'years': 0},

    'predictAheadTime': None,

    # Model parameter dictionary.
    'modelParams': {
        # The type of inference that this model will perform
        'inferenceType': 'TemporalMultiStep',

        'sensorParams': {
            # Sensor diagnostic output verbosity control;
            # if > 0: sensor region will print out on screen what it's sensing
            # at each step 0: silent; >=1: some info; >=2: more info;
            # >=3: even more info (see compute() in py/regions/RecordSensor.py)
            'verbosity' : 0,

            # Include the encoders we use
            'encoders': {
                u'timestamp_timeOfDay': {
                    'fieldname': u'timestamp',
                    'name': u'timestamp_timeOfDay',
                    'timeOfDay': (21, 0.5),
                    'type': 'DateEncoder'
                },
                u'timestamp_dayOfWeek': None,
                u'timestamp_weekend': None,
                u'consumption': {
                    'clipInput': True,
                    'fieldname': u'consumption',
                    'maxval': 100.0,
                    'minval': 0.0,
                    'n': 50,
                    'name': u'c1',
                    'type': 'ScalarEncoder',
                    'w': 21
                },
            },

            # A dictionary specifying the period for automatically-generated
            # resets from a RecordSensor;
            #
            # None = disable automatically-generated resets (also disabled if
            # all of the specified values evaluate to 0).
            # Valid keys is the desired combination of the following:
            #   days, hours, minutes, seconds, milliseconds, microseconds, weeks
            #
            # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12),
            #
            # (value generated from SENSOR_AUTO_RESET)
            'sensorAutoReset' : None,
        },

        'spEnable': True,

        'spParams': {
            # SP diagnostic output verbosity control;
            # 0: silent; >=1: some info; >=2: more info;
            'spVerbosity' : 0,

            # Spatial Pooler implementation selector, see getSPClass
            # in py/regions/SPRegion.py for details
            # 'py' (default), 'cpp' (speed optimized, new)
            'spatialImp' : 'cpp',

            'globalInhibition': 1,

            # Number of cell columns in the cortical region (same number for
            # SP and TM)
            # (see also tpNCellsPerCol)
            'columnCount': 2048,

            'inputWidth': 0,

            # SP inhibition control (absolute value);
            # Maximum number of active columns in the SP region's output (when
            # there are more, the weaker ones are suppressed)
            'numActiveColumnsPerInhArea': 40,

            'seed': 1956,

            # potentialPct
            # What percent of the columns's receptive field is available
            # for potential synapses. At initialization time, we will
            # choose potentialPct * (2*potentialRadius+1)^2
            'potentialPct': 0.5,

            # The default connected threshold. Any synapse whose
            # permanence value is above the connected threshold is
            # a "connected synapse", meaning it can contribute to the
            # cell's firing. Typical value is 0.10. Cells whose activity
            # level before inhibition falls below minDutyCycleBeforeInh
            # will have their own internal synPermConnectedCell
            # threshold set below this default value.
            # (This concept applies to both SP and TM and so 'cells'
            # is correct here as opposed to 'columns')
            'synPermConnected': 0.1,

            'synPermActiveInc': 0.1,

            'synPermInactiveDec': 0.005,
        },

        # Controls whether TM is enabled or disabled;
        # TM is necessary for making temporal predictions, such as predicting
        # the next inputs.  Without TP, the model is only capable of
        # reconstructing missing sensor inputs (via SP).
        'tmEnable' : True,

        'tmParams': {
            # TM diagnostic output verbosity control;
            # 0: silent; [1..6]: increasing levels of verbosity
            # (see verbosity in nupic/trunk/py/nupic/research/TP.py and BacktrackingTMCPP.py)
            'verbosity': 0,

            # Number of cell columns in the cortical region (same number for
            # SP and TM)
            # (see also tpNCellsPerCol)
            'columnCount': 2048,

            # The number of cells (i.e., states), allocated per column.
            'cellsPerColumn': 32,

            'inputWidth': 2048,

            'seed': 1960,

            # Temporal Pooler implementation selector (see _getTPClass in
            # CLARegion.py).
            'temporalImp': 'cpp',

            # New Synapse formation count
            # NOTE: If None, use spNumActivePerInhArea
            #
            # TODO: need better explanation
            'newSynapseCount': 20,

            # Maximum number of synapses per segment
            #  > 0 for fixed-size CLA
            # -1 for non-fixed-size CLA
            #
            # TODO: for Ron: once the appropriate value is placed in TP
            # constructor, see if we should eliminate this parameter from
            # description.py.
            'maxSynapsesPerSegment': 32,

            # Maximum number of segments per cell
            #  > 0 for fixed-size CLA
            # -1 for non-fixed-size CLA
            #
            # TODO: for Ron: once the appropriate value is placed in TP
            # constructor, see if we should eliminate this parameter from
            # description.py.
            'maxSegmentsPerCell': 128,

            # Initial Permanence
            # TODO: need better explanation
            'initialPerm': 0.21,

            # Permanence Increment
            'permanenceInc': 0.1,

            # Permanence Decrement
            # If set to None, will automatically default to tpPermanenceInc
            # value.
            'permanenceDec' : 0.1,

            'globalDecay': 0.0,

            'maxAge': 0,

            # Minimum number of active synapses for a segment to be considered
            # during search for the best-matching segments.
            # None=use default
            # Replaces: tpMinThreshold
            'minThreshold': 9,

            # Segment activation threshold.
            # A segment is active if it has >= tpSegmentActivationThreshold
            # connected synapses that are active due to infActiveState
            # None=use default
            # Replaces: tpActivationThreshold
            'activationThreshold': 12,

            'outputType': 'normal',

            # "Pay Attention Mode" length. This tells the TM how many new
            # elements to append to the end of a learned sequence at a time.
            # Smaller values are better for datasets with short sequences,
            # higher values are better for datasets with long sequences.
            'pamLength': 1,
        },

        'clParams': {
            'regionName' : 'SDRClassifierRegion',

            # Classifier diagnostic output verbosity control;
            # 0: silent; [1..6]: increasing levels of verbosity
            'verbosity' : 0,

            # This controls how fast the classifier learns/forgets. Higher values
            # make it adapt faster and forget older patterns faster.
            'alpha': 0.005,

            # This is set after the call to updateConfigFromSubConfig and is
            # computed from the aggregationInfo and predictAheadTime.
            'steps': '1,5',

            'implementation': 'cpp',
        },

        'trainSPNetOnlyIfRequested': False,
    },
}

Dataset Helpers¶

In [38]:

from pkg_resources import resource_filename

datasetPath = resource_filename("nupic.datafiles", "extra/hotgym/hotgym.csv")
print datasetPath

with open(datasetPath) as inputFile:
    print
    for _ in xrange(8):
        print inputFile.next().strip()

/Users/mleborgne/_git/nupic/src/nupic/datafiles/extra/hotgym/hotgym.csv

gym,address,timestamp,consumption
string,string,datetime,float
S,,T,
Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:00:00.0,5.3
Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:15:00.0,5.5
Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:30:00.0,5.1
Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:45:00.0,5.3
Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 01:00:00.0,5.2

Loading Data¶

FileRecordStream - file reader for the NuPIC file format (CSV with three header rows, understands datetimes)

In [39]:

from nupic.data.file_record_stream import FileRecordStream

def getData():
    return FileRecordStream(datasetPath)

data = getData()
for _ in xrange(5):
    print data.next()

['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 0), 5.3]
['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 15), 5.5]
['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 30), 5.1]
['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 45), 5.3]
['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 1, 0), 5.2]

In [40]:

from nupic.frameworks.opf.model_factory import ModelFactory
model = ModelFactory.create(MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})

In [41]:

data = getData()
for _ in xrange(100):
    record = dict(zip(data.getFieldNames(), data.next()))
    print "input: ", record["consumption"]
    result = model.run(record)
    print "prediction: ", result.inferences["multiStepBestPredictions"][1]

input:  5.3
prediction:  5.3
input:  5.5
prediction:  5.5
input:  5.1
prediction:  5.36
input:  5.3
prediction:  5.1
input:  5.2
prediction:  5.342
input:  5.5
prediction:  5.2994
input:  4.5
prediction:  5.35958
input:  1.2
prediction:  4.92
input:  1.1
prediction:  1.2
input:  1.2
prediction:  1.17
input:  1.2
prediction:  1.179
input:  1.2
prediction:  1.1853
input:  1.2
prediction:  1.18971
input:  1.2
prediction:  1.192797
input:  1.1
prediction:  1.1949579
input:  1.2
prediction:  1.16647053
input:  1.1
prediction:  1.176529371
input:  1.2
prediction:  1.1535705597
input:  1.2
prediction:  1.16749939179
input:  1.1
prediction:  1.17724957425
input:  1.2
prediction:  1.15407470198
input:  6.0
prediction:  1.16785229138
input:  7.9
prediction:  5.551706
input:  8.4
prediction:  6.2561942
input:  10.6
prediction:  6.89933594
input:  12.4
prediction:  10.6
input:  12.1
prediction:  12.4
input:  12.4
prediction:  12.31
input:  11.4
prediction:  12.337
input:  11.2
prediction:  10.84
input:  10.8
prediction:  10.948
input:  12.0
prediction:  10.9036
input:  11.8
prediction:  11.23252
input:  11.9
prediction:  11.402764
input:  11.4
prediction:  11.5519348
input:  11.0
prediction:  11.50635436
input:  9.8
prediction:  11.354448052
input:  9.8
prediction:  10.8881136364
input:  10.8
prediction:  10.5616795455
input:  11.1
prediction:  10.6331756818
input:  11.1
prediction:  10.7732229773
input:  11.0
prediction:  10.8712560841
input:  10.7
prediction:  10.9098792589
input:  10.6
prediction:  10.8469154812
input:  10.3
prediction:  10.7728408368
input:  10.1
prediction:  10.6309885858
input:  12.9
prediction:  10.4716920101
input:  10.5
prediction:  10.4716920101
input:  9.7
prediction:  10.480184407
input:  9.7
prediction:  10.2461290849
input:  9.2
prediction:  10.0822903594
input:  9.2
prediction:  9.81760325161
input:  9.2
prediction:  9.63232227613
input:  9.3
prediction:  9.50262559329
input:  9.1
prediction:  9.4418379153
input:  9.0
prediction:  9.33928654071
input:  8.9
prediction:  9.2375005785
input:  9.0
prediction:  9.13625040495
input:  8.9
prediction:  9.09537528346
input:  8.9
prediction:  9.03676269843
input:  9.0
prediction:  8.9957338889
input:  9.2
prediction:  8.99701372223
input:  10.0
prediction:  9.05790960556
input:  10.7
prediction:  9.34053672389
input:  8.9
prediction:  9.74837570672
input:  9.0
prediction:  9.49386299471
input:  9.0
prediction:  9.34570409629
input:  9.3
prediction:  9.24199286741
input:  9.3
prediction:  9.25939500718
input:  9.1
prediction:  9.27157650503
input:  9.1
prediction:  9.22010355352
input:  9.1
prediction:  9.18407248746
input:  9.2
prediction:  9.15885074122
input:  9.4
prediction:  9.17119551886
input:  9.3
prediction:  9.2398368632
input:  9.3
prediction:  9.25788580424
input:  9.1
prediction:  9.27052006297
input:  9.1
prediction:  9.21936404408
input:  11.0
prediction:  9.18355483085
input:  9.0
prediction:  9.7284883816
input:  8.6
prediction:  9.50994186712
input:  3.0
prediction:  9.50994186712
input:  1.3
prediction:  4.344
input:  1.2
prediction:  1.20749660397
input:  1.3
prediction:  1.20524762278
input:  1.3
prediction:  1.23367333594
input:  1.3
prediction:  1.25357133516
input:  1.2
prediction:  1.26749993461
input:  1.3
prediction:  1.24724995423
input:  1.2
prediction:  1.26307496796
input:  1.3
prediction:  1.24415247757
input:  1.2
prediction:  1.2609067343
input:  1.3
prediction:  1.24263471401
input:  1.2
prediction:  1.25984429981
input:  1.1
prediction:  1.24189100987
input:  2.3
prediction:  1.19932370691
input:  5.5
prediction:  3.7308
input:  5.5
prediction:  6.8366746106
input:  5.8
prediction:  6.43567222742
input:  5.7
prediction:  6.24497055919

In [42]:

print "5-step prediction: ", result.inferences["multiStepBestPredictions"][5]

5-step prediction:  1.19932370691

Anomaly Score¶

In [43]:

# Model Params!
MODEL_PARAMS = {
    # Type of model that the rest of these parameters apply to.
    'model': "HTMPrediction",

    # Version that specifies the format of the config.
    'version': 1,

    # Intermediate variables used to compute fields in modelParams and also
    # referenced from the control section.
    'aggregationInfo': {   'days': 0,
        'fields': [('consumption', 'sum')],
        'hours': 1,
        'microseconds': 0,
        'milliseconds': 0,
        'minutes': 0,
        'months': 0,
        'seconds': 0,
        'weeks': 0,
        'years': 0},

    'predictAheadTime': None,

    # Model parameter dictionary.
    'modelParams': {
        # The type of inference that this model will perform
        'inferenceType': 'TemporalAnomaly',

        'sensorParams': {
            # Sensor diagnostic output verbosity control;
            # if > 0: sensor region will print out on screen what it's sensing
            # at each step 0: silent; >=1: some info; >=2: more info;
            # >=3: even more info (see compute() in py/regions/RecordSensor.py)
            'verbosity' : 0,

            # Include the encoders we use
            'encoders': {
                u'timestamp_timeOfDay': {
                    'fieldname': u'timestamp',
                    'name': u'timestamp_timeOfDay',
                    'timeOfDay': (21, 0.5),
                    'type': 'DateEncoder'},
                u'timestamp_dayOfWeek': None,
                u'timestamp_weekend': None,
                u'consumption': {
                    'clipInput': True,
                    'fieldname': u'consumption',
                    'maxval': 100.0,
                    'minval': 0.0,
                    'n': 50,
                    'name': u'c1',
                    'type': 'ScalarEncoder',
                    'w': 21},},

            # A dictionary specifying the period for automatically-generated
            # resets from a RecordSensor;
            #
            # None = disable automatically-generated resets (also disabled if
            # all of the specified values evaluate to 0).
            # Valid keys is the desired combination of the following:
            #   days, hours, minutes, seconds, milliseconds, microseconds, weeks
            #
            # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12),
            #
            # (value generated from SENSOR_AUTO_RESET)
            'sensorAutoReset' : None,
        },

        'spEnable': True,

        'spParams': {
            # SP diagnostic output verbosity control;
            # 0: silent; >=1: some info; >=2: more info;
            'spVerbosity' : 0,

            # Spatial Pooler implementation selector, see getSPClass
            # in py/regions/SPRegion.py for details
            # 'py' (default), 'cpp' (speed optimized, new)
            'spatialImp' : 'cpp',

            'globalInhibition': 1,

            # Number of cell columns in the cortical region (same number for
            # SP and TM)
            # (see also tpNCellsPerCol)
            'columnCount': 2048,

            'inputWidth': 0,

            # SP inhibition control (absolute value);
            # Maximum number of active columns in the SP region's output (when
            # there are more, the weaker ones are suppressed)
            'numActiveColumnsPerInhArea': 40,

            'seed': 1956,

            # potentialPct
            # What percent of the columns's receptive field is available
            # for potential synapses. At initialization time, we will
            # choose potentialPct * (2*potentialRadius+1)^2
            'potentialPct': 0.5,

            # The default connected threshold. Any synapse whose
            # permanence value is above the connected threshold is
            # a "connected synapse", meaning it can contribute to the
            # cell's firing. Typical value is 0.10. Cells whose activity
            # level before inhibition falls below minDutyCycleBeforeInh
            # will have their own internal synPermConnectedCell
            # threshold set below this default value.
            # (This concept applies to both SP and TM and so 'cells'
            # is correct here as opposed to 'columns')
            'synPermConnected': 0.1,

            'synPermActiveInc': 0.1,

            'synPermInactiveDec': 0.005,
        },

        # Controls whether TM is enabled or disabled;
        # TM is necessary for making temporal predictions, such as predicting
        # the next inputs.  Without TP, the model is only capable of
        # reconstructing missing sensor inputs (via SP).
        'tmEnable' : True,

        'tmParams': {
            # TM diagnostic output verbosity control;
            # 0: silent; [1..6]: increasing levels of verbosity
            # (see verbosity in nupic/trunk/py/nupic/research/TP.py and BacktrackingTMCPP.py)
            'verbosity': 0,

            # Number of cell columns in the cortical region (same number for
            # SP and TM)
            # (see also tpNCellsPerCol)
            'columnCount': 2048,

            # The number of cells (i.e., states), allocated per column.
            'cellsPerColumn': 32,

            'inputWidth': 2048,

            'seed': 1960,

            # Temporal Pooler implementation selector (see _getTPClass in
            # CLARegion.py).
            'temporalImp': 'cpp',

            # New Synapse formation count
            # NOTE: If None, use spNumActivePerInhArea
            #
            # TODO: need better explanation
            'newSynapseCount': 20,

            # Maximum number of synapses per segment
            #  > 0 for fixed-size CLA
            # -1 for non-fixed-size CLA
            #
            # TODO: for Ron: once the appropriate value is placed in TP
            # constructor, see if we should eliminate this parameter from
            # description.py.
            'maxSynapsesPerSegment': 32,

            # Maximum number of segments per cell
            #  > 0 for fixed-size CLA
            # -1 for non-fixed-size CLA
            #
            # TODO: for Ron: once the appropriate value is placed in TP
            # constructor, see if we should eliminate this parameter from
            # description.py.
            'maxSegmentsPerCell': 128,

            # Initial Permanence
            # TODO: need better explanation
            'initialPerm': 0.21,

            # Permanence Increment
            'permanenceInc': 0.1,

            # Permanence Decrement
            # If set to None, will automatically default to tpPermanenceInc
            # value.
            'permanenceDec' : 0.1,

            'globalDecay': 0.0,

            'maxAge': 0,

            # Minimum number of active synapses for a segment to be considered
            # during search for the best-matching segments.
            # None=use default
            # Replaces: tpMinThreshold
            'minThreshold': 9,

            # Segment activation threshold.
            # A segment is active if it has >= tpSegmentActivationThreshold
            # connected synapses that are active due to infActiveState
            # None=use default
            # Replaces: tpActivationThreshold
            'activationThreshold': 12,

            'outputType': 'normal',

            # "Pay Attention Mode" length. This tells the TM how many new
            # elements to append to the end of a learned sequence at a time.
            # Smaller values are better for datasets with short sequences,
            # higher values are better for datasets with long sequences.
            'pamLength': 1,
        },

        'clParams': {
            'regionName' : 'SDRClassifierRegion',

            # Classifier diagnostic output verbosity control;
            # 0: silent; [1..6]: increasing levels of verbosity
            'verbosity' : 0,

            # This controls how fast the classifier learns/forgets. Higher values
            # make it adapt faster and forget older patterns faster.
            'alpha': 0.005,

            # This is set after the call to updateConfigFromSubConfig and is
            # computed from the aggregationInfo and predictAheadTime.
            'steps': '1',

            'implementation': 'cpp',
        },

        'anomalyParams': {
            u'anomalyCacheRecords': None,
            u'autoDetectThreshold': None,
            u'autoDetectWaitRecords': 2184
        },

        'trainSPNetOnlyIfRequested': False,
    },
}

In [44]:

from nupic.frameworks.opf.model_factory import ModelFactory
model = ModelFactory.create(MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})

In [45]:

data = getData()
for _ in xrange(5):
    record = dict(zip(data.getFieldNames(), data.next()))
    print "input: ", record["consumption"]
    result = model.run(record)
    print "prediction: ", result.inferences["multiStepBestPredictions"][1]

input:  5.3
prediction:  5.3
input:  5.5
prediction:  5.5
input:  5.1
prediction:  5.36
input:  5.3
prediction:  5.1
input:  5.2
prediction:  5.342

In [46]:

print result

ModelResult(	predictionNumber=4
	rawInput={'timestamp': datetime.datetime(2010, 7, 2, 1, 0), 'gym': 'Balgowlah Platinum', 'consumption': 5.2, 'address': 'Shop 67 197-215 Condamine Street Balgowlah 2093'}
	sensorInput=SensorInput(	dataRow=(5.2, 1.0)
	dataDict={'timestamp': datetime.datetime(2010, 7, 2, 1, 0), 'gym': 'Balgowlah Platinum', 'consumption': 5.2, 'address': 'Shop 67 197-215 Condamine Street Balgowlah 2093'}
	dataEncodings=[array([ 0.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32), array([ 0.,  0.,  0., ...,  0.,  0.,  0.], dtype=float32)]
	sequenceReset=0.0
	category=-1
)
	inferences={'multiStepPredictions': {1: {5.1: 0.0088801263517415546, 5.2: 0.010775254623541418, 5.341999999999999: 0.98034461902471692}}, 'multiStepBucketLikelihoods': {1: {1: 0.0088801263517415546, 2: 0.98034461902471692}}, 'multiStepBestPredictions': {1: 5.341999999999999}, 'anomalyLabel': '[]', 'anomalyScore': 0.40000001}
	metrics=None
	predictedFieldIdx=0
	predictedFieldName=consumption
	classifierInput=ClassifierInput(	dataRow=5.2
	bucketIndex=2
)
)

In [47]:

print "anomaly score: ", result.inferences["anomalyScore"]

anomaly score:  0.4

See Subutai's talk for more info on anomaly detection!

Built-in OPF Clients¶

python examples/opf/bin/OpfRunExperiment.py examples/opf/experiments/multistep/hotgym/

Outputs examples/opf/experiments/multistep/hotgym/inference/DefaultTask.TemporalMultiStep.predictionLog.csv

python bin/run_swarm.py examples/opf/experiments/multistep/hotgym/permutations.py

Outputs examples/opf/experiments/multistep/hotgym/model_0/description.py

In [ ]: