Encoders

  • Scalar
  • Date/time
  • Category
  • Multi
In [1]:
import numpy
In [2]:
from nupic.encoders import ScalarEncoder

ScalarEncoder?
In [3]:
# 22 bits with 3 active representing values 0 to 100
# clipInput=True makes values >100 encode the same as 100 (instead of throwing a ValueError)
# forced=True allows small values for `n` and `w`
enc = ScalarEncoder(n=22, w=3, minval=2.5, maxval=97.5, clipInput=True, forced=True)
print "3 =", enc.encode(3)
print "4 =", enc.encode(4)
print "5 =", enc.encode(5)
3 = [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
4 = [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
5 = [0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
In [4]:
# Encode maxval
print "100  =", enc.encode(100)
# See that any larger number gets the same encoding
print "1000 =", enc.encode(1000)
100  = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]
1000 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]
In [5]:
from nupic.encoders.random_distributed_scalar import RandomDistributedScalarEncoder

RandomDistributedScalarEncoder?
In [6]:
# 21 bits with 3 active with buckets of size 5
rdse = RandomDistributedScalarEncoder(n=21, w=3, resolution=5, offset=2.5)

print "3 =   ", rdse.encode(3)
print "4 =   ", rdse.encode(4)
print "5 =   ", rdse.encode(5)
print
print "100 = ", rdse.encode(100)
print "1000 =", rdse.encode(1000)
3 =    [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1]
4 =    [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1]
5 =    [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1]

100 =  [0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
1000 = [0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0]
In [7]:
import datetime
from nupic.encoders.date import DateEncoder

DateEncoder?
In [8]:
de = DateEncoder(season=5)

now = datetime.datetime.strptime("2014-05-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "now =       ", de.encode(now)
nextMonth = datetime.datetime.strptime("2014-06-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "next month =", de.encode(nextMonth)
xmas = datetime.datetime.strptime("2014-12-25 13:08:58", "%Y-%m-%d %H:%M:%S")
print "xmas =      ", de.encode(xmas)
now =        [0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0]
next month = [0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0]
xmas =       [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]
In [9]:
from nupic.encoders.category import CategoryEncoder

categories = ("cat", "dog", "monkey", "slow loris")
encoder = CategoryEncoder(w=3, categoryList=categories, forced=True)
cat = encoder.encode("cat")
dog = encoder.encode("dog")
monkey = encoder.encode("monkey")
loris = encoder.encode("slow loris")
print "cat =       ", cat
print "dog =       ", dog
print "monkey =    ", monkey
print "slow loris =", loris
cat =        [0 0 0 1 1 1 0 0 0 0 0 0 0 0 0]
dog =        [0 0 0 0 0 0 1 1 1 0 0 0 0 0 0]
monkey =     [0 0 0 0 0 0 0 0 0 1 1 1 0 0 0]
slow loris = [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]
In [10]:
print encoder.encode(None)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
In [11]:
print encoder.encode("unknown")
[1 1 1 0 0 0 0 0 0 0 0 0 0 0 0]
In [12]:
print encoder.decode(cat)
({'category': ([(1, 1)], 'cat')}, ['category'])
In [13]:
catdog = numpy.array([0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0])
print encoder.decode(catdog)
({'category': ([(1, 2)], 'cat, dog')}, ['category'])

Spatial Pooler

In [14]:
from nupic.research.spatial_pooler import SpatialPooler

print SpatialPooler?
In [ ]:
print SpatialPooler
In [15]:
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
In [16]:
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
In [17]:
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
In [18]:
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
In [19]:
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
In [20]:
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
In [21]:
print SpatialPooler
<class 'nupic.research.spatial_pooler.SpatialPooler'>
In [22]:
print len(cat)
print cat
15
[0 0 0 1 1 1 0 0 0 0 0 0 0 0 0]
In [23]:
sp = SpatialPooler(inputDimensions=(15,),
                   columnDimensions=(4,),
                   potentialRadius=15,
                   numActiveColumnsPerInhArea=1,
                   globalInhibition=True,
                   synPermActiveInc=0.03,
                   potentialPct=1.0)
import numpy
for column in xrange(4):
    connected = numpy.zeros((15,), dtype="int")
    sp.getConnectedSynapses(column, connected)
    print connected
[0 1 1 0 0 1 0 0 0 1 1 1 1 0 0]
[1 1 1 1 1 0 1 0 1 1 1 0 0 1 0]
[0 1 0 1 1 0 1 0 1 1 0 0 1 0 1]
[0 1 1 1 0 1 0 0 0 1 1 1 1 1 0]
In [24]:
output = numpy.zeros((4,), dtype="int")
sp.compute(cat, learn=True, activeArray=output)
print output
[0 0 0 1]
In [25]:
for _ in xrange(20):
    sp.compute(cat, learn=True, activeArray=output)
In [26]:
for column in xrange(4):
    connected = numpy.zeros((15,), dtype="int")
    sp.getConnectedSynapses(column, connected)
    print connected
[0 1 1 0 0 1 0 0 0 1 1 1 1 0 0]
[1 1 1 1 1 0 1 0 1 1 1 0 0 1 0]
[0 1 0 1 1 0 1 0 1 1 0 0 1 0 1]
[0 0 1 1 1 1 0 0 0 1 1 1 1 0 0]
In [27]:
for _ in xrange(200):
    sp.compute(cat, learn=True, activeArray=output)
    sp.compute(dog, learn=True, activeArray=output)
    sp.compute(monkey, learn=True, activeArray=output)
    sp.compute(loris, learn=True, activeArray=output)
In [28]:
for column in xrange(4):
    connected = numpy.zeros((15,), dtype="int")
    sp.getConnectedSynapses(column, connected)
    print connected
[0 1 1 0 0 1 0 0 0 1 1 1 1 0 0]
[1 1 1 1 1 0 1 0 1 1 1 0 1 1 0]
[0 0 0 0 0 0 1 1 1 0 0 0 1 1 1]
[0 0 0 1 1 1 0 0 0 1 1 1 0 0 0]
In [29]:
noisyCat = numpy.zeros((15,), dtype="uint32")
noisyCat[3] = 1
noisyCat[4] = 1
# This is part of dog!
noisyCat[6] = 1
print noisyCat
[0 0 0 1 1 0 1 0 0 0 0 0 0 0 0]
In [30]:
sp.compute(noisyCat, learn=False, activeArray=output)
print output  # matches cat!
[0 1 0 0]

Temporal Memory (a.k.a. Sequence Memory, Temporal Pooler)

From: examples/tp/hello_tm.py

In [31]:
from nupic.research.TP import TP

TP?
In [32]:
# Step 1: create Temporal Pooler instance with appropriate parameters
tp = TP(numberOfCols=50, cellsPerColumn=2,
        initialPerm=0.5, connectedPerm=0.5,
        minThreshold=10, newSynapseCount=10,
        permanenceInc=0.1, permanenceDec=0.0,
        activationThreshold=8,
        globalDecay=0, burnIn=1,
        checkSynapseConsistency=False,
        pamLength=10)
In [33]:
# Step 2: create input vectors to feed to the temporal pooler. Each input vector
# must be numberOfCols wide. Here we create a simple sequence of 5 vectors
# representing the sequence A -> B -> C -> D -> E
x = numpy.zeros((5, tp.numberOfCols), dtype="uint32")
x[0,0:10]  = 1   # Input SDR representing "A", corresponding to columns 0-9
x[1,10:20] = 1   # Input SDR representing "B", corresponding to columns 10-19
x[2,20:30] = 1   # Input SDR representing "C", corresponding to columns 20-29
x[3,30:40] = 1   # Input SDR representing "D", corresponding to columns 30-39
x[4,40:50] = 1   # Input SDR representing "E", corresponding to columns 40-49
In [34]:
# Step 3: send this simple sequence to the temporal pooler for learning
# We repeat the sequence 10 times
for i in range(10):

    # Send each letter in the sequence in order
    for j in range(5):

        # The compute method performs one step of learning and/or inference. Note:
        # here we just perform learning but you can perform prediction/inference and
        # learning in the same step if you want (online learning).
        tp.compute(x[j], enableLearn = True, computeInfOutput = False)

        # This function prints the segments associated with every cell.$$$$
        # If you really want to understand the TP, uncomment this line. By following
        # every step you can get an excellent understanding for exactly how the TP
        # learns.
        #tp.printCells()

    # The reset command tells the TP that a sequence just ended and essentially
    # zeros out all the states. It is not strictly necessary but it's a bit
    # messier without resets, and the TP learns quicker with resets.
    tp.reset()
In [35]:
# Step 4: send the same sequence of vectors and look at predictions made by
# temporal pooler

# Utility routine for printing the input vector
def formatRow(x):
    s = ''
    for c in range(len(x)):
        if c > 0 and c % 10 == 0:
            s += ' '
        s += str(x[c])
    s += ' '
    return s

for j in range(5):
    print "\n\n--------","ABCDE"[j],"-----------"
    print "Raw input vector\n",formatRow(x[j])

    # Send each vector to the TP, with learning turned off
    tp.compute(x[j], enableLearn=False, computeInfOutput=True)

    # This method prints out the active state of each cell followed by the
    # predicted state of each cell. For convenience the cells are grouped
    # 10 at a time. When there are multiple cells per column the printout
    # is arranged so the cells in a column are stacked together
    #
    # What you should notice is that the columns where active state is 1
    # represent the SDR for the current input pattern and the columns where
    # predicted state is 1 represent the SDR for the next expected pattern
    print "\nAll the active and predicted cells:"
    tp.printStates(printPrevious=False, printLearnState=False)

    # tp.getPredictedState() gets the predicted cells.
    # predictedCells[c][i] represents the state of the i'th cell in the c'th
    # column. To see if a column is predicted, we can simply take the OR
    # across all the cells in that column. In numpy we can do this by taking
    # the max along axis 1.
    print "\n\nThe following columns are predicted by the temporal pooler. This"
    print "should correspond to columns in the *next* item in the sequence."
    predictedCells = tp.getPredictedState()
    print formatRow(predictedCells.max(axis=1).nonzero())

-------- A -----------
Raw input vector
1111111111 0000000000 0000000000 0000000000 0000000000 

All the active and predicted cells:

Inference Active state
1111111111 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 0000000000 0000000000 
Inference Predicted state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 1111111111 0000000000 0000000000 0000000000 


The following columns are predicted by the temporal pooler. This
should correspond to columns in the *next* item in the sequence.
[10 11 12 13 14 15 16 17 18 19] 


-------- B -----------
Raw input vector
0000000000 1111111111 0000000000 0000000000 0000000000 

All the active and predicted cells:

Inference Active state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 1111111111 0000000000 0000000000 0000000000 
Inference Predicted state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 1111111111 0000000000 0000000000 


The following columns are predicted by the temporal pooler. This
should correspond to columns in the *next* item in the sequence.
[20 21 22 23 24 25 26 27 28 29] 


-------- C -----------
Raw input vector
0000000000 0000000000 1111111111 0000000000 0000000000 

All the active and predicted cells:

Inference Active state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 1111111111 0000000000 0000000000 
Inference Predicted state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 1111111111 0000000000 


The following columns are predicted by the temporal pooler. This
should correspond to columns in the *next* item in the sequence.
[30 31 32 33 34 35 36 37 38 39] 


-------- D -----------
Raw input vector
0000000000 0000000000 0000000000 1111111111 0000000000 

All the active and predicted cells:

Inference Active state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 1111111111 0000000000 
Inference Predicted state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 0000000000 1111111111 


The following columns are predicted by the temporal pooler. This
should correspond to columns in the *next* item in the sequence.
[40 41 42 43 44 45 46 47 48 49] 


-------- E -----------
Raw input vector
0000000000 0000000000 0000000000 0000000000 1111111111 

All the active and predicted cells:

Inference Active state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 0000000000 1111111111 
Inference Predicted state
0000000000 0000000000 0000000000 0000000000 0000000000 
0000000000 0000000000 0000000000 0000000000 0000000000 


The following columns are predicted by the temporal pooler. This
should correspond to columns in the *next* item in the sequence.
[] 

Networks and Regions

See slides.

Online Prediction Framework

  • CLAModel
  • OPF Client
  • Swarming

CLAModel

From examples/opf/clients/hotgym/simple/hotgym.py

Model Parameters

MODEL_PARAMS have all of the parameters for the CLA model and subcomponents

In [36]:
# Model Params!
MODEL_PARAMS = {
    # Type of model that the rest of these parameters apply to.
    'model': "CLA",

    # Version that specifies the format of the config.
    'version': 1,

    # Intermediate variables used to compute fields in modelParams and also
    # referenced from the control section.
    'aggregationInfo': {   'days': 0,
        'fields': [('consumption', 'sum')],
        'hours': 1,
        'microseconds': 0,
        'milliseconds': 0,
        'minutes': 0,
        'months': 0,
        'seconds': 0,
        'weeks': 0,
        'years': 0},

    'predictAheadTime': None,

    # Model parameter dictionary.
    'modelParams': {
        # The type of inference that this model will perform
        'inferenceType': 'TemporalMultiStep',

        'sensorParams': {
            # Sensor diagnostic output verbosity control;
            # if > 0: sensor region will print out on screen what it's sensing
            # at each step 0: silent; >=1: some info; >=2: more info;
            # >=3: even more info (see compute() in py/regions/RecordSensor.py)
            'verbosity' : 0,

            # Include the encoders we use
            'encoders': {
                u'timestamp_timeOfDay': {
                    'fieldname': u'timestamp',
                    'name': u'timestamp_timeOfDay',
                    'timeOfDay': (21, 0.5),
                    'type': 'DateEncoder'
                },
                u'timestamp_dayOfWeek': None,
                u'timestamp_weekend': None,
                u'consumption': {
                    'clipInput': True,
                    'fieldname': u'consumption',
                    'maxval': 100.0,
                    'minval': 0.0,
                    'n': 50,
                    'name': u'c1',
                    'type': 'ScalarEncoder',
                    'w': 21
                },
            },

            # A dictionary specifying the period for automatically-generated
            # resets from a RecordSensor;
            #
            # None = disable automatically-generated resets (also disabled if
            # all of the specified values evaluate to 0).
            # Valid keys is the desired combination of the following:
            #   days, hours, minutes, seconds, milliseconds, microseconds, weeks
            #
            # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12),
            #
            # (value generated from SENSOR_AUTO_RESET)
            'sensorAutoReset' : None,
        },

        'spEnable': True,

        'spParams': {
            # SP diagnostic output verbosity control;
            # 0: silent; >=1: some info; >=2: more info;
            'spVerbosity' : 0,

            # Spatial Pooler implementation selector, see getSPClass
            # in py/regions/SPRegion.py for details
            # 'py' (default), 'cpp' (speed optimized, new)
            'spatialImp' : 'cpp',

            'globalInhibition': 1,

            # Number of cell columns in the cortical region (same number for
            # SP and TP)
            # (see also tpNCellsPerCol)
            'columnCount': 2048,

            'inputWidth': 0,

            # SP inhibition control (absolute value);
            # Maximum number of active columns in the SP region's output (when
            # there are more, the weaker ones are suppressed)
            'numActiveColumnsPerInhArea': 40,

            'seed': 1956,

            # potentialPct
            # What percent of the columns's receptive field is available
            # for potential synapses. At initialization time, we will
            # choose potentialPct * (2*potentialRadius+1)^2
            'potentialPct': 0.5,

            # The default connected threshold. Any synapse whose
            # permanence value is above the connected threshold is
            # a "connected synapse", meaning it can contribute to the
            # cell's firing. Typical value is 0.10. Cells whose activity
            # level before inhibition falls below minDutyCycleBeforeInh
            # will have their own internal synPermConnectedCell
            # threshold set below this default value.
            # (This concept applies to both SP and TP and so 'cells'
            # is correct here as opposed to 'columns')
            'synPermConnected': 0.1,

            'synPermActiveInc': 0.1,

            'synPermInactiveDec': 0.005,
        },

        # Controls whether TP is enabled or disabled;
        # TP is necessary for making temporal predictions, such as predicting
        # the next inputs.  Without TP, the model is only capable of
        # reconstructing missing sensor inputs (via SP).
        'tpEnable' : True,

        'tpParams': {
            # TP diagnostic output verbosity control;
            # 0: silent; [1..6]: increasing levels of verbosity
            # (see verbosity in nupic/trunk/py/nupic/research/TP.py and TP10X*.py)
            'verbosity': 0,

            # Number of cell columns in the cortical region (same number for
            # SP and TP)
            # (see also tpNCellsPerCol)
            'columnCount': 2048,

            # The number of cells (i.e., states), allocated per column.
            'cellsPerColumn': 32,

            'inputWidth': 2048,

            'seed': 1960,

            # Temporal Pooler implementation selector (see _getTPClass in
            # CLARegion.py).
            'temporalImp': 'cpp',

            # New Synapse formation count
            # NOTE: If None, use spNumActivePerInhArea
            #
            # TODO: need better explanation
            'newSynapseCount': 20,

            # Maximum number of synapses per segment
            #  > 0 for fixed-size CLA
            # -1 for non-fixed-size CLA
            #
            # TODO: for Ron: once the appropriate value is placed in TP
            # constructor, see if we should eliminate this parameter from
            # description.py.
            'maxSynapsesPerSegment': 32,

            # Maximum number of segments per cell
            #  > 0 for fixed-size CLA
            # -1 for non-fixed-size CLA
            #
            # TODO: for Ron: once the appropriate value is placed in TP
            # constructor, see if we should eliminate this parameter from
            # description.py.
            'maxSegmentsPerCell': 128,

            # Initial Permanence
            # TODO: need better explanation
            'initialPerm': 0.21,

            # Permanence Increment
            'permanenceInc': 0.1,

            # Permanence Decrement
            # If set to None, will automatically default to tpPermanenceInc
            # value.
            'permanenceDec' : 0.1,

            'globalDecay': 0.0,

            'maxAge': 0,

            # Minimum number of active synapses for a segment to be considered
            # during search for the best-matching segments.
            # None=use default
            # Replaces: tpMinThreshold
            'minThreshold': 9,

            # Segment activation threshold.
            # A segment is active if it has >= tpSegmentActivationThreshold
            # connected synapses that are active due to infActiveState
            # None=use default
            # Replaces: tpActivationThreshold
            'activationThreshold': 12,

            'outputType': 'normal',

            # "Pay Attention Mode" length. This tells the TP how many new
            # elements to append to the end of a learned sequence at a time.
            # Smaller values are better for datasets with short sequences,
            # higher values are better for datasets with long sequences.
            'pamLength': 1,
        },

        'clParams': {
            'regionName' : 'SDRClassifierRegion',

            # Classifier diagnostic output verbosity control;
            # 0: silent; [1..6]: increasing levels of verbosity
            'verbosity' : 0,

            # This controls how fast the classifier learns/forgets. Higher values
            # make it adapt faster and forget older patterns faster.
            'alpha': 0.005,

            # This is set after the call to updateConfigFromSubConfig and is
            # computed from the aggregationInfo and predictAheadTime.
            'steps': '1,5',

            'implementation': 'cpp',
        },

        'trainSPNetOnlyIfRequested': False,
    },
}

Dataset Helpers

In [37]:
from pkg_resources import resource_filename

datasetPath = resource_filename("nupic.datafiles", "extra/hotgym/hotgym.csv")
print datasetPath

with open(datasetPath) as inputFile:
    print
    for _ in xrange(8):
        print inputFile.next().strip()
/Users/nromano/workspace/nupic/src/nupic/datafiles/extra/hotgym/hotgym.csv

gym,address,timestamp,consumption
string,string,datetime,float
S,,T,
Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:00:00.0,5.3
Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:15:00.0,5.5
Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:30:00.0,5.1
Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:45:00.0,5.3
Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 01:00:00.0,5.2

Loading Data

FileRecordStream - file reader for the NuPIC file format (CSV with three header rows, understands datetimes)

In [38]:
from nupic.data.file_record_stream import FileRecordStream

def getData():
    return FileRecordStream(datasetPath)

data = getData()
for _ in xrange(5):
    print data.next()
['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 0), 5.3]
['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 15), 5.5]
['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 30), 5.1]
['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 45), 5.3]
['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 1, 0), 5.2]
In [39]:
from nupic.frameworks.opf.modelfactory import ModelFactory
model = ModelFactory.create(MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})
In [40]:
data = getData()
for _ in xrange(100):
    record = dict(zip(data.getFieldNames(), data.next()))
    print "input: ", record["consumption"]
    result = model.run(record)
    print "prediction: ", result.inferences["multiStepBestPredictions"][1]
input:  5.3
prediction:  5.3
input:  5.5
prediction:  5.5
input:  5.1
prediction:  5.36
input:  5.3
prediction:  5.1
input:  5.2
prediction:  5.342
input:  5.5
prediction:  5.2994
input:  4.5
prediction:  5.35958
input:  1.2
prediction:  5.35958
input:  1.1
prediction:  5.35958
input:  1.2
prediction:  1.17
input:  1.2
prediction:  1.179
input:  1.2
prediction:  1.1853
input:  1.2
prediction:  1.18971
input:  1.2
prediction:  1.192797
input:  1.1
prediction:  1.1949579
input:  1.2
prediction:  1.16647053
input:  1.1
prediction:  1.176529371
input:  1.2
prediction:  1.1535705597
input:  1.2
prediction:  1.16749939179
input:  1.1
prediction:  1.17724957425
input:  1.2
prediction:  1.15407470198
input:  6.0
prediction:  1.16785229138
input:  7.9
prediction:  1.16785229138
input:  8.4
prediction:  1.16785229138
input:  10.6
prediction:  1.16785229138
input:  12.4
prediction:  1.16785229138
input:  12.1
prediction:  1.16785229138
input:  12.4
prediction:  1.16785229138
input:  11.4
prediction:  1.16785229138
input:  11.2
prediction:  1.16785229138
input:  10.8
prediction:  1.16785229138
input:  12.0
prediction:  1.16785229138
input:  11.8
prediction:  11.23252
input:  11.9
prediction:  11.402764
input:  11.4
prediction:  11.5519348
input:  11.0
prediction:  1.16785229138
input:  9.8
prediction:  1.16785229138
input:  9.8
prediction:  10.8881136364
input:  10.8
prediction:  10.5616795455
input:  11.1
prediction:  10.6331756818
input:  11.1
prediction:  10.7732229773
input:  11.0
prediction:  10.8712560841
input:  10.7
prediction:  10.9098792589
input:  10.6
prediction:  10.8469154812
input:  10.3
prediction:  10.7728408368
input:  10.1
prediction:  10.6309885858
input:  12.9
prediction:  10.4716920101
input:  10.5
prediction:  10.4716920101
input:  9.7
prediction:  10.480184407
input:  9.7
prediction:  10.2461290849
input:  9.2
prediction:  10.0822903594
input:  9.2
prediction:  1.16785229138
input:  9.2
prediction:  1.16785229138
input:  9.3
prediction:  1.16785229138
input:  9.1
prediction:  1.16785229138
input:  9.0
prediction:  1.16785229138
input:  8.9
prediction:  1.16785229138
input:  9.0
prediction:  1.16785229138
input:  8.9
prediction:  1.16785229138
input:  8.9
prediction:  1.16785229138
input:  9.0
prediction:  1.16785229138
input:  9.2
prediction:  1.16785229138
input:  10.0
prediction:  1.16785229138
input:  10.7
prediction:  1.16785229138
input:  8.9
prediction:  1.16785229138
input:  9.0
prediction:  1.16785229138
input:  9.0
prediction:  1.16785229138
input:  9.3
prediction:  1.16785229138
input:  9.3
prediction:  1.16785229138
input:  9.1
prediction:  1.16785229138
input:  9.1
prediction:  1.16785229138
input:  9.1
prediction:  1.16785229138
input:  9.2
prediction:  1.16785229138
input:  9.4
prediction:  1.16785229138
input:  9.3
prediction:  1.16785229138
input:  9.3
prediction:  1.16785229138
input:  9.1
prediction:  1.16785229138
input:  9.1
prediction:  1.16785229138
input:  11.0
prediction:  1.16785229138
input:  9.0
prediction:  1.16785229138
input:  8.6
prediction:  1.16785229138
input:  3.0
prediction:  1.16785229138
input:  1.3
prediction:  1.16785229138
input:  1.2
prediction:  1.20749660397
input:  1.3
prediction:  1.20524762278
input:  1.3
prediction:  1.23367333594
input:  1.3
prediction:  1.25357133516
input:  1.2
prediction:  1.26749993461
input:  1.3
prediction:  1.24724995423
input:  1.2
prediction:  1.26307496796
input:  1.3
prediction:  1.24415247757
input:  1.2
prediction:  1.2609067343
input:  1.3
prediction:  1.24263471401
input:  1.2
prediction:  1.25984429981
input:  1.1
prediction:  1.24189100987
input:  2.3
prediction:  1.19932370691
input:  5.5
prediction:  1.19932370691
input:  5.5
prediction:  1.19932370691
input:  5.8
prediction:  1.19932370691
input:  5.7
prediction:  9.50994186712
In [41]:
print "5-step prediction: ", result.inferences["multiStepBestPredictions"][5]
5-step prediction:  9.50994186712

Anomaly Score

In [42]:
# Model Params!
MODEL_PARAMS = {
    # Type of model that the rest of these parameters apply to.
    'model': "CLA",

    # Version that specifies the format of the config.
    'version': 1,

    # Intermediate variables used to compute fields in modelParams and also
    # referenced from the control section.
    'aggregationInfo': {   'days': 0,
        'fields': [('consumption', 'sum')],
        'hours': 1,
        'microseconds': 0,
        'milliseconds': 0,
        'minutes': 0,
        'months': 0,
        'seconds': 0,
        'weeks': 0,
        'years': 0},

    'predictAheadTime': None,

    # Model parameter dictionary.
    'modelParams': {
        # The type of inference that this model will perform
        'inferenceType': 'TemporalAnomaly',

        'sensorParams': {
            # Sensor diagnostic output verbosity control;
            # if > 0: sensor region will print out on screen what it's sensing
            # at each step 0: silent; >=1: some info; >=2: more info;
            # >=3: even more info (see compute() in py/regions/RecordSensor.py)
            'verbosity' : 0,

            # Include the encoders we use
            'encoders': {
                u'timestamp_timeOfDay': {
                    'fieldname': u'timestamp',
                    'name': u'timestamp_timeOfDay',
                    'timeOfDay': (21, 0.5),
                    'type': 'DateEncoder'},
                u'timestamp_dayOfWeek': None,
                u'timestamp_weekend': None,
                u'consumption': {
                    'clipInput': True,
                    'fieldname': u'consumption',
                    'maxval': 100.0,
                    'minval': 0.0,
                    'n': 50,
                    'name': u'c1',
                    'type': 'ScalarEncoder',
                    'w': 21},},

            # A dictionary specifying the period for automatically-generated
            # resets from a RecordSensor;
            #
            # None = disable automatically-generated resets (also disabled if
            # all of the specified values evaluate to 0).
            # Valid keys is the desired combination of the following:
            #   days, hours, minutes, seconds, milliseconds, microseconds, weeks
            #
            # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12),
            #
            # (value generated from SENSOR_AUTO_RESET)
            'sensorAutoReset' : None,
        },

        'spEnable': True,

        'spParams': {
            # SP diagnostic output verbosity control;
            # 0: silent; >=1: some info; >=2: more info;
            'spVerbosity' : 0,

            # Spatial Pooler implementation selector, see getSPClass
            # in py/regions/SPRegion.py for details
            # 'py' (default), 'cpp' (speed optimized, new)
            'spatialImp' : 'cpp',

            'globalInhibition': 1,

            # Number of cell columns in the cortical region (same number for
            # SP and TP)
            # (see also tpNCellsPerCol)
            'columnCount': 2048,

            'inputWidth': 0,

            # SP inhibition control (absolute value);
            # Maximum number of active columns in the SP region's output (when
            # there are more, the weaker ones are suppressed)
            'numActiveColumnsPerInhArea': 40,

            'seed': 1956,

            # potentialPct
            # What percent of the columns's receptive field is available
            # for potential synapses. At initialization time, we will
            # choose potentialPct * (2*potentialRadius+1)^2
            'potentialPct': 0.5,

            # The default connected threshold. Any synapse whose
            # permanence value is above the connected threshold is
            # a "connected synapse", meaning it can contribute to the
            # cell's firing. Typical value is 0.10. Cells whose activity
            # level before inhibition falls below minDutyCycleBeforeInh
            # will have their own internal synPermConnectedCell
            # threshold set below this default value.
            # (This concept applies to both SP and TP and so 'cells'
            # is correct here as opposed to 'columns')
            'synPermConnected': 0.1,

            'synPermActiveInc': 0.1,

            'synPermInactiveDec': 0.005,
        },

        # Controls whether TP is enabled or disabled;
        # TP is necessary for making temporal predictions, such as predicting
        # the next inputs.  Without TP, the model is only capable of
        # reconstructing missing sensor inputs (via SP).
        'tpEnable' : True,

        'tpParams': {
            # TP diagnostic output verbosity control;
            # 0: silent; [1..6]: increasing levels of verbosity
            # (see verbosity in nupic/trunk/py/nupic/research/TP.py and TP10X*.py)
            'verbosity': 0,

            # Number of cell columns in the cortical region (same number for
            # SP and TP)
            # (see also tpNCellsPerCol)
            'columnCount': 2048,

            # The number of cells (i.e., states), allocated per column.
            'cellsPerColumn': 32,

            'inputWidth': 2048,

            'seed': 1960,

            # Temporal Pooler implementation selector (see _getTPClass in
            # CLARegion.py).
            'temporalImp': 'cpp',

            # New Synapse formation count
            # NOTE: If None, use spNumActivePerInhArea
            #
            # TODO: need better explanation
            'newSynapseCount': 20,

            # Maximum number of synapses per segment
            #  > 0 for fixed-size CLA
            # -1 for non-fixed-size CLA
            #
            # TODO: for Ron: once the appropriate value is placed in TP
            # constructor, see if we should eliminate this parameter from
            # description.py.
            'maxSynapsesPerSegment': 32,

            # Maximum number of segments per cell
            #  > 0 for fixed-size CLA
            # -1 for non-fixed-size CLA
            #
            # TODO: for Ron: once the appropriate value is placed in TP
            # constructor, see if we should eliminate this parameter from
            # description.py.
            'maxSegmentsPerCell': 128,

            # Initial Permanence
            # TODO: need better explanation
            'initialPerm': 0.21,

            # Permanence Increment
            'permanenceInc': 0.1,

            # Permanence Decrement
            # If set to None, will automatically default to tpPermanenceInc
            # value.
            'permanenceDec' : 0.1,

            'globalDecay': 0.0,

            'maxAge': 0,

            # Minimum number of active synapses for a segment to be considered
            # during search for the best-matching segments.
            # None=use default
            # Replaces: tpMinThreshold
            'minThreshold': 9,

            # Segment activation threshold.
            # A segment is active if it has >= tpSegmentActivationThreshold
            # connected synapses that are active due to infActiveState
            # None=use default
            # Replaces: tpActivationThreshold
            'activationThreshold': 12,

            'outputType': 'normal',

            # "Pay Attention Mode" length. This tells the TP how many new
            # elements to append to the end of a learned sequence at a time.
            # Smaller values are better for datasets with short sequences,
            # higher values are better for datasets with long sequences.
            'pamLength': 1,
        },

        'clParams': {
            'regionName' : 'SDRClassifierRegion',

            # Classifier diagnostic output verbosity control;
            # 0: silent; [1..6]: increasing levels of verbosity
            'verbosity' : 0,

            # This controls how fast the classifier learns/forgets. Higher values
            # make it adapt faster and forget older patterns faster.
            'alpha': 0.005,

            # This is set after the call to updateConfigFromSubConfig and is
            # computed from the aggregationInfo and predictAheadTime.
            'steps': '1',

            'implementation': 'cpp',
        },

        'anomalyParams': {
            u'anomalyCacheRecords': None,
            u'autoDetectThreshold': None,
            u'autoDetectWaitRecords': 2184
        },

        'trainSPNetOnlyIfRequested': False,
    },
}
In [43]:
from nupic.frameworks.opf.modelfactory import ModelFactory
model = ModelFactory.create(MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})
In [44]:
data = getData()
for _ in xrange(5):
    record = dict(zip(data.getFieldNames(), data.next()))
    print "input: ", record["consumption"]
    result = model.run(record)
    print "prediction: ", result.inferences["multiStepBestPredictions"][1]
input:  5.3
prediction:  5.3
input:  5.5
prediction:  5.5
input:  5.1
prediction:  5.36
input:  5.3
prediction:  5.1
input:  5.2
prediction:  5.342
In [45]:
print result
ModelResult(	predictionNumber=4
	rawInput={'timestamp': datetime.datetime(2010, 7, 2, 1, 0), 'gym': 'Balgowlah Platinum', 'consumption': 5.2, 'address': 'Shop 67 197-215 Condamine Street Balgowlah 2093'}
	sensorInput=SensorInput(	dataRow=(5.2, 1.0)
	dataDict={'timestamp': datetime.datetime(2010, 7, 2, 1, 0), 'gym': 'Balgowlah Platinum', 'consumption': 5.2, 'address': 'Shop 67 197-215 Condamine Street Balgowlah 2093'}
	dataEncodings=[array([ 0.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32), array([ 0.,  0.,  0., ...,  0.,  0.,  0.], dtype=float32)]
	sequenceReset=0.0
	category=-1
)
	inferences={'multiStepPredictions': {1: {5.1: 0.29314434936959172, 5.341999999999999: 0.70685565063040834}}, 'multiStepBucketLikelihoods': {1: {1: 0.29314434936959172, 2: 0.70685565063040834}}, 'multiStepBestPredictions': {1: 5.341999999999999}, 'anomalyLabel': '[]', 'anomalyScore': 0.29999999999999999}
	metrics=None
	predictedFieldIdx=0
	predictedFieldName=consumption
	classifierInput=ClassifierInput(	dataRow=5.2
	bucketIndex=2
)
)
In [46]:
print "anomaly score: ", result.inferences["anomalyScore"]
anomaly score:  0.3

See Subutai's talk for more info on anomaly detection!

Built-in OPF Clients

python examples/opf/bin/OpfRunExperiment.py examples/opf/experiments/multistep/hotgym/

Outputs examples/opf/experiments/multistep/hotgym/inference/DefaultTask.TemporalMultiStep.predictionLog.csv

python bin/run_swarm.py examples/opf/experiments/multistep/hotgym/permutations.py

Outputs examples/opf/experiments/multistep/hotgym/model_0/description.py

In [ ]: