#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('pylab', 'inline') figsize(10,5) # In[2]: import clstm # Network creation and initialization is very similar to C++: # # - networks are created using the `make_net(name)` factory function # - the `net.set(key,value)` method is used to set up parameters # - the `.setLearningRate(lr,mom)` method is used to set learning rate and momentum # - `.initialize()` is called to create the network # # As in C++, the combination of `make_net` and `set` does not allow arbitrary network architectures to be constructed. For anything complicated, you # In[3]: net = clstm.make_net_init("lstm1","ninput=1:nhidden=4:noutput=2") print net # In[6]: net.setLearningRate(1e-4,0.9) print clstm.network_info_as_string(net) # You can navigate the network structure as you would in C++. You can use similar methods to create more complex network architectures than possible with `make_net`. # In[7]: print net.sub.size() print net.sub[0] print net.sub[0].kind # This cell generally illustrates how to invoke the CLSTM library from Python: # # - `net.inputs`, `net.outputs`, `net.d_inputs`, and `net.d_outputs` are `Sequence` types # - `Sequence` objects can be converted to rank 3 arrays using the .array() method # - The values in a `Sequence` can be set with the `.aset(array)` method # In[8]: N = 20 xs = array(randn(N,1,1)<0.2, 'f') net.inputs.aset(xs) net.forward() # Here is a training loop that generates a delayed-by-one from a random input sequence and trains the network to learn this task. # In[11]: N = 20 test = array(rand(N)<0.3, 'f') plot(test, '--', c="black") ntrain = 30000 for i in range(ntrain): xs = array(rand(N)<0.3, 'f') ys = roll(xs, 1) ys[0] = 0 ys = array([1-ys, ys],'f').T.copy() net.inputs.aset(xs.reshape(N,1,1)) net.forward() net.outputs.dset(ys.reshape(N,2,1)-net.outputs.array()) net.backward() clstm.sgd_update(net) if i%1000==0: net.inputs.aset(test.reshape(N,1,1)) net.forward() plot(net.outputs.array()[:,1,0],c=cm.jet(i*1.0/ntrain)) # In[ ]: