#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('pylab', 'inline')
figsize(10,5)


# In[2]:


import clstm


# Network creation and initialization is very similar to C++:
# 
#  - networks are created using the `make_net(name)` factory function
#  - the `net.set(key,value)` method is used to set up parameters
#  - the `.setLearningRate(lr,mom)` method is used to set learning rate and momentum
#  - `.initialize()` is called to create the network
# 
# As in C++, the combination of `make_net` and `set` does not allow arbitrary network architectures to be constructed. For anything complicated, you 

# In[3]:


net = clstm.make_net_init("lstm1","ninput=1:nhidden=4:noutput=2")
print net


# In[6]:


net.setLearningRate(1e-4,0.9)
print clstm.network_info_as_string(net)


# You can navigate the network structure as you would in C++. You can use similar methods to create more complex network architectures than possible with `make_net`.

# In[7]:


print net.sub.size()
print net.sub[0]
print net.sub[0].kind


# This cell generally illustrates how to invoke the CLSTM library from Python:
# 
#  - `net.inputs`, `net.outputs`, `net.d_inputs`, and `net.d_outputs` are `Sequence` types
#  - `Sequence` objects can be converted to rank 3 arrays using the .array() method
#  - The values in a `Sequence` can be set with the `.aset(array)` method

# In[8]:


N = 20
xs = array(randn(N,1,1)<0.2, 'f')
net.inputs.aset(xs)
net.forward()


# Here is a training loop that generates a delayed-by-one from a random input sequence and trains the network to learn this task.

# In[11]:


N = 20
test = array(rand(N)<0.3, 'f')
plot(test, '--', c="black")
ntrain = 30000
for i in range(ntrain):
    xs = array(rand(N)<0.3, 'f')
    ys = roll(xs, 1)
    ys[0] = 0
    ys = array([1-ys, ys],'f').T.copy()
    net.inputs.aset(xs.reshape(N,1,1))
    net.forward()
    net.outputs.dset(ys.reshape(N,2,1)-net.outputs.array())
    net.backward()
    clstm.sgd_update(net)
    if i%1000==0:
        net.inputs.aset(test.reshape(N,1,1))
        net.forward()
        plot(net.outputs.array()[:,1,0],c=cm.jet(i*1.0/ntrain))


# In[ ]: