First we start with some magic ...

In [9]:
%erp5_url http://10.0.180.166:2222/erp5/Base_executeJupyter
Your erp5_url is http://10.0.180.166:2222/erp5/Base_executeJupyter. 
Please proceed
In [10]:
%notebook_set_reference NB-OTHERZ
Your notebook_set_reference is NB-OTHERZ. 
Please proceed
In [11]:
%erp5_user zope
Your erp5_user is zope. 
Please proceed
In [12]:
%erp5_password insecure
Your erp5_password is insecure. 
Please proceed

"context" gives you the connection to Wendelin

In [13]:
# Get ERP5 Object
context
<ERP5Site at /erp5>

Get a Data Stream by its ID

In [14]:
# Get the data stream you uploaded the wav
# "209" is the ID at the ERP5
context.data_stream_module["209"]
<Data Stream at /erp5/data_stream_module/209>

Let's search by the instead on Catalog

In [15]:
# Query on catalog for the Data Stream with reference "wavdemo"
result = context.portal_catalog.getResultValue(
  portal_type="Data Stream", 
  reference='wavdemo')

# We don't direct receive the object, that's why we get the object
# here.
data_stream = result.getObject()

data_stream
<Data Stream at /erp5/data_stream_module/209>

"On core" vs "Out-of-Core"

In [16]:
# BAD BAD BAD BAD!!
# Not out of core, as you return all data as string (So you should avoid do like this)
datastream_as_string = data_stream.getData()

print "Total is %s" % len(datastream_as_string)
print type(datastream_as_string)
Total is 10339336
<type 'str'>
In [17]:
# Now we try with out-of-core!!
stream = data_stream.data 

l = 0
for chunk in stream.iterate(): # out of core
    l += len(chunk)
print "Total is %s" % l
print type(stream)
Total is 10339336
<class 'Products.ERP5Type.BTreeData.BTreeData'>

Few needed imports

In [18]:
# Import needed libraries to use later
import matplotlib.pyplot as plt
from scipy.fftpack import fft

# Import also scipy to read the audio file
import scipy
import scipy.io
from scipy.io.wavfile import read

Calculate a simple FFT

In [26]:
#############################################################
# Create an class for wrapper the file api
# So we can pass a out-of-core objects that behave like average file.
# This also don't guarantee that "scipy.io.wavfile.read" will try to
# keep things on core.
#############################################################

class BigFileReader:

    def __init__(self, bigfile):
        self.bigfile = bigfile
        self.pos = 0


    def tell(self):
        return self.pos

    def seek(self, pos): # TODO whence
        # TODO check for out of range
        self.pos = pos

    def read(self, n):
        chunkv = []
        for chunk in self.bigfile.iterate(self.pos, n):
            chunkv.append(chunk)
        data = ''.join(chunkv)
        self.pos += len(data)
        return data


# Simple call read()
fs, data = read(BigFileReader(data_stream.data)) 

# Get only channel
array = data.T[0] # not out of core


###############################################################
# Create and save the array to wendelin to make it out-of-core
# Using this persistent API you can save and append little by little 
# one array that continuously grow.

out_of_core_array = context.data_array_module.newContent(
    array=array, 
    portal_type="Data Array", 
    title="pydata-wav2")

cmplx = fft(out_of_core_array.getArray()) 
spectrum = abs(cmplx[:(len(cmplx)/2)-1]) # not out of core

##### Also save spectrum array.
# Save the spectrum array to make it now out of core
out_of_core_spectrum_array = context.data_array_module.newContent(
    array=spectrum, 
    portal_type="Data Array", 
    title="pydata-spectrum2")

# Let us just Plot it as usual
figure = plt.figure() 

ax1 = figure.add_subplot(211)
ax2 = figure.add_subplot(212)

ax1.plot(out_of_core_array.getArray()) 
ax2.plot(out_of_core_spectrum_array.getArray())

# figure.show() don't present inline rendering....
# So we use instead:
context.Base_renderAsHtml(plt)

You can save the out put image for later too.

In [27]:
# now quick save on image_module un a dummy way.
figure.savefig("/tmp/somenamec.png")

context.image_module.newContent(title="plot", portal_type="Image", data=open("/tmp/somenamec.png").read())
<Image at /erp5/image_module/3>

Replot using the array

In [30]:
# We can recover an array saved into and re-replot the chart
# Query is assincronous, we must wait the object be catalogued
# So it can take a while to work in this example.
array_to_replot = context.portal_catalog.getResultValue(
      title="pydata-wav2",
      portal_type="Data Array").getObject()

figure = plt.figure() 

ax1 = figure.add_subplot(211)

ax1.plot(array_to_replot.getArray()) 

# figure.show() don't present inline rendering....
# So we use instead:
context.Base_renderAsHtml(plt)