In [1]:
%erp5_url http://10.0.2.15:20001/erp5/Base_executeJupyter
Your erp5_url is http://10.0.2.15:20001/erp5/Base_executeJupyter. 
Please enter reference in next cell. 
In [2]:
%notebook_set_reference NB-OTHER
Your notebook_set_reference is NB-OTHER. 
Please enter user in next cell. 
In [3]:
%erp5_user zope
Your erp5_user is zope. 
Please enter password in next cell. 
In [4]:
%erp5_password insecure
Your erp5_password is insecure. 
Please proceed
In [5]:
# Get ERP5 Object
context
<ERP5Site at /erp5>
In [6]:
# Get the data stream you uploaded the wav
# "1" is the ID at the ERP5
context.data_stream_module["1"]
<Data Stream at /erp5/data_stream_module/1>
In [7]:
# Import needed libraries to use later
import matplotlib.pyplot as plt
from scipy.fftpack import fft

# Import also scipy to read the audio file
import scipy
import scipy.io
from scipy.io.wavfile import read

In [8]:
# Not out of core, as you return all data as string (So you should avoid do like this)
datastream_as_string = context.data_stream_module["1"].getData()

print "Total is %s" % len(datastream_as_string)
print type(datastream_as_string)
Total is 3833900
<type 'str'>
In [9]:
# Now we try with out-of-core!!
stream = context.data_stream_module["1"].data 

l = 0
for chunk in stream.iterate(): # out of core
    l += len(chunk)
print "Total is %s" % l
print type(stream)
Total is 3833900
<class 'Products.ERP5Type.BTreeData.BTreeData'>
In [11]:
# Import StringIO as we need to manipulate file object at wavefile.read
from StringIO import StringIO

# Load the file from erp5 as string, which is not good here
# we will later try to improve this because this is not out-of-core
fs, data = read(StringIO(context.data_stream_module["1"].getData())) 

# Get only channel
array = data.T[0] # not out of core

# Save the array to erp5 to make it out-of-core
out_of_core_array = context.data_array_module.newContent(
    array=array, 
    portal_type="Data Array", 
    title="pydata-wav2")

cmplx = fft(out_of_core_array.getArray()) 
spectrum = abs(cmplx[:(len(cmplx)/2)-1]) # not out of core

# Save the spectrum array to make it now out of core
out_of_core_spectrum_array = context.data_array_module.newContent(
    array=spectrum, 
    portal_type="Data Array", 
    title="pydata-spectrum2")


figure = plt.figure() 

ax1 = figure.add_subplot(211)
ax2 = figure.add_subplot(212)

ax1.plot(out_of_core_array.getArray()) 
ax2.plot(out_of_core_spectrum_array.getArray())  

# figure.show() don't present inline rendering....
# So we use instead:
context.Base_renderAsHtml(plt)

In [13]:
# now quick save on image_module un a dummy way.
figure.savefig("/tmp/somenamec.png")

context.image_module.newContent(title="plot", portal_type="Image", data=open("/tmp/somenamec.png").read())
<Image at /erp5/image_module/5>
In [14]:
# Create an class for wrapper the file api
# So we can pass a out-of-core objects that behave like 
# average file.

class BigFileReader:

    def __init__(self, bigfile):
        self.bigfile = bigfile
        self.pos = 0


    def tell(self):
        return self.pos

    def seek(self, pos): # TODO whence
        # TODO check for out of range
        self.pos = pos

    def read(self, n):
        chunkv = []
        for chunk in self.bigfile.iterate(self.pos, n):
            chunkv.append(chunk)
        data = ''.join(chunkv)
        self.pos += len(data)
        return data



fs, data = read(BigFileReader(context.data_stream_module["1"].data)) 

# Get only channel
array = data.T[0] # not out of core

# Save the array to erp5 to make it out-of-core
out_of_core_array = context.data_array_module.newContent(
    array=array, 
    portal_type="Data Array", 
    title="pydata-wav2")

cmplx = fft(out_of_core_array.getArray()) 
spectrum = abs(cmplx[:(len(cmplx)/2)-1]) # not out of core

# Save the spectrum array to make it now out of core
out_of_core_spectrum_array = context.data_array_module.newContent(
    array=spectrum, 
    portal_type="Data Array", 
    title="pydata-spectrum2")

figure = plt.figure() 

ax1 = figure.add_subplot(211)
ax2 = figure.add_subplot(212)

ax1.plot(out_of_core_array.getArray()) 
ax2.plot(out_of_core_spectrum_array.getArray())

# figure.show() don't present inline rendering....
# So we use instead:
context.Base_renderAsHtml(plt)

In [30]:
class BigFileReader:

    def __init__(self, bigfile):
        self.bigfile = bigfile
        self.pos = 0


    def tell(self):
        return self.pos

    def seek(self, pos):    # TODO whence
        # TODO check for out of range
        self.pos = pos

    def read(self, n):
        chunkv = []
        for chunk in self.bigfile.iterate(self.pos, n):
            chunkv.append(chunk)
        data = ''.join(chunkv)
        self.pos += len(data)
        return data


# Load the file from erp5 as string, which is not good here
# we will later try to improve this because this is not out-of-core
fs, data = read(BigFileReader(context.data_stream_module["1"].data)) 

# Get only channel
array = data.T[0] # not out of core

# Save the array to erp5 to make it out-of-core
out_of_core_array = context.data_array_module.newContent(
    array=array, 
    portal_type="Data Array", 
    title="pydata-wav2")

cmplx = fft(out_of_core_array.getArray()) 
spectrum = abs(cmplx[:(len(cmplx)/2)-1]) # not out of core

# Save the spectrum array to make it now out of core
out_of_core_spectrum_array = context.data_array_module.newContent(
    array=spectrum, 
    portal_type="Data Array", 
    title="pydata-spectrum2-to-recover")

figure = plt.figure() 

ax1 = figure.add_subplot(211)
ax2 = figure.add_subplot(212)

ax1.plot(out_of_core_array.getArray()) 
ax2.plot(out_of_core_spectrum_array.getArray())  

# figure.show() don't present inline rendering....
# So we use instead:
context.Base_renderAsHtml(plt)

In [31]:
# We can recover an array saved into and re-replot the chart
# Query is assincronous, we must wait the object be catalogued
# So it can take a while to work in this example.
spectrum = context.portal_catalog.getResultValue(
      title="pydata-spectrum2-to-recover",
      portal_type="Data Array")

figure = plt.figure() 

ax1 = figure.add_subplot(211)

ax1.plot(spectrum.getArray()) 

# figure.show() don't present inline rendering....
# So we use instead:
context.Base_renderAsHtml(plt)