Data from: http://data.worldbank.org/topic/agriculture-and-rural-development

http://data.worldbank.org/about/world-development-indicators-data/agriculture-and-rural-development

In [1]:

f = open("1_Topic_en_xml_v2.xml")
print f.readline()

<?xml version="1.0" encoding="utf-8"?>

In [2]:

print f.readline()

<Root xmlns:wb="http://www.worldbank.org">

XML namespaces: http://www.w3schools.com/xml/xml_namespaces.asp

XML schema and validation:

In [3]:

for i in range(10):
    print f.readline(),

  <data>
    <record>
      <field name="Country or Area" key="ABW">Aruba</field>
      <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field>
      <field name="Year">1960</field>
      <field name="Value" />
    </record>
    <record>
      <field name="Country or Area" key="ABW">Aruba</field>
      <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field>

In [4]:

for i in range(40):
    print f.readline(),

      <field name="Year">1961</field>
      <field name="Value" />
    </record>
    <record>
      <field name="Country or Area" key="ABW">Aruba</field>
      <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field>
      <field name="Year">1962</field>
      <field name="Value" />
    </record>
    <record>
      <field name="Country or Area" key="ABW">Aruba</field>
      <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field>
      <field name="Year">1963</field>
      <field name="Value" />
    </record>
    <record>
      <field name="Country or Area" key="ABW">Aruba</field>
      <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field>
      <field name="Year">1964</field>
      <field name="Value" />
    </record>
    <record>
      <field name="Country or Area" key="ABW">Aruba</field>
      <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field>
      <field name="Year">1965</field>
      <field name="Value" />
    </record>
    <record>
      <field name="Country or Area" key="ABW">Aruba</field>
      <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field>
      <field name="Year">1966</field>
      <field name="Value" />
    </record>
    <record>
      <field name="Country or Area" key="ABW">Aruba</field>
      <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field>
      <field name="Year">1967</field>
      <field name="Value" />
    </record>
    <record>

In [5]:

import xml.etree.ElementTree as ET
tree = ET.parse('1_Topic_en_xml_v2.xml')
root = tree.getroot()

Good tutorial on ElementTree: http://getpython3.com/diveintopython3/xml.html#xml-parse

Other options:

In [6]:

root

Out[6]:

<Element 'Root' at 0x2fb4b90>

In [7]:

for child in root:
   print child.tag,  ":",  child.attrib

data : {}

In [8]:

root[0]

Out[8]:

<Element 'data' at 0x2fb4bd0>

In [9]:

data = root[0]
records = data.getchildren()

In [10]:

len(records)

Out[10]:

In [11]:

records[0]

Out[11]:

<Element 'record' at 0x2fb4c10>

In [12]:

r = records[0]

In [13]:

r.getchildren()

Out[13]:

[<Element 'field' at 0x2fb4c90>,
 <Element 'field' at 0x2fb4cd0>,
 <Element 'field' at 0x2fb4c50>,
 <Element 'field' at 0x2fb4d10>]

In [14]:

records[0][0]

Out[14]:

<Element 'field' at 0x2fb4c90>

In [15]:

records[0][0].text

Out[15]:

'Aruba'

In [16]:

r = records[0]
for field in r:
    print field.attrib

{'name': 'Country or Area', 'key': 'ABW'}
{'name': 'Item', 'key': 'AG.AGR.TRAC.NO'}
{'name': 'Year'}
{'name': 'Value'}

In [17]:

records_parsed = []

for record in records:
    r = {}
    for field in record:
        if field.attrib['name'] == 'Country or Area':
            r['country'] = field.text
        if field.attrib['name'] == 'Item':
            r['item'] = field.text
        if field.attrib['name'] == 'Year':
            r['year'] = field.text
        if field.attrib['name'] == 'Value':
            r['value'] = field.text
    records_parsed.append(r)

In [18]:

records_parsed[0]

Out[18]:

{'country': 'Aruba',
 'item': 'Agricultural machinery, tractors',
 'value': None,
 'year': '1960'}

In [19]:

items = set()

for r in records_parsed:
    items.add(r['item'])

items

Out[19]:

set(['Arable land (hectares)',
     'Agricultural machinery, tractors',
     'Crop production index (2004-2006 = 100)',
     'Food production index (2004-2006 = 100)',
     'Arable land (% of land area)',
     'Permanent cropland (% of land area)',
     'Poverty headcount ratio at rural poverty line (% of rural population)',
     'Cereal yield (kg per hectare)',
     'Land area (sq. km)',
     'Rural population growth (annual %)',
     'Improved water source, rural (% of rural population with access)',
     'Arable land (hectares per person)',
     'Agricultural raw materials imports (% of merchandise imports)',
     'Average precipitation in depth (mm per year)',
     'Agricultural irrigated land (% of total agricultural land)',
     'Fertilizer consumption (% of fertilizer production)',
     'Forest area (% of land area)',
     'Livestock production index (2004-2006 = 100)',
     'Surface area (sq. km)',
     'Agricultural machinery, tractors per 100 sq. km of arable land',
     'Employment in agriculture (% of total employment)',
     'Poverty gap at rural poverty line (%)',
     'Rural population (% of total population)',
     'Agriculture value added per worker (constant 2005 US$)',
     'Agricultural raw materials exports (% of merchandise exports)',
     'Fertilizer consumption (kilograms per hectare of arable land)',
     'Forest area (sq. km)',
     'Agricultural land (sq. km)',
     'Agricultural land (% of land area)',
     'Land under cereal production (hectares)',
     'Agriculture, value added (% of GDP)',
     'Rural population'])

In [20]:

items = set()

for r in records_parsed:
    items.add(r['country'])

items

Out[20]:

set(['Canada',
     'Sao Tome and Principe',
     'Turkmenistan',
     'Lao PDR',
     'Arab World',
     'Latin America & Caribbean (all income levels)',
     'Cambodia',
     'Ethiopia',
     'Aruba',
     'Swaziland',
     'South Asia',
     'Argentina',
     'Bolivia',
     'Bahamas, The',
     'Burkina Faso',
     'OECD members',
     'Ghana',
     'Saudi Arabia',
     'Sub-Saharan Africa (IFC classification)',
     'Thailand',
     'Japan',
     'Channel Islands',
     'American Samoa',
     'Northern Mariana Islands',
     'Slovenia',
     'Guatemala',
     'Bosnia and Herzegovina',
     'Kuwait',
     'Russian Federation',
     'Jordan',
     'St. Lucia',
     'Congo, Rep.',
     'Dominica',
     'Liberia',
     'Maldives',
     'East Asia & Pacific (all income levels)',
     'Virgin Islands (U.S.)',
     'Lithuania',
     'Tanzania',
     'Vietnam',
     'Cabo Verde',
     'Greenland',
     'Gabon',
     'Monaco',
     'New Zealand',
     'European Union',
     'Jamaica',
     'Albania',
     'Samoa',
     'Slovak Republic',
     'United Arab Emirates',
     'Guam',
     'Uruguay',
     'India',
     'Azerbaijan',
     'Lesotho',
     'Kenya',
     'Latin America and the Caribbean (IFC classification)',
     'Upper middle income',
     'Tajikistan',
     'Pacific island small states',
     'Turkey',
     'Afghanistan',
     'Venezuela, RB',
     'Bangladesh',
     'Mauritania',
     'Solomon Islands',
     'Korea, Rep.',
     'San Marino',
     'Mongolia',
     'France',
     'Syrian Arab Republic',
     'Bermuda',
     'Namibia',
     'Somalia',
     'Peru',
     'Vanuatu',
     'Nigeria',
     'South Asia (IFC classification)',
     'Norway',
     "Cote d'Ivoire",
     'Europe & Central Asia (developing only)',
     'Benin',
     'Other small states',
     'Cuba',
     'Cameroon',
     'Montenegro',
     'Low & middle income',
     'Togo',
     'China',
     'Sub-Saharan Africa (developing only)',
     'Armenia',
     'Small states',
     'Timor-Leste',
     'Dominican Republic',
     'Low income',
     'Ukraine',
     'Bahrain',
     'Tonga',
     'Finland',
     'Latin America & Caribbean (developing only)',
     'High income',
     'Libya',
     'Cayman Islands',
     'Central African Republic',
     'Europe & Central Asia (all income levels)',
     'Mauritius',
     'Liechtenstein',
     'Belarus',
     'Mali',
     'Micronesia, Fed. Sts.',
     'Korea, Dem. Rep.',
     'Bulgaria',
     'North America',
     'Romania',
     'Angola',
     'Egypt, Arab Rep.',
     'Trinidad and Tobago',
     'St. Vincent and the Grenadines',
     'Cyprus',
     'Caribbean small states',
     'Brunei Darussalam',
     'Qatar',
     'Middle income',
     'Austria',
     'High income: OECD',
     'Mozambique',
     'Uganda',
     'Kyrgyz Republic',
     'Hungary',
     'Niger',
     'United States',
     'Brazil',
     'World',
     'Middle East & North Africa (all income levels)',
     'Guinea',
     'Panama',
     'Costa Rica',
     'Luxembourg',
     'Andorra',
     'Chad',
     'Euro area',
     'Ireland',
     'Pakistan',
     'Palau',
     'Faeroe Islands',
     'Lower middle income',
     'Ecuador',
     'Czech Republic',
     'Australia',
     'Algeria',
     'East Asia and the Pacific (IFC classification)',
     'El Salvador',
     'Tuvalu',
     'St. Kitts and Nevis',
     'Marshall Islands',
     'Chile',
     'Puerto Rico',
     'Belgium',
     'Europe and Central Asia (IFC classification)',
     'Haiti',
     'Belize',
     'Sierra Leone',
     'Georgia',
     'East Asia & Pacific (developing only)',
     'Denmark',
     'Philippines',
     'Moldova',
     'Macedonia, FYR',
     'Morocco',
     'Croatia',
     'French Polynesia',
     'Guinea-Bissau',
     'Kiribati',
     'Switzerland',
     'Grenada',
     'Middle East and North Africa (IFC classification)',
     'Yemen, Rep.',
     'Isle of Man',
     'Portugal',
     'Estonia',
     'Kosovo',
     'Sweden',
     'Mexico',
     'Hong Kong SAR, China',
     'South Africa',
     'Uzbekistan',
     'West Bank and Gaza',
     'Djibouti',
     'Rwanda',
     'Antigua and Barbuda',
     'Spain',
     'Colombia',
     'Burundi',
     'Least developed countries: UN classification',
     'Fiji',
     'Barbados',
     'Seychelles',
     'Madagascar',
     'Congo, Dem. Rep.',
     'Italy',
     'Curacao',
     'Bhutan',
     'Sudan',
     'Nepal',
     'Malta',
     'Netherlands',
     'Macao SAR, China',
     'Suriname',
     'Middle East & North Africa (developing only)',
     'Turks and Caicos Islands',
     'St. Martin (French part)',
     'Iran, Islamic Rep.',
     'Israel',
     'Indonesia',
     'Malaysia',
     'Iceland',
     'Zambia',
     'Sub-Saharan Africa (all income levels)',
     'Senegal',
     'Papua New Guinea',
     'Malawi',
     'Zimbabwe',
     'Germany',
     'Oman',
     'Kazakhstan',
     'Poland',
     'Sint Maarten (Dutch part)',
     'Eritrea',
     'Iraq',
     'New Caledonia',
     'Paraguay',
     'Not classified',
     'Latvia',
     'South Sudan',
     'Guyana',
     'Honduras',
     'Myanmar',
     'Equatorial Guinea',
     'Tunisia',
     'Nicaragua',
     'Singapore',
     'Serbia',
     'Botswana',
     'United Kingdom',
     'Gambia, The',
     'High income: nonOECD',
     'Greece',
     'Sri Lanka',
     'Lebanon',
     'Comoros',
     'Heavily indebted poor countries (HIPC)'])

In [21]:

forest_areas = []

for r in records_parsed:
    if r['item'] == 'Forest area (% of land area)':
        forest_areas.append(r)

forest_areas[:5]

Out[21]:

[{'country': 'Aruba',
  'item': 'Forest area (% of land area)',
  'value': None,
  'year': '1960'},
 {'country': 'Aruba',
  'item': 'Forest area (% of land area)',
  'value': None,
  'year': '1961'},
 {'country': 'Aruba',
  'item': 'Forest area (% of land area)',
  'value': None,
  'year': '1962'},
 {'country': 'Aruba',
  'item': 'Forest area (% of land area)',
  'value': None,
  'year': '1963'},
 {'country': 'Aruba',
  'item': 'Forest area (% of land area)',
  'value': None,
  'year': '1964'}]

In [22]:

forest_areas_parsed = {}
for sample in forest_areas:
    if sample['value']:
        if not forest_areas_parsed.has_key(sample['country']):
            forest_areas_parsed[sample['country']] =[[],[]]
        forest_areas_parsed[sample['country']][0].append(int(sample['year']))
        forest_areas_parsed[sample['country']][1].append(float(sample['value']))

In [24]:

forest_areas_parsed['Aruba']

Out[24]:

[[1990,
  1991,
  1992,
  1993,
  1994,
  1995,
  1996,
  1997,
  1998,
  1999,
  2000,
  2001,
  2002,
  2003,
  2004,
  2005,
  2006,
  2007,
  2008,
  2009,
  2010,
  2011],
 [2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.22222222222222,
  2.33333333333333,
  2.33333333333333]]

In [25]:

aruba_data = forest_areas_parsed['Aruba']
plot(aruba_data[0], aruba_data[1])

Out[25]:

[<matplotlib.lines.Line2D at 0x7e072c50>]

In [26]:

plot(*forest_areas_parsed['Aruba'])

Out[26]:

[<matplotlib.lines.Line2D at 0x7e228850>]

In [27]:

for country,areas in forest_areas_parsed.items():
    plot(*areas, label=country)

In [41]:

def foo(*arg):
    print arg

foo(*'hello')
foo('hello')

('h', 'e', 'l', 'l', 'o')
('hello',)

In [28]:

len(forest_areas_parsed)

Out[28]:

In [42]:

import csnd6

http://nbviewer.ipython.org/gist/jacobjoaquin/5535792

In [43]:

cs = csnd6.Csound()
csPerf = csnd6.CsoundPerformanceThread(cs)
cs.SetOption('-odac')
cs.CompileOrc(
'''sr = 44100
ksmps = 128
nchnls = 2
0dbfs = 1.0
''')

Out[43]:

In [44]:

cs.Start()
csPerf.Play()

In [92]:

cs.CompileOrc(
'''instr 1
idur = p3
iamp = p4
ifreq = p5

k1 line 1, idur, 0
a1 oscils iamp, ifreq, 1
out a1 * k1, a1 * k1
endin
''')

Out[92]:

In [48]:

cs.ReadScore('i 1 0 1 0.5 440')

Out[48]:

In [49]:

def event(*args):
    e = ' '.join(map(str, args))
    cs.ReadScore(e)
    #print e

In [50]:

event('i', 1, 0, 0.25, 0.707, 262)

In [55]:

for t in xrange(8):
    event('i', 1, t, 1, 0.707, 440 + 440 * random.random())

In [115]:

cs.CompileOrc(
'''instr 2
  iplk = 0.75
  kamp = p4
  icps = p5
  kpick = 0.75
  krefl = 0.5

  aenv linen 1, 0.01, p3, 0.01
  apluck wgpluck2 iplk, kamp, icps, kpick, krefl

  out apluck * aenv, apluck * aenv
endin
''')

Out[115]:

In [62]:

for t in xrange(8):
    event('i', 2, t*0.3, 1, 0.4, 440 + 440 * random.random())

In [63]:

forest_areas_parsed['Colombia'][1]
forest_areas_parsed['Colombia']

Out[63]:

[[1990,
  1991,
  1992,
  1993,
  1994,
  1995,
  1996,
  1997,
  1998,
  1999,
  2000,
  2001,
  2002,
  2003,
  2004,
  2005,
  2006,
  2007,
  2008,
  2009,
  2010,
  2011],
 [56.3488057683641,
  56.2577737719694,
  56.1667417755746,
  56.0757097791798,
  55.984677782785,
  55.8936457863903,
  55.8026137899955,
  55.7115817936007,
  55.620549797206,
  55.5295178008112,
  55.4384858044164,
  55.3474538080216,
  55.2564218116269,
  55.1653898152321,
  55.0743578188373,
  54.9833258224425,
  54.8922938260478,
  54.801261829653,
  54.7102298332582,
  54.6191978368635,
  54.5281658404687,
  54.4371338440739]]

In [64]:

max(forest_areas_parsed['Colombia'][1])

Out[64]:

56.3488057683641

In [65]:

min(forest_areas_parsed['Colombia'][1])

Out[65]:

54.4371338440739

In [71]:

score = ''

base_hz = 220
ceil_hz = 440

initial_year = 1990
dur = 0.25

import itertools

colombia_max = max(forest_areas_parsed['Colombia'][1])
colombia_min = min(forest_areas_parsed['Colombia'][1])

for year, value in itertools.izip(*forest_areas_parsed['Colombia']):
    start_time = (year - initial_year) * dur
    amp = 0.1
    freq = base_hz + (ceil_hz - base_hz) * (value - colombia_min) / ( colombia_max - colombia_min)
    
    score += "i 2 %f %f %f %f\n"%(start_time, dur, amp, freq)

print score

i 2 0.000000 0.250000 0.100000 440.000000
i 2 0.250000 0.250000 0.100000 429.523810
i 2 0.500000 0.250000 0.100000 419.047619
i 2 0.750000 0.250000 0.100000 408.571429
i 2 1.000000 0.250000 0.100000 398.095238
i 2 1.250000 0.250000 0.100000 387.619048
i 2 1.500000 0.250000 0.100000 377.142857
i 2 1.750000 0.250000 0.100000 366.666667
i 2 2.000000 0.250000 0.100000 356.190476
i 2 2.250000 0.250000 0.100000 345.714286
i 2 2.500000 0.250000 0.100000 335.238095
i 2 2.750000 0.250000 0.100000 324.761905
i 2 3.000000 0.250000 0.100000 314.285714
i 2 3.250000 0.250000 0.100000 303.809524
i 2 3.500000 0.250000 0.100000 293.333333
i 2 3.750000 0.250000 0.100000 282.857143
i 2 4.000000 0.250000 0.100000 272.380952
i 2 4.250000 0.250000 0.100000 261.904762
i 2 4.500000 0.250000 0.100000 251.428571
i 2 4.750000 0.250000 0.100000 240.952381
i 2 5.000000 0.250000 0.100000 230.476190
i 2 5.250000 0.250000 0.100000 220.000000

In [72]:

cs.ReadScore(score)

Out[72]:

In [73]:

plot(*forest_areas_parsed['Colombia'])

Out[73]:

[<matplotlib.lines.Line2D at 0x7e50b6d0>]

In [111]:

score = ''

base_hz = 220
ceil_hz = 440

initial_year = 1990
dur = 0.25

num = 20

for country,areas in forest_areas_parsed.items()[:num]:
    max_areas = max(areas[1])
    min_areas = min(areas[1])
    #print country
    for year, area in zip(*areas):
        
        area_range = max_areas - min_areas
        #print area, area_range
        if area_range > 0:
            freq = base_hz + ((area - min_areas)/(max_areas - min_areas)) * (ceil_hz - base_hz)
            amp = 0.2
        else:
            freq = 110.0
            amp = 0.0001
        score += "i 2 %f %f %f %f\n"%((year - initial_year) * dur, dur, amp/num, freq)

#cs.ReadScore(score) # too big!

In [116]:

cs.ReadScore(score)

Out[116]:

In [82]:

for country,areas in forest_areas_parsed.items()[:20]:
    plot(*areas)

In [151]:

score = ''

base_hz = 220
ceil_hz = 880

initial_year = 1990
dur = 0.25

num = 20

max_areas = max(max(areas[1]) for country,areas in forest_areas_parsed.items()[:num])
min_areas = min(min(areas[1]) for country,areas in forest_areas_parsed.items()[:num])


counter = 0
for country,areas in forest_areas_parsed.items()[:num]:
    #print country
    for year, area in zip(*areas):
        local_max_areas = max(areas[1])
        local_min_areas = min(areas[1])
        area_range = local_max_areas - local_min_areas
        #print area, area_range
        if area_range > 0:
            freq = base_hz + ((area - min_areas)/(max_areas - min_areas)) * (ceil_hz - base_hz)
            amp = 0.2
        else:
            freq = 110.0
            amp = 0.01
        score += "i 2 %f %f %f %f\n"%((year - initial_year) * dur + counter, dur, amp/num, freq)
    counter += 0.5*dur/num

In [152]:

cs.ReadScore(score)

Out[152]:

By: Andrés Cabrera mantaraya36@gmail.com

For Course MAT 240F at UCSB

This ipython notebook is licensed under the CC-BY-NC-SA license: http://creativecommons.org/licenses/by-nc-sa/4.0/