f = open("1_Topic_en_xml_v2.xml")
print f.readline()
<?xml version="1.0" encoding="utf-8"?>
print f.readline()
<Root xmlns:wb="http://www.worldbank.org">
for i in range(10):
print f.readline(),
<data> <record> <field name="Country or Area" key="ABW">Aruba</field> <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field> <field name="Year">1960</field> <field name="Value" /> </record> <record> <field name="Country or Area" key="ABW">Aruba</field> <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field>
for i in range(40):
print f.readline(),
<field name="Year">1961</field> <field name="Value" /> </record> <record> <field name="Country or Area" key="ABW">Aruba</field> <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field> <field name="Year">1962</field> <field name="Value" /> </record> <record> <field name="Country or Area" key="ABW">Aruba</field> <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field> <field name="Year">1963</field> <field name="Value" /> </record> <record> <field name="Country or Area" key="ABW">Aruba</field> <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field> <field name="Year">1964</field> <field name="Value" /> </record> <record> <field name="Country or Area" key="ABW">Aruba</field> <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field> <field name="Year">1965</field> <field name="Value" /> </record> <record> <field name="Country or Area" key="ABW">Aruba</field> <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field> <field name="Year">1966</field> <field name="Value" /> </record> <record> <field name="Country or Area" key="ABW">Aruba</field> <field name="Item" key="AG.AGR.TRAC.NO">Agricultural machinery, tractors</field> <field name="Year">1967</field> <field name="Value" /> </record> <record>
import xml.etree.ElementTree as ET
tree = ET.parse('1_Topic_en_xml_v2.xml')
root = tree.getroot()
Good tutorial on ElementTree: http://getpython3.com/diveintopython3/xml.html#xml-parse
Other options:
root
<Element 'Root' at 0x2fb4b90>
for child in root:
print child.tag, ":", child.attrib
data : {}
root[0]
<Element 'data' at 0x2fb4bd0>
data = root[0]
records = data.getchildren()
len(records)
435456
records[0]
<Element 'record' at 0x2fb4c10>
r = records[0]
r.getchildren()
[<Element 'field' at 0x2fb4c90>, <Element 'field' at 0x2fb4cd0>, <Element 'field' at 0x2fb4c50>, <Element 'field' at 0x2fb4d10>]
records[0][0]
<Element 'field' at 0x2fb4c90>
records[0][0].text
'Aruba'
r = records[0]
for field in r:
print field.attrib
{'name': 'Country or Area', 'key': 'ABW'} {'name': 'Item', 'key': 'AG.AGR.TRAC.NO'} {'name': 'Year'} {'name': 'Value'}
records_parsed = []
for record in records:
r = {}
for field in record:
if field.attrib['name'] == 'Country or Area':
r['country'] = field.text
if field.attrib['name'] == 'Item':
r['item'] = field.text
if field.attrib['name'] == 'Year':
r['year'] = field.text
if field.attrib['name'] == 'Value':
r['value'] = field.text
records_parsed.append(r)
records_parsed[0]
{'country': 'Aruba', 'item': 'Agricultural machinery, tractors', 'value': None, 'year': '1960'}
items = set()
for r in records_parsed:
items.add(r['item'])
items
set(['Arable land (hectares)', 'Agricultural machinery, tractors', 'Crop production index (2004-2006 = 100)', 'Food production index (2004-2006 = 100)', 'Arable land (% of land area)', 'Permanent cropland (% of land area)', 'Poverty headcount ratio at rural poverty line (% of rural population)', 'Cereal yield (kg per hectare)', 'Land area (sq. km)', 'Rural population growth (annual %)', 'Improved water source, rural (% of rural population with access)', 'Arable land (hectares per person)', 'Agricultural raw materials imports (% of merchandise imports)', 'Average precipitation in depth (mm per year)', 'Agricultural irrigated land (% of total agricultural land)', 'Fertilizer consumption (% of fertilizer production)', 'Forest area (% of land area)', 'Livestock production index (2004-2006 = 100)', 'Surface area (sq. km)', 'Agricultural machinery, tractors per 100 sq. km of arable land', 'Employment in agriculture (% of total employment)', 'Poverty gap at rural poverty line (%)', 'Rural population (% of total population)', 'Agriculture value added per worker (constant 2005 US$)', 'Agricultural raw materials exports (% of merchandise exports)', 'Fertilizer consumption (kilograms per hectare of arable land)', 'Forest area (sq. km)', 'Agricultural land (sq. km)', 'Agricultural land (% of land area)', 'Land under cereal production (hectares)', 'Agriculture, value added (% of GDP)', 'Rural population'])
items = set()
for r in records_parsed:
items.add(r['country'])
items
set(['Canada', 'Sao Tome and Principe', 'Turkmenistan', 'Lao PDR', 'Arab World', 'Latin America & Caribbean (all income levels)', 'Cambodia', 'Ethiopia', 'Aruba', 'Swaziland', 'South Asia', 'Argentina', 'Bolivia', 'Bahamas, The', 'Burkina Faso', 'OECD members', 'Ghana', 'Saudi Arabia', 'Sub-Saharan Africa (IFC classification)', 'Thailand', 'Japan', 'Channel Islands', 'American Samoa', 'Northern Mariana Islands', 'Slovenia', 'Guatemala', 'Bosnia and Herzegovina', 'Kuwait', 'Russian Federation', 'Jordan', 'St. Lucia', 'Congo, Rep.', 'Dominica', 'Liberia', 'Maldives', 'East Asia & Pacific (all income levels)', 'Virgin Islands (U.S.)', 'Lithuania', 'Tanzania', 'Vietnam', 'Cabo Verde', 'Greenland', 'Gabon', 'Monaco', 'New Zealand', 'European Union', 'Jamaica', 'Albania', 'Samoa', 'Slovak Republic', 'United Arab Emirates', 'Guam', 'Uruguay', 'India', 'Azerbaijan', 'Lesotho', 'Kenya', 'Latin America and the Caribbean (IFC classification)', 'Upper middle income', 'Tajikistan', 'Pacific island small states', 'Turkey', 'Afghanistan', 'Venezuela, RB', 'Bangladesh', 'Mauritania', 'Solomon Islands', 'Korea, Rep.', 'San Marino', 'Mongolia', 'France', 'Syrian Arab Republic', 'Bermuda', 'Namibia', 'Somalia', 'Peru', 'Vanuatu', 'Nigeria', 'South Asia (IFC classification)', 'Norway', "Cote d'Ivoire", 'Europe & Central Asia (developing only)', 'Benin', 'Other small states', 'Cuba', 'Cameroon', 'Montenegro', 'Low & middle income', 'Togo', 'China', 'Sub-Saharan Africa (developing only)', 'Armenia', 'Small states', 'Timor-Leste', 'Dominican Republic', 'Low income', 'Ukraine', 'Bahrain', 'Tonga', 'Finland', 'Latin America & Caribbean (developing only)', 'High income', 'Libya', 'Cayman Islands', 'Central African Republic', 'Europe & Central Asia (all income levels)', 'Mauritius', 'Liechtenstein', 'Belarus', 'Mali', 'Micronesia, Fed. Sts.', 'Korea, Dem. Rep.', 'Bulgaria', 'North America', 'Romania', 'Angola', 'Egypt, Arab Rep.', 'Trinidad and Tobago', 'St. Vincent and the Grenadines', 'Cyprus', 'Caribbean small states', 'Brunei Darussalam', 'Qatar', 'Middle income', 'Austria', 'High income: OECD', 'Mozambique', 'Uganda', 'Kyrgyz Republic', 'Hungary', 'Niger', 'United States', 'Brazil', 'World', 'Middle East & North Africa (all income levels)', 'Guinea', 'Panama', 'Costa Rica', 'Luxembourg', 'Andorra', 'Chad', 'Euro area', 'Ireland', 'Pakistan', 'Palau', 'Faeroe Islands', 'Lower middle income', 'Ecuador', 'Czech Republic', 'Australia', 'Algeria', 'East Asia and the Pacific (IFC classification)', 'El Salvador', 'Tuvalu', 'St. Kitts and Nevis', 'Marshall Islands', 'Chile', 'Puerto Rico', 'Belgium', 'Europe and Central Asia (IFC classification)', 'Haiti', 'Belize', 'Sierra Leone', 'Georgia', 'East Asia & Pacific (developing only)', 'Denmark', 'Philippines', 'Moldova', 'Macedonia, FYR', 'Morocco', 'Croatia', 'French Polynesia', 'Guinea-Bissau', 'Kiribati', 'Switzerland', 'Grenada', 'Middle East and North Africa (IFC classification)', 'Yemen, Rep.', 'Isle of Man', 'Portugal', 'Estonia', 'Kosovo', 'Sweden', 'Mexico', 'Hong Kong SAR, China', 'South Africa', 'Uzbekistan', 'West Bank and Gaza', 'Djibouti', 'Rwanda', 'Antigua and Barbuda', 'Spain', 'Colombia', 'Burundi', 'Least developed countries: UN classification', 'Fiji', 'Barbados', 'Seychelles', 'Madagascar', 'Congo, Dem. Rep.', 'Italy', 'Curacao', 'Bhutan', 'Sudan', 'Nepal', 'Malta', 'Netherlands', 'Macao SAR, China', 'Suriname', 'Middle East & North Africa (developing only)', 'Turks and Caicos Islands', 'St. Martin (French part)', 'Iran, Islamic Rep.', 'Israel', 'Indonesia', 'Malaysia', 'Iceland', 'Zambia', 'Sub-Saharan Africa (all income levels)', 'Senegal', 'Papua New Guinea', 'Malawi', 'Zimbabwe', 'Germany', 'Oman', 'Kazakhstan', 'Poland', 'Sint Maarten (Dutch part)', 'Eritrea', 'Iraq', 'New Caledonia', 'Paraguay', 'Not classified', 'Latvia', 'South Sudan', 'Guyana', 'Honduras', 'Myanmar', 'Equatorial Guinea', 'Tunisia', 'Nicaragua', 'Singapore', 'Serbia', 'Botswana', 'United Kingdom', 'Gambia, The', 'High income: nonOECD', 'Greece', 'Sri Lanka', 'Lebanon', 'Comoros', 'Heavily indebted poor countries (HIPC)'])
forest_areas = []
for r in records_parsed:
if r['item'] == 'Forest area (% of land area)':
forest_areas.append(r)
forest_areas[:5]
[{'country': 'Aruba', 'item': 'Forest area (% of land area)', 'value': None, 'year': '1960'}, {'country': 'Aruba', 'item': 'Forest area (% of land area)', 'value': None, 'year': '1961'}, {'country': 'Aruba', 'item': 'Forest area (% of land area)', 'value': None, 'year': '1962'}, {'country': 'Aruba', 'item': 'Forest area (% of land area)', 'value': None, 'year': '1963'}, {'country': 'Aruba', 'item': 'Forest area (% of land area)', 'value': None, 'year': '1964'}]
forest_areas_parsed = {}
for sample in forest_areas:
if sample['value']:
if not forest_areas_parsed.has_key(sample['country']):
forest_areas_parsed[sample['country']] =[[],[]]
forest_areas_parsed[sample['country']][0].append(int(sample['year']))
forest_areas_parsed[sample['country']][1].append(float(sample['value']))
forest_areas_parsed['Aruba']
[[1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011], [2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.22222222222222, 2.33333333333333, 2.33333333333333]]
aruba_data = forest_areas_parsed['Aruba']
plot(aruba_data[0], aruba_data[1])
[<matplotlib.lines.Line2D at 0x7e072c50>]
plot(*forest_areas_parsed['Aruba'])
[<matplotlib.lines.Line2D at 0x7e228850>]
for country,areas in forest_areas_parsed.items():
plot(*areas, label=country)
def foo(*arg):
print arg
foo(*'hello')
foo('hello')
('h', 'e', 'l', 'l', 'o') ('hello',)
len(forest_areas_parsed)
238
import csnd6
cs = csnd6.Csound()
csPerf = csnd6.CsoundPerformanceThread(cs)
cs.SetOption('-odac')
cs.CompileOrc(
'''sr = 44100
ksmps = 128
nchnls = 2
0dbfs = 1.0
''')
0
cs.Start()
csPerf.Play()
cs.CompileOrc(
'''instr 1
idur = p3
iamp = p4
ifreq = p5
k1 line 1, idur, 0
a1 oscils iamp, ifreq, 1
out a1 * k1, a1 * k1
endin
''')
0
cs.ReadScore('i 1 0 1 0.5 440')
0
def event(*args):
e = ' '.join(map(str, args))
cs.ReadScore(e)
#print e
event('i', 1, 0, 0.25, 0.707, 262)
for t in xrange(8):
event('i', 1, t, 1, 0.707, 440 + 440 * random.random())
cs.CompileOrc(
'''instr 2
iplk = 0.75
kamp = p4
icps = p5
kpick = 0.75
krefl = 0.5
aenv linen 1, 0.01, p3, 0.01
apluck wgpluck2 iplk, kamp, icps, kpick, krefl
out apluck * aenv, apluck * aenv
endin
''')
0
for t in xrange(8):
event('i', 2, t*0.3, 1, 0.4, 440 + 440 * random.random())
forest_areas_parsed['Colombia'][1]
forest_areas_parsed['Colombia']
[[1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011], [56.3488057683641, 56.2577737719694, 56.1667417755746, 56.0757097791798, 55.984677782785, 55.8936457863903, 55.8026137899955, 55.7115817936007, 55.620549797206, 55.5295178008112, 55.4384858044164, 55.3474538080216, 55.2564218116269, 55.1653898152321, 55.0743578188373, 54.9833258224425, 54.8922938260478, 54.801261829653, 54.7102298332582, 54.6191978368635, 54.5281658404687, 54.4371338440739]]
max(forest_areas_parsed['Colombia'][1])
56.3488057683641
min(forest_areas_parsed['Colombia'][1])
54.4371338440739
score = ''
base_hz = 220
ceil_hz = 440
initial_year = 1990
dur = 0.25
import itertools
colombia_max = max(forest_areas_parsed['Colombia'][1])
colombia_min = min(forest_areas_parsed['Colombia'][1])
for year, value in itertools.izip(*forest_areas_parsed['Colombia']):
start_time = (year - initial_year) * dur
amp = 0.1
freq = base_hz + (ceil_hz - base_hz) * (value - colombia_min) / ( colombia_max - colombia_min)
score += "i 2 %f %f %f %f\n"%(start_time, dur, amp, freq)
print score
i 2 0.000000 0.250000 0.100000 440.000000 i 2 0.250000 0.250000 0.100000 429.523810 i 2 0.500000 0.250000 0.100000 419.047619 i 2 0.750000 0.250000 0.100000 408.571429 i 2 1.000000 0.250000 0.100000 398.095238 i 2 1.250000 0.250000 0.100000 387.619048 i 2 1.500000 0.250000 0.100000 377.142857 i 2 1.750000 0.250000 0.100000 366.666667 i 2 2.000000 0.250000 0.100000 356.190476 i 2 2.250000 0.250000 0.100000 345.714286 i 2 2.500000 0.250000 0.100000 335.238095 i 2 2.750000 0.250000 0.100000 324.761905 i 2 3.000000 0.250000 0.100000 314.285714 i 2 3.250000 0.250000 0.100000 303.809524 i 2 3.500000 0.250000 0.100000 293.333333 i 2 3.750000 0.250000 0.100000 282.857143 i 2 4.000000 0.250000 0.100000 272.380952 i 2 4.250000 0.250000 0.100000 261.904762 i 2 4.500000 0.250000 0.100000 251.428571 i 2 4.750000 0.250000 0.100000 240.952381 i 2 5.000000 0.250000 0.100000 230.476190 i 2 5.250000 0.250000 0.100000 220.000000
cs.ReadScore(score)
0
plot(*forest_areas_parsed['Colombia'])
[<matplotlib.lines.Line2D at 0x7e50b6d0>]
score = ''
base_hz = 220
ceil_hz = 440
initial_year = 1990
dur = 0.25
num = 20
for country,areas in forest_areas_parsed.items()[:num]:
max_areas = max(areas[1])
min_areas = min(areas[1])
#print country
for year, area in zip(*areas):
area_range = max_areas - min_areas
#print area, area_range
if area_range > 0:
freq = base_hz + ((area - min_areas)/(max_areas - min_areas)) * (ceil_hz - base_hz)
amp = 0.2
else:
freq = 110.0
amp = 0.0001
score += "i 2 %f %f %f %f\n"%((year - initial_year) * dur, dur, amp/num, freq)
#cs.ReadScore(score) # too big!
cs.ReadScore(score)
0
for country,areas in forest_areas_parsed.items()[:20]:
plot(*areas)
score = ''
base_hz = 220
ceil_hz = 880
initial_year = 1990
dur = 0.25
num = 20
max_areas = max(max(areas[1]) for country,areas in forest_areas_parsed.items()[:num])
min_areas = min(min(areas[1]) for country,areas in forest_areas_parsed.items()[:num])
counter = 0
for country,areas in forest_areas_parsed.items()[:num]:
#print country
for year, area in zip(*areas):
local_max_areas = max(areas[1])
local_min_areas = min(areas[1])
area_range = local_max_areas - local_min_areas
#print area, area_range
if area_range > 0:
freq = base_hz + ((area - min_areas)/(max_areas - min_areas)) * (ceil_hz - base_hz)
amp = 0.2
else:
freq = 110.0
amp = 0.01
score += "i 2 %f %f %f %f\n"%((year - initial_year) * dur + counter, dur, amp/num, freq)
counter += 0.5*dur/num
cs.ReadScore(score)
0
By: Andrés Cabrera mantaraya36@gmail.com
For Course MAT 240F at UCSB
This ipython notebook is licensed under the CC-BY-NC-SA license: http://creativecommons.org/licenses/by-nc-sa/4.0/