#!/usr/bin/env python
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
OSM_FILE = "/media/vagner/Seagate Expansion Drive/500Gb/"+
"DiskExternoVagner/Cursos/CientistaDados/Modulo_4/"+
"DW-OSM-RMSP/sao-paulo_brazil.osm"
SAMPLE_FILE = "/media/vagner/Seagate Expansion Drive/500Gb/"+
"DiskExternoVagner/Cursos/CientistaDados/Modulo_4/"+
"DW-OSM-RMSP/sample.osm"
k = 10 # Parameter: take every k-th top level element
def get_element(osm_file, tags=('node', 'way', 'relation')):
"""Yield element if it is the right type of tag
Reference:
http://stackoverflow.com/questions/3095434/inserting-newlines-in-xml-file-generated
-via-xml-etree-elementtree-in-python
"""
context = iter(ET.iterparse(osm_file, events=('start', 'end')))
_, root = next(context)
for event, elem in context:
if event == 'end' and elem.tag in tags:
yield elem
root.clear()
with open(SAMPLE_FILE, 'wb') as output:
output.write('<?xml version="1.0" encoding="UTF-8"?>\n')
output.write('<osm>\n ')
# Write every kth top level element
for i, element in enumerate(get_element(OSM_FILE)):
if i % k == 0:
output.write(ET.tostring(element, encoding='utf-8'))
output.write('</osm>')