using LibExpat names(LibExpat) sm = """hi hey yo """ et=xp_parse(s); esm = xp_parse(sm) dump(esm) esm.name, esm.attr esm.elements typeof(esm.elements[1]) <: String for e in esm.elements stre = strip(string(e)) if length(stre)>0 println(stre, " ", typeof(e)) if typeof(e) <: String println("Payload: ",stre) end end end s="""
Flight Info - NXXXXXX(Rogers Bleeblah #)
Date Origin Dest Depart Arrive Hobbs Flight Time Ground Time Flight Distance Taxi Distance Fuel Fuel/hr Fuel/nm Altitude Gnd Speed
Mon, May xx, 2010 KMYF XXXX 10:44 12:43 1.92 hrs 1.8 hrs (1:48) 0.12 hrs (0:07) 177.27 nm 1.32 nm 16.69 gal 8.68 gal/hr 0.09 gal/nm 9511 msl 95.21 kts
"""; tds = LibExpat.find(et, "/div/table//table//td") el = tds[1] typeof(el) string(el) el.attr["class"] get(el.attr, "class","") function parse_header( hdr ) #hdr = strip(td.elements[1]) hdr = strip( split(hdr,'-')[2] ) (acid, actype) = [strip(s) for s in split(hdr,'(')] actype = strip(replace(actype, "#)","")) return (acid, actype) end parse_header( "Flight Info - NXXXXXX (Rogers Bleeblah #) " ) labels = ASCIIString[] values = ASCIIString[] hdr = "" for td in tds if get(td.attr,"class","")=="table_header" hdr = strip(td.elements[1]) (acid, actype) = parse_header(hdr) end if get(td.attr,"class","")=="table_td" push!(values, strip(td.elements[1]) ) end if get(td.attr,"class","")=="table_row_header" push!(labels, strip(td.elements[1]) ) end end acid, actype dmap = Dict() for (i,el) in enumerate(labels) v = values[i] if '0'<=v[end]<='9' dmap[el] = v else dmap[el] = split(v,' ')[1] end end dump(dmap)