#use regular expression to parse log entry import re #http://docs.python.org/2/library/re.html #awkward historical log format: log_entry='proxy4.utsa.edu.au 151.217.6.9 - -|- [11/Apr/2013:23:57:14 -0400] [Mozilla/5.0 (Windows NT 6.1; rv:19.0) Gecko/20100101 Firefox/19.0|-=151.217.60.103|0|http://arxiv.org/|proxy4.utas.edu.au.1364191674910933] "GET /find/all/1/all:+2013arXiv13011419D/0/1/0/all/0/1 HTTP/1.0" 200 10737' #re.match pulls out the objects in () mm = re.match(r"(\S+) (\S+) (\S+) (\S+?)\|(\S+) \[(.*?)\] \[(.*)\|(.*?)=(.*?)\|(\d+)\|(.*)\|(.*?)\] \"(.*)\" (\d+) (\S+)",log_entry) keys=['host','ip','logname','tapiruid','tapirsid','datetime','ua','xfrom','xfor','delay','referer','cookie','request','status','bytes'] #mm.groups() is the list of matching objects entry=dict(zip(keys,mm.groups())) for k in keys: print k+':',entry[k] #next need to parse the datetime import time #http://docs.python.org/2/library/time.html def timestr_utc(time_string): #wants timezone names instead of time offset: ts=time_string.replace('-0400','EDT').replace('-0500','EST') try: return int(time.mktime(time.strptime(ts,'%d/%b/%Y:%H:%M:%S %Z'))) except ValueError: print "bad time",time_string return(None) #current time print 'current time is',time.time() time_string=time.strftime('%d/%b/%Y:%H:%M:%S %Z') print 'current time string is',time_string utc_time = timestr_utc(time_string) print 'and converts back to',utc_time,'seconds' print "in years that's roughly",utc_time/(60*60*24*365.25),'years' #recall that on 31 Dec 1969 at 7pm eastern time the ball at Times Square descended #with great fanfare and announced to the world "0 Unix time" for ts in ["31/Dec/1969:18:59:59 -0500","31/Dec/1969:19:00:00 -0500","31/Dec/1969:19:00:01 -0500"]: print ts,'converts to',timestr_utc(ts) #http://imgs.xkcd.com/comics/bug.png from urllib2 import urlopen bug = urlopen('http://imgs.xkcd.com/comics/bug.png').read() from IPython.display import Image Image(bug) #now check what happens when the clocks were turned back at 2a.m., and 1:30a.m. occurred twice: time_string1 = "04/Nov/2012:01:30:13 -0400" time_string2 = "04/Nov/2012:01:30:13 -0500" utc1=timestr_utc(time_string1) utc2=timestr_utc(time_string2) print time_string1,'converts to',utc1 print time_string2,'converts to',utc2 print utc1,'-',utc2,'=',utc2-utc1,'seconds difference, and convert back to:' #then see how they're translated back print time.strftime('%d/%b/%Y:%H:%M:%S %Z',time.localtime(utc1)) print time.strftime('%d/%b/%Y:%H:%M:%S %Z',time.localtime(utc2)) #some examples from http://networkx.github.io/documentation/latest/examples/ import networkx as nx G=nx.Graph() G.add_node("spam") G.add_edge(1,2) print 'nodes:',G.nodes() print 'edges:',G.edges() #http://networkx.github.io/documentation/latest/examples/drawing/house_with_colors.html G=nx.house_graph() # explicitly set positions pos={0:(0,0), 1:(1,0), 2:(0,1), 3:(1,1), 4:(0.5,2.0)} nx.draw_networkx_nodes(G,pos,node_size=2000,nodelist=[4]) nx.draw_networkx_nodes(G,pos,node_size=3000,nodelist=[0,1,2,3],node_color='b') nx.draw_networkx_edges(G,pos,alpha=0.5,width=6) axis('off') None #http://networkx.github.io/documentation/latest/examples/drawing/ego_graph.html ###just draw friends network of highest degree node in preferential attachment network # Create a BA model graph n=1000 m=2 from operator import itemgetter G=nx.generators.barabasi_albert_graph(n,m) # find node with largest degree node_and_degree=G.degree() (largest_hub,degree)=sorted(node_and_degree.items(),key=itemgetter(1))[-1] # Create ego graph of main hub hub_ego=nx.ego_graph(G,largest_hub) # Draw graph figure(figsize=(4,4)) pos=nx.spring_layout(hub_ego) nx.draw(hub_ego,pos,node_color='b',node_size=50,with_labels=False) # Draw ego as large and red nx.draw_networkx_nodes(hub_ego,pos,nodelist=[largest_hub],node_size=300,node_color='r') #http://networkx.github.io/documentation/latest/examples/drawing/random_geometric_graph.html #random geometric graph # G=nx.random_geometric_graph(200,0.125) # position is stored as node attribute data for random_geometric_graph pos=nx.get_node_attributes(G,'pos') # find node near center (0.5,0.5) dmin=1 ncenter=0 for n in pos: x,y=pos[n] d=(x-0.5)**2+(y-0.5)**2 if d1: nx.draw_networkx_edges(Gi,pos,with_labels=False, edge_color='r',alpha=0.3,width=5.0) #http://networkx.github.io/documentation/latest/examples/graph/napoleon_russian_campaign.html #Minard's data from Napoleon's 1812-1813 Russian Campaign. #http://www.math.yorku.ca/SCS/Gallery/minard/minard.txt import string def minard_graph(): data1="""\ 24.0,54.9,340000,A,1 24.5,55.0,340000,A,1 25.5,54.5,340000,A,1 26.0,54.7,320000,A,1 27.0,54.8,300000,A,1 28.0,54.9,280000,A,1 28.5,55.0,240000,A,1 29.0,55.1,210000,A,1 30.0,55.2,180000,A,1 30.3,55.3,175000,A,1 32.0,54.8,145000,A,1 33.2,54.9,140000,A,1 34.4,55.5,127100,A,1 35.5,55.4,100000,A,1 36.0,55.5,100000,A,1 37.6,55.8,100000,A,1 37.7,55.7,100000,R,1 37.5,55.7,98000,R,1 37.0,55.0,97000,R,1 36.8,55.0,96000,R,1 35.4,55.3,87000,R,1 34.3,55.2,55000,R,1 33.3,54.8,37000,R,1 32.0,54.6,24000,R,1 30.4,54.4,20000,R,1 29.2,54.3,20000,R,1 28.5,54.2,20000,R,1 28.3,54.3,20000,R,1 27.5,54.5,20000,R,1 26.8,54.3,12000,R,1 26.4,54.4,14000,R,1 25.0,54.4,8000,R,1 24.4,54.4,4000,R,1 24.2,54.4,4000,R,1 24.1,54.4,4000,R,1""" data2="""\ 24.0,55.1,60000,A,2 24.5,55.2,60000,A,2 25.5,54.7,60000,A,2 26.6,55.7,40000,A,2 27.4,55.6,33000,A,2 28.7,55.5,33000,R,2 29.2,54.2,30000,R,2 28.5,54.1,30000,R,2 28.3,54.2,28000,R,2""" data3="""\ 24.0,55.2,22000,A,3 24.5,55.3,22000,A,3 24.6,55.8,6000,A,3 24.6,55.8,6000,R,3 24.2,54.4,6000,R,3 24.1,54.4,6000,R,3""" cities="""\ 24.0,55.0,Kowno 25.3,54.7,Wilna 26.4,54.4,Smorgoni 26.8,54.3,Moiodexno 27.7,55.2,Gloubokoe 27.6,53.9,Minsk 28.5,54.3,Studienska 28.7,55.5,Polotzk 29.2,54.4,Bobr 30.2,55.3,Witebsk 30.4,54.5,Orscha 30.4,53.9,Mohilow 32.0,54.8,Smolensk 33.2,54.9,Dorogobouge 34.3,55.2,Wixma 34.4,55.5,Chjat 36.0,55.5,Mojaisk 37.6,55.8,Moscou 36.6,55.3,Tarantino 36.5,55.0,Malo-Jarosewii""" c={} for line in cities.split('\n'): x,y,name=line.split(',') c[name]=(float(x),float(y)) g=[] for data in [data1,data2,data3]: G=nx.Graph() i=0 G.pos={} # location G.pop={} # size last=None for line in data.split('\n'): x,y,p,r,n=line.split(',') G.pos[i]=(float(x),float(y)) G.pop[i]=int(p) if last is None: last=i else: G.add_edge(i,last,{r:int(n)}) last=i i=i+1 g.append(G) return g,c (g,city)=minard_graph() figure(1,figsize=(11,5)) colors=['b','g','r'] for G in g: c=colors.pop(0) node_size=[int(G.pop[n]/300.0) for n in G] nx.draw_networkx_edges(G,G.pos,edge_color=c,width=4,alpha=0.5) nx.draw_networkx_nodes(G,G.pos,node_size=node_size,node_color=c,alpha=0.5) nx.draw_networkx_nodes(G,G.pos,node_size=5,node_color='k') for c in city: x,y=city[c] text(x,y+0.1,c)