#!/usr/bin/env python # coding: utf-8 # This is a text! # In[1]: print "Hello World!" # In[2]: get_ipython().run_line_magic('pylab', 'inline') # In[3]: get_ipython().run_line_magic('quickref', '') # In[ ]: help("numpy") # In[5]: x = 24 help(x) # In[6]: get_ipython().run_cell_magic('bash', '', 'echo "Hello World!"\n') # In[7]: get_ipython().run_cell_magic('bash', '', "awk '{print;}' data.txt\n") # In[8]: get_ipython().run_cell_magic('bash', '', "awk '{print $1;}' data.txt\n") # # Variables # * Python variables are dynamically typed. # * No variable declarations # * No type safety # In[9]: x = 24 x = 3.140372 x = "Hello World!" # In[10]: print x type(x) # In[11]: 3 + 4 # In[12]: 3 - 4 # In[13]: 4 / 4 # In[14]: 4 * 4 # In[15]: x = 3 x += 3 print x # In[16]: x / 3 # ## Casting is possible # In[17]: x = 1 y = 3 res = x / y # In[18]: print res type(res) # In[19]: x / float(y) # In[20]: x = 1. y = float(3) res = x / y # In[21]: print res type(res) # In[25]: a = "13" float(a) # ## String functions # In[26]: text = "Hello world!" len(text) # In[27]: text[0:3] # In[28]: text[0] # In[29]: text.upper() # In[30]: text.lower() # In[31]: text.find('l') # In[32]: text.split(' ') # ### Strings are immutable! # In[33]: text[0] = 'A' # In[34]: 'A' + text[1:] # ## Booleans # In[35]: True # In[36]: False # In[37]: x = None print x # # Data structures # ## Lists # In[38]: x = [3, 4, 5] print x # In[39]: x[0] # In[40]: x.append(7) print x # In[41]: x[0:2] # In[42]: x[-1] # In[43]: x[-3:] # In[44]: a = x + [3,1] # In[45]: a # In[46]: len(a) # In[47]: a.append("string") # In[48]: a # In[49]: a.sort() print a # In[50]: for i in a: print i # In[51]: x = [1,2,3] a = [i*2 for i in x] # In[52]: print a # In[53]: for x in a: print x # In[55]: for i,x in enumerate(a): print i, x, i, x # In[56]: range(4) # ## Sets # # * unordered collection, no duplicates # * support set-theoretic operations like union or intersection # In[57]: a = set() b = set() a.add(1) a.add(2) b.add(1) b.add(4) print a, b # In[59]: a.add(1) print a # In[60]: a[0] = 0 # In[61]: a & b #intersection # In[62]: a | b #union # In[63]: a - b #difference # In[64]: a ^ b #symmetric difference # In[67]: a = [1,1,3,5] b = set(a) b a = list(b) print a # In[68]: for x in b: print x # ## N-Tuples # # * Immutable # In[69]: a = (1,2) a # In[70]: b = (1,2,3) b # In[71]: a[0] = 0 # In[72]: a[0] # In[73]: a[2] # In[53]: zip(a,b) # In[74]: a = (3,1,2) b = (1,1,1) zip(a,b) # In[75]: a = [3,1,2] b = [1,1,1] zip(a,b) # ## Dictionaries # # * Key: Value # In[76]: a = dict() # In[77]: a[0] = "string" print a # In[78]: print a[0] # In[79]: print a[1] # In[80]: a["string"] = 0 print a # In[81]: a.keys() # In[82]: a.values() # In[83]: for key, value in a.iteritems(): print key,value # In[84]: for key in a.keys(): print key, a[key] # ## Further useful data structures # In[ ]: # In[85]: from collections import Counter a = ["c", "a", "b", "a", "c", "c"] Counter(a) # In[86]: from collections import OrderedDict a = OrderedDict() a[1] = 3 a[2] = 5 a["string"] = 6 print a a[1] # In[88]: from collections import defaultdict #initialized with a function a = defaultdict(int) a[1] += 1 print a[1] a[1] += 1 print a[1] # In[90]: a = defaultdict(int) if a[1] == 0: print "wow" # In[91]: print a # In[107]: print a.keys() # In[89]: a = dict() a[1] = 0 a[1] += 1 # In[108]: print a[2] # In[109]: a.keys() # In[92]: a = defaultdict(lambda: defaultdict(int)) a[1][2] = 3 print a print a[1] print a[1][2] # ## Performance # https://wiki.python.org/moin/TimeComplexity # ## Referencing # ### Be careful with aliasing when working with mutable objects as Python works with pointers to objects # In[93]: a = [3,4,5] b = a b[0] = 10 print a # # Control flow # In[94]: import collections a = collections.defaultdict() # In[95]: from collections import defaultdict a = defaultdict() # In[96]: from collections import * a = Counter() # In[100]: x = True print "Hello World!" if x == True: print "wohooo" # In[101]: if x: print "wohooo" # In[102]: x = 3 if x == 2: print "2" elif x > 2: print "larger than 2" else: print "smaller than 2" # In[103]: x = 0 while x < 5: print x x += 1 # In[104]: x = 3 x >= 3 # In[105]: 3 in [3,4,5] # In[106]: 3 in {3:4, 4:4} # In[107]: 5 in {3:4, 4:4} # In[108]: x = 0 while True: x += 1 print x if x == 5: break # In[109]: x = 0 while True: x += 1 print x if x == 5: pass if x == 6: break # In[110]: x = [1,2,3] x[99] # In[111]: x = [1,2,3] try: x[99] print "all fine" except: print "cought an exception" # In[112]: a = [1,2,3] if 4 in a: print "something" # # File handling # In[113]: f = open("data.txt") for line in f: print line # In[116]: f = open("data.txt") data1 = [] data2 = [] dict_data = {} for line in f: columns = line.strip().split(" ") data1.append(columns[0]) data2.append(columns[1]) dict_data[columns[0]] = columns[1] print data1 print data2 print dict_data # In[117]: f = open("data.txt", 'a') f.write("row3col3 row2col3\n") f.write("row4col4 row4col4\n") # In[118]: f = open("data.txt") for line in f: print line # ## Pandas --> Next tutorial # # Functions # In[120]: def name(para): para += 1 return para name(1) # In[121]: def factorial(n): if n<=0: return 1 return n*factorial(n-1) factorial(10) # In[122]: def func(a): a[0] = 3 x = [4,5,6] func(x) print x # # Plotting with Matplotlib # In[123]: get_ipython().run_line_magic('pylab', 'inline') # In[10]: #pip install matplotlib import matplotlib.pyplot as plt # In[130]: x = range(10) y = range(10) fig = plt.figure() plt.plot(x,y, label="linear") # In[134]: x = range(10) y = range(10) fig = plt.figure(figsize(12,10)) ax1 = fig.add_subplot(2,1,1) ax2 = fig.add_subplot(2,1,2) ax1.plot(x,y, label="linear") y = [factorial(i) for i in x] ax2.plot(x,y,label="factorial") # In[135]: x = range(10) y = range(10) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(x,y, label="linear") y = [factorial(i) for i in x] ax.plot(x,y,label="factorial") # In[136]: ax.set_yscale('log') fig # # Numpy # In[43]: import numpy as np # In[137]: a = np.array([1,2,3]) # In[138]: print type(a) print a # In[139]: a[0] # In[140]: a.shape # In[141]: a = np.array([[1,2,3],[4,5,6]]) # In[142]: print a # In[143]: a[1,2] # In[144]: a[0,:] # In[145]: a.shape # In[146]: np.zeros((3,3)) # In[147]: np.ones((3,3)) # In[148]: a # In[149]: a * 3 # In[150]: a * a # In[151]: a # In[152]: a.dot(a.T) # In[153]: a = np.random.random((3,3)) a # In[154]: from numpy.linalg import eig # In[155]: eig(a) # In[156]: a = np.array([1,2,3], dtype=np.float32) # In[157]: a # In[158]: a = np.array([1,2,3], dtype=np.int32) # In[159]: a # # Scipy # In[161]: from scipy.sparse import csr_matrix # In[162]: a = csr_matrix(np.zeros((3,3))) # In[163]: print a # In[164]: a.toarray() # In[165]: a[1,1] = 4. # In[166]: print a # # NetworkX # # ## Module for the creation, manipulation and study of the structure, dynamics and functions of complex networks. # In[167]: import networkx as nx # In[168]: G = nx.Graph() # In[169]: G.add_node(1) # In[170]: G.nodes() # In[171]: G.add_edge(1,2) # In[172]: G.nodes() # In[173]: G.edges() # In[174]: G.add_edge(2,3) G.add_edge(1,3) G.add_edge(3,4) # In[175]: plt.figure() nx.draw(G, with_labels=True) # In[176]: nx.draw_circular(G) # In[177]: nx.draw_spectral(G) # In[178]: G.neighbors(1) # In[179]: nx.degree(G) # In[180]: nx.clustering(G) # In[181]: G.degree(3) # #Scikit learn # # Machine learning library # http://scikit-learn.org/ # The diabetes dataset consists of 10 physiological variables (age, sex, weight, blood pressure) measure on 442 patients, and an indication of disease progression after one year # In[10]: #http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html import matplotlib.pyplot as plt import numpy as np from sklearn import datasets, linear_model # Load the diabetes dataset diabetes = datasets.load_diabetes() # Use only one feature diabetes_X = diabetes.data[:, np.newaxis] diabetes_X_temp = diabetes_X[:, :, 2] # Split the data into training/testing sets diabetes_X_train = diabetes_X_temp[:-20] diabetes_X_test = diabetes_X_temp[-20:] # Split the targets into training/testing sets diabetes_y_train = diabetes.target[:-20] diabetes_y_test = diabetes.target[-20:] # Create linear regression object regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(diabetes_X_train, diabetes_y_train) # The coefficients print('Coefficients: \n', regr.coef_) # The mean square error print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % regr.score(diabetes_X_test, diabetes_y_test)) # Plot outputs plt.scatter(diabetes_X_test, diabetes_y_test, color='black') plt.plot(diabetes_X_test, regr.predict(diabetes_X_test), color='blue', linewidth=3) plt.xticks(()) plt.yticks(()) plt.show() # In[17]: diabetes.target # In[ ]: