#!/usr/bin/env python # coding: utf-8 # # Python 101 # ### Importing Packages # In[ ]: from numpy import * from pandas import * #using import * brings ALL the packages classes and functions into the name space, #for large packages you can bring in only some parts by doing from [name] import [class/object] #to avoid name space conflicts you can also rename what you import #import pandas and rename it import pandas as pd #import the Series and dataframe classes from pandas import Series, DataFrame #common naming conventions. numpy is np, pandas pd, and matplotlib is plt import numpy as np import pandas as pd import matplotlib.pyplot as plt # ### Declaring Variables # In[ ]: #no data types need to be associated with variables #all variables are "Objects" in python. num = 5 alpha = 'hello world' mixed = str(num) + ' ' + alpha print num print alpha print mixed # ## Lists # ### Creating # In[ ]: #this is a list a=[0,1,2,3,4,5,6,7,8,9] a # ### Slice and Dice # In[ ]: #use [x] to access the item at location x in the list #all lists start at 0 print 'first item', a[0] #you can also index from back by using -1 for last, -2 for "second from last" etc print 'last item', a[-1] #you can "slice" a list using : and :: print 'first three items', a[:3] print 'last three items', a[-3:] print 'start at the 4th item', a[3:] print 'the odd items', a[::2] # In[ ]: #all lists have a length, use len(list_name) to get the number of items in it #mathematical functions can also, in general, be applied to lists if they contain numbers print 'length of list', len(a) print 'largets number in list', max(a) print 'smallest number', min(a) print 'average', mean(a) #we can find the index of the max and min using argmax() and argmin() print 'the largest number in the list is', max(a), 'and is found at index:', argmax(a) # ### Add Items # In[ ]: #items can be added to a list by using list_name.append(item) #add 3 to the list a.append(3) #add 4 a.append(4) a # In[ ]: #finally, we can de dupe a list by using the unique function #we can also sort a list using sort(list_name) print 'sorted list', sort(a) print 'select distinct values', unique(a) # ### Remove Items # In[ ]: #items can be removed from the list using list_name.remove(item) #remove 3 from the list a.remove(3) #remove 4 a.remove(4) a # ### Itterators # In[ ]: #we can itterate over n items using a for loop #a shortcut for making the list [0,...,n-1] is the function range(n) #print the numbers 0 -4 for i in range(5): print i # In[ ]: #print the numbers 0 -4 for i in range(0,5): print i # In[ ]: #print the a list for i in a: print i # In[ ]: #itterating over something and appending is a common way of building lists #create array output=[] #build the list holding the first 4 squares by using a for loop for i in range(5): output.append(i**2) #**2 operator means squared output # In[ ]: #this works but is slow, a faster way to do this is to use list comprehension output2=[i**2 for i in range(5)] output2 # In[ ]: #we can also put conditions in the list comprehension #build the first 10 squares for all the even numbers output3=[i**2 for i in range(10) if i%2==0] # % is means modulus (remainder) output3 # In[ ]: #the zip command lines up two lists together L1=[1,2,3] L2=['x','y','z'] #the output is a list of tuples print zip(L1,L2) # In[ ]: #if they are of different size, it gets chopped off L1=[1,2,3,4] L2=['x','y','z'] #the output is a list of tuples print zip(L1,L2) # In[ ]: #it is very common to itterate over lists using zip for list1,list2 in zip(L1,L2): print list1,list2 # In[ ]: #this can also be done with list comprehension print [(x,y) for x,y in zip(L1,L2)] #we can also make more complex lists output=[(x,y,str(x)+y) for x,y in zip(L1,L2)] #itterate over our output for a nicer looking print statement for z in output: print z #we can also do this differently for a1,a2,a3 in output: print a1,a2,a3 # ### IF ELSE Statements # In[ ]: if 1==1: print 'one equals one' # In[ ]: if 1<0: print 'one is less than zero' else: print '1 does not equal to 0' # In[ ]: if 1<>1: #you can also use the != operator print 'one does not equal to one ' elif 1==0: print '1 is equal to zero' else: print '1 does not equal to 0' # In[ ]: if (1==1 and 0<1): print 'and operator' if (1==1 or 0==1): print 'or operator' # ### WHILE Statements # In[ ]: #counter i = 0 #loop while i < 5 while i < 5: print i #increment counter i = i + 1 # ## Functions # In[ ]: #define functions def SimpleAdd(Number): return Number def SimpleAdd2(N1, N2): return N1 + N2 def SimpleAdd3(N1=2, N2=2): return N1 + N2 #return a 10 print SimpleAdd(10) #return 2 + 5 print SimpleAdd2(2,5) #return 1 + 3 print SimpleAdd3(1,3) #use default parameters print SimpleAdd3() # ## Excel # In[ ]: from xlwt import * # ### Add an image # In[ ]: # Create a workbook wb = Workbook() # Add a sheet/tab ws0 = wb.add_sheet('Picture_Test') # Add picture at location (2,1) # Note: Only accepts bmp files # i.e. ws0.insert_bitmap('C:\Users\username\Desktop/test.bmp', 2, 1) ws0.insert_bitmap('DataFiles\\testpic.bmp', 2, 1) # Write excel file # Note: This will overwrite any other files with the same name wb.save('DataFiles/AddImage.xls') # ### Convert image to BMP # In[ ]: #convert an image file to a .bmp image file using PIL from PIL import Image # Convert to bmp def ConvertImg(file_in,file_out): img = Image.open(file_in) #print len(img.split()) # test if len(img.split()) == 4: # prevent IOError: cannot write mode RGBA as BMP r, g, b, a = img.split() img = Image.merge("RGB", (r, g, b)) img.save(file_out) else: img.save(file_out) ConvertImg('DataFiles/pngInput.png', 'DataFiles/bmpOutput.bmp') # ## Delete Files # In[ ]: import os # Delete file os.remove('DataFiles/bmpOutput.bmp') # ## Dates # In[ ]: from datetime import datetime, date, time # Get today's date CurrentTime = datetime.now() CurrentTime = CurrentTime.strftime("%Y-%m-%d_%H-%M-%S") CurrentTime # ### IO # #### Dataframe to SQL # ##### Import sqlalchemy # In[ ]: from sqlalchemy import * # In[ ]: # DB Parameters ServerName = "devdb4\sql4" Database = "BizIntel" # To create a temp table just add a "#" to the table name # To create a global table just add a "##" to the table name TableName = "#TableCheckTest" # pyobdc must be installed engine = create_engine('mssql+pyodbc://' + ServerName + '/' + Database) conn = engine.connect() # In[ ]: # Required for querying tables metadata = MetaData(conn) ## Create table tbl = Table(TableName, metadata, Column('DateAdded', DateTime), Column('Revenue', Integer) ) # This actually creates a table in the sql database # checkfirst=True >> create if table does not exist tbl.create(checkfirst=True) # In[ ]: # Create data to insert into table # Create a dataframe with dates as your index data = [1,2,3,4,5,6,7,8,9,10] idx = date_range('1/1/2012', periods=10, freq='MS') df = DataFrame(data, index=idx, columns=['Revenue']) # Remove the index if you want to include it in the insert df = df.reset_index() #print df # In[ ]: # Iterate through each of the columns and insert into table for x in df.iterrows(): #print list(x[1]) sql = tbl.insert(list(x[1])) conn.execute(sql) # select all form table sql = tbl.select() result = conn.execute(sql) for row in result: print 'Write to SQL', row # #### SQL to Dataframe # In[ ]: # Select all sql = tbl.select() # run sql code result = conn.execute(sql) # In[ ]: # Insert to a dataframe df2 = DataFrame(data=list(result), columns=result.keys()) # Convert data types df2.DateAdded = df2.DateAdded.astype('datetime64') df2.Revenue = df2.Revenue.astype('int') print ' ' print 'Data Types' print df2.dtypes # Set index to dataframe df2 = df2.set_index('DateAdded') print ' ' print 'Read from SQL', df2 #print df2.head().to_string() # Close connection conn.close() # #### Clipboard to Dataframe # In[ ]: # Copy paste data into a dataframe # Make sure you first copy data before running the code # This example assumes you are copying data with the first column composed of dates # index_col=0, means the date column is in the first column # parse_dates=True, makes sure dates are converted to datetime datatypes df = read_clipboard(index_col=0,parse_dates=True) df # This has worked for me in SQL, Excel, and LibreOffice # #### Dataframe to Excel # In[ ]: # Export dataframe to Excel # you can export to both .xls and .xlsx df.to_excel('dfToExcel.xls','Sheet Name') # #### Dataframe to Excel (alternate) # In[ ]: # Import packages from xlwt import * from pandas import * import numpy as np # In[ ]: # Create a dataframe with dates as your index data = [1,2,3,4,5,6,7,8,9,10] idx = date_range('1/1/2012', periods=10, freq='MS') df = DataFrame(data, index=idx, columns=['Revenue']) df # In[ ]: # Create a workbook wb = Workbook() # Add a sheet/tab ws0 = wb.add_sheet('DataFrame_Test') # In[ ]: # Write dataframe # formatting values in excel date_xf = easyxf(num_format_str='DD/MM/YYYY') # sets date format in Excel num_xf = easyxf(num_format_str='#0.000000') # sets date format in Excel # Iterate through each of the columns and insert into sheet for i, (date, row) in enumerate(df.T.iteritems()): #print i, date, row[0],type(row[0]).__name__ # Skip first line if i > 0: if type(date).__name__ == 'Timestamp': ws0.write(i,0,date,date_xf) elif type(date).__name__ == 'str': ws0.write(i,0,date) else: ws0.write(i,0,date.astype(np.float),num_xf) if type(row[0]).__name__ == 'Timestamp': ws0.write(i,1,row[0].astype(np.float),date_xf) elif type(row[0]).__name__ == 'str': ws0.write(i,1,row[0].astype(np.float)) else: ws0.write(i,1,row[0].astype(np.float),num_xf) # In[ ]: # Remove the index if you want to include it in the insert df = df.reset_index() # Rename columns df.columns = ['DateAdded', 'Revenue'] # Add column Headers for i, cols in enumerate(df.columns): #print i, cols ws0.write(0,i,cols) # In[ ]: # Write excel file # Note: This will overwrite any other files with the same name wb.save('DFtoExcel.xls') # ## Matplotlib # In[ ]: #import statement (automatic in pylab) import matplotlib.pyplot as plt # In[ ]: #make some data x=linspace(1,50,100) # linspace(start,end,num_points) y=np.sin(x) #a line graph: plot(x,y) # In[ ]: #a scatter plot scatter(x,y) # In[ ]: #another way to make a scatter plot plot(x,y,'x') # In[ ]: #by default all the plots go into the same figure. we can make a new figure by calling figure() #we can also get a refrence to the figure #WITHOUT calling figure() #a line graph: plot(x,y) #a scatter plot scatter(x,y) # In[ ]: #calling figure() #a line graph: plot(x,y) fig=figure() #a scatter plot scatter(x,y) # In[ ]: #plot the data, show data points as x's, connect them with a line, make it red and kinda see through #name the data plt.plot(x,y, 'x-', color='red', alpha=.5, label='the data') #add a title plt.title('The Title') #name the axis xlabel('x axis label') ylabel('y axis label') #the legend plt.legend(loc='best') #loc = 'best' tries to make the legend not overlap the data #turn a grid on plt.grid() #save the figure as an image #this will create a ".png" file in the file location you run the code in plt.savefig('test_fig.png') # In[ ]: #subplots and multiple lines t=linspace(1,100,100) x1=np.sin(20./(2*pi) * t) x2=np.sin(40./(2*pi)*t) x3=x1+x2 figure() subplot(2,1,1) plot(t,x1,label='x1') plot(t,x2,label='x2') plot(t,x3,label='x3') legend(loc='best') subplot(2,1,2) #plot a histogram #we save the histogram data but it is not needed to do so for plotting purposes x1hist=hist(x1,normed='true', bins=25, alpha=.25,label='x1') legend(loc='best') # In[ ]: #more histograms x1=np.random.randn(1000) x2=2.0*np.random.randn(1000)+2 x1h=hist(x1,bins=50,alpha=.25) x2h=hist(x2,bins=50,alpha=.25) # ## BaseMap # In[ ]: from mpl_toolkits.basemap import Basemap import matplotlib.pyplot as plt # In[ ]: #setup the projection m = Basemap(projection='merc',llcrnrlat=20,urcrnrlat=50,\ llcrnrlon=-130,urcrnrlon=-60,lat_ts=20,resolution='i') # In[ ]: #create some points to plot #provided is a csv file with the lat/long of US states import pandas as pd from pandas import DataFrame data=pd.read_csv('DataFiles\\usa_lat_long.csv') #it has some duplicates data.head() # In[ ]: #I just want the lat/long in a list points=data[['longitude','latitude']].values # In[ ]: #transform the points into map coordinates transform_points=[m(lng, lat) for lng, lat in points] # In[ ]: #do all the drawing: fig=figure(figsize=(10,10)) #make a larger than default image fig.add_subplot(1,1,1) #not strictly required m.drawmapboundary(fill_color='white') m.fillcontinents(color='white',lake_color='white') m.drawcoastlines(color='black', linewidth=.3) m.drawcountries(color='black', linewidth=.3) m.drawstates(color='black', linewidth=.3) #plot the points on the map. These are just regular calls to matplotlib with x,y data #you could also do this in one shot by using plot(xlist, ylist...) #or using scatter(). for x,y in transform_points: plot(x,y,'o',color='red',ms=10*rand()) #plot them at random sizes #we can plot some labels text(transform_points[7][0],transform_points[7][1],'California',fontsize=15) text(transform_points[12][0],transform_points[12][1],'Florida',fontsize=15) #draw some great circles lng1,lat1 =points[12] lng2,lat2 =points[7] m.drawgreatcircle(lng1,lat1,lng2,lat2,linewidth=3,color='blue',alpha=.5) # ## Scipy Solver # In[ ]: # Objective Function #Max C = 2x + 3y # Constraints #2x+y<=15 #x+3y<=20 #x>=0 #y>=0 # Solution #(5,5) # In[ ]: from scipy.optimize import minimize # In[ ]: # Since the objective is to maximize C, we # multiple the objective function by -1 # x[0] equals x # x[1] equals y ## Objective Function fun = lambda x: -1*(2*x[0] + 3*x[1]) # In[ ]: # Equality constraint are defined as follows: # This is not in the correct format: x = -2 # Equation has to always be equal to zero: x + 2 = 0 # i.e. {'type': 'eq', 'fun': lambda x: x[0] + 2} #------------------------------------------------------------- # Inequality contraints are defined as follows: # This is not in the correct format: 2x+y<=15 # Equation has to always be greater than or equal to zero: 0 <= 15 + -2x + -y ## Contraints cons = ({'type': 'ineq', 'fun': lambda x: 15 + -2*x[0] + -1*x[1]}, #15-2x-y>=0 {'type': 'ineq', 'fun': lambda x: 20 + -1*x[0] + -3*x[1]}, #20-x-3y {'type': 'ineq', 'fun': lambda x: x[0]}, #x>=0 {'type': 'ineq', 'fun': lambda x: x[1]}) #y>=0 # In[ ]: # STILL NOT SURE WHAT THIS MEANS ## Bounds bnds = ((None, None), (None, None)) # In[ ]: ## Solver # = minimize(Objective Function, Initial Guess, Solver to use, Bounds, Constraints) res = minimize(fun, (0, 0), method='SLSQP', bounds=bnds, constraints=cons) res # solution is in res.x # ## SQLAlchemy # In[ ]: from sqlalchemy import * # ### Connecting # In[ ]: # Parameters ServerName = "Type Server Name Here" Database = "Type database name here" TableName = "Type table name here" # Note: pyobdc must be installed engine = create_engine('mssql+pyodbc://' + ServerName + '/' + Database) conn = engine.connect() # ### SELECT Statements # In[ ]: # Required for querying tables metadata = MetaData(conn) # Table to query tbl = Table(TableName, metadata, autoload=True, schema="dbo") # select all where sql = select(tbl.c.Column1 == 'Type value to match') # select specific columns sql = select([tbl.c.Column1, tbl.c.Column2, tbl.c.Column3]) # select top N sql = select([tbl.c.Column1, tbl.c.Column2, tbl.c.Column3],limit=10) # select specific column and a where clause sql = select([tbl.c.Column1], tbl.c.Column1 == 'Type value to match') # and, or, not, in sql = select([tbl], and_(tbl.c.Column1 < 4, tbl.c.Column1 != 1)) sql = select([tbl], or_(tbl.c.Column1 < 4, tbl.c.Column1 != 1)) sql = select([tbl], not_(tbl.c.Column1 > 4)) sql = select([tbl], tbl.c.Column1.in_( (1,4) )) # like, between sql = select([tbl], tbl.c.Column1.startswith('M')) sql = select([tbl], tbl.c.Column1.like('%a%')) sql = select([tbl], tbl.c.Column1.endswith('n')) sql = select([tbl], tbl.c.Column1.between(30,39),limit=10) result = conn.execute(sql) for row in result: print row # Close connection conn.close() # ## IPython NoteBook # In[ ]: # Import related packages from IPython.display import YouTubeVideo,HTML,Image # In[ ]: # Embed Videos YouTubeVideo('BHEr1dBc0yM'); # In[ ]: # Embed WebPages HTML(''); # In[ ]: # Embed Images Image(filename='DataFiles\infiniteenergy.jpg'); # In[ ]: