#!/usr/bin/env python # coding: utf-8 # # Python Basic Concepts # In[1]: import addutils.toc ; addutils.toc.js(ipy_notebook=True) # In[2]: from addutils import css_notebook css_notebook() # ## 1 Why Python? # As far as scientific computing is concerned, it's hard to find a good alternative to Python. Python is the do it all language. If you want to perform a statistical analysis, then model some data, and then come up with a GUI and web platform to share with other users, you can perfectly do this with Python. # # Nevertheless Python tutorials for Data Analysis in Engineering, Finance and Scientific applications are difficult to find. # For this reason we made a complete set of courses and tutorials to address the scientist's and engineer's needs: # # * **Get data** (simulation, experiment control) # * **Manipulate and process data.** # * **Visualize results**... to understand what we are doing! # * **Communicate results:** produce figures for reports or publications, write presentations. # # We use Python because it's: # # * **OPEN**: The Python implementation is under an open source license that makes it freely usable and distributable, even for commercial use. # * **BATTERIES INCLUDED**: Rich collection of advanced scientific computing libraries and general libraries: we don’t want to re-program the plotting of a curve, a Fourier transform or a fitting algorithm. Don’t reinvent the wheel! # * **FRIENDLY and EASY TO LEARN**: Python allows you to do almost anything possible # with a compiled language (C/C++/Fortran) without requiring all the complexity. It is # extensible in C or C++. Clear syntax enhances readability: “Executable Pseudo Code” # * **RUNS EVERYWERE**: It runs on many Unix variants, on the Mac, and on PCs under MSDOS, # Windows, Windows NT, and OS/2, Android and many other platforms. # # ###Alternatives to Python… # # * **Compiled languages: C, C++, Fortran, etc.** # * *Advantages:* Fast, Optimized compilers, Very optimized scientific libraries. Example: BLAS (vector/matrix operations) # * *Drawbacks:* Painful usage: These are difficult languages for non computer scientists. # # * **Scripting languages: Matlab** # * *Advantages:* Very rich collection of libraries, Fast execution, Good development environment. # * *Drawbacks:* Base language quite poor, can become restrictive for advanced users, Expensive. # # * **Other scripting languages: Scilab, Octave, Igor, R, IDL, etc.** # * *Advantages:* Open-source, free, or at least cheaper than Matlab, Some advanced features (statistics in R, figures in Igor, etc.) # * *Drawbacks:* Fewer available algorithms than in Matlab, very powerful, but they are restricted to a single type of usage. # ## 2 Python101 # ### 2.1 Two important language features: # 1. *Python is interpreted* # 1. The code doesn't require compilation # 2. In IPython Notebook, code in cells is executed immediately # 2. ***The indentation is part of the syntax*** # ### 2.2 The bricks of Python are: # * Built-in operators: +, -, log, sqrt, and so on. # * Built-in high level data types: strings, lists, dictionaries, etc. # * Control structures: if, if-else, if-elif-else, while, plus a powerful collection iterator (for). # * Multiple levels of organizational structure: functions, classes, scripts, modules, and packages. These assist in organizing code. An excellent and large example is the [Python standard library](http://docs.python.org/2/library/). # # The operators are much like as in Matlab. # We will see and play later with the different data types and control structures, that are very handy and useful. # # We defer functions and classes discussion to a more advanced tutorial. However, functions are much like Matlab functions, and classes are the basic concept of object-oriented programming, very useful for more structured, large projects. Put simply, an object is an instance of a class, as the Colosseum is an instance of the buildings class. # ### 2.3 Variables, as everything else, in Python are objects # Objects have many properties. For example every object has an univocal **id**. In the following example three variables are assigned in the same codeline, then `d = a` define d to be the `a` object. In other words, `d` and `a` are two different names for the same object: this is confirmed by the same object id. # In[3]: a, b, c = 5, 6, 7 d = a print (a, d, id(a), id(d)) # Unlike "real" pointers like those in C and C++, things change, when we assign a new value to y. In this case d will receive a separate memory location. Basically, Python creates only real copies, if it has to, i.e. if the user, the programmer, explicitly demands it: # In[4]: d = 8 print (a, d, id(a), id(d)) # Pay attention to this behavour (shallow copy) when copying mutable objects (check the next chapters). In this case we reassign an element of list_02: the mutable object doesn't change, so list_01 stll maintain the same id() of list_02 and consequently results updated: # In[5]: list_01 = [1, 2, 3] list_02 = list_01 list_02[1] = 5 list_01 # **'isinstance'** checks if the passed value correspond to one of the listed instances: in this case 'a' is a float: # In[6]: isinstance(a, (int, float, bool)) # ### 2.4 Mutable / Immutable Objects # * **Mutable Objects** can be modified after being created (elements can be changed). # * **Immutable objects** can be read but not modified (rewritten) after being created. For example, a string is immutable, so you cannot add caracters to a string without reassign the string itself. # # Some Examples: # # * **Strings** are IMMUTABLE # * **Lists** are MUTABLE # * **Tuples** are IMMUTABLE # * **Sets** are MUTABLE # * **Dictionaries** are MUTABLE # ### 2.5 Scripts, modules and namespaces # Some words on the organizational structure of Python code: # # # * A **script is the operational unit of programming**: it is a collection of many constructs, built using operators, datatypes, control structures, functions and classes, logically connected into a single body and saved as a single file with the .py or .pyw extension, that accomplish a complete programming task. # * You can run a script from the Python interpreter or from Ipython. # * You can import the functions of a script into another script thanks to the import statement: than you treat it as a module. # * **Packages are collections of modules**, stored into a single folder that can have multiple folders, each corresponding to a subpackage. Each folder contains a special file, named `__init__.py`, that can be empty, that signal that the folder is a (sub)package. [Numpy](http://www.scipy.org/NumPy_for_Matlab_Users) and [Matplotlib](http://matplotlib.org/gallery.html) are examples of packages. # # # Python is expanded by modules. To use a module it must first be imported. There are three ways to import modules: # # * `import modulename` - will preserve the full package name in the namespace. To use a module keyword in the code you will use `modulename.keyword` # * `import modulename as name` - will replace the full package name with a suitable alias. To use a module keyword in the code you will use `name.keyword` # * `from modulename import *` - *THIS IS NOT ADVISABLE IN MOST CASES*: will include the package keywords in the base namespace, this means that some keywords could be overvritten. To use a module keyword in the code you will use `keyword` # # Some examples: # # import math # Then math. must be used before using any command # import numpy as np # Then the alias np. must be used before any command # from pandas import * # Import EVERYTHING in the current namespace # In[7]: import math # Then 'math.'must be used before using any command math.sin(3) # ## 3 Strings # Strings can be defined with both double or single quotes. Escape codes like `\t [tab]`, `\n [newline]` or `\xHH [special character]` can be used. The output can be printed multiple times by using `*k` # In[8]: a, b, c = 'hello', "HELLO", "Hello, how's going?" print(a, b, c, sep="-"*5) # The **in** function can be used to find substrings: # In[9]: a = '\t abcdefβγδ♻_gh \n ' 'δ♻' in a # `strip` is one of the most used functions while working with strings in Python. **Discover by yourself what it is** by using `?` ! # In[10]: print(a.strip()) # ***Try by yourself***  the power of `split` by running the following code (`strip`, `split` and many other functions can be put in the same statement by using the **'.'** operator: # # a.strip().split('_') # b = '236 23 32 23 55' # b.split() # In[11]: b = '236 23 32 23 55' b.split() # ***Try by yourself***  the following commands: # # c = a.strip() # print(c) # 'abcdefβγδ♻_gh' # print(c.upper()) # 'ABCDEFΒΓΔ♻_GH' # print(c.title()) # 'Abcdefβγδ♻_Gh' # print(c.center(30,'=')) # '========abcdefβγδ♻_gh=========' # print(c.find('c')) # 2 (index start from zero) # print(c.split('_')) # ['abcdefβγδ♻', 'gh'] # print(c.replace('_','')) # 'abcdefβγδ♻gh' # print(' *** '.join(['one', 'two', 'three'])) # one *** two *** three # # In[ ]: # **Exercise:** format the following string and remove trailing and leading escape characters and internal separation characters, format the name to have the first letter capitalized and the other lowercase (output must be: **Johnn Richard Thompson**). Everything can be done in just one line! # In[12]: name = ' JOHNN - Richard-Thompson ' print (' '.join(name.strip().replace('-',' ').title().split())) # More examples for `split`: # ```python # # Split # s1 = '236 23 32 23 55' # s1.split() # ['236', '23', '32', '23', '55'] - Multiple separators # # s3 = '236 32 || 23 ||32--44||2|5||6' # s3.split('||') # ['236 32 ', ' 23 ', '32--44', '2|5', '6'] # # s3.split('||', 1) # ['236 32 ', ' 23 ||32--44||2|5||6'] # # # Dealing with multiple separators using 'split' # s4 = 'a;b,c;d' # s4.replace(';',',').upper().split(',') # # # Alternative solution: 'regexp' # import re # phrase = "Hey, '32' you - what are you doing here???" # print ' '.join(re.findall('\w+', phrase)) # ``` # ## 4 String formatting # This shows how the string method `.format()` works for positional parameters:: # In[13]: print('First Argument: {} --- second one: {}'.format(47.99,11.55)) print('First Argument: {0} --- second one: {1}'.format(47.99,11.55)) print('First Argument: {1} --- second one: {0}'.format(47.99,11.55)) # This shows how to use heyword parameters: # In[14]: print('First Argument: {a} --- second one: {b}'.format(b=47.99, a=11.55)) # Of course the parameters can be formatted individually: # In[15]: print('First Argument: {a:08.0f} --- second one: {b:08.3f}'.format(b=47.99, a=11.55)) # Conversion Table: # # * d Signed integer decimal. # * i Signed integer decimal. # * o Unsigned octal. # * u Unsigned decimal. # * x Unsigned hexadecimal (lowercase). # * X Unsigned hexadecimal (uppercase). # * e Floating point exponential format (lowercase). # * E Floating point exponential format (uppercase). # * f Floating point decimal format. # * F Floating point decimal format. # * g Same as "e" if exponent is greater than -4 or less than precision, "f" otherwise. # * G Same as "E" if exponent is greater than -4 or less than precision, "F" otherwise. # * c Single character (accepts integer or single character string). # * r String (converts any python object using repr()). # * s String (converts any python object using str()). # * % No argument is converted, results in a "%" character in the result. # # Flags: # # * "#" Used with o, x or X specifiers the value is preceded with 0, 0o, 0O, 0x or 0X respectively. # * "0" The conversion result will be zero padded for numeric values. # * "-" The converted value is left adjusted # * " " If no sign (minus sign e.g.) is going to be written, a blank space is inserted before the value. # * "+" A sign character ("+" or "-") will precede the conversion (overrides a "space" flag). # # # # In[16]: x = 12222 a = 'βγδ♻' print('Signed Integer Decimal: {0:12d}'.format(x)) print('Signed Integer Decimal with thousands separator: {0:12,d}'.format(x)) print('Signed Integer Decimal padded with zeroes: {0:012d}'.format(x)) print('Signed Integer Decimal padded with zeroes signed: {0:+012d}'.format(x)) print('Signed Integer Decimal leading space if positive: {0: 012d}'.format(x)) print('Signed Integer Decimal aligned to the left: {0:<12d}'.format(x)) print('Signed Integer Decimal centered: {0:^12d}'.format(x)) print('Floating point decimal format: {0:12.2F}'.format(x)) print('Unsigned hexadecimal (uppercase): {0:12X}'.format(x)) print('Unsigned hexadecimal (uppercase): {0:#012X}'.format(x)) print('Unsigned octal: {0:12o}'.format(x)) print('Floating point exponential format (lowercase): {0:12e}'.format(x)) print('String using repr(): {0!r}'.format(a)) print('String using str(): {0!s}'.format(a)) print('-'*21, '\n') # '-'*21 is the same as '-'+'-'+'-'+... 21 times print('{0:>22s}: {1:012d}'.format('Description', x)) print('{0:>22s}: {1:012d}'.format('Description x2', x*2)) print('{0:>22s}: {1:012d}'.format('Longer Description x4', x*4)) # ## 5 Lists # List are ordered Non-Homogeneus containers. The main properties of Python lists are the following: # # * They **MUTABLE**, i.e. the elements can be changed without redefining the list object # * The contain arbitrary objects # * They are ordered # * Elements can be accessed by an index # * They are arbitrarily nestable, i.e. they can contain other lists as sublists # * Variable size # * The index starts from 0, not from 1! # In[17]: ls = [2, 3, 4, 5, 'six', 9] ls[-1] = 8 # Redefine the last element print(ls) # ***Try by yourself***  the following commands: # ```python # ls = [1, 2, 3] # ls.append([11, 12, 'one']) # ls.extend([33,44]) # ls.insert(2,[55,66]) # ls[1:1] = [77, 88, 99] # See 'slicing' next # ls = ls + ['aa', 'bb'] # ``` # In[18]: ls[1:1] = [77, 88, 99] print(ls) # ***Try by yourself***  more commands: # ```python # ls = [5, 6, 3, 7, 3, 9, 7] # ls.sort() # ls.reverse() # ls.pop() # ls.count(7) # len(ls) # Length # range(10) # Generate a list of integers # range(4,20,3) # range(start, stop, step) # ``` # In[19]: ls = [5, 6, 3, 7, 3, 9, 7] ls.count(7) # `sort` can be used with a secondary sort key (a function to generate the key): in this case the sort key is the lenght of the strings # In[20]: ls= ['Zr', 'wax', 'grid', 'I', 'Sir', 'zirconium'] ls.sort(key=len) ls # `sort` modifies the list (sort in place). If you don't want to modify the list use the 'sorted' function # In[21]: print(sorted(ls)) # `in` checks if one element is in the list # In[22]: 'wax' in ls # `index` finds the position of a given element in a list # In[23]: print(ls, ls.index('grid')) # Lists can be iterated with `for`. In Python the index is not requires but you can have one if you need it for your purposes. Check the following two examples # In[24]: for string in ls: print(string.rjust(10)) # In[25]: for index, string in enumerate(ls): print(index, string.rjust(10)) # ***List comprehension*** is one of the more important constructs in Python. The general syntax is: # # [expression(argument) for argument in list if boolean_expression] # # Expression can contain control structures such as `if ... else`. # Lets see one example. Imagine to start from a list of numbers and build a second list containing just the string representation of the numbers that can be divided by three (in Python `x%y` is the reminder of the division x/y): # In[26]: numbers = range(4, 20) strings = [str(number) for number in numbers if not number%3] print(strings) # ## 6 Slicing # Slicing can be done on any sequential object (like strings and list) and is used to extract (slice) a part of the object, delete or add elements to the object. # # It works like this: # # s[begin: end: step] # # The resulting sequence consists of the following elements: # # s[begin], s[begin + 1*step], ... s[begin + i*step] for all (begin + i*step) < end # # ***Try by yourself***  some slicing on a string: # ```python # s = 'abcdefghi' + '123' # s is a string # s[:4] # s[5:] # s[::2] # ls = list('abcdefghi') # ls is a list # ls[-1:-1] = ['i', 1, 2, 3] # ls[0:3] = ['A', 'B', 'C'] # s1 = ''.join(str(s) for s in ls) # ``` # In[27]: s = 'abcdefghi' + '123' # s is a string s[:4] ls = list('abcdefghi') # ls is a list ls[-1:-1] = ['i', 1, 2, 3] print(ls) # ## 7 Sets # Sets are lists with **UNIQUE** elements. The main properties of Python sets are the following: # # * Sets are **MUTABLE** but CANNOT contain mutable objects # * To have and **IMMUTABLE** set use `frozenset` # * Sets cannot be indexed: to index, sets must be transformed in List # * Variable size # * The index starts from 0, not from 1! # # In[28]: set1 = set([1, 2, 'a', 7, 7, 9]) set1.add('xyz') set2 = set('54677788') print (set1, set2) # ***Try by yourself***  the following commands: # ```python # set1 & set2 # AND # set1 | set2 # OR # set1 ^ set2 # XOR # set1 - set2 # ls = list(set1) # To index a set, first transform it to a list # ``` # In[29]: import ipywidgets from IPython.display import display text01 = ipywidgets.Text(value='123ab') text02 = ipywidgets.Text(value='345bc') text03 = ipywidgets.Text(description='JOIN:', value='---', width=450, border_color='black', color='white', background_color='green') button01 = ipywidgets.Button(description='JOIN SETS', tooltip='Join', value=False, border_color='black', color='white', background_color='red') box01 = ipywidgets.HBox(children=[text01, text02]) box02 = ipywidgets.HBox(children=[button01, text03]) display(box01, box02) def click(b): set01 = set(str(text01.value)) set02 = set(str(text02.value)) text03.value = str(set01 | set02) button01.on_click(click) # ## 8 Tuples # A tuple is an **IMMUTABLE** list, i.e. a tuple cannot be changed in any way once it has been created. A tuple is defined analogously to lists, except that the set of elements is enclosed in parentheses instead of square brackets. The rules for indices are the same as for lists. Once a tuple has been created, you can't add elements to a tuple or remove elements from a tuple. # # So, what is the reason to use tuples? Mainly three: # # * Tuples are faster than lists. # * If you know that some data doesn't have to be changed, you should use tuples instead of lists, because this protect your data against accidental changes to these data. # * Tuples can be used as keys in dictionaries, while lists can't (see next chapter). # In[30]: def myfunction(pack): a, b, c, d = pack print(a+b+c+d[0]-d[1]) t = 1, 2, 3, (8, 9) # Pack arguments to pass it to a function myfunction(t) # In[31]: seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)] for a, b, c in seq: print(a, b, c) # Common use for tuple unpacking # **'zip'** can be used to reorganize (transpose) columns of data: # In[32]: names = [('Chloe', 'Emily', 'Sophia'), ('Stuart', 'Winsor', 'Davidson')] for firstname, lastname in zip(*names): print(firstname, lastname) # ## 9 Dictionaries # Dictionaries are **'Associative Arrays'**: values are indexed by generic keys. In other words the indexing kay can be an integer number but can be a string, a tuple or any other immutable object. # Here we make a dictionary using tuples as keys and telephone numbers as values. Then we access a dictionary item by providing a key (tuple) # In[33]: data = [(('Chloe', 'Stuart'), '(831) 758-7214'), (('Emily', 'Winsor'), '(877) 359-8474'), (('Sophia', 'Davidson'), '(800) 445-2854')] d = dict(data) print(d[('Chloe', 'Stuart')]) # ***Try by yourself***  the following commands: # ```python # d.keys() # d.values() # d.items() # ``` # In[34]: print(d.keys()) # When reading the dictionary you must check if the dictionary contain the key by using `in`. If you ask for an unknown key, Python rises an exception. Alternatively you can use `get` with a default value to be used if the key is not found # In[35]: ('Chloe', 'Winsor') in d # In[36]: print(d.get(('Chloe', 'Winsor'),'Number not available')) # When iterating a dictionary, the items order is not guaranteed: # In[37]: for key, value in d.items(): print('KEY: ', key, '\t\t VALUE: ', value) # In[38]: # Iterators work on keys by default for key in d: print(key, '\t\t', d[key]) # Some more examples: # In[39]: # How to create a dictionary with a for loop (from a list of tuples) d1 = dict([(n, str(n)) for n in range(5)]) print(d1) # {0: '0', 1: '1', 2: '2', 3: '3', 4: '4'} # In[40]: d1.pop(2) d2 = {'10': 'ten', '11': 'eleven'} d1.update(d2) # {0: '0', 1: '1', 2: '2', 3: '3', '10': 'ten', '11': 'eleven'} print(d1) # In[41]: # Creating dict from sequences d3 = {} for key, value in zip(list('abcd'), list('1234')): d3[key] = value print(d3) # ## 10 Counters # Counters are a very special type of dictionaries: they give you a simple and effective way to count items. # In[42]: from collections import Counter colorlist = ['red', 'blue', 'red', 'green', 'blue', 'blue', 'green', 'blue', 'cyan'] cnt = Counter(colorlist) print('Total of all counts: ', sum(cnt.values())) print('Most common elements: ') for item, number in cnt.most_common(3): print('\t'*2, item, number) print('Least common elements: ') for item, number in cnt.most_common()[:-4:-1]: print('\t'*2, item, number) # ## 11 IF - FOR - WHILE # This is a brief overview of the flow-control instructions in Python # ### 11.1 IF # In[43]: a = 34 if a != 7: print("'a' is not 7") if a > 15: print("'a' is greater than 15") elif a == 15: print("'a' is exactly 15!") else: print("'a' is less than 15") # ### 11.2 FOR - ELSE # In Python you can iterate Lists, Dictionaries, Lines in a file and all the 'ITERABLE' Objects # In[44]: l = ['a', 'b', 'c', 'd', 'e', 'f'] for v in l: if v == 'e': break # Skip all loops and go the 'else' statement elif v == 'b': continue # Skip this loop print(v) print('Done !') # Executed upon completion of the for loop # In[45]: for element in [3,4,5]: # Elements in LISTS print(element) for element in (7,8,9): # Elements in TUPLES print(element) for char in 'abc': # Elements in STRINGS print(char) import os.path path = os.path.join(os.path.curdir, "example_data", "my_input.txt") for line in open(path): # Elements in FILES print(line, end='') # In[46]: # Enumerate returns an enumerate object: for i, season in enumerate(['Spring', 'Summer', 'Fall', 'Winter']): print(i, season) # ### 11.3 WHILE # In[47]: a = 0 while a < 10: a += 1 print(a, end='') # In[48]: a = 2**10 while a>0.5: print('{0:.1f}'.format(a), end=' - ') a = a/2 # --- # # Visit [www.add-for.com]() for more tutorials and updates. # # This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.