print "Big data examiner" #Big data examiner is a one stop place to learn datascience. a= 'Big data' print type(a) b= 'Examiner' print type(b) c= 4.5 print isinstance(a, str) print isinstance(a,int) print isinstance(c, (int, float)) a = 'Bill gates' a. # remove and press tab button import numpy as np # importing numpy as np data_new = [6, 7.5, 8, 0, 1] data = np.array(data1) # accessing numpy as np. Here I am converting a list to array data x= [1,2,3,4] y = x z=list(x) print x is y print x is not z # you can use the following operators: # x // y -> this is called floor divide, it drops the fractional remainder # x** y -> raise x to the y the power. # x< =y, x True if y is less than or equal to y. Same implies with greater than symbol. # same applies to other logical operators such as &, |, ^, ==, != # list, dict, arrays are a mutable programming = ['Python', 'R', 'Java', 'Php'] programming[2] ='c++' print programming #Strings and tuples are immutable z_tuple = (9, 10, 11, 23) z_tuple[1] = 'twenty two' # you cant mutate a tuple # you can write multiline strings using triple quotes ''' or """ """ Hi! learn Python it is fun Data science and machine learning are amazing """ # As I said before python strings are immutable. x= ' This is big data examiner' x[10] = 'f' x = 'Java is a powerful programming language' y = x.replace('Java', 'Python') y # many python objects can be converted to a string using 'str' function x = 56664 y = str(x) print y print type(y) # strings act like other sequences, such as lists and tuples a = 'Python' print list(a) print a[:3] # you can slice a python string print a[3:] #String concentation is very important p = "P is the best programming language" q = ", I have ever seen" z = p+q z print "Hii space left is just %.3f gb, and the data base is %s" %(0.987, 'mysql') print "Hii space left is just %f gb, and the data base is %s" %(0.987, 'mysql') print "Hii space left is just %d gb, and the data base is %s" %(0.987, 'mysql') # boolean values in python are written as True and False. print True and True print True or False print True and False #Empty iterables(list, dict, strings, tuples etc) are treated as False ,if used with a control flow(if, for ,while.. etc) print bool([]), bool([1,2,3]) print bool('Hello Python!'), bool('') bool(0), bool(1) x = '1729' y = float(x) print type(y) print int(y) print bool(y) #Python date and time module provides datetime, date and time types from datetime import datetime, date, time td = datetime(1989,6,9,5,1, 30)# do not write number 6 as 06, you will get an invalid token error. print td.day print td.minute print td.date() print td.time() td.strftime('%m/%d/%y %H:%M:%S')#strf method converts the date and time into a string from datetime import datetime, date, time datetime.strptime('1989911', '%Y%m%d') # strings can be converted to date and time objects using strptime td = datetime(1989,6,9,5,1, 30) td.replace(hour =0 ,minute=0, second=30)#you can replace function to edit datetim function from datetime import datetime, date, time td = datetime(1989,6,9,5,1, 30) td1 = datetime(1988,8, 31, 11, 2, 23) new_time =td1 - td # you can subtract two different date and time functions print new_time print type(new_time) # the type is date and time print td +new_time print float('7.968') float('Big data') # suppose we want our float function to return the input value, we can do this using the folowing code. def return_float(x): try: return float(x) except: return x print return_float('4.55') print return_float('big data') # This time it didnt return a value error #print float((9,8)) ->this will return a type error, remove the comment and check the output. def return_float(x): try: return float(x) except(TypeError, ValueError):# type error and value error are mentioned as a exception values return x print return_float((9,8)) #now you can see it returns 9,8 # these are called ternary expressions x = 'Life is short use python' 'This is my favourite quote' if x == 'Life is short use python' else 'I hate R' #Tuples are one dimensional, fixed length, imutable sequence of Python Objects. machine_learning = 77, 45, 67 print machine_learning pythonista = (87, 56, 98), (78, 45, 33) #Nested Tuples print pythonista #You can convert any sequence to a tuple by using 'tuple' keyword print tuple([4,0,2]) pythonista = tuple('Python') print pythonista pythonista[0] # you can accessing each element in a tuple, x = tuple(['Manu',[99,88], 'Jeevan']) #x[2] = 'Prakash' # you cant modify a tuple like this x[1].append(77)# But you can append to a object to a tuple x y = ('Mean', 'Median', 'Mode')+('Chisquare', 'Annova') + ('statistical significance',) # you can concatenate a tuple using'+' symbol. print y ('Mean', 'Median') *4 # try printing a tuple using a number deep_learning =('Theano', 'Open cv', 'Torch') # you can un pack a tuple x,y,z= deep_learning print x print y print z countries ='Usa', 'India', ('Afghanistan',' Pakistan'), a,b,(c,d) = countries print a print b print c print d countries ='Usa', 'India', ('Afghanistan',' Pakistan'), 'Usa', 'Usa' countries.count('Usa') # .count can be used to count how many values are ther in a tuple countries =['Usa', 'India','Afghanistan',' Pakistan'] y = countries.extend(['Britian', 'Canada', 'Uzbekistan', 'Turkey']) z = countries.sort(key=len) # countries are sorted according to number of characters print countries # extend can be a handy feature when your lists are large. import bisect b = [9,9,9,9,5,6,3,5,3,2,1,4,7,8] b.sort() x =bisect.bisect(b,2) # bisect.bisect finds the location where an element should be inserted to keep it sorted. y= bisect.bisect(b, 5) print x print y # When iterating over a sequence; to keep track of the index of the current element, you can use 'enumerate' languages = ['Bigdata', 'Hadoop', 'mapreduce', 'Nosql'] for i,val in enumerate(languages): print i,val #Sorted function returns a new sorted list from a sequence print sorted([89, 99,45,63,25,53,34,56]) print sorted('Big data examiner') hot_job = ['Big_data', 'data science', 'data scientist', 'data base developer'] languages = ['c', 'c++', 'java', 'python'] statistics = ['Mean', 'Median', 'Mode', 'Chi square'] print zip(hot_job, languages, statistics) for i, (x,y) in enumerate(zip(hot_job, languages)): #See how I use zip and enumerate together print('%d: %s, %s' %(i,x,y)) # you can unzip a zipped sequence as follows rockers = [('Jame', 'Manu'), ('Govind', 'Dheepan'),('Partha', 'Reddy')] first_names, last_names = zip(*rockers) print first_names print last_names #Use reversed keyword to reverse a sequence list(reversed(range(20))) # you can combine two dictionaries using 'update' method d1 = {'a' : 'octave', 'b' : 'Java'} d1.update({'c' : 'foo', 'd' : 12}) print d1 d2 = {'a' : 'octave', 'b' : 'Java'} d2.update({'b' : 'foo', 'c' : 12}) #the dictionary inside brackets, overrides the value 'b' in d2 print d2 # dict type function accepts a tuple data_science = dict(zip(range(10), reversed(range(10)))) # see how I am using zip and dict to create a key- value pair data_science # The keys of a dictionary should be immutable(int, string, float, tuples). The technical term for this is hashability print hash('string') print hash((1,2,3)) print hash([1,2,4]) # generates an error as lists are immutable # An easy way to convert a list into a key is to convert it to a tuple fg ={} fg[tuple([3,4,5])] = 45 fg # a set is an unordered collection of unique elements. set([3,3,4,4,4,6,7,7,7,8]) #Sets support mathematical set operations like union, intersection, difference, and symmetric difference a = {1, 2, 3, 4, 5} b = {3, 4, 5, 6, 7, 8} print a|b # union print a&b #intersection-> common elements in two dictionaries print a-b print a^b # symmetric difference print {1,2,3} =={3,2,1} # if values are equal so True football_clubs = ['Manchester', 'Liverpool', 'Arsenal', 'Chelsea', 'Mancity', 'Tottenham', 'Barcelona','Dortmund'] football ={} for clubs in football_clubs: club = clubs[0] # gets the first character of football_clubs if club not in football_clubs: football[club] = [clubs] else: football[club].append(clubs) print football # Usually, a Python dictionary throws a KeyError if you try to get an item with a key that is not currently in the dictionary. #The defaultdict in contrast will simply create any items that you try to access (provided of course they do not exist yet). To create such a "default" item, it calls the function object that you pass in the constructor #(more precisely, it's an arbitrary "callable" object, which includes function and type objects). # The Same operation can be done using default dict from collections import defaultdict # default dict is present in collections library soccer = defaultdict(list) for clubs in football_clubs: soccer[clubs[0]].append(clubs) print soccer # a function can return multiple values def b(): x =34 y =45 z =89 return x,y,z # Example of a closure function. The function returns True, if a element is repeated in the list. def dict_funct(): new_dict = {} # create a new dictionary def modifier(z): if z in new_dict: # if z is in dictionary return True else: new_dict[z]=True return False return modifier x = dict_funct() list_func = [5,4,6,5,3,4,6,2,1,5] y = [x(i) for i in list_func] print y # If we are doing some data cleaning, we will be having a messy data set like this. import re states = [' Kerala', 'Gujarat!', 'Delhi', 'Telengana', 'TriPUra', 'Tamil Nadu##', 'West Bengal?'] def remove_functions(strp): return re.sub('[!#?]', '', strp) ooops = [str.strip, remove_functions, str.title] # create a list of functions def clean_data(oops, funky): # function takes two arguments result = [] # create a empty list for data in oops: # loop over(go to each and every element) in states for fun in funky: # loop over ooops list data = fun(data) # apply each and every function in ooops to states. result.append(data) # attach formmated states data to a new list return result # return the list x = clean_data(states, ooops) print x # Lambda is short form of writing a function. def f(x): return x**2 print f(8) #same function using lambda y = lambda x: x**2 print y(9) def new_objjj(): for x in xrange(100): yield x**2 #when using generator functions, Use yield instead of return. some_variable = new_objjj() # The above function can be written as follows new_obj = (x**2 for x in range(100)) #Generator expressions can be used inside any Python function that will accept a generator y = sum(x**2 for x in xrange(100)) print y dict((i,i**2) for i in xrange(5)) #xrange is faster than range rkeys=[1,2,3] rvals=['South','Sardinia','North'] rmap={e[0]:e[1] for e in zip(rkeys,rvals)} # use of Zip function rmap