# 1.2
sqroot144 = 144**0.5
print(sqroot144)
12.0
# 1.3 with "in" statement
myName = 'Timo'
print('i' in myName)
True
# 1.3 with for loop
myName = 'Timo'
for letter in myName:
if letter=='i':
print('yes')
break
# 1.3 with while loop
myName = 'Timo'
has_i = False
idx = 0
while(not has_i):
if myName[idx]=='i':
has_i = True
print('yes')
idx +=1
# 1.4
var1 = 21
var2 = 14
var3 = 80
sumOfThree = var1+var2+var3
if 120 > sumOfThree > 100:
print('yes')
else:
print('no')
# 1.4
var1 = 21
var2 = 14
var3 = 80
sumOfThree = var1+var2+var3
print('yes' if (sumOfThree > 100 and sumOfThree < 120) else 'no')
print(type(myName))
print(type(var1))
# and so on..
Write your own loops.
Given:
modules = ["cognitive", "developmental", "neuro", "clinical", "social", "computational"]
comparisonVariable = "introductory"
Determine whether any of the modules have names that are longer than the comparison variable.
modules = ["cognitive", "developmental", "neuro", "clinical", "social", "computational"]
comparisonVariable = "introductory"
anyNameLonger = False
for mod in modules:
if len(mod) > len(comparisonVariable):
anyNameLonger = True
break
print(anyNameLonger)
Write a function that takes a list of numbers as input and returns the mean of these numbers.
Remember, given a vector of numbers
$\textbf{x} = [x_1, x_2, x_3, x_4, ...., x_n]$
the mean is defined as:
$mean(\textbf{x})=\frac{1}{N}\sum_{i=1}^{N} x_i = \frac{1}{N}(x_1 + x_2 + x_3 + ... + x_n)$
Don't forget to add docstrings to your function!
numbers = [2,5,8,7,1,4,4,9]
def compute_mean(x):
'''
computes mean of a list of numbers
INPUT: x (list)
OUTPUT: mu (float)
'''
return sum(x)/len(x)
print(compute_mean(numbers))
help(compute_mean)
# ... or:
def compute_mean(x):
'''
computes mean of a list of numbers
INPUT: x (list)
OUTPUT: mu (float)
'''
sum_of_numbers = 0
for ii in x:
sum_of_numbers = sum_of_numbers + ii
mean = sum_of_numbers/len(x)
return mean
print(compute_mean(numbers))
help(compute_mean)
You're given the following string:
sentence = 'Michaelmas term starts in October.'
Write a function that puts all vowels in the upper case, while all the consonants stay in the lower case. Test your function on the variable 'sentence'.
Create a new variable where the word 'Michaelmas' will be replaced with the word 'Trinity' and 'October' with 'April'.
sentence = 'Michaelmas term starts in October.'
# 4.1
def UpperVowelsLowerConsonants(string):
newString = []
vowels = ['a','e','i','o','u', 'A', 'E', 'I', 'O', 'U']
for letter in string:
if letter in vowels:
letter = letter.upper()
newString.append(letter)
else:
letter = letter.lower()
newString.append(letter)
newString = ''.join(newString)
return(newString)
UpperVowelsLowerConsonants(sentence)
# 4.2
sentenceTT = sentence.replace('Michaelmas', 'Trinity')
sentenceTT = sentenceTT.replace('October', 'April')
print(sentenceTT)
Try to plot lengths against widths of the petals for all flower types. The kind of plot you'll want to use is most likely a scatterplot, so see if you can google your way to the syntax that will take petal length and width as inputs for each datapoint.
from sklearn import datasets
import seaborn as sns
iris = sns.load_dataset("iris") # Load the available dataset
sns.scatterplot(x = 'petal_width',y = 'petal_length',data = iris)
Now try to change your scatterplot code from the cell above to color different species of flowers differently.
sns.scatterplot(x = 'petal_width',y = 'petal_length',data = iris, hue = 'species')
Try to find a method that will tell you how many responses there are in the 'low', 'medium' and 'high' condition.
Try to calculate a grand mean rating score.
import numpy as np
import pandas as pd
import warnings; warnings.simplefilter('ignore')
# Load the data
socialDecisionMakingData_ANOVA = pd.read_excel('data/data_BN_task1.xlsx', sheet_name = 'ANOVA')
# Before you start any analysis, explore the ANOVA dataset
print(socialDecisionMakingData_ANOVA.shape)
# 6.1
print(socialDecisionMakingData_ANOVA['influence_level'].value_counts())
# 6.2
grand_mean = np. mean(socialDecisionMakingData_ANOVA['rating_score']) # Overall mean rating score
print(grand_mean)
Now try to make a so-called violin plot using the same variables we used above (influence_level, rating_score).
import seaborn as sns
sns.catplot(x="influence_level", y="rating_score",
kind="violin", data=socialDecisionMakingData_ANOVA)
As pointed out above, we shouldn't have carried out an ANOVA as some of the assumptions were violated.
We could have transformed the data, checked for normality again and carried out an ANOVA on the transformed data (provided it would be normally distributed).
Alternatively, we could use non-parametric tests instead, which don't make these distributional assumptions (but are usually less powerful, i.e. worse at detecting an effect).
If you've followed our tutorial carefully, you should have learned enough to do this yourself!
Please carry out:
Hint: The signed-rank test from the scipy.stats package doesn't return the z-statistic, which you would usually need to report together with the p-value. You can obtain an unsigned approximation with the following formula: $$z = ISF(\frac{p}{2})$$ where ISF corresponds to the inverse survival function, which is the inverse of the CDF of a gaussian, i.e. $$ISF(X) = [1-CDF(X)]^{-1}$$
import numpy as np
import pandas as pd
import warnings; warnings.simplefilter('ignore')
from scipy.stats import zscore
# Load the data
df = pd.read_excel('data/data_BN_task1.xlsx', sheet_name = 'ANOVA')
# Convert from long format to wide format
df['subject'] = np.tile(np.arange(1,66),3)
df = df.pivot(index='subject',columns='influence_level',values='rating_score')
# Remove outliers
df['zs_high'] = zscore(df['high'])
df['zs_medium'] = zscore(df['medium'])
df['zs_low'] = zscore(df['low'])
outliers = np.abs(df)>2
outlier_mask = np.any(outliers,axis=1)
df = df[outlier_mask==False]
# Convert back from wide to long
subject_ids = np.asarray(df.index)
df = df[df.columns[:3]].melt(value_name='rating_score')
df['subject'] = np.tile(subject_ids,3)
df
from pingouin import friedman
results = friedman(data=df,dv='rating_score',subject='subject',within='influence_level')
print(results)
from scipy.stats import wilcoxon, norm
scores = ['low','medium','high']
# Let's loop through all possible combinations
for ii in range(len(scores)):
for jj in range(ii+1,len(scores)):
lvl1 = df['rating_score'][df['influence_level']==scores[ii]]
lvl2 = df['rating_score'][df['influence_level']==scores[jj]]
s,p = wilcoxon(lvl1,lvl2)
# s is sum of ranks, so we need to convert it to a z score
# we use the inverse survival function (1-cdf) to recover z from p (except for the sign)
z = norm.isf(p / 2)
print('{} vs {}: z={}, p={}'.format(scores[ii],scores[jj],str(np.round(z,2)),str(np.round(p,5))))