#!/usr/bin/env python # coding: utf-8 # # Python for CogSci Research # ## Getting Python: Anaconda # # https://store.continuum.io/cshop/anaconda/ # ## IPython and IPython Notebook # # http://ipython.org/notebook.html # In[1]: 3 + 5 # In[2]: import os # In[3]: x = 5 y = 3 x + y # In[4]: import json # In[5]: json.dumps({"x": x, "y": y}) # ## Project structure # # ``` # config.json # analysis/ # analyses/ # results/ # figures/ # bin/ # data/ # human/ # model # experiment/ # lib/ # stimuli/ # ``` # # * `config.json`: global config file that specifies where resources are, model parameters, etc. # * `analysis`: all scripts and results for analyzing the data # * `bin`: helper scripts for running the experiment, running simulations, viewing stimuli, etc. # * `data`: collected or simulated data # * `experiment`: psiTurk experiment files # * `lib`: importable python files that are used in analysis and model simulations # * `stimuli`: one file per stimulus, usually JSON # ## Running experiments # # ### psiTurk # # https://psiturk.org/ # # ### PsychoPy # # http://www.psychopy.org/screenshots.html # ## Running model simulations # # ### NumPy # In[6]: import numpy as np # In[7]: np.array([7, 3, 8, 2]) # In[8]: [7, 3, 8, 2] # In[9]: np.arange(10) # In[10]: np.arange(10) + 1 # In[11]: range(10) + 1 # In[12]: [x + 1 for x in range(10)] # In[13]: np.arange(10) + np.arange(1, 11) # ### SciPy # In[14]: import scipy.stats # In[15]: rv = scipy.stats.norm(0, 1) rv # In[16]: samples = rv.rvs(10000) samples # In[17]: x = np.linspace(-3, 3) y = rv.pdf(x) y # ### Matplotlib # In[18]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt # In[19]: plt.hist(samples, normed=True, color='k', bins=100) plt.plot(x, y, 'r-', lw=3) # ## Analyzing data # # ### Pandas # In[20]: import pandas as pd # In[21]: data = pd.read_csv("experiment.csv") data.head() # In[22]: data.set_index(["pid", "trial"]).sortlevel().head() # In[23]: rt = data.groupby(["HoleClass", "HoleWidth"])["responseTime"].median() rt # ### Matplotlib # In[24]: plt.bar(np.arange(len(rt)), rt, align='center') plt.xticks(np.arange(len(rt)), rt.index) plt.xlim(-0.75, len(rt) - 0.25) fig = plt.gcf() fig.set_figwidth(16) fig.set_figheight(6) # ### Seaborn # In[25]: import seaborn as sns # In[26]: plt.bar(np.arange(len(rt)), rt, align='center') plt.xticks(np.arange(len(rt)), rt.index) plt.xlim(-0.75, len(rt) - 0.25) fig = plt.gcf() fig.set_figwidth(16) fig.set_figheight(6) # In[27]: sns.factorplot("HoleClass", "responseTime", "HoleWidth", data, estimator=np.median) fig = plt.gcf() fig.set_figwidth(8) fig.set_figheight(6) # ## Demo: responses vs. response times # In[28]: exp = data.groupby('isControl').get_group(False) exp.head() # In[29]: rt_per_stim = exp.groupby(["stim", "HoleClass", "HoleWidth"])["responseTime"].median() rt_per_stim # In[30]: resp_per_stim = exp.groupby(["stim", "HoleClass", "HoleWidth"])["response"].mean() resp_per_stim # In[31]: plt.plot(resp_per_stim, rt_per_stim, 'o') plt.xlabel("Mean response to 'will it go in?'") plt.ylabel("Median response time") plt.title("Responses vs. response time") # ## Demo: R magic # In[32]: import rpy2 # In[33]: get_ipython().run_line_magic('load_ext', 'rpy2.ipython') # In[34]: get_ipython().run_cell_magic('R', '-i exp', '\nlibrary(lme4)\n\n# check for an interaction between the hole class and hole width\nfull.lmer = lmer(responseTime ~ HoleClass*HoleWidth + (1|stim), exp)\nnoint.lmer = lmer(responseTime ~ HoleClass + HoleWidth + (1|stim), exp)\nanova(noint.lmer, full.lmer)\n') # ## Demo: Interactive Widgets # In[35]: from IPython.html.widgets import interact # In[36]: @interact def plot_histogram(num_samples=(0, 10000), bins=(10, 100), normed=True): rv = scipy.stats.norm(0, 1) samples = rv.rvs(num_samples) x = np.linspace(-3, 3) y = rv.pdf(x) plt.hist(samples, normed=normed, color='k', bins=bins) plt.plot(x, y, 'r-', lw=3) # ## Other tools # * scikit-learn # * sympy # * statsmodels