from __future__ import print_function
import numpy as np
import pandas as pd
from collections import OrderedDict #sorting participant df dict before pd.concat()
import matplotlib.pylab as plt
%matplotlib inline
pd.options.display.mpl_style = 'default'
# Participants that are excluded from all performance analysis
non_english_fluent = ['023', '031', '045', '050', '070', '106',]
left_handed = ['042', '088',]
excluded_all_tasks = non_english_fluent + left_handed
# Pilot subjects
excluded_all_tasks += ['010', '011', '012', '013', '014']
def isolate_isip_task(taskname):
#requires external dbase variable
db_to_isip = dbase.swaplevel(1,2).swaplevel(0,1).xs('tap_r', drop_level=True)
del db_to_isip['channel']
del db_to_isip['pitch']
#channel==1 and pitch==48 verified for all tap_r data points
db_allsubs = db_to_isip.xs(taskname, drop_level=True)
return db_allsubs
def run_isip_grouping_filter(df,
minimum_interval,
#filter_stdev_radius,
maximum_interval,
start_recording):
grouped = df.groupby(level='pid') #, 'stamp_type'])
def filter_intervals(df):
filt1 = df.copy()
# remove timestamps prior to the end of stimuli
filt1 = filt1.loc[filt1.task_ms >= start_recording]
# remove timestamps that end an undersized interval
filt1 = filt1.loc[ (filt1.int_raw >= minimum_interval)
| (filt1.int_raw.isnull()) ]
# special case: p048 @800ms, outlier short intervals
# among >1000 average interval
# (removed, not needed: p048 was left-handed, eliminated)
#if filt1.int_raw.median() > 1000:
# print(filt1.int_raw.count())
# print("applied")
# filt1 = filt1.loc[ (filt1.int_raw >= 650)
# | (filt1.int_raw.isnull()) ]
# print(filt1.int_raw.count())
# special case: p049 @500ms, outlier short intervals
# among >1000 average interval
if filt1.int_raw.median() < 375:
print(filt1.int_raw.count())
print("applied")
filt1 = filt1.loc[ (filt1.int_raw <= 625)
| (filt1.int_raw.isnull()) ]
print(filt1.int_raw.count())
# end special cases
# interval recalculation now skips the too-soon taps.
int_filt1 = filt1.task_ms - filt1.task_ms.shift(1)
int_max_exceeded = int_filt1 > maximum_interval
# df.int_filt1 is our nearly-final intervals sequence: but has NaNs where
# intervals were combined, and contains overlong intervals for now.
# Make a pair of series containing the same indexes: only those
# that weren't removed due to too-SHORT intervals. (those we combined
# together because they were too short are 'legitimate' and included
# as an interval in the lagging. Those we excluded because they were
# overlong disrupt the lagged-comparison process and should trigger
# an exclusion of data from the lagged deviation calculation.)
# Or more succinctly: avoid calculating a lag-2 dev before and after
# a skipped, overlong interval.
intsequence = int_filt1[int_filt1.notnull()]
maxex_sequence = int_max_exceeded[int_filt1.notnull()]
intsequence[maxex_sequence == True] = np.nan
df['ints_filtered'] = intsequence
def skip_missing_lagvalues(series, lag_values, min_values='all'):
for n in lag_values:
series = series[maxex_sequence.shift(n) != True]
return series
lag2dev = intsequence - intsequence.shift(2)
lag2dev = skip_missing_lagvalues(lag2dev, [0, 1, 2])
df['lag2dev'] = lag2dev
df['lag2devsq'] = lag2dev ** 2
# rolliing_mean(): "By default, the result is set to the right edge of the
# window. This can be changed to the center of the window
# by setting center=True."
# shift(1) sets the result series one ahead of the end of the window, so that
# each value is compared with the mean of the N intervals preceding it.
movingmean_prev2 = pd.rolling_mean(intsequence.shift(1), window=2)
movingmean_prev2 = skip_missing_lagvalues(movingmean_prev2, [1, 2])
df['movingmean_prev2'] = movingmean_prev2
lagdev_avgprev2 = intsequence - pd.rolling_mean(intsequence.shift(1), window=2)
lagdev_avgprev2 = skip_missing_lagvalues(lagdev_avgprev2, [0, 1, 2])
df['lagdev_avgprev2'] = lagdev_avgprev2
df['lagdev_avgprev2sq'] = lagdev_avgprev2 ** 2
movingmean_prev3 = pd.rolling_mean(intsequence.shift(1), window=3)
movingmean_prev3 = skip_missing_lagvalues(movingmean_prev3, [1, 2, 3])
df['movingmean_prev3'] = movingmean_prev3
lagdev_avgprev3 = intsequence - pd.rolling_mean(intsequence.shift(1), window=3)
#lagdev_avgprev3 = skip_missing_lagvalues(lagdev_avgprev3, [0, 1, 2, 3])
df['lagdev_avgprev3'] = lagdev_avgprev3
df['lagdev_avgprev3sq'] = lagdev_avgprev3 ** 2
movingmean_prev4 = pd.rolling_mean(intsequence.shift(1), window=4)
movingmean_prev4 = skip_missing_lagvalues(movingmean_prev4, [1, 2, 3, 4])
df['movingmean_prev4'] = movingmean_prev4
lagdev_avgprev4 = intsequence - pd.rolling_mean(intsequence.shift(1), window=4)
lagdev_avgprev4 = skip_missing_lagvalues(lagdev_avgprev4, [0, 1, 2, 3, 4])
df['lagdev_avgprev4'] = lagdev_avgprev4
df['lagdev_avgprev4sq'] = lagdev_avgprev4 ** 2
df['LD_AP4_MMskipmis_var'] = movingmean_prev4.var(ddof=1)
df['LD_AP4_MMskipmis_std'] = movingmean_prev4.std(ddof=1)
df['LD_AP4_MMskipmis_len'] = movingmean_prev4.count()
temp_noskip_4 = pd.rolling_mean(intsequence.shift(1), window=4)
df['LD_AP4_MMkeepmis_var'] = temp_noskip_4.var(ddof=1)
df['LD_AP4_MMkeepmis_std'] = temp_noskip_4.std(ddof=1)
df['LD_AP4_MMkeepmis_len'] = temp_noskip_4.count()
movingmean_prev_12 = pd.rolling_mean(intsequence.shift(1), window=12)
#movingmean_prev_12 = skip_missing_lagvalues(movingmean_prev_12, range(1, 12 + 1))
df['movingmean_prev_12'] = movingmean_prev_12
lagdev_avgprev_12 = intsequence - pd.rolling_mean(intsequence.shift(1), window=12)
#lagdev_avgprev_12 = skip_missing_lagvalues(lagdev_avgprev_12, range(0, 12 + 1))
df['lagdev_avgprev_12'] = lagdev_avgprev_12
df['lagdev_avgprev_12sq'] = lagdev_avgprev_12 ** 2
df['int_filt1'] = int_filt1
df['int_max_exceeded'] = int_max_exceeded
df['ints'] = int_filt1.loc[~int_max_exceeded]
return df
df = grouped.apply(filter_intervals)
#print(df)
return df
(Image from a related article. "34" is inapplicable for the current dataset.)
def isip_outcomes_taskdf(isip_db,
squared_local_dev_measure='lag2devsq'):
pid_list = sorted(list(isip_db.index.get_level_values('pid').unique()))
df = pd.DataFrame(index=pid_list)
df.index.names = ['pid']
ints = isip_db.ints.groupby(level='pid')
ints_count = ints.apply(lambda s: s.count()) #count() ignores nulls
ints_mean = ints.apply(lambda s: s.mean())
ints_variance = ints.apply(lambda s: s.var(ddof=1))
ints_stdev = ints.apply(lambda s: s.std(ddof=1))
ints_lag2corr = ints.corr(ints.shift(2))
lagdevsq_series = isip_db[squared_local_dev_measure]
lagdevsq = lagdevsq_series.groupby(level='pid')
lagdevsq_count = lagdevsq.apply(lambda s: s.count()) # (N - 2)
lagdevsq_mean = lagdevsq.apply(lambda s: s.mean())
#Sum: ((X sub i + 2) - (x sub i)) ^ 2
lagdevsq_sum = lagdevsq.apply(lambda s: s.sum())
#because of problem below, might need to change the lag2devsq_count variable
# to be the overall count (ints_count) minus one.
local_sq_abs = lagdevsq_sum / (2. * lagdevsq_count)
local = 100 * (1. / ints_mean) * np.sqrt(local_sq_abs)
#PROBLEM: total variance uses all the intervals. local_sq_abs removes data points
#when they aren't sequential...
drift = 100 * ((1. / ints_mean) * np.sqrt(ints_variance - local_sq_abs))
df['ints_count'] = ints_count
df['ints_mean'] = ints_mean
df['ints_variance'] = ints_variance
df['ints_stdev'] = ints_stdev
df['ints_lag2corr'] = ints_lag2corr
#df['devsq_sum'] = lagdevsq_sum
#df['devsq_count'] = lagdevsq_count
#df['devsq_mean'] = lagdevsq_mean
#df['local_sq_abs'] = local_sq_abs
#df['local'] = local
#df['drift'] = drift
df[squared_local_dev_measure + '_sum'] = lagdevsq_sum
df[squared_local_dev_measure + '_count'] = lagdevsq_count
df[squared_local_dev_measure + '_mean'] = lagdevsq_mean
df[squared_local_dev_measure + '_local_sq_abs'] = local_sq_abs
df[squared_local_dev_measure + '_local'] = local
df[squared_local_dev_measure + '_drift'] = drift
return df
#recording of intervals starts (ISI x 5) after stims end (2.5s, 4.0s)
ISIP_5_ENDSTIMS_MS = 19500
ISIP_5_WAIT_AFTER_STIMS_MS = 2500
ISIP_5_MINIMUM_INT = 375
ISIP_5_MAXIMUM_INT = 650
isip_5_start_recording = ISIP_5_ENDSTIMS_MS + ISIP_5_WAIT_AFTER_STIMS_MS
#ISIP_500_DISQUALIFIED = ['045', #did not complete task correctly
# '042', '048' #left-handed
# ]
ISIP_8_ENDSTIMS_MS = 23200
ISIP_8_WAIT_AFTER_STIMS_MS = 4000
ISIP_8_MINIMUM_INT = 600
ISIP_8_MAXIMUM_INT = 1000
isip_8_start_recording = ISIP_8_ENDSTIMS_MS + ISIP_8_WAIT_AFTER_STIMS_MS
#ISIP_800_DISQUALIFIED = ['045', #did not complete task correctly
# '042', '048' #left-handed
# ]
pickled_dbase = "c:/db_pickles/pickle - dbase - 2014-10-03b.pickle"
dbase = pd.read_pickle(pickled_dbase)
dbase = dbase.drop(excluded_all_tasks, level='pid')
db_isip5_allsubs = isolate_isip_task('ISIP_5')
db_isip8_allsubs = isolate_isip_task('ISIP_8')
db_isip5 = db_isip5_allsubs
db_isip8 = db_isip8_allsubs
#db_isip5 = db_isip5_allsubs.drop(ISIP_500_DISQUALIFIED)
#db_isip8 = db_isip8_allsubs.drop(ISIP_800_DISQUALIFIED)
#delete_columns_for_filter_debug_rerun(db_isip5)
print('isip5:')
db_isip5 = run_isip_grouping_filter(db_isip5,
minimum_interval = ISIP_5_MINIMUM_INT,
#filter_stdev_radius = ISIP_5_FILTER_STDEV_RADIUS,
maximum_interval = ISIP_5_MAXIMUM_INT,
start_recording = isip_5_start_recording)
print('isip8:')
#delete_columns_for_filter_debug_rerun(db_isip8)
db_isip8 = run_isip_grouping_filter(db_isip8,
minimum_interval = ISIP_8_MINIMUM_INT,
#filter_stdev_radius = ISIP_8_FILTER_STDEV_RADIUS,
maximum_interval = ISIP_8_MAXIMUM_INT,
start_recording = isip_8_start_recording)
isip5: isip8:
pid_list_800 = sorted(list(db_isip8.index.get_level_values('pid').unique()))
pid_list_500 = sorted(list(db_isip5.index.get_level_values('pid').unique()))
print('\n\n800')
for pid in pid_list_800:
print(pid, end=",")
assert db_isip8.ints_filtered.xs(pid).max() <= 1000
assert db_isip8.ints_filtered.xs(pid).min() >= 600
print(db_isip8.ints_filtered.xs(pid).count())
print('\n\n500')
for pid in pid_list_500:
print(pid, end=",")
assert db_isip5.ints_filtered.xs(pid).max() <= 650
assert db_isip5.ints_filtered.xs(pid).min() >= 375
print(db_isip5.ints_filtered.xs(pid).count())
800 015,134 016,112 017,109 018,114 019,116 020,111 021,121 022,109 024,124 025,105 026,117 027,112 028,122 029,115 030,108 032,117 033,116 034,114 035,116 036,123 037,119 038,112 039,120 040,118 041,119 043,132 044,111 046,124 047,110 048,13 049,109 051,126 052,111 053,105 054,113 055,125 056,125 057,119 058,113 059,117 060,111 061,114 062,113 063,114 064,120 065,94 066,110 067,122 068,112 069,115 071,125 072,112 073,80 074,113 075,119 076,114 077,117 078,115 079,112 080,122 081,114 082,118 083,112 084,112 085,114 086,111 087,89 089,120 090,119 091,131 092,118 093,117 094,81 095,115 096,105 097,115 098,118 099,124 100,118 101,126 102,114 103,116 104,113 105,121 107,113 108,118 109,122 110,114 111,103 112,119 113,114 114,117 115,119 116,113 117,78 118,109 119,111 120,121 121,105 500 015,118 016,107 017,113 018,117 019,116 020,108 021,117 022,118 024,115 025,106 026,110 027,116 028,113 029,119 030,110 032,114 033,114 034,115 035,114 036,113 037,116 038,116 039,121 040,111 041,113 043,118 044,114 046,114 047,118 048,108 049,32 051,112 052,115 053,113 054,111 055,116 056,115 057,114 058,109 059,117 060,122 061,111 062,115 063,115 064,113 065,110 066,118 067,127 068,119 069,122 071,121 072,119 073,127 074,116 075,113 076,115 077,114 078,117 079,115 080,118 081,119 082,111 083,109 084,117 085,114 086,106 087,112 089,117 090,120 091,119 092,112 093,123 094,109 095,113 096,110 097,119 098,115 099,112 100,114 101,118 102,115 103,112 104,108 105,115 107,114 108,103 109,109 110,115 111,114 112,113 113,115 114,117 115,111 116,113 117,108 118,113 119,108 120,120 121,115
db_isip8.ints_filtered.xs('015').hist()
<matplotlib.axes.AxesSubplot at 0x12078748>
#excluded: number of qualifying intervals is far below normal
# only for #49 on 500ms task, and just for #48 on 800ms task.
print(db_isip5.ints_filtered.xs('049').count())
print(db_isip8.ints_filtered.xs('048').count())
db_isip5 = db_isip5.drop('049', level='pid')
db_isip8 = db_isip8.drop('048', level='pid')
32 13
#outcome_dfs_isip5 = {}
outcomesdf_isip5_lag2 = isip_outcomes_taskdf(db_isip5, 'lag2devsq')
#outcomesdf_isip5_avgprev2 = isip_outcomes_taskdf(db_isip5, 'lagdev_avgprev2sq')
#outcomesdf_isip5_avgprev3 = isip_outcomes_taskdf(db_isip5, 'lagdev_avgprev3sq')
outcomesdf_isip5_avgprev4 = isip_outcomes_taskdf(db_isip5, 'lagdev_avgprev4sq')
#outcomesdf_isip5_avgprev_12 = isip_outcomes_taskdf(db_isip5, 'lagdev_avgprev_12sq')
outcomesdf_isip8_lag2 = isip_outcomes_taskdf(db_isip8, 'lag2devsq')
#outcomesdf_isip8_avgprev2 = isip_outcomes_taskdf(db_isip8, 'lagdev_avgprev2sq')
#outcomesdf_isip8_avgprev3 = isip_outcomes_taskdf(db_isip8, 'lagdev_avgprev3sq')
outcomesdf_isip8_avgprev4 = isip_outcomes_taskdf(db_isip8, 'lagdev_avgprev4sq')
#outcomesdf_isip8_avgprev_12 = isip_outcomes_taskdf(db_isip8, 'lagdev_avgprev_12sq')
updated = "2014-10-12b"
db_isip5.to_csv('isip5_intervals - ' + updated + '.csv')
db_isip8.to_csv('isip8_intervals - ' + updated + '.csv')
outcomesdf_isip5_lag2.to_csv('dfo-isip5_lag2 - ' + updated + '.csv')
#outcomesdf_isip5_avgprev2.to_csv('dfo-isip5_avgprev2 - ' + updated + '.csv')
#outcomesdf_isip5_avgprev3.to_csv('dfo-isip5_avgprev3 - ' + updated + '.csv')
outcomesdf_isip5_avgprev4.to_csv('dfo-isip5_avgprev4 - ' + updated + '.csv')
#outcomesdf_isip5_avgprev_12.to_csv('dfo-isip5_avgprev_12 - ' + updated + '.csv')
outcomesdf_isip8_lag2.to_csv('dfo-isip8_lag2 - ' + updated + '.csv')
#outcomesdf_isip8_avgprev2.to_csv('dfo-isip8_avgprev2 - ' + updated + '.csv')
#outcomesdf_isip8_avgprev3.to_csv('dfo-isip8_avgprev3 - ' + updated + '.csv')
outcomesdf_isip8_avgprev4.to_csv('dfo-isip8_avgprev4 - ' + updated + '.csv')
#outcomesdf_isip8_avgprev_12.to_csv('dfo-isip8_avgprev_12 - ' + updated + '.csv')
prefix = "c:/db_pickles/pickle - "
outcomesdf_isip5_lag2.to_pickle(prefix + 'dfo-isip5_lag2 - ' + updated + '.pickle')
#outcomesdf_isip5_avgprev2.to_pickle(prefix + 'dfo-isip5_avgprev2 - ' + updated + '.pickle')
#outcomesdf_isip5_avgprev3.to_pickle(prefix + 'dfo-isip5_avgprev3 - ' + updated + '.pickle')
outcomesdf_isip5_avgprev4.to_pickle(prefix + 'dfo-isip5_avgprev4 - ' + updated + '.pickle')
#outcomesdf_isip5_avgprev_12.to_pickle(prefix + 'dfo-isip5_avgprev_12 - ' + updated + '.pickle')
outcomesdf_isip8_lag2.to_pickle(prefix + 'dfo-isip8_lag2 - ' + updated + '.pickle')
#outcomesdf_isip8_avgprev2.to_pickle(prefix + 'dfo-isip8_avgprev2 - ' + updated + '.pickle')
#outcomesdf_isip8_avgprev3.to_pickle(prefix + 'dfo-isip8_avgprev3 - ' + updated + '.pickle')
outcomesdf_isip8_avgprev4.to_pickle(prefix + 'dfo-isip8_avgprev4 - ' + updated + '.pickle')
#outcomesdf_isip8_avgprev_12.to_pickle(prefix + 'dfo-isip8_avgprev_12 - ' + updated + '.pickle')
#Confirming that the errors in drift calculations were due to lag-2 autocorrelations in the data.
def negative_autocorrelations_where_drift_calc_errors_occurred(outcomesdf):
df = outcomesdf
bycorr = df.ints_lag2corr.sort(inplace=False, ascending=True)
ranks = bycorr.reset_index().reset_index()
ranks['ranknum'] = ranks['index'] + 1
ranks = ranks.set_index('pid')
drifterrors = list(df[df.lag2devsq_drift.isnull()].index.values)
dranks = {d: (ranks.loc[d].ranknum, ranks.loc[d].ints_lag2corr) for d in drifterrors}
return dranks
[ 'isip500',
negative_autocorrelations_where_drift_calc_errors_occurred(outcomesdf_isip5_lag2),
'isip800',
negative_autocorrelations_where_drift_calc_errors_occurred(outcomesdf_isip8_lag2)
]
['isip500', {'016': (7.0, -0.18011009562769489), '022': (19.0, -0.077318401758675104), '033': (2.0, -0.36398911213809126), '036': (23.0, -0.066149949529849969), '044': (13.0, -0.11614305890152636), '052': (15.0, -0.10259161718889551), '053': (27.0, -0.044324588330401017), '056': (24.0, -0.061102016273541497), '061': (18.0, -0.083054167548957683), '077': (6.0, -0.21300595349211318), '078': (12.0, -0.12239673626254305), '084': (22.0, -0.068490437870905738), '085': (3.0, -0.31383563731802194), '089': (17.0, -0.085148815829851968), '092': (9.0, -0.15372357912950563), '096': (21.0, -0.076982421201613069), '097': (26.0, -0.044972277832436111), '101': (14.0, -0.1129626278118966), '105': (31.0, 0.012985936150983755), '107': (5.0, -0.24330962478469564), '109': (20.0, -0.07703639077522681), '110': (11.0, -0.14000336455025097), '111': (10.0, -0.15060305422862588), '115': (29.0, -0.023845574052905719), '116': (8.0, -0.17025624797714051)}, 'isip800', {'018': (4.0, -0.11811734350973402), '057': (2.0, -0.17087419981036001), '059': (5.0, -0.025420144525136891), '064': (3.0, -0.13645588774203291), '066': (7.0, -0.0092054999827699338), '116': (1.0, -0.22704639231499918)}]
def sideplots(title, serieslist, namelist, **kwargs):
assert len(serieslist) == len(namelist)
count = len(serieslist)
from matplotlib import pyplot as plt
fig, axes = plt.subplots(nrows=count, ncols=3, **kwargs)
#fig.set_figheight(10)
#fig.set_figwidth(15)
#fig.suptitle('t', fontsize=25)
#plt.xlabel('xlabel', fontsize=18)
#plt.ylabel('ylabel', fontsize=16)
plots = [(namelist[i], serieslist[i]) for i in range(count)]
for (i, (n, s)) in enumerate(plots):
ax_hist = plt.subplot2grid((count, 3), (i, 0), colspan=2)
ax_hist.set_title(n, fontsize=16)
ax_line = plt.subplot2grid((count, 3), (i, 2), colspan=1)
ax_line.set_title(n, fontsize=16)
s.plot(ax=ax_line, linewidth=3)
s.hist(ax=ax_hist, bins=20)
fig.suptitle(title, fontsize=22)
plt.show()
#fig.tight_layout()
pids5 = sorted(set((p) for p in db_isip5.index.get_level_values('pid')))
isip5 = {p: db_isip5.xs(p) for p in pids5}
def isip5_hist(p):
ints = isip5[p].int_raw
ints_filt = isip5[p].ints_filtered
#sideplots(ints, ints_filt,
# plotname_top="pre-filter",
# plotname_bottom="post-filter"):
#ints.hist()
#plt.show()
for p in pids5:
print(p)
isip5_hist(p)
#ri = raw_input()
#if ri=="x":
break
#isip5['015']
015
pids8 = sorted(set((p) for p in db_isip8.index.get_level_values('pid')))
isip8 = {p: db_isip8.xs(p) for p in pids8}
pids8_gen = (p for p in pids8)
def next_twenty_800():
ran = 0
for p in pids8_gen:
ran += 1
if ran==10: break
i_raw = isip8[p].int_raw
c = i_raw.count()
print(c)
m = i_raw.mean()
s = i_raw.std()
i_abs_cutoff = i_raw[(i_raw >= 600) & (i_raw <= 1000)]
i_sd_cutoff = i_raw[(i_raw <= m + 2.97*s) & (i_raw >= m - 2.97*s)]
sideplots(title='P. %s' % p,
serieslist=[i_raw, i_abs_cutoff, i_sd_cutoff],
namelist=['raw', 'abs cutoff', 'sd cutoff'])
next_twenty_800()
152
152
145
127
142
143
152
139
isip5['055'].ints_filtered.plot(marker="o")
<matplotlib.axes.AxesSubplot at 0x1faeca58>
pids = (p for p in pids5)
def next_ten():
ran = 0
for p in pids:
p='055'
ran += 1
if ran==10: break
i_raw = isip5[p].int_raw
c = i_raw.count()
print(c)
m = i_raw.mean()
s = i_raw.std()
i_abs_cutoff = i_raw[(i_raw >= 375) & (i_raw <= 650)]
i_sd_cutoff = i_raw[(i_raw <= m + 2.97*s) & (i_raw >= m - 2.97*s)]
sideplots(title='P. %s' % p,
serieslist=[i_raw, i_abs_cutoff, i_sd_cutoff],
namelist=['raw', 'abs cutoff', 'sd cutoff'],
figsize=(15,8))
break
next_ten()
283
pids5 = sorted(set((p) for p in db_isip5.index.get_level_values('pid')))
isip5 = {p: db_isip5.xs(p) for p in pids5}
pids8 = sorted(set((p) for p in db_isip8.index.get_level_values('pid')))
isip8 = {p: db_isip8.xs(p) for p in pids8}
for p in pids8:
sideplots(title='P. %s' % p,
serieslist=[isip8[p].ints_filtered],
namelist=['ints_filtered'],
figsize=(19,5))
#df['drift'] = 100 * (1. / df.ints_mean) * np.sqrt(df.ints_variance - df.local_sq_abs)
#p017:
100 * (1. / 844.185541) * np.sqrt(1740.213950 - 1040.648841)
# problem: we have local_sq_abs values that are greater
# than the total ints_variance. That shouldn't be.
3.1331137388384755
db_isip5[db_isip5.task_ms >= isip_5_start_recording].to_csv('check_isip5_calcs_individual_pids_V4.csv')
outcomesdf_isip5.to_csv('check_isip5_calcs_outcomes_V4.csv')
db_isip5.xs('010', level='pid').ints.count()
117
#db_isip8
test = outcomesdf_isip5_exp['exp_local_sq_abs'] - outcomesdf_isip5['local_sq_abs']
test.max()
0.0
db_isip5.head().T
pid | 010 | ||||
---|---|---|---|---|---|
csv_line | 8562 | 8564 | 8567 | 8569 | 8571 |
run_count | 17 | 17 | 17 | 17 | 17 |
task_id | 5 | 5 | 5 | 5 | 5 |
i | 1 | 3 | 6 | 8 | 10 |
velocity | 43 | 51 | 54 | 60 | 53 |
micros | 2516643704 | 2517208392 | 2517742648 | 2518263648 | 2518755488 |
task_ms | 409.128 | 973.816 | 1508.072 | 2029.072 | 2520.912 |
int_raw | NaN | 564.688 | 534.256 | 521 | 491.84 |
lag2dev | NaN | NaN | NaN | NaN | NaN |
lag2devsq | NaN | NaN | NaN | NaN | NaN |
movingmean_prev2 | NaN | NaN | NaN | NaN | NaN |
lagdev_avgprev2 | NaN | NaN | NaN | NaN | NaN |
lagdev_avgprev2sq | NaN | NaN | NaN | NaN | NaN |
movingmean_prev3 | NaN | NaN | NaN | NaN | NaN |
lagdev_avgprev3 | NaN | NaN | NaN | NaN | NaN |
lagdev_avgprev3sq | NaN | NaN | NaN | NaN | NaN |
movingmean_prev4 | NaN | NaN | NaN | NaN | NaN |
lagdev_avgprev4 | NaN | NaN | NaN | NaN | NaN |
lagdev_avgprev4sq | NaN | NaN | NaN | NaN | NaN |
int_filt1 | NaN | NaN | NaN | NaN | NaN |
int_max_exceeded | NaN | NaN | NaN | NaN | NaN |
ints | NaN | NaN | NaN | NaN | NaN |
#db_isip5[['task_ms', 'movingmean_prev4']].groupby(level='pid').plot(kind='scatter', x=0, y=1, figsize=(18,18))
db_isip5[['task_ms', 'movingmean_prev3']].plot(kind='scatter', x=0, y=1, figsize=(18,18))
<matplotlib.axes.AxesSubplot at 0x55a17c50>
outcomesdf_isip8_avgprev4.sort('ints_variance', ascending=False)
#outcomesdf_isip5[30:]
ints_count | ints_mean | ints_variance | ints_stdev | lagdev_avgprev4sq_sum | lagdev_avgprev4sq_count | lagdev_avgprev4sq_mean | lagdev_avgprev4sq_local_sq_abs | lagdev_avgprev4sq_local | lagdev_avgprev4sq_drift | |
---|---|---|---|---|---|---|---|---|---|---|
pid | ||||||||||
048 | 15 | 916.353067 | 20867.669247 | 144.456461 | 1413.420112 | 2 | 706.710056 | 353.355028 | 2.051364 | 15.630241 |
049 | 131 | 677.489649 | 6168.530649 | 78.539994 | 640079.383384 | 119 | 5378.818348 | 2689.409174 | 7.654658 | 8.706270 |
073 | 142 | 651.723690 | 5567.003086 | 74.612352 | 240273.168960 | 138 | 1741.109920 | 870.554960 | 4.527251 | 10.515290 |
071 | 129 | 716.267969 | 4861.394718 | 69.723703 | 257113.257986 | 125 | 2056.906064 | 1028.453032 | 4.477305 | 8.643519 |
065 | 94 | 888.713532 | 3097.764909 | 55.657568 | 138198.965205 | 60 | 2303.316087 | 1151.658043 | 3.818563 | 4.963883 |
036 | 129 | 703.468155 | 2774.862029 | 52.676959 | 319606.785295 | 121 | 2641.378391 | 1320.689195 | 5.166017 | 5.420802 |
062 | 115 | 793.888557 | 2763.623445 | 52.570176 | 247692.904354 | 107 | 2314.886957 | 1157.443478 | 4.285390 | 5.048212 |
050 | 113 | 815.714973 | 2581.123395 | 50.804758 | 249713.540472 | 109 | 2290.949913 | 1145.474956 | 4.149104 | 4.644999 |
079 | 112 | 799.880643 | 2463.848699 | 49.637171 | 312241.442559 | 100 | 3122.414426 | 1561.207213 | 4.939751 | 3.756059 |
112 | 125 | 730.919936 | 2441.423644 | 49.410764 | 201279.851170 | 121 | 1663.469844 | 831.734922 | 3.945687 | 5.489100 |
015 | 137 | 702.533956 | 2430.437702 | 49.299470 | 273840.045376 | 125 | 2190.720363 | 1095.360182 | 4.710979 | 5.200989 |
069 | 115 | 798.398504 | 2392.578919 | 48.913995 | 294280.806528 | 111 | 2651.178437 | 1325.589219 | 4.560209 | 4.091291 |
089 | 122 | 752.853475 | 2259.368340 | 47.532813 | 82409.339912 | 118 | 698.384237 | 349.192118 | 2.482114 | 5.805321 |
025 | 105 | 873.708838 | 2234.780431 | 47.273464 | 84482.066848 | 101 | 836.456107 | 418.228054 | 2.340668 | 4.878173 |
080 | 130 | 694.809015 | 2221.811703 | 47.136098 | 97408.345467 | 122 | 798.429061 | 399.214531 | 2.875661 | 6.144406 |
020 | 111 | 819.063459 | 2114.370996 | 45.982290 | 198047.988630 | 103 | 1922.796006 | 961.398003 | 3.785594 | 4.145644 |
099 | 124 | 741.676419 | 2017.520730 | 44.916820 | 203068.116562 | 120 | 1692.234305 | 846.117152 | 3.921938 | 4.614650 |
043 | 132 | 701.174636 | 1999.840545 | 44.719577 | 99567.367423 | 128 | 777.870058 | 388.935029 | 2.812628 | 5.724121 |
086 | 111 | 823.767604 | 1966.517909 | 44.345438 | 168360.224507 | 107 | 1573.460042 | 786.730021 | 3.404930 | 4.169627 |
114 | 117 | 760.652444 | 1941.435481 | 44.061724 | 257821.200139 | 105 | 2455.440001 | 1227.720001 | 4.606419 | 3.512176 |
077 | 117 | 781.664718 | 1937.097322 | 44.012468 | 225173.202000 | 113 | 1992.683204 | 996.341602 | 4.038161 | 3.923900 |
023 | 114 | 810.928772 | 1901.405562 | 43.605109 | 221269.804635 | 110 | 2011.543678 | 1005.771839 | 3.910813 | 3.690477 |
087 | 89 | 922.643146 | 1882.635147 | 43.389344 | 66521.666558 | 63 | 1055.899469 | 527.949735 | 2.490362 | 3.989197 |
058 | 113 | 814.722372 | 1881.234906 | 43.373205 | 206449.352136 | 109 | 1894.030754 | 947.015377 | 3.777190 | 3.751585 |
105 | 123 | 744.315415 | 1875.171194 | 43.303247 | 172491.447506 | 119 | 1449.507962 | 724.753981 | 3.616915 | 4.556912 |
029 | 115 | 804.279235 | 1867.368872 | 43.213064 | 248772.631647 | 111 | 2241.194880 | 1120.597440 | 4.162152 | 3.397715 |
017 | 109 | 844.185541 | 1740.213950 | 41.715872 | 143206.684566 | 105 | 1363.873186 | 681.936593 | 3.093386 | 3.853557 |
121 | 105 | 870.896114 | 1675.054895 | 40.927435 | 155709.983251 | 101 | 1541.683002 | 770.841501 | 3.187984 | 3.452782 |
068 | 112 | 812.597286 | 1536.414100 | 39.197119 | 181396.409978 | 104 | 1744.196250 | 872.098125 | 3.634187 | 3.171845 |
072 | 112 | 825.148071 | 1482.495409 | 38.503187 | 169901.322776 | 108 | 1573.160396 | 786.580198 | 3.398910 | 3.197027 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
076 | 114 | 812.193018 | 727.804233 | 26.977847 | 94891.601549 | 110 | 862.650923 | 431.325462 | 2.557074 | 2.120008 |
081 | 114 | 779.827544 | 687.355127 | 26.217458 | 67012.581453 | 102 | 656.986093 | 328.493046 | 2.324152 | 2.429211 |
033 | 116 | 795.213966 | 677.040070 | 26.019994 | 87007.273412 | 112 | 776.850655 | 388.425328 | 2.478391 | 2.136364 |
063 | 114 | 800.623333 | 676.930632 | 26.017891 | 89612.764257 | 110 | 814.661493 | 407.330747 | 2.520840 | 2.050839 |
019 | 116 | 794.750414 | 676.364586 | 26.007010 | 80265.162034 | 112 | 716.653232 | 358.326616 | 2.381819 | 2.243927 |
028 | 122 | 753.348820 | 667.002049 | 25.826383 | 80715.285312 | 118 | 684.027842 | 342.013921 | 2.454855 | 2.392972 |
120 | 121 | 758.141686 | 666.742129 | 25.821350 | 84332.383746 | 117 | 720.789605 | 360.394802 | 2.504026 | 2.308642 |
042 | 119 | 756.064739 | 665.248659 | 25.792415 | 68057.120333 | 115 | 591.801046 | 295.900523 | 2.275170 | 2.541903 |
066 | 110 | 811.625927 | 644.061508 | 25.378367 | 68121.385873 | 98 | 695.116182 | 347.558091 | 2.296983 | 2.121578 |
082 | 118 | 777.074814 | 613.571344 | 24.770372 | 86968.594176 | 114 | 762.882405 | 381.441203 | 2.513338 | 1.960663 |
041 | 119 | 770.669849 | 579.816556 | 24.079380 | 76222.351133 | 115 | 662.803053 | 331.401527 | 2.362158 | 2.045128 |
098 | 118 | 777.291593 | 567.512488 | 23.822521 | 79510.471323 | 114 | 697.460275 | 348.730137 | 2.402485 | 1.902927 |
119 | 111 | 832.191856 | 550.818229 | 23.469517 | 65808.892054 | 107 | 615.036374 | 307.518187 | 2.107230 | 1.874337 |
075 | 119 | 777.781613 | 532.321319 | 23.072090 | 50763.319894 | 115 | 441.420173 | 220.710086 | 1.910088 | 2.269598 |
102 | 114 | 804.779053 | 531.380717 | 23.051697 | 47076.259142 | 110 | 427.965992 | 213.982996 | 1.817661 | 2.213733 |
110 | 114 | 807.640421 | 530.966500 | 23.042710 | 74662.088049 | 110 | 678.746255 | 339.373127 | 2.280976 | 1.713848 |
057 | 119 | 773.640000 | 523.642247 | 22.883231 | 66580.947648 | 115 | 578.964762 | 289.482381 | 2.199238 | 1.977959 |
027 | 112 | 822.994571 | 522.639055 | 22.861300 | 53110.808332 | 108 | 491.766744 | 245.883372 | 1.905319 | 2.021396 |
078 | 115 | 796.646783 | 516.727957 | 22.731651 | 65273.319254 | 111 | 588.047921 | 294.023961 | 2.152413 | 1.873261 |
034 | 114 | 812.306596 | 513.101841 | 22.651751 | 65086.298002 | 110 | 591.693618 | 295.846809 | 2.117451 | 1.814533 |
022 | 109 | 823.375670 | 511.808289 | 22.623180 | 32921.279468 | 101 | 325.953262 | 162.976631 | 1.550475 | 2.268349 |
031 | 109 | 829.454862 | 500.124746 | 22.363469 | 58005.872594 | 105 | 552.436882 | 276.218441 | 2.003704 | 1.804016 |
115 | 119 | 777.876975 | 492.878399 | 22.200865 | 66013.674230 | 115 | 574.031950 | 287.015975 | 2.177921 | 1.844496 |
047 | 110 | 841.013018 | 468.186673 | 21.637622 | 45479.634149 | 106 | 429.053152 | 214.526576 | 1.741558 | 1.893753 |
116 | 113 | 794.877381 | 431.205669 | 20.765492 | 57209.350172 | 105 | 544.850954 | 272.425477 | 2.076461 | 1.585251 |
070 | 119 | 771.457983 | 413.095668 | 20.324755 | 46867.943760 | 115 | 407.547337 | 203.773669 | 1.850384 | 1.875405 |
013 | 114 | 834.087930 | 338.276056 | 18.392283 | 36457.388143 | 98 | 372.014165 | 186.007082 | 1.635132 | 1.479428 |
026 | 117 | 787.017641 | 326.880811 | 18.079845 | 41266.683642 | 113 | 365.191891 | 182.595945 | 1.716964 | 1.526251 |
032 | 117 | 792.519453 | 280.617415 | 16.751639 | 32422.220689 | 113 | 286.922307 | 143.461153 | 1.511323 | 1.477740 |
045 | 0 | NaN | NaN | NaN | NaN | 0 | NaN | NaN | NaN | NaN |
112 rows × 10 columns
#don't need this: we'll get the full ID list when we merge them together later.
# set an index with full ID list to ensure that the index is consistent
# across all of the scales/index values/etc. that we'll be concatenating
# together once all processing steps are done.
#pid_list = ['010', '011', '012', '013', '014', '015', '016', '017',
# '018', '019', '020', '021', '022', '023', '024', '025',
# '026', '027', '028', '029', '030', '031', '032', '033',
# '034', '035', '036', '037', '038', '039', '040', '041',
# '042', '043', '044', '045', '046', '047', '048', '049',
# '050', '051', '052', '053', '054', '055', '056', '057',
# '058', '059', '060', '061', '062', '063', '064', '065',
# '066', '067', '068', '069', '070', '071', '072', '073',
# '074', '075', '076', '077', '078', '079', '080', '081',
# '082', '083', '084', '085', '086', '087', '088', '089',
# '090', '091', '092', '093', '094', '095', '096', '097',
# '098', '099', '100', '101', '102', '103', '104', '105',
#'106', '107', '108', '109', '110', '111', '112', '113',
#'114', '115', '116', '117', '118', '119', '120', '121']
#full_index = pd.DataFrame(index = pid_list)
#not using this anymore: it's easier to have all of the multiindex values
#(scales, isip5, isip8, etc) set up in the final assembly, rather than
#stick the ISIP5 / ISIP8 together in a multiindex already here. So we'll
#save the tasks under separate pickle files.
isip_out = pd.concat([scales,
isip_outcomes_taskdf(db_isip5),
isip_outcomes_taskdf(db_isip8),
],
axis=1, #defaults
join='outer',
#join_axes=None,
#ignore_index=False,
keys=['scales','isip5','isip8'],
#levels=None,
names=['task'],
#verify_integrity=False
)
isip_out[31:37]
task | scales | ... | isip8 | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
session_day | date | time | session_sex | exclusion_language | exclusion_overall | exclusion_rhythmmethodsgeneral | exclusion_rhythmadminerror | age | wasivocab_rawscore | ... | ints_count | ints_mean | ints_variance | ints_std | lag2devsq_count | lag2devsq_sum | lag2devsq_mean | local_sq_abs | local | drift | |
041 | 20140326 | 3/26/2014 | 1:00 PM | f | 0 | 0 | 0 | 0 | 23 | 40 | ... | 119 | 770.669849 | 579.816556 | 24.079380 | 117 | 102163.465424 | 873.192012 | 436.596006 | 2.711262 | 1.552867 |
042 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
043 | 20140328 | 3/28/2014 | 11:00 AM | f | 0 | 0 | 0 | 0 | 19 | 29 | ... | 132 | 701.174636 | 1999.840545 | 44.719577 | 130 | 188007.440288 | 1446.211079 | 723.105540 | 3.835082 | 5.095939 |
044 | 20140331 | 3/31/2014 | 10:30 AM | f | 0 | 0 | 0 | 0 | 18 | 40 | ... | 111 | 827.880505 | 1191.182247 | 34.513508 | 109 | 164717.853136 | 1511.172965 | 755.586482 | 3.320278 | 2.521008 |
045 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
046 | 20140331 | 3/31/2014 | 1:30 PM | m | 0 | 0 | 0 | 0 | 18 | 38 | ... | 124 | 740.406000 | 1015.423245 | 31.865706 | 122 | 137458.143136 | 1126.706091 | 563.353046 | 3.205682 | 2.871660 |
6 rows × 42 columns
counts = {pid: str(db_isip5.xs(pid, level='pid').ints.count()) + " @500, "
+ str(db_isip8.xs(pid, level='pid').ints.count()) + " @800"
for pid in pid_list}
counts
{'010': '117 @500, 129 @800', '011': '118 @500, 128 @800', '012': '109 @500, 120 @800', '013': '117 @500, 114 @800', '014': '116 @500, 119 @800', '015': '118 @500, 137 @800', '016': '107 @500, 112 @800', '017': '113 @500, 109 @800', '018': '117 @500, 114 @800', '019': '116 @500, 116 @800', '020': '108 @500, 111 @800', '021': '117 @500, 121 @800', '022': '118 @500, 109 @800', '023': '118 @500, 114 @800', '024': '115 @500, 124 @800', '025': '106 @500, 105 @800', '026': '110 @500, 117 @800', '027': '116 @500, 112 @800', '028': '113 @500, 122 @800', '029': '119 @500, 115 @800', '030': '110 @500, 108 @800', '031': '115 @500, 109 @800', '032': '114 @500, 117 @800', '033': '114 @500, 116 @800', '034': '115 @500, 114 @800', '035': '114 @500, 116 @800', '036': '113 @500, 129 @800', '037': '116 @500, 119 @800', '038': '116 @500, 112 @800', '039': '121 @500, 120 @800', '040': '111 @500, 118 @800', '041': '113 @500, 119 @800', '043': '118 @500, 132 @800', '044': '114 @500, 111 @800', '046': '114 @500, 124 @800', '047': '118 @500, 110 @800', '049': '156 @500, 131 @800', '050': '120 @500, 113 @800', '051': '112 @500, 126 @800', '052': '115 @500, 111 @800', '053': '113 @500, 105 @800', '054': '111 @500, 113 @800', '055': '116 @500, 125 @800', '056': '115 @500, 125 @800', '057': '114 @500, 119 @800', '058': '109 @500, 113 @800', '059': '117 @500, 117 @800', '060': '122 @500, 111 @800', '061': '111 @500, 114 @800', '062': '115 @500, 115 @800', '063': '115 @500, 114 @800', '064': '113 @500, 120 @800', '065': '110 @500, 94 @800', '066': '118 @500, 110 @800', '067': '127 @500, 122 @800', '068': '119 @500, 112 @800', '069': '122 @500, 115 @800', '070': '116 @500, 119 @800', '071': '121 @500, 129 @800', '072': '119 @500, 112 @800', '073': '127 @500, 142 @800', '074': '116 @500, 113 @800', '075': '113 @500, 119 @800', '076': '115 @500, 114 @800', '077': '114 @500, 117 @800', '078': '117 @500, 115 @800', '079': '115 @500, 112 @800', '080': '118 @500, 130 @800', '081': '119 @500, 114 @800', '082': '111 @500, 118 @800', '083': '109 @500, 112 @800', '084': '117 @500, 112 @800', '085': '114 @500, 114 @800', '086': '106 @500, 111 @800', '087': '112 @500, 89 @800', '088': '118 @500, 119 @800', '089': '117 @500, 122 @800', '090': '120 @500, 119 @800', '091': '119 @500, 131 @800', '092': '112 @500, 120 @800', '093': '123 @500, 117 @800', '094': '113 @500, 81 @800', '095': '113 @500, 115 @800', '096': '110 @500, 105 @800', '097': '119 @500, 115 @800', '098': '115 @500, 118 @800', '099': '112 @500, 124 @800', '100': '114 @500, 118 @800', '101': '118 @500, 126 @800', '102': '115 @500, 114 @800', '103': '112 @500, 116 @800', '104': '108 @500, 113 @800', '105': '115 @500, 123 @800', '106': '115 @500, 107 @800', '107': '114 @500, 113 @800', '108': '103 @500, 118 @800', '109': '109 @500, 122 @800', '110': '115 @500, 114 @800', '111': '114 @500, 103 @800', '112': '113 @500, 125 @800', '113': '115 @500, 114 @800', '114': '117 @500, 117 @800', '115': '111 @500, 119 @800', '116': '113 @500, 113 @800', '117': '108 @500, 78 @800', '118': '113 @500, 109 @800', '119': '108 @500, 111 @800', '120': '120 @500, 121 @800', '121': '115 @500, 105 @800'}
for pid in pid_list:
try:
plt.figure(figsize=(14,3))
db_isip8.loc[pid].ints.hist(bins=40)
print(pid)
plt.show()
except: pass
#outliers: 054, one point >1000
# 036, one point > 1000
# 056, two points > 900
# 062, oine point > 1100
# 048, two points < 650 (almost whole set is >1000)
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
043
044
046
047
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
for pid in pid_list:
try:
plt.figure(figsize=(14,3))
db_isip5.loc[pid].ints.hist(bins=40)
print(pid)
plt.show()
except: pass
#outliers: 054, one point >1000
# 036, one point > 1000
# 056, two points > 900
# 062, oine point > 1100
# 048, two points < 650 (almost whole set is >1000)
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
043
044
046
047
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
dfo.to_csv('isip_data_output.csv')
!isip_data_output.csv
#for viewing data that will be kept...
data = dfo.loc[dfo.scales.exclusion_language==0]
#pd.set_option('display.multi_sparse', False)
#dfo['isip5']['ints_mean']
#dfo['isip5','ints_mean']
#dfo.loc(axis=1)['isip5','ints_mean']
#dfo.loc(axis=1)[('isip5','ints_mean')]
dfo['isip5'].drift[dfo['isip5'].drift > 6]
049 9.978668 055 8.000231 Name: drift, dtype: float64
dfo.head()
task | scales | scales | scales | scales | scales | scales | scales | scales | scales | scales | ... | isip8 | isip8 | isip8 | isip8 | isip8 | isip8 | isip8 | isip8 | isip8 | isip8 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
session_day | date | time | session_sex | exclusion_language | exclusion_overall | exclusion_rhythmmethodsgeneral | exclusion_rhythmadminerror | age | wasivocab_rawscore | ... | ints_count | ints_mean | ints_variance | ints_std | lag2devsq_count | lag2devsq_sum | lag2devsq_mean | local_sq_abs | local | drift | |
pid | |||||||||||||||||||||
010 | 20140226 | 2/26/2014 | 1:20 PM | m | 1 | 0 | 0 | 0 | 18 | 29 | ... | 129 | 774.104930 | 1138.331372 | 33.739167 | 127 | 153047.935504 | 1205.101854 | 602.550927 | 3.171006 | 2.990154 |
011 | 20140226 | 2/26/2014 | 2:40 PM | f | 0 | 0 | 0 | 0 | 19 | 40 | ... | 128 | 776.994000 | 883.209799 | 29.718846 | 126 | 208781.623552 | 1656.997012 | 828.498506 | 3.704488 | 0.951965 |
012 | 20140226 | 2/26/2014 | 4:00 PM | f | 0 | 0 | 0 | 0 | 57 | 33 | ... | 120 | 831.791733 | 986.297632 | 31.405376 | 118 | 176171.441216 | 1492.978315 | 746.489158 | 3.284711 | 1.861734 |
013 | 20140227 | 2/27/2014 | 8:00 AM | m | 0 | 0 | 0 | 1 | 21 | 42 | ... | 114 | 834.087930 | 338.276056 | 18.392283 | 106 | 60632.582384 | 572.005494 | 286.002747 | 2.027558 | 0.866818 |
014 | 20140228 | 2/28/2014 | 9:10 AM | f | 1 | 0 | 0 | 0 | 25 | 13 | ... | 119 | 845.244504 | 1018.963019 | 31.921200 | 117 | 176923.391440 | 1512.165739 | 756.082869 | 3.253137 | 1.918212 |
5 rows × 42 columns
dfo.xs('scales', level='task', axis=1).to_json('isip_data_output_scales.json')
dfo.xs('isip5', level='task', axis=1).to_json('isip_data_output_isip5.json')
dfo.xs('isip8', level='task', axis=1).to_json('isip_data_output_isip8.json')
outcomesdf_isip5
ints_count | ints_mean | ints_variance | ints_stdev | lag2devsq_sum | lag2devsq_count | lag2devsq_mean | lag2devsq_local_sq_abs | lag2devsq_local | lag2devsq_drift | |
---|---|---|---|---|---|---|---|---|---|---|
pid | ||||||||||
010 | 117 | 484.570188 | 431.516467 | 20.772974 | 61689.291664 | 113 | 545.922935 | 272.961468 | 3.409526 | 2.598563 |
011 | 118 | 485.356814 | 231.312109 | 15.208948 | 59610.206416 | 116 | 513.881090 | 256.940545 | 3.302594 | NaN |
012 | 109 | 528.153248 | 619.335497 | 24.886452 | 134842.083888 | 107 | 1260.206391 | 630.103196 | 4.752760 | NaN |
013 | 117 | 492.018496 | 191.325997 | 13.832064 | 45051.603072 | 115 | 391.753070 | 195.876535 | 2.844525 | NaN |
014 | 116 | 495.567621 | 426.417128 | 20.649870 | 100343.064752 | 114 | 880.202322 | 440.101161 | 4.233244 | NaN |
015 | 118 | 482.560712 | 503.989402 | 22.449708 | 71439.642368 | 114 | 626.663530 | 313.331765 | 3.668177 | 2.861377 |
016 | 107 | 534.489682 | 337.322524 | 18.366342 | 81375.980656 | 105 | 775.009340 | 387.504670 | 3.682978 | NaN |
017 | 113 | 507.232283 | 372.501872 | 19.300308 | 76301.810240 | 111 | 687.403696 | 343.701848 | 3.654972 | 1.058009 |
018 | 117 | 490.434838 | 169.564390 | 13.021689 | 33365.922272 | 115 | 290.138455 | 145.069227 | 2.455875 | 1.009157 |
019 | 116 | 496.082310 | 87.338616 | 9.345513 | 19395.184096 | 114 | 170.133194 | 85.066597 | 1.859199 | 0.303845 |
020 | 108 | 527.804963 | 589.663762 | 24.282993 | 95309.265760 | 106 | 899.144017 | 449.572008 | 4.017225 | 2.242502 |
021 | 117 | 493.456479 | 491.635470 | 22.172854 | 103278.063904 | 115 | 898.070121 | 449.035060 | 4.294289 | 1.322690 |
022 | 118 | 491.002881 | 266.638426 | 16.329067 | 66905.256720 | 116 | 576.769454 | 288.384727 | 3.458614 | NaN |
023 | 118 | 487.401525 | 209.743705 | 14.482531 | 47419.766096 | 116 | 408.791087 | 204.395544 | 2.933248 | 0.474477 |
024 | 115 | 497.052209 | 346.236721 | 18.607437 | 73709.621120 | 113 | 652.297532 | 326.148766 | 3.633339 | 0.901708 |
025 | 106 | 537.173396 | 495.696599 | 22.264245 | 59565.535456 | 104 | 572.745533 | 286.372767 | 3.150296 | 2.693363 |
026 | 110 | 522.279564 | 484.524188 | 22.011910 | 99789.086832 | 108 | 923.973026 | 461.986513 | 4.115396 | 0.908974 |
027 | 116 | 492.205552 | 227.230850 | 15.074178 | 47722.113664 | 114 | 418.615032 | 209.307516 | 2.939313 | 0.860128 |
028 | 113 | 499.047115 | 340.013136 | 18.439445 | 73322.526480 | 109 | 672.683729 | 336.341865 | 3.674929 | 0.383943 |
029 | 119 | 480.923597 | 383.615111 | 19.586095 | 71476.612096 | 117 | 610.911214 | 305.455607 | 3.634109 | 1.838293 |
030 | 110 | 526.321818 | 152.433529 | 12.346397 | 26781.463872 | 108 | 247.976517 | 123.988259 | 2.115626 | 1.013336 |
031 | 115 | 503.174574 | 222.846559 | 14.928046 | 67330.682048 | 113 | 595.846744 | 297.923372 | 3.430312 | NaN |
032 | 114 | 504.738105 | 123.587386 | 11.116986 | 23833.411232 | 112 | 212.798315 | 106.399157 | 2.043633 | 0.821390 |
033 | 114 | 501.366632 | 280.591619 | 16.750869 | 85712.136320 | 112 | 765.286931 | 382.643466 | 3.901591 | NaN |
034 | 115 | 501.191200 | 225.441641 | 15.014714 | 42139.552128 | 113 | 372.916391 | 186.458195 | 2.724503 | 1.245767 |
035 | 114 | 504.927754 | 396.148055 | 19.903468 | 82621.804784 | 112 | 737.694686 | 368.847343 | 3.803593 | 1.034803 |
036 | 113 | 506.245097 | 865.523721 | 29.419785 | 209699.127152 | 111 | 1889.181326 | 944.590663 | 6.071011 | NaN |
037 | 116 | 497.986897 | 609.504061 | 24.688136 | 100630.167984 | 114 | 882.720772 | 441.360386 | 4.218701 | 2.603888 |
038 | 116 | 496.498345 | 194.346045 | 13.940805 | 36632.072896 | 114 | 321.333973 | 160.666986 | 2.552969 | 1.168859 |
039 | 121 | 475.983207 | 383.342106 | 19.579124 | 64152.844432 | 119 | 539.099533 | 269.549767 | 3.449275 | 2.241119 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
092 | 112 | 508.889214 | 422.469593 | 20.554065 | 107997.824320 | 110 | 981.798403 | 490.899201 | 4.353845 | NaN |
093 | 123 | 465.726732 | 412.370742 | 20.306914 | 73061.555584 | 121 | 603.814509 | 301.907254 | 3.730831 | 2.256723 |
094 | 113 | 509.198549 | 705.218200 | 26.555945 | 144728.472992 | 111 | 1303.860117 | 651.930059 | 5.014335 | 1.433600 |
095 | 113 | 496.906018 | 337.276204 | 18.365081 | 71364.353344 | 109 | 654.718838 | 327.359419 | 3.641147 | 0.633740 |
096 | 110 | 522.939891 | 369.646599 | 19.226196 | 85088.745520 | 108 | 787.858755 | 393.929377 | 3.795399 | NaN |
097 | 119 | 486.892672 | 450.793265 | 21.231893 | 110019.624912 | 117 | 940.338674 | 470.169337 | 4.453423 | NaN |
098 | 115 | 499.021948 | 232.085405 | 15.234350 | 50146.397600 | 113 | 443.773430 | 221.886715 | 2.985011 | 0.639960 |
099 | 112 | 514.290429 | 291.855066 | 17.083766 | 57254.421216 | 110 | 520.494738 | 260.247369 | 3.136785 | 1.093171 |
100 | 114 | 504.409088 | 344.916823 | 18.571936 | 69286.214752 | 112 | 618.626917 | 309.313459 | 3.486715 | 1.182940 |
101 | 118 | 483.521424 | 276.944489 | 16.641649 | 70894.575456 | 116 | 611.160133 | 305.580067 | 3.615320 | NaN |
102 | 115 | 503.016904 | 406.461517 | 20.160891 | 85176.583648 | 113 | 753.775077 | 376.887538 | 3.859431 | 1.081116 |
103 | 112 | 514.388250 | 478.128078 | 21.866140 | 90473.666432 | 110 | 822.487877 | 411.243938 | 3.942382 | 1.589903 |
104 | 108 | 531.871444 | 481.559774 | 21.944470 | 69251.269280 | 106 | 653.313861 | 326.656931 | 3.398124 | 2.340039 |
105 | 115 | 503.235965 | 373.097980 | 19.315744 | 84927.682624 | 113 | 751.572413 | 375.786206 | 3.852111 | NaN |
106 | 115 | 497.394748 | 259.228145 | 16.100564 | 29725.450832 | 113 | 263.057087 | 131.528544 | 2.305732 | 2.271923 |
107 | 114 | 507.268667 | 240.647244 | 15.512809 | 64212.007584 | 112 | 573.321496 | 286.660748 | 3.337691 | NaN |
108 | 103 | 559.819107 | 651.107208 | 25.516802 | 106842.165168 | 101 | 1057.843219 | 528.921610 | 4.108166 | 1.974523 |
109 | 109 | 531.315853 | 407.849711 | 20.195289 | 92851.086032 | 107 | 867.767159 | 433.883580 | 3.920431 | NaN |
110 | 115 | 495.863270 | 151.669191 | 12.315405 | 38776.196176 | 113 | 343.152179 | 171.576089 | 2.641596 | NaN |
111 | 114 | 507.458877 | 375.364702 | 19.374331 | 95714.069888 | 112 | 854.589910 | 427.294955 | 4.073456 | NaN |
112 | 113 | 512.244920 | 667.787840 | 25.841591 | 53438.306144 | 111 | 481.426181 | 240.713091 | 3.028811 | 4.034357 |
113 | 115 | 497.162678 | 139.779273 | 11.822828 | 30864.131664 | 113 | 273.133909 | 136.566954 | 2.350576 | 0.360505 |
114 | 117 | 482.347009 | 905.847583 | 30.097302 | 199473.923248 | 113 | 1765.255958 | 882.627979 | 6.159270 | 0.999005 |
115 | 111 | 518.681153 | 386.652012 | 19.663469 | 84921.336288 | 109 | 779.094828 | 389.547414 | 3.805219 | NaN |
116 | 113 | 508.455540 | 214.243404 | 14.637056 | 55277.700768 | 111 | 497.997304 | 248.998652 | 3.103455 | NaN |
117 | 108 | 528.354889 | 484.106776 | 22.002427 | 66093.075616 | 106 | 623.519581 | 311.759791 | 3.341830 | 2.484712 |
118 | 113 | 505.189274 | 518.832753 | 22.777901 | 87404.963440 | 111 | 787.432103 | 393.716052 | 3.927692 | 2.214132 |
119 | 108 | 533.036630 | 144.675042 | 12.028094 | 21337.248112 | 106 | 201.294794 | 100.647397 | 1.882107 | 1.244817 |
120 | 120 | 476.299600 | 261.461009 | 16.169756 | 55264.225152 | 118 | 468.340891 | 234.170446 | 3.212816 | 1.096796 |
121 | 115 | 499.836139 | 226.263531 | 15.042059 | 43008.093712 | 113 | 380.602599 | 190.301300 | 2.759899 | 1.199764 |
109 rows × 10 columns
x = outcomesdf_isip5['lag2devsq_mean']
y = outcomesdf_isip5_avgprev3['lagdev_avgprev3sq_mean']
#ids = sorted(set(x.index).intersection(set(y.index)))
#df = pd.DataFrame(index = ids)
sx = x.apply(lambda n: np.sqrt(n))
sy = y.apply(lambda n: np.sqrt(n))
dfo = pd.concat([sx, sy], axis=1, join='outer',
keys = ['x' + ' sqrt of ' + x.name,
'y' + ' sqrt of ' + y.name])
dfo.plot(x=0, y=1, kind='scatter', figsize=(12,12))
<matplotlib.axes.AxesSubplot at 0xc828f98>
plt.figure(figsize=(8,5))
plt.scatter(data.isip8.drift, data.isip5.drift)
plt.show()
print(data.isip8.drift[data.isip8.drift > 6])
print(data.isip5.drift[data.isip5.drift > 6])
data_rem_drift_ol = dfo.drop(['049', '055', '071', '073'])
plt.figure(figsize=(8,5))
plt.scatter(data_rem_drift_ol.isip8.drift, data_rem_drift_ol.isip5.drift)
plt.show()
pid 071 8.124596 073 10.119067 Name: drift, dtype: float64 pid 049 9.978668 055 8.000231 Name: drift, dtype: float64
data.scales.fsiq2
data.isip5.local
pid 011 3.302594 012 4.752760 013 2.844525 015 3.668177 016 3.682978 017 3.654972 018 2.455875 019 1.859199 020 4.017225 021 4.294289 022 3.458614 024 3.633339 025 3.150296 026 4.115396 027 2.939313 ... 107 3.337691 108 4.108166 109 3.920431 110 2.641596 111 4.073456 112 3.028811 113 2.350576 114 6.159270 115 3.805219 116 3.103455 117 3.341830 118 3.927692 119 1.882107 120 3.212816 121 2.759899 Name: local, Length: 102, dtype: float64
plt.figure(figsize=(8,5))
plt.scatter(data.isip8.local, data.isip5.local)
plt.show()
print(data.isip8.drift[data.isip8.local > 9])
print(data.isip5.drift[data.isip5.local > 9])
data_rem_local_ol = dfo.drop(['049'])
plt.figure(figsize=(8,5))
plt.scatter(data_rem_local_ol.isip8.local, data_rem_local_ol.isip5.local)
plt.show()
pid 049 5.272842 Name: drift, dtype: float64 Series([], name: drift, dtype: float64)
plt.figure(figsize=(8,5))
plt.scatter(data.scales.wasivocab_rawscore, data.scales.wasimatrix_rawscore)
plt.show()
plt.figure(figsize=(8,5))
plt.scatter(data.scales.wasivocab_tscore, data.isip8.local)
plt.show()
compare_df = pd.concat([data.scales.wasivocab_tscore,
data.isip8.local],
axis=1)
compare_df = compare_df[compare_df.wasivocab_tscore.notnull()]
compare_df.loc['049'].local = 7
plt.figure(figsize=(8,5))
plt.scatter(compare_df.wasivocab_tscore, compare_df.local)
plt.show()
compare_arr = np.array(compare_df.T)
#compare_arr[0]
from scipy import stats
r, p = stats.pearsonr(compare_arr[0], compare_arr[1])
print("r: {r}, p: {p}".format(r=r, p=p))
C:\Applications\_Data analysis\Anaconda\lib\site-packages\pandas\core\generic.py:1858: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_index,col_indexer] = value instead self[name] = value
r: -0.150423484883, p: 0.131278387372
compare_df = pd.concat([data.scales.wasimatrix_tscore,
data.isip8.local],
axis=1)
compare_df = compare_df[compare_df.wasimatrix_tscore.notnull()]
compare_df.loc['049'].local = 7
plt.figure(figsize=(8,5))
plt.scatter(compare_df.wasimatrix_tscore, compare_df.local)
plt.show()
compare_arr = np.array(compare_df.T)
#compare_arr[0]
from scipy import stats
r, p = stats.pearsonr(compare_arr[0], compare_arr[1])
print("r: {r}, p: {p}".format(r=r, p=p))
r: -0.152668983147, p: 0.127469736219
compare_df = pd.concat([data.scales.fsiq2,
data.isip8.local],
axis=1)
compare_df = compare_df[compare_df.fsiq2.notnull()]
compare_df.loc['049'].local = 7
plt.figure(figsize=(8,5))
plt.scatter(compare_df.fsiq2, compare_df.local)
plt.show()
compare_arr = np.array(compare_df.T)
#compare_arr[0]
from scipy import stats
r, p = stats.pearsonr(compare_arr[0], compare_arr[1])
print("r: {r}, p: {p}".format(r=r, p=p))
r: -0.204788026516, p: 0.0399464145313
def rtest(df_columns):
from scipy import stats
import matplotlib.pylab as plt
print(df_columns.columns[0])
print(df_columns.columns[1])
plt.figure(figsize=(3,3))
plt.scatter(compare_df.fsiq2, compare_df.local)
plt.show()
rtest(compare_df)
fsiq2 local
compare_df = pd.concat([data.scales.fsiq2,
data.isip5.local],
axis=1)
compare_df = compare_df[compare_df.fsiq2.notnull()]
assert len(compare_df[compare_df.fsiq2.isnull()]) == 0
compare_arr = np.array(compare_df.T)
#compare_arr[0]
from scipy import stats
stats.pearsonr(compare_arr[0], compare_arr[1])
(-0.012201312518071359, 0.90361193985416333)
def scatter_tooltips(df, x_col, y_col,
size_col=None,
color_col=None,
show_all_cols=False,
fig_size=(8, 5)):
#import matplotlib.pyplot as plt
#import numpy as np
import pandas as pd
import mpld3
from mpld3 import plugins
#x = df[x_col]
#y = df[y_col]
df_info = [x_col, y_col]
#for arg in args:
# df_info.append(arg)
# Define some CSS to control our custom labels
css = """
table { border-collapse: collapse; }
th { color: #ffffff; background-color: #000000; }
td { background-color: #cccccc; }
table, th, td { font-family:Arial, Helvetica, sans-serif;
border: 1px solid black; text-align: right; }
"""
fig, ax = plt.subplots()
fig.set_size_inches(fig_size)
ax.grid(True, alpha=0.3)
labels = []
for row in df.iterrows():
index, series = row
pid = index
label = pd.DataFrame(series)
labels.append(str(label.to_html()))
points = ax.plot(df[x_col],
df[y_col],
'o',
color='b',
markeredgecolor='k',
markersize=8,
markeredgewidth=1,
alpha=.6)
ax.set_xlabel(x_col)
ax.set_ylabel(y_col)
ax.set_title(x_col + ' . ' + y_col, size=16)
tooltip = plugins.PointHTMLTooltip(points[0], labels,
voffset=10, hoffset=10, css=css)
plugins.connect(fig, tooltip)
return mpld3.display()
scatter_tooltips(data, 'isip8_sq2dev_mean_sqrt', 'isip5_sq2dev_mean_sqrt',
fig_size=(12, 7.5))
def d3plot(x, y, size=(10,6)):
import mpld3
fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
fig.set_size_inches((8,5) )
scatter = ax.scatter(x, y,
#c=np.random.random(size=N),
s=40, #size
alpha=0.5,
cmap=plt.cm.jet)
ax.grid(color='white', linestyle='solid')
ax.set_title("Scatter Plot (with tooltips!)", size=10)
labels = ['{0}'.format(pid) for pid in x.index]
tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=labels)
mpld3.plugins.connect(fig, tooltip)
return mpld3.display()
d3plot(data['isip5_sq2devsum'], data['isip8_sq2devsum'])
sset = db_isip8.xs(['056', 'tap_r'], level=['pid', 'stamp_type'])
print(sset.ints.mean())
print(sset.ints.std())
728.982048 37.2019991333
for pid in pid_list:
try:
sset = db_isip5.xs([pid, 'tap_r'], level=['pid', 'stamp_type'])
data = sset.ints
if data.min() > 400 or data.max() < 600: continue
print(pid)
plt.figure(figsize=(13,6))
data.hist(bins=100)
#annotating non-midpoint values
caption_y_increment = 0.2
median = data.median()
prev_ypos = 0
for idx, value in enumerate(data):
if np.abs(value - median) > 150:
caption = str(data.index[idx]) + ": " + str(value.round(1))
plt.annotate(caption, (value, prev_ypos + caption_y_increment))
prev_ypos += caption_y_increment
plt.show()
except:
print("error....")
#High outliers:
#p049 - will need to remove manually
049
055
taps = db_isip5.xs('tap_r', level='stamp_type').ints
pmeans = taps.groupby(level='pid').mean()
data = pmeans
##############################
plt.figure(figsize=(13,6))
data.hist(bins=25)
#annotating non-midpoint values
caption_y_increment = 1
median = data.median()
prev_ypos = 0
for idx, value in enumerate(data):
if np.abs(value - median) > 25:
caption = str(data.index[idx]) + ": " + str(value.round(1))
plt.annotate(caption, (value, prev_ypos + caption_y_increment))
prev_ypos += caption_y_increment
plt.show()
#why is pid 049's mean so low?
sset = db_isip8.xs(['048', 'tap_r'], level=['pid', 'stamp_type'])
data = sset.ints.mean()
data - data*0.5
524.87915909090907
sset[60:]
run_count | task_id | i | channel | pitch | velocity | micros | task_ms | int_raw | int_filt1 | int_max_exceeded | int_lag2dev | ints | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
task_name | csv_line | |||||||||||||
ISIP_8 | 9628 | 21 | 3 | 117 | 1 | 48 | 92 | 2373733160 | 54572.428 | 1076.716 | 1076.716 | False | 47.040 | 1076.716 |
9629 | 21 | 3 | 118 | 1 | 48 | 117 | 2374789772 | 55629.040 | 1056.612 | 1056.612 | False | 64.404 | 1056.612 | |
9630 | 21 | 3 | 119 | 1 | 48 | 100 | 2375801208 | 56640.476 | 1011.436 | 1011.436 | False | -65.280 | 1011.436 | |
9631 | 21 | 3 | 120 | 1 | 48 | 111 | 2376926120 | 57765.388 | 1124.912 | 1124.912 | False | 68.300 | 1124.912 | |
9632 | 21 | 3 | 121 | 1 | 48 | 111 | 2378075728 | 58914.996 | 1149.608 | 1149.608 | False | 138.172 | 1149.608 | |
9633 | 21 | 3 | 122 | 1 | 48 | 121 | 2379113188 | 59952.456 | 1037.460 | 1037.460 | False | -87.452 | 1037.460 | |
9634 | 21 | 3 | 123 | 1 | 48 | 97 | 2380251568 | 61090.836 | 1138.380 | 1138.380 | False | -11.228 | 1138.380 | |
9635 | 21 | 3 | 124 | 1 | 48 | 127 | 2381400428 | 62239.696 | 1148.860 | 1148.860 | False | 111.400 | 1148.860 | |
9636 | 21 | 3 | 125 | 1 | 48 | 79 | 2382490936 | 63330.204 | 1090.508 | 1090.508 | False | -47.872 | 1090.508 | |
9637 | 21 | 3 | 126 | 1 | 48 | 105 | 2383587036 | 64426.304 | 1096.100 | 1096.100 | False | -52.760 | 1096.100 | |
9638 | 21 | 3 | 127 | 1 | 48 | 127 | 2384698024 | 65537.292 | 1110.988 | 1110.988 | False | 20.480 | 1110.988 | |
9639 | 21 | 3 | 128 | 1 | 48 | 111 | 2385825580 | 66664.848 | 1127.556 | 1127.556 | False | 31.456 | 1127.556 | |
9640 | 21 | 3 | 129 | 1 | 48 | 112 | 2386941280 | 67780.548 | 1115.700 | 1115.700 | False | 4.712 | 1115.700 | |
9641 | 21 | 3 | 130 | 1 | 48 | 127 | 2388091424 | 68930.692 | 1150.144 | 1150.144 | False | 22.588 | 1150.144 | |
9642 | 21 | 3 | 131 | 1 | 48 | 108 | 2389179152 | 70018.420 | 1087.728 | 1087.728 | False | -27.972 | 1087.728 | |
9644 | 21 | 3 | 133 | 1 | 48 | 127 | 2390218064 | 71057.332 | 1038.912 | 1038.912 | False | -111.232 | 1038.912 | |
9645 | 21 | 3 | 134 | 1 | 48 | 121 | 2391315900 | 72155.168 | 1097.836 | 1097.836 | False | 10.108 | 1097.836 | |
9646 | 21 | 3 | 135 | 1 | 48 | 103 | 2392416444 | 73255.712 | 1100.544 | 1100.544 | False | 61.632 | 1100.544 | |
9647 | 21 | 3 | 136 | 1 | 48 | 110 | 2393596776 | 74436.044 | 1180.332 | 1180.332 | False | 82.496 | 1180.332 | |
9648 | 21 | 3 | 137 | 1 | 48 | 117 | 2394694828 | 75534.096 | 1098.052 | 1098.052 | False | -2.492 | 1098.052 | |
9649 | 21 | 3 | 138 | 1 | 48 | 79 | 2395813540 | 76652.808 | 1118.712 | 1118.712 | False | -61.620 | 1118.712 | |
9651 | 21 | 3 | 140 | 1 | 48 | 19 | 2396327468 | 77166.736 | 513.928 | 513.928 | False | -584.124 | 513.928 | |
9652 | 21 | 3 | 141 | 1 | 48 | 97 | 2396959476 | 77798.744 | 632.008 | 632.008 | False | -486.704 | 632.008 | |
9653 | 21 | 3 | 142 | 1 | 48 | 127 | 2398009728 | 78848.996 | 1050.252 | 1050.252 | False | 536.324 | 1050.252 | |
9654 | 21 | 3 | 143 | 1 | 48 | 117 | 2399073920 | 79913.188 | 1064.192 | 1064.192 | False | 432.184 | 1064.192 | |
9656 | 21 | 3 | 145 | 1 | 48 | 127 | 2400149340 | 80988.608 | 1075.420 | 1075.420 | False | 25.168 | 1075.420 | |
9657 | 21 | 3 | 146 | 1 | 48 | 123 | 2401271772 | 82111.040 | 1122.432 | 1122.432 | False | 58.240 | 1122.432 | |
9658 | 21 | 3 | 147 | 1 | 48 | 100 | 2402320156 | 83159.424 | 1048.384 | 1048.384 | False | -27.036 | 1048.384 | |
9659 | 21 | 3 | 148 | 1 | 48 | 113 | 2403437916 | 84277.184 | 1117.760 | 1117.760 | False | -4.672 | 1117.760 | |
9660 | 21 | 3 | 149 | 1 | 48 | 124 | 2404513744 | 85353.012 | 1075.828 | 1075.828 | False | 27.444 | 1075.828 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | |
9663 | 21 | 3 | 152 | 1 | 48 | 106 | 2407753000 | 88592.268 | 1097.376 | 1097.376 | False | 33.196 | 1097.376 | |
9664 | 21 | 3 | 153 | 1 | 48 | 127 | 2408894168 | 89733.436 | 1141.168 | 1141.168 | False | 63.468 | 1141.168 | |
9665 | 21 | 3 | 154 | 1 | 48 | 127 | 2409955116 | 90794.384 | 1060.948 | 1060.948 | False | -36.428 | 1060.948 | |
9666 | 21 | 3 | 155 | 1 | 48 | 98 | 2411092752 | 91932.020 | 1137.636 | 1137.636 | False | -3.532 | 1137.636 | |
9667 | 21 | 3 | 156 | 1 | 48 | 103 | 2412197028 | 93036.296 | 1104.276 | 1104.276 | False | 43.328 | 1104.276 | |
9668 | 21 | 3 | 157 | 1 | 48 | 95 | 2413307044 | 94146.312 | 1110.016 | 1110.016 | False | -27.620 | 1110.016 | |
9669 | 21 | 3 | 158 | 1 | 48 | 127 | 2414405856 | 95245.124 | 1098.812 | 1098.812 | False | -5.464 | 1098.812 | |
9670 | 21 | 3 | 159 | 1 | 48 | 107 | 2415495400 | 96334.668 | 1089.544 | 1089.544 | False | -20.472 | 1089.544 | |
9671 | 21 | 3 | 160 | 1 | 48 | 127 | 2416539816 | 97379.084 | 1044.416 | 1044.416 | False | -54.396 | 1044.416 | |
9672 | 21 | 3 | 161 | 1 | 48 | 107 | 2417615288 | 98454.556 | 1075.472 | 1075.472 | False | -14.072 | 1075.472 | |
9673 | 21 | 3 | 162 | 1 | 48 | 127 | 2418704512 | 99543.780 | 1089.224 | 1089.224 | False | 44.808 | 1089.224 | |
9674 | 21 | 3 | 163 | 1 | 48 | 119 | 2419783108 | 100622.376 | 1078.596 | 1078.596 | False | 3.124 | 1078.596 | |
9675 | 21 | 3 | 164 | 1 | 48 | 118 | 2420889492 | 101728.760 | 1106.384 | 1106.384 | False | 17.160 | 1106.384 | |
9677 | 21 | 3 | 166 | 1 | 48 | 93 | 2421971488 | 102810.756 | 1081.996 | 1081.996 | False | 3.400 | 1081.996 | |
9678 | 21 | 3 | 167 | 1 | 48 | 92 | 2423056596 | 103895.864 | 1085.108 | 1085.108 | False | -21.276 | 1085.108 | |
9679 | 21 | 3 | 168 | 1 | 48 | 127 | 2424098628 | 104937.896 | 1042.032 | 1042.032 | False | -39.964 | 1042.032 | |
9680 | 21 | 3 | 169 | 1 | 48 | 127 | 2425119560 | 105958.828 | 1020.932 | 1020.932 | False | -64.176 | 1020.932 | |
9681 | 21 | 3 | 170 | 1 | 48 | 127 | 2426156576 | 106995.844 | 1037.016 | 1037.016 | False | -5.016 | 1037.016 | |
9682 | 21 | 3 | 171 | 1 | 48 | 127 | 2427229152 | 108068.420 | 1072.576 | 1072.576 | False | 51.644 | 1072.576 | |
9683 | 21 | 3 | 172 | 1 | 48 | 127 | 2428292348 | 109131.616 | 1063.196 | 1063.196 | False | 26.180 | 1063.196 | |
9684 | 21 | 3 | 173 | 1 | 48 | 114 | 2429364964 | 110204.232 | 1072.616 | 1072.616 | False | 0.040 | 1072.616 | |
9685 | 21 | 3 | 174 | 1 | 48 | 90 | 2430461228 | 111300.496 | 1096.264 | 1096.264 | False | 33.068 | 1096.264 | |
9686 | 21 | 3 | 175 | 1 | 48 | 127 | 2431537956 | 112377.224 | 1076.728 | 1076.728 | False | 4.112 | 1076.728 | |
9687 | 21 | 3 | 176 | 1 | 48 | 127 | 2432566892 | 113406.160 | 1028.936 | 1028.936 | False | -67.328 | 1028.936 | |
9688 | 21 | 3 | 177 | 1 | 48 | 92 | 2433586864 | 114426.132 | 1019.972 | 1019.972 | False | -56.756 | 1019.972 | |
9689 | 21 | 3 | 178 | 1 | 48 | 123 | 2434660704 | 115499.972 | 1073.840 | 1073.840 | False | 44.904 | 1073.840 | |
9690 | 21 | 3 | 179 | 1 | 48 | 119 | 2435683356 | 116522.624 | 1022.652 | 1022.652 | False | 2.680 | 1022.652 | |
9692 | 21 | 3 | 181 | 1 | 48 | 127 | 2436797604 | 117636.872 | 1114.248 | 1114.248 | False | 40.408 | 1114.248 | |
9693 | 21 | 3 | 182 | 1 | 48 | 121 | 2437885104 | 118724.372 | 1087.500 | 1087.500 | False | 64.848 | 1087.500 | |
9694 | 21 | 3 | 183 | 1 | 48 | 100 | 2439016980 | 119856.248 | 1131.876 | 1131.876 | False | 17.628 | 1131.876 |
62 rows × 13 columns
sset = db_isip5.xs(['055', 'tap_r'], level=['pid', 'stamp_type'])
sset_ints = sset.ints
sset_ints.hist(bins=100, figsize=(14,5))
<matplotlib.axes.AxesSubplot at 0x27494d68>
taps = db_isip8.xs('tap_r', level='stamp_type').ints
pmeans = taps.groupby(level='pid').mean()
data = pmeans
##############################
plt.figure(figsize=(13,6))
data.hist(bins=25)
#annotating non-midpoint values
caption_y_increment = 0.8
median = data.median()
prev_ypos = 0
for idx, value in enumerate(data):
if np.abs(value - median) > 60:
caption = str(data.index[idx]) + ": " + str(value.round(1))
plt.annotate(caption, (value, prev_ypos + caption_y_increment))
prev_ypos += caption_y_increment
plt.show()
#Good distribution-- 048 is a bit high
ISIP5_DATASTART = 22000
sset = db_isip5.xs(['055', 'tap_r'], level=['pid', 'stamp_type'])
sset[sset.task_ms > ISIP5_DATASTART]
# a double-interval has snuck through here.
run_count | task_id | i | channel | pitch | velocity | micros | task_ms | int_raw | ||
---|---|---|---|---|---|---|---|---|---|---|
task_name | csv_line | |||||||||
ISIP_5 | 9471 | 17 | 5 | 113 | 1 | 48 | 65 | 1732756792 | 22613.980 | 2091.876 |
9472 | 17 | 5 | 114 | 1 | 48 | 51 | 1732790128 | 22647.316 | 33.336 | |
9473 | 17 | 5 | 115 | 1 | 48 | 88 | 1733174484 | 23031.672 | 384.356 | |
9474 | 17 | 5 | 116 | 1 | 48 | 60 | 1733222708 | 23079.896 | 48.224 | |
9475 | 17 | 5 | 117 | 1 | 48 | 87 | 1733628264 | 23485.452 | 405.556 | |
9476 | 17 | 5 | 118 | 1 | 48 | 53 | 1733682880 | 23540.068 | 54.616 | |
9477 | 17 | 5 | 119 | 1 | 48 | 82 | 1734103016 | 23960.204 | 420.136 | |
9478 | 17 | 5 | 120 | 1 | 48 | 52 | 1734154328 | 24011.516 | 51.312 | |
9479 | 17 | 5 | 121 | 1 | 48 | 76 | 1734594460 | 24451.648 | 440.132 | |
9480 | 17 | 5 | 122 | 1 | 48 | 61 | 1734631988 | 24489.176 | 37.528 | |
9481 | 17 | 5 | 123 | 1 | 48 | 70 | 1735060468 | 24917.656 | 428.480 | |
9482 | 17 | 5 | 124 | 1 | 48 | 50 | 1735104280 | 24961.468 | 43.812 | |
9483 | 17 | 5 | 125 | 1 | 48 | 64 | 1735558120 | 25415.308 | 453.840 | |
9484 | 17 | 5 | 126 | 1 | 48 | 52 | 1735593328 | 25450.516 | 35.208 | |
9485 | 17 | 5 | 127 | 1 | 48 | 54 | 1736072748 | 25929.936 | 479.420 | |
9486 | 17 | 5 | 128 | 1 | 48 | 46 | 1736106608 | 25963.796 | 33.860 | |
9487 | 17 | 5 | 129 | 1 | 48 | 74 | 1736565836 | 26423.024 | 459.228 | |
9488 | 17 | 5 | 130 | 1 | 48 | 54 | 1736604256 | 26461.444 | 38.420 | |
9489 | 17 | 5 | 131 | 1 | 48 | 75 | 1737178084 | 27035.272 | 573.828 | |
9490 | 17 | 5 | 132 | 1 | 48 | 64 | 1737204848 | 27062.036 | 26.764 | |
9491 | 17 | 5 | 133 | 1 | 48 | 76 | 1738085532 | 27942.720 | 880.684 | |
9492 | 17 | 5 | 134 | 1 | 48 | 66 | 1738110116 | 27967.304 | 24.584 | |
9493 | 17 | 5 | 135 | 1 | 48 | 72 | 1738631120 | 28488.308 | 521.004 | |
9494 | 17 | 5 | 136 | 1 | 48 | 58 | 1738656776 | 28513.964 | 25.656 | |
9495 | 17 | 5 | 137 | 1 | 48 | 71 | 1739140532 | 28997.720 | 483.756 | |
9496 | 17 | 5 | 138 | 1 | 48 | 57 | 1739166840 | 29024.028 | 26.308 | |
9497 | 17 | 5 | 139 | 1 | 48 | 73 | 1739618944 | 29476.132 | 452.104 | |
9498 | 17 | 5 | 140 | 1 | 48 | 54 | 1739642048 | 29499.236 | 23.104 | |
9499 | 17 | 5 | 141 | 1 | 48 | 75 | 1740106948 | 29964.136 | 464.900 | |
9500 | 17 | 5 | 142 | 1 | 48 | 56 | 1740130408 | 29987.596 | 23.460 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | |
9652 | 17 | 5 | 294 | 1 | 48 | 75 | 1782736108 | 72593.296 | 500.512 | |
9653 | 17 | 5 | 295 | 1 | 48 | 62 | 1782763460 | 72620.648 | 27.352 | |
9654 | 17 | 5 | 296 | 1 | 48 | 77 | 1783236524 | 73093.712 | 473.064 | |
9655 | 17 | 5 | 297 | 1 | 48 | 64 | 1783262904 | 73120.092 | 26.380 | |
9656 | 17 | 5 | 298 | 1 | 48 | 72 | 1783750856 | 73608.044 | 487.952 | |
9657 | 17 | 5 | 299 | 1 | 48 | 55 | 1783777084 | 73634.272 | 26.228 | |
9658 | 17 | 5 | 300 | 1 | 48 | 70 | 1784250972 | 74108.160 | 473.888 | |
9659 | 17 | 5 | 301 | 1 | 48 | 58 | 1784270844 | 74128.032 | 19.872 | |
9660 | 17 | 5 | 302 | 1 | 48 | 65 | 1784766872 | 74624.060 | 496.028 | |
9661 | 17 | 5 | 303 | 1 | 48 | 54 | 1784792432 | 74649.620 | 25.560 | |
9662 | 17 | 5 | 304 | 1 | 48 | 69 | 1785263992 | 75121.180 | 471.560 | |
9663 | 17 | 5 | 305 | 1 | 48 | 57 | 1785291876 | 75149.064 | 27.884 | |
9664 | 17 | 5 | 306 | 1 | 48 | 72 | 1785784980 | 75642.168 | 493.104 | |
9665 | 17 | 5 | 307 | 1 | 48 | 59 | 1785810392 | 75667.580 | 25.412 | |
9666 | 17 | 5 | 308 | 1 | 48 | 69 | 1786281656 | 76138.844 | 471.264 | |
9667 | 17 | 5 | 309 | 1 | 48 | 53 | 1786307440 | 76164.628 | 25.784 | |
9668 | 17 | 5 | 310 | 1 | 48 | 65 | 1786797260 | 76654.448 | 489.820 | |
9669 | 17 | 5 | 311 | 1 | 48 | 49 | 1786825580 | 76682.768 | 28.320 | |
9670 | 17 | 5 | 312 | 1 | 48 | 70 | 1787330664 | 77187.852 | 505.084 | |
9671 | 17 | 5 | 313 | 1 | 48 | 54 | 1787357344 | 77214.532 | 26.680 | |
9672 | 17 | 5 | 314 | 1 | 48 | 69 | 1787852992 | 77710.180 | 495.648 | |
9673 | 17 | 5 | 315 | 1 | 48 | 56 | 1787883188 | 77740.376 | 30.196 | |
9674 | 17 | 5 | 316 | 1 | 48 | 66 | 1788365152 | 78222.340 | 481.964 | |
9675 | 17 | 5 | 317 | 1 | 48 | 57 | 1788394148 | 78251.336 | 28.996 | |
9676 | 17 | 5 | 318 | 1 | 48 | 70 | 1788869604 | 78726.792 | 475.456 | |
9677 | 17 | 5 | 319 | 1 | 48 | 58 | 1788900252 | 78757.440 | 30.648 | |
9678 | 17 | 5 | 320 | 1 | 48 | 72 | 1789369428 | 79226.616 | 469.176 | |
9679 | 17 | 5 | 321 | 1 | 48 | 59 | 1789397380 | 79254.568 | 27.952 | |
9680 | 17 | 5 | 322 | 1 | 48 | 70 | 1789879792 | 79736.980 | 482.412 | |
9681 | 17 | 5 | 323 | 1 | 48 | 58 | 1789906620 | 79763.808 | 26.828 |
211 rows × 9 columns
taps = db_isip5.xs('tap_r', level='stamp_type').ints
pstdev = taps.groupby(level='pid').std()
data = pstdev
plt.figure(figsize=(13,6))
data.hist(bins=25)
#annotating non-midpoint values
caption_y_increment = 1
median = data.median()
prev_ypos = 0
for idx, value in enumerate(data):
if np.abs(value - median) > 8:
caption = str(data.index[idx]) + ": " + str(value.round(1))
plt.annotate(caption, (value, prev_ypos + caption_y_increment))
prev_ypos += caption_y_increment
plt.show()
lt = [1, 2, 3, 4, 5, 7]
r = [2, 3, 4]
set(lt).difference(set(r))
Ordered
{1, 5, 7}
import matplotlib.pylab as plt
%matplotlib inline
pd.options.display.mpl_style = 'default'
#EXPECTED_RTAP_MINS = {}
dfo = pd.DataFrame(index = pid_list)
dfo['missing_tasks'] = ''
for t in TASK_NAMES_USING:
df = dbase.xs(t, drop_level=True) #index: pid, stamp_type, csv_line
for p in pid_list:
dfp = df.xs(p, drop_level=True)
if dfp.micros.count()==0: dfo.loc[p] += t + ' '
dfo.loc[dfo.missing_tasks != '']
missing_tasks | |
---|---|
010 | Improv_Melody |
011 | Ticks_Linear_8 Improv_Melody |
012 | Jits_Linear_5 Jits_Linear_8 Improv_Melody |
013 | T1_SMS_5 T1_SMS_8 Ticks_ISO_T2_5 Ticks_ISO_T2_... |
014 | Improv_Melody |
015 | Improv_Melody |
018 | Jits_Linear_5 |
031 | T1_SMS_5 T1_SMS_8 Ticks_ISO_T2_5 Ticks_ISO_T2_... |
# Number of loopback midi signals per participant per task
# Slightly variations for improv tasks: possibly due to
# overflows (checkable in original CSV files)?
# T1_SMS_5 and T1_SMS_8 tasks were slightly longer for
# participants 010 through 015.
#(sms5: 150 vs 130; sms8: 140 vs 120)
# Should discard the additional intervals for these p's.
loopct = pd.DataFrame(index = pid_list)
for t in TASK_NAMES_USING:
loopct['lbcount_' + t] = np.nan
dft = dbase.xs(t, drop_level=True) #index: pid, stamp_type, csv_line
for p in pid_list:
dftp = dft.xs(p, drop_level=True)
task_lbcount = dftp.xs('loopback').micros.count()
loopct['lbcount_' + t].loc[p] = task_lbcount
loopct[::25]
#loopct.to_csv(....)
lbcount_T1_SMS_5 | lbcount_T1_SMS_8 | lbcount_Ticks_ISO_T2_5 | lbcount_Ticks_ISO_T2_8 | lbcount_Ticks_Linear_5 | lbcount_Ticks_Linear_8 | lbcount_Ticks_Phase_5 | lbcount_Ticks_Phase_8 | lbcount_Jits_ISO_5 | lbcount_Jits_ISO_8 | lbcount_Jits_Phase_5 | lbcount_Jits_Phase_8 | lbcount_Jits_Linear_5 | lbcount_Jits_Linear_8 | lbcount_ISIP_5 | lbcount_ISIP_8 | lbcount_Improv_Metronome | lbcount_Improv_Melody | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
010 | 150 | 140 | 130 | 120 | 170 | 170 | 170 | 170 | 360 | 360 | 510 | 510 | 510 | 510 | 40 | 30 | 140 | 0 |
035 | 130 | 120 | 130 | 120 | 170 | 170 | 170 | 170 | 360 | 360 | 510 | 510 | 510 | 510 | 40 | 30 | 140 | 162 |
060 | 130 | 120 | 130 | 120 | 170 | 170 | 170 | 170 | 360 | 360 | 510 | 510 | 510 | 510 | 40 | 30 | 140 | 162 |
085 | 130 | 120 | 130 | 120 | 170 | 170 | 170 | 170 | 360 | 360 | 510 | 510 | 510 | 510 | 40 | 30 | 140 | 162 |
110 | 130 | 120 | 130 | 120 | 170 | 170 | 170 | 170 | 360 | 360 | 510 | 510 | 510 | 510 | 40 | 30 | 140 | 162 |
#lbdata = dfo.drop('missing_tasks', axis=1)
data = loopct
for c in data.columns:
print(c)
if 'ISIP' in c:
print('skipping ISIP data\n')
continue
data_range = data[c].max() - data[c].min()
if data_range==0:
print("all data points = {0} \n\n".format(data[c].max()))
else:
plt.figure(figsize=(13,3))
data[c].hist(bins=data_range)
#annotating non-midpoint values
median = data[c].median()
prev_ypos = 0
for idx, value in enumerate(data[c]):
if value != median:
caption = str(data[c].index[idx]) + ": " + str(value)
plt.annotate(caption, (value, prev_ypos + 10))
prev_ypos += 10
#for i, txt in enumerate(n):
# ax.annotate(txt, (x[i],y[i]))
plt.show()
lbcount_T1_SMS_5
lbcount_T1_SMS_8
lbcount_Ticks_ISO_T2_5
lbcount_Ticks_ISO_T2_8
lbcount_Ticks_Linear_5
lbcount_Ticks_Linear_8
lbcount_Ticks_Phase_5
lbcount_Ticks_Phase_8
lbcount_Jits_ISO_5
lbcount_Jits_ISO_8
lbcount_Jits_Phase_5
lbcount_Jits_Phase_8
lbcount_Jits_Linear_5
lbcount_Jits_Linear_8
lbcount_ISIP_5 skipping ISIP data lbcount_ISIP_8 skipping ISIP data lbcount_Improv_Metronome
lbcount_Improv_Melody
tapct = pd.DataFrame(index = pid_list)
for t in TASK_NAMES_USING:
tapct[t] = np.nan
dft = dbase.xs(t, drop_level=True) #index: pid, stamp_type, csv_line
for p in pid_list:
dftp = dft.xs(p, drop_level=True)
task_tapcount = dftp.xs('tap_r').micros.count()
tapct[t].loc[p] = task_tapcount
data = tapct
for c in data.columns:
print(c)
if 'ISIP' in c:
print('skipping ISIP data\n')
continue
data_range = data[c].max() - data[c].min()
if data_range==0:
print("all data points = {0} \n\n".format(data[c].max()))
else:
plt.figure(figsize=(13,3))
data[c].hist(bins=40)
#annotating non-midpoint values
caption_y_increment = 5
median = data[c].median()
prev_ypos = 0
for idx, value in enumerate(data[c]):
if np.abs(value - median) > 40:
caption = str(data[c].index[idx]) + ": " + str(value)
plt.annotate(caption, (value, prev_ypos + caption_y_increment))
prev_ypos += caption_y_increment
#for i, txt in enumerate(n):
# ax.annotate(txt, (x[i],y[i]))
plt.show()
#isip: Why such a large number of timestamps for some p's?
#ISIP_5: 094: 281, 055: 284, 041: 279, ...
#ISIP_8: 041: 288, 055: 280, 017: 217, ...
#...Right: these aren't filtered yet here!
T1_SMS_5
T1_SMS_8
Ticks_ISO_T2_5
Ticks_ISO_T2_8
Ticks_Linear_5
Ticks_Linear_8
Ticks_Phase_5
Ticks_Phase_8
Jits_ISO_5
Jits_ISO_8
Jits_Phase_5
Jits_Phase_8
Jits_Linear_5
Jits_Linear_8
ISIP_5 skipping ISIP data ISIP_8 skipping ISIP data Improv_Metronome
Improv_Melody
def custom_histogram(data_series):
cleanintervals = data_series.dropna()
millis = cleanintervals.divide(1000)
millis_array = np.array(millis)
plt.title("Interval lengths")
plt.xlabel("Milliseconds")
plt.ylabel("Frequency")
plt.hist(millis_array,
bins=35,
#all defaults below
range=None,
normed=False,
weights=None,
cumulative=False,
bottom=None,
histtype='bar',
align='mid',
orientation='vertical',
rwidth=None,
log=False,
color=None,
label=None,
stacked=False,
hold=None,)
plt.gcf().set_size_inches(7, 7)
plt.show()
print("pre-filter length, maximum:")
print(len(taps.interval)) #before filter
print(max(taps.interval)); print()
tapsfilt = taps.copy(deep = True)
tapsfilt.loc[
tapsfilt['interval'] > 750000, # boolean selector on axis 0
'interval' # index value selector on axis 1
] = np.nan
print("post-filter length, maximum:")
print(len(tapsfilt.interval)) #after (includes NaN)
print(max(tapsfilt.interval))
custom_histogram(tapsfilt.interval)
pre-filter length, maximum: 161 892052.0 post-filter length, maximum: 161 531780.0