In [1]:

%load_ext autoreload
%autoreload 2

In [2]:

import sys
sys.path.append("..")

In [3]:

from optimus import Optimus

C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\config.py:161: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.
  data = yaml.load(f.read()) or {}
C:\Users\argenisleon\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:49: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version
  data_klasses = (pandas.Series, pandas.DataFrame, pandas.Panel)

In [4]:

op = Optimus("dask_cudf", comm=True)

In [ ]:

df = op.load.csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=True, infer_schema='true', charset="UTF-8").ext.cache()

In [ ]:

df.ext.display()

In [ ]:

df = df.ext.send(output="json", infer=False, advanced_stats=False)

In [10]:

df.cols.count_uniques("*")

Out[10]:

{'count_uniques': {'price': 6.000274674963478}}

In [9]:

df.cols.count_na("*")

Out[9]:

In [11]:

from optimus.profiler.profiler import Profiler
p = Profiler()
p.run(df)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-11-e792527df240> in <module>
      1 from optimus.profiler.profiler import Profiler
      2 p = Profiler()
----> 3 p.run(df)

~\Documents\Optimus\optimus\helpers\decorators.py in timed(*args, **kw)
      8     def timed(*args, **kw):
      9         start_time = timeit.default_timer()
---> 10         f = method(*args, **kw)
     11         _time = round(timeit.default_timer() - start_time, 2)
     12         logger.print("{name}() executed in {time} sec".format(name=method.__name__, time=_time))

~\Documents\Optimus\optimus\profiler\profiler.py in run(self, df, columns, buckets, infer, relative_error, approx_count, mismatch, advanced_stats)
     72         columns = parse_columns(df, columns)
     73         output = self.dataset(df, columns, buckets, infer, relative_error, approx_count, format="dict",
---> 74                               mismatch=mismatch, advanced_stats=advanced_stats)
     75 
     76         # Load jinja

~\Documents\Optimus\optimus\profiler\profiler.py in dataset(self, df, columns, buckets, infer, relative_error, approx_count, sample, stats, format, mismatch, advanced_stats)
    325                 self.cols_count = cols_count = len(df.columns)
    326                 updated_columns = self.columns_stats(df, cols_to_profile, buckets, infer, relative_error, approx_count,
--> 327                                                      mismatch, advanced_stats)
    328 
    329                 output_columns = update_dict(output_columns, updated_columns)

~\Documents\Optimus\optimus\profiler\profiler.py in columns_stats(self, df, columns, buckets, infer, relative_error, approx_count, mismatch, advanced_stats)
    436 
    437         # Aggregation
--> 438         stats = self.columns_agg(df, columns, buckets, relative_error, approx_count, advanced_stats)
    439 
    440         # Calculate Frequency

~\Documents\Optimus\optimus\profiler\profiler.py in columns_agg(self, df, columns, buckets, relative_error, approx_count, advanced_stats)
    493                 funcs = [df.functions.stddev, df.functions.kurtosis, df.functions.mean, df.functions.skewness,
    494                          df.functions.sum, df.functions.variance, df.functions.zeros_agg]
--> 495                 exprs.extend(df.cols.create_exprs(cols, funcs))
    496 
    497                 # TODO: None in basic calculation

~\Documents\Optimus\optimus\engines\base\dask\columns.py in create_exprs(self, columns, funcs, *args)
    537                             exprs[col_name].update(func(col_name, args)(df))
    538                         else:
--> 539                             exprs[col_name] = func(col_name, args)(df)
    540 
    541         result = {}

~\Documents\Optimus\optimus\engines\dask\functions.py in _kurtoris(serie)
    132         def kurtosis(col_name, args):
    133             def _kurtoris(serie):
--> 134                 result = {"kurtosis": float(stats.kurtosis(serie[col_name]))}
    135                 return result
    136 

~\Anaconda3\lib\site-packages\dask\array\stats.py in kurtosis(a, axis, fisher, bias, nan_policy)
    227     olderr = np.seterr(all='ignore')
    228     try:
--> 229         vals = da.where(zero, 0, m4 / m2**2.0)
    230     finally:
    231         np.seterr(**olderr)

TypeError: unsupported operand type(s) for ** or pow(): 'Array' and 'float'

In [5]:

op= Optimus("dask", comm=True)

Open Bumblebee: https://app.hi-bumblebee.com

If you really care about privacy get your keys in bumblebee.ini and put them here

In [6]:

# url = "https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/crime.csv"
url = "data/crime.csv"

In [7]:

import pandas as pd
pd.read_csv(url, encoding='latin1')

Out[7]:

	INCIDENT_NUMBER	OFFENSE_CODE	OFFENSE_CODE_GROUP	OFFENSE_DESCRIPTION	DISTRICT	REPORTING_AREA	SHOOTING	OCCURRED_ON_DATE	YEAR	MONTH	DAY_OF_WEEK	HOUR	UCR_PART	STREET	Lat	Long	Location
0	I182070945	619	Larceny	LARCENY ALL OTHERS	D14	808	NaN	2018-09-02 13:00:00	2018	9	Sunday	13	Part One	LINCOLN ST	42.357791	-71.139371	(42.35779134, -71.13937053)
1	I182070943	1402	Vandalism	VANDALISM	C11	347	NaN	2018-08-21 00:00:00	2018	8	Tuesday	0	Part Two	HECLA ST	42.306821	-71.060300	(42.30682138, -71.06030035)
2	I182070941	3410	Towed	TOWED MOTOR VEHICLE	D4	151	NaN	2018-09-03 19:27:00	2018	9	Monday	19	Part Three	CAZENOVE ST	42.346589	-71.072429	(42.34658879, -71.07242943)
3	I182070940	3114	Investigate Property	INVESTIGATE PROPERTY	D4	272	NaN	2018-09-03 21:16:00	2018	9	Monday	21	Part Three	NEWCOMB ST	42.334182	-71.078664	(42.33418175, -71.07866441)
4	I182070938	3114	Investigate Property	INVESTIGATE PROPERTY	B3	421	NaN	2018-09-03 21:05:00	2018	9	Monday	21	Part Three	DELHI ST	42.275365	-71.090361	(42.27536542, -71.09036101)
5	I182070936	3820	Motor Vehicle Accident Response	M/V ACCIDENT INVOLVING PEDESTRIAN - INJURY	C11	398	NaN	2018-09-03 21:09:00	2018	9	Monday	21	Part Three	TALBOT AVE	42.290196	-71.071590	(42.29019621, -71.07159012)
6	I182070933	724	Auto Theft	AUTO THEFT	B2	330	NaN	2018-09-03 21:25:00	2018	9	Monday	21	Part One	NORMANDY ST	42.306072	-71.082733	(42.30607218, -71.08273260)
7	I182070932	3301	Verbal Disputes	VERBAL DISPUTE	B2	584	NaN	2018-09-03 20:39:37	2018	9	Monday	20	Part Three	LAWN ST	42.327016	-71.105551	(42.32701648, -71.10555088)
8	I182070931	301	Robbery	ROBBERY - STREET	C6	177	NaN	2018-09-03 20:48:00	2018	9	Monday	20	Part One	MASSACHUSETTS AVE	42.331521	-71.070853	(42.33152148, -71.07085307)
9	I182070929	3301	Verbal Disputes	VERBAL DISPUTE	C11	364	NaN	2018-09-03 20:38:00	2018	9	Monday	20	Part Three	LESLIE ST	42.295147	-71.058608	(42.29514664, -71.05860832)
10	I182070928	3301	Verbal Disputes	VERBAL DISPUTE	C6	913	NaN	2018-09-03 19:55:00	2018	9	Monday	19	Part Three	OCEAN VIEW DR	42.319579	-71.040328	(42.31957856, -71.04032766)
11	I182070927	3114	Investigate Property	INVESTIGATE PROPERTY	C6	936	NaN	2018-09-03 20:19:00	2018	9	Monday	20	Part Three	DALESSIO CT	42.340115	-71.053390	(42.34011469, -71.05339029)
12	I182070923	3108	Fire Related Reports	FIRE REPORT - HOUSE, BUILDING, ETC.	D4	139	NaN	2018-09-03 19:58:00	2018	9	Monday	19	Part Three	MARLBOROUGH ST	42.350388	-71.087853	(42.35038760, -71.08785290)
13	I182070922	2647	Other	THREATS TO DO BODILY HARM	B3	429	NaN	2018-09-03 20:39:00	2018	9	Monday	20	Part Two	WOODROW AVE	42.286470	-71.087147	(42.28647012, -71.08714661)
14	I182070921	3201	Property Lost	PROPERTY - LOST	B3	469	NaN	2018-09-02 14:00:00	2018	9	Sunday	14	Part Three	MULVEY ST	42.279241	-71.096674	(42.27924052, -71.09667382)
15	I182070920	3006	Medical Assistance	SICK/INJURED/MEDICAL - PERSON	NaN		NaN	2018-09-03 19:43:00	2018	9	Monday	19	Part Three	NaN	42.352875	-71.073830	(42.35287456, -71.07382970)
16	I182070919	3301	Verbal Disputes	VERBAL DISPUTE	C11	341	NaN	2018-09-03 18:52:00	2018	9	Monday	18	Part Three	STONEHURST ST	42.305264	-71.066838	(42.30526428, -71.06683755)
17	I182070918	3305	Assembly or Gathering Violations	DEMONSTRATIONS/RIOT	D4	130	NaN	2018-09-03 17:00:00	2018	9	Monday	17	Part Three	HUNTINGTON AVE	42.348577	-71.077720	(42.34857652, -71.07772012)
18	I182070917	2647	Other	THREATS TO DO BODILY HARM	B2	901	NaN	2018-09-03 19:52:00	2018	9	Monday	19	Part Two	HORADAN WAY	42.333717	-71.096658	(42.33371742, -71.09665806)
19	I182070915	614	Larceny From Motor Vehicle	LARCENY THEFT FROM MV - NON-ACCESSORY	B2	181	NaN	2018-09-02 18:00:00	2018	9	Sunday	18	Part One	SHIRLEY ST	42.325695	-71.068168	(42.32569490, -71.06816778)
20	I182070913	3006	Medical Assistance	SICK/INJURED/MEDICAL - PERSON	NaN		NaN	2018-09-03 18:46:00	2018	9	Monday	18	Part Three	WOLCOTT	-1.000000	-1.000000	(-1.00000000, -1.00000000)
21	I182070911	3801	Motor Vehicle Accident Response	M/V ACCIDENT - OTHER	A1	69	NaN	2018-09-03 18:30:00	2018	9	Monday	18	Part Three	BEACON ST	42.355644	-71.071681	(42.35564426, -71.07168077)
22	I182070910	3006	Medical Assistance	SICK/INJURED/MEDICAL - PERSON	B3	434	NaN	2018-09-03 18:42:00	2018	9	Monday	18	Part Three	CAPEN ST	42.283402	-71.080797	(42.28340243, -71.08079740)
23	I182070909	3803	Motor Vehicle Accident Response	M/V ACCIDENT - PERSONAL INJURY	E5	550	NaN	2018-09-03 18:33:00	2018	9	Monday	18	Part Three	WASHINGTON ST	42.275818	-71.139913	(42.27581799, -71.13991259)
24	I182070908	522	Residential Burglary	BURGLARY - RESIDENTIAL - NO FORCE	B2	911	NaN	2018-09-03 18:38:00	2018	9	Monday	18	Part One	ANNUNCIATION RD	42.335062	-71.093168	(42.33506218, -71.09316781)
25	I182070906	3831	Motor Vehicle Accident Response	M/V - LEAVING SCENE - PROPERTY DAMAGE	NaN		NaN	2018-09-03 18:20:00	2018	9	Monday	18	Part Three	NaN	42.283593	-71.055657	(42.28359328, -71.05565683)
26	I182070905	3006	Medical Assistance	SICK/INJURED/MEDICAL - PERSON	D4	172	NaN	2018-09-03 18:50:00	2018	9	Monday	18	Part Three	MASSACHUSETTS AVE	42.333112	-71.072764	(42.33311189, -71.07276370)
27	I182070904	802	Simple Assault	ASSAULT SIMPLE - BATTERY	C11	242	NaN	2018-09-03 18:34:00	2018	9	Monday	18	Part Two	ANNAPOLIS ST	42.317319	-71.061509	(42.31731905, -71.06150882)
28	I182070904	2007	Restraining Order Violations	VIOL. OF RESTRAINING ORDER W NO ARREST	C11	242	NaN	2018-09-03 18:34:00	2018	9	Monday	18	Part Two	ANNAPOLIS ST	42.317319	-71.061509	(42.31731905, -71.06150882)
29	I182070903	2900	Other	VAL - VIOLATION OF AUTO LAW - OTHER	B3	463	NaN	2018-09-03 18:55:00	2018	9	Monday	18	Part Two	BLUE HILL AVE	42.295904	-71.087733	(42.29590385, -71.08773294)
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
319043	I110551302-00	3125	Warrant Arrests	WARRANT ARREST	D4	171	NaN	2015-07-22 22:00:00	2015	7	Wednesday	22	Part Three	HARRISON AVE	42.335560	-71.074364	(42.33555954, -71.07436364)
319044	I110551302-00	623	Larceny	LARCENY SHOPLIFTING $50 TO $199	D4	171	NaN	2015-07-22 22:00:00	2015	7	Wednesday	22	Part One	HARRISON AVE	42.335560	-71.074364	(42.33555954, -71.07436364)
319045	I110372326-00	403	Aggravated Assault	ASSAULT & BATTERY D/W - OTHER	A1	97	NaN	2016-06-14 09:40:00	2016	6	Tuesday	9	Part One	SCHOOL ST	42.357428	-71.058326	(42.35742837, -71.05832551)
319046	I110372326-00	3125	Warrant Arrests	WARRANT ARREST	A1	97	NaN	2016-06-14 09:40:00	2016	6	Tuesday	9	Part Three	SCHOOL ST	42.357428	-71.058326	(42.35742837, -71.05832551)
319047	I110261417-00	3125	Warrant Arrests	WARRANT ARREST	B2	324	NaN	2016-07-29 00:00:00	2016	7	Friday	0	Part Three	BOWDOIN ST	42.307038	-71.066153	(42.30703835, -71.06615319)
319048	I110261417-00	619	Larceny	LARCENY OTHER $200 & OVER	B2	324	NaN	2016-07-29 00:00:00	2016	7	Friday	0	Part One	BOWDOIN ST	42.307038	-71.066153	(42.30703835, -71.06615319)
319049	I110177502-00	3125	Warrant Arrests	WARRANT ARREST	B2	318	NaN	2015-10-02 21:00:00	2015	10	Friday	21	Part Three	HOMESTEAD ST	42.311277	-71.089093	(42.31127726, -71.08909334)
319050	I110177502-00	802	Simple Assault	ASSAULT & BATTERY	B2	318	NaN	2015-10-02 21:00:00	2015	10	Friday	21	Part Two	HOMESTEAD ST	42.311277	-71.089093	(42.31127726, -71.08909334)
319051	I110177502-00	3125	Warrant Arrests	WARRANT ARREST	B2	318	NaN	2015-10-02 21:00:00	2015	10	Friday	21	Part Three	HOMESTEAD ST	42.311277	-71.089093	(42.31127726, -71.08909334)
319052	I100636670-00	629	Larceny	LARCENY OTHER $50 TO $199	D4	285	NaN	2016-06-05 17:23:00	2016	6	Sunday	17	Part One	COVENTRY ST	42.336951	-71.085748	(42.33695098, -71.08574813)
319053	I100636670-00	3125	Warrant Arrests	WARRANT ARREST	D4	285	NaN	2016-06-05 17:23:00	2016	6	Sunday	17	Part Three	COVENTRY ST	42.336951	-71.085748	(42.33695098, -71.08574813)
319054	I100340225-00	3125	Warrant Arrests	WARRANT ARREST	A1	77	NaN	2015-07-27 10:47:00	2015	7	Monday	10	Part Three	BOWDOIN SQ	42.361645	-71.062299	(42.36164502, -71.06229949)
319055	I100340225-00	339	Robbery	ROBBERY - UNARMED - STREET	A1	77	NaN	2015-07-27 10:47:00	2015	7	Monday	10	Part One	BOWDOIN SQ	42.361645	-71.062299	(42.36164502, -71.06229949)
319056	I100222105-02	3125	Warrant Arrests	WARRANT ARREST	E13	572	NaN	2015-08-03 16:22:00	2015	8	Monday	16	Part Three	COLUMBUS AVE	42.313628	-71.095603	(42.31362799, -71.09560307)
319057	I100033064-00	2907	Violations	VAL - OPERATING AFTER REV/SUSP.	B2	304	NaN	2016-07-29 18:20:00	2016	7	Friday	18	Part Two	SLAYTON WAY	42.321770	-71.097798	(42.32177032, -71.09779774)
319058	I100033064-00	2910	Violations	VAL - OPERATING AFTER REV/SUSP.	B2	304	NaN	2016-07-29 18:20:00	2016	7	Friday	18	Part Two	SLAYTON WAY	42.321770	-71.097798	(42.32177032, -71.09779774)
319059	I090321958-00	3125	Warrant Arrests	WARRANT ARREST	C11	355	NaN	2016-02-01 01:43:00	2016	2	Monday	1	Part Three	GENEVA AVE	NaN	NaN	(0.00000000, 0.00000000)
319060	I090321958-00	3125	Warrant Arrests	WARRANT ARREST	C11	355	NaN	2016-02-01 01:43:00	2016	2	Monday	1	Part Three	GENEVA AVE	NaN	NaN	(0.00000000, 0.00000000)
319061	I090317057-00	403	Aggravated Assault	ASSAULT & BATTERY D/W - OTHER	B3	458	NaN	2015-11-20 11:15:00	2015	11	Friday	11	Part One	BLUE HILL AVE	42.301897	-71.085549	(42.30189690, -71.08554944)
319062	I090317057-00	3125	Warrant Arrests	WARRANT ARREST	B3	458	NaN	2015-11-20 11:15:00	2015	11	Friday	11	Part Three	BLUE HILL AVE	42.301897	-71.085549	(42.30189690, -71.08554944)
319063	I080542626-00	3125	Warrant Arrests	WARRANT ARREST	A1	111	NaN	2015-08-12 12:00:00	2015	8	Wednesday	12	Part Three	BOYLSTON ST	42.352312	-71.063705	(42.35231190, -71.06370510)
319064	I080542626-00	1848	Drug Violation	DRUGS - POSS CLASS B - INTENT TO MFR DIST DISP	A1	111	NaN	2015-08-12 12:00:00	2015	8	Wednesday	12	Part Two	BOYLSTON ST	42.352312	-71.063705	(42.35231190, -71.06370510)
319065	I080542626-00	1849	Drug Violation	DRUGS - POSS CLASS B - COCAINE, ETC.	A1	111	NaN	2015-08-12 12:00:00	2015	8	Wednesday	12	Part Two	BOYLSTON ST	42.352312	-71.063705	(42.35231190, -71.06370510)
319066	I060168073-00	1864	Drug Violation	DRUGS - POSS CLASS D - INTENT MFR DIST DISP	E13	912	NaN	2018-01-27 14:01:00	2018	1	Saturday	14	Part Two	CENTRE ST	42.322838	-71.100967	(42.32283759, -71.10096723)
319067	I060168073-00	3125	Warrant Arrests	WARRANT ARREST	E13	912	NaN	2018-01-27 14:01:00	2018	1	Saturday	14	Part Three	CENTRE ST	42.322838	-71.100967	(42.32283759, -71.10096723)
319068	I050310906-00	3125	Warrant Arrests	WARRANT ARREST	D4	285	NaN	2016-06-05 17:25:00	2016	6	Sunday	17	Part Three	COVENTRY ST	42.336951	-71.085748	(42.33695098, -71.08574813)
319069	I030217815-08	111	Homicide	MURDER, NON-NEGLIGIENT MANSLAUGHTER	E18	520	NaN	2015-07-09 13:38:00	2015	7	Thursday	13	Part One	RIVER ST	42.255926	-71.123172	(42.25592648, -71.12317207)
319070	I030217815-08	3125	Warrant Arrests	WARRANT ARREST	E18	520	NaN	2015-07-09 13:38:00	2015	7	Thursday	13	Part Three	RIVER ST	42.255926	-71.123172	(42.25592648, -71.12317207)
319071	I010370257-00	3125	Warrant Arrests	WARRANT ARREST	E13	569	NaN	2016-05-31 19:35:00	2016	5	Tuesday	19	Part Three	NEW WASHINGTON ST	42.302333	-71.111565	(42.30233307, -71.11156487)
319072	142052550	3125	Warrant Arrests	WARRANT ARREST	D4	903	NaN	2015-06-22 00:12:00	2015	6	Monday	0	Part Three	WASHINGTON ST	42.333839	-71.080290	(42.33383935, -71.08029038)

319073 rows × 17 columns

In [81]:

from dask import dataframe as dd
df = dd.read_csv(url, encoding='latin1').reset_index()

In [83]:

df.rows.limit(5).ext.display()

Viewing 5 of 5 rows / 5 columns

1 partition(s)

index 1 (int64) not nullable	num 2 (int64) not nullable	idk 3 (int64) not nullable
0	1	2
1	2	3
2	3	4
3	4	5
4	5	6

Viewing 5 of 5 rows / 5 columns

1 partition(s)

In [9]:

df.cols.min("num")

Out[9]:

{'num': {'min': 1}}

In [10]:

df.cols.min("*")

Out[10]:

{'num': {'min': 1}, 'idk': {'min': 2}}

In [11]:

df.cols.percentile("num")

Out[11]:

{'num': {'percentile': {'0.5': 6.0}}}

In [12]:

df.cols.percentile("*")

Out[12]:

{'num': {'percentile': {'0.5': 6.0}}, 'idk': {'percentile': {'0.5': 4.0}}}

In [13]:

a = {0.25: 3.5, 0.5: 6.0, 0.75: 8.5}

In [14]:

print(a)

{0.25: 3.5, 0.5: 6.0, 0.75: 8.5}

In [64]:

df.rows.select((df["num"] >= 6.8) & (df["num"] <= 99.3)).rows.limit(10).ext.display()

C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\core.py:4382: UserWarning: Insufficient elements for `head`. 5 elements requested, only 4 elements available. Try passing larger `npartitions` to `head`.
  warnings.warn(msg.format(n, len(r)))

   num  idk
6    7    3
7    8    4
8    9    5
9   10    6
[OrderedDict([('num', 7), ('idk', 3)]), OrderedDict([('num', 8), ('idk', 4)]), OrderedDict([('num', 9), ('idk', 5)]), OrderedDict([('num', 10), ('idk', 6)])]

Viewing 4 of 4 rows / 4 columns

1 partition(s)

num 1 (int64) not nullable	idk 2 (int64) not nullable
7	3
8	4
9	5
10	6

Viewing 4 of 4 rows / 4 columns

1 partition(s)

In [65]:

df.outliers.tukey("num").select().ext.display()

{'num': {'percentile': {'0.25': 3.5, '0.5': 6.0, '0.75': 8.5}}}
{'num': {'percentile': {'0.25': 3.5, '0.5': 6.0, '0.75': 8.5}}}

C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\core.py:4382: UserWarning: Insufficient elements for `head`. 10 elements requested, only 1 elements available. Try passing larger `npartitions` to `head`.
  warnings.warn(msg.format(n, len(r)))

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-65-d3c1de2af819> in <module>
----> 1 df.outliers.tukey("num").select().ext.display()

~\Documents\Optimus\optimus\engines\dask\extension.py in display(limit, columns, title, truncate)
    332         def display(limit=None, columns=None, title=None, truncate=True):
    333             # TODO: limit, columns, title, truncate
--> 334             Ext.table(limit, columns, title, truncate)
    335 
    336         @staticmethod

~\Documents\Optimus\optimus\engines\dask\extension.py in table(limit, columns, title, truncate)
    338             try:
    339                 if __IPYTHON__ and DataFrame.output is "html":
--> 340                     result = Ext.table_html(title=title, limit=limit, columns=columns, truncate=truncate)
    341                     print_html(result)
    342                 else:

~\Documents\Optimus\optimus\engines\dask\extension.py in table_html(limit, columns, title, full, truncate, count)
    288                 data = df.cols.select(columns).ext.to_dict()
    289             else:
--> 290                 data = df.cols.select(columns).rows.limit(limit).ext.to_dict()
    291 
    292             # Load the Jinja template

~\AppData\Roaming\Python\Python37\site-packages\pandas\core\generic.py in __getattr__(self, name)
   5065             if self._info_axis._can_hold_identifiers_and_holds_name(name):
   5066                 return self[name]
-> 5067             return object.__getattribute__(self, name)
   5068 
   5069     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'ext'

In [ ]:

df.

In [39]:

from dask import dataframe as dd
df = dd.read_csv("data/foo.csv", sep=",").head(20)

C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\core.py:4382: UserWarning: Insufficient elements for `head`. 20 elements requested, only 19 elements available. Try passing larger `npartitions` to `head`.
  warnings.warn(msg.format(n, len(r)))

In [52]:

df.rows.between("id",1,5).ext.display()

Viewing 3 of 3 rows / 3 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna
3	NiELS	Böhr//((%%	551.0	pizza	8.0	1990/07/09	give
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you

Viewing 3 of 3 rows / 3 columns

1 partition(s)

In [54]:

df1 = df.rows.append(df)

In [66]:

df1.ext.display()

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-66-44bea7b62d53> in <module>
----> 1 df1.ext.display()

NameError: name 'df1' is not defined

In [ ]:

#https://github.com/dask/dask/pull/4229#issuecomment-449123512

In [178]:

df["id"].mode().compute()

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-178-4167ff9155e5> in <module>
----> 1 df["id"].mode().compute()

AttributeError: 'Series' object has no attribute 'mode'

In [114]:

import pandas as pd

In [9]:

df = pd.read_csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=0)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-ae93fea6af71> in <module>
----> 1 df = pd.read_csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=0)

NameError: name 'pd' is not defined

In [21]:

df.head(20)

C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\core.py:4382: UserWarning: Insufficient elements for `head`. 20 elements requested, only 13 elements available. Try passing larger `npartitions` to `head`.
  warnings.warn(msg.format(n, len(r)))

Out[21]:

	id	firstName	lastName	billingId	product	price	birth	dummyCol
0	1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
1	2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna
2	3	NiELS	Böhr//((%%	551.0	pizza	8.0	1990/07/09	give
3	4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
4	5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
5	6	Galileo	GALiLEI	672.0	arepa	5.0	1930/08/12	never
6	7	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
7	8	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let
8	9	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you
9	10	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down
10	11	Isaac	Newton	992.0	pasta	9.0	1999/02/15	never
11	12	Emmy%%	Nöether$	234.0	pasta	9.0	1993/12/08	gonna
12	13	NaN	NaN	NaN	NaN	NaN	NaN	NaN

In [19]:

df.cols.names()

Out[19]:

['id',
 'firstName',
 'lastName',
 'billingId',
 'product',
 'price',
 'birth',
 'dummyCol']

In [6]:

df = op.load.csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=True, infer_schema='false', null_value="None")

https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv

In [7]:

df.rows.limit(5).cols.lower("lastName").ext.display()

Viewing 5 of 5 rows / 5 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
1	Luis	alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
2	André	ampère	423.0	piza	8.0	1950/07/08	gonna
3	NiELS	böhr//((%%	551.0	pizza	8.0	1990/07/09	give
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
5	Albert	einstein	634.0	pizza	8.0	1990/07/11	up

Viewing 5 of 5 rows / 5 columns

1 partition(s)

In [8]:

df.rows.limit(5).cols.min_max_scaler("billingId").ext.display()

..\optimus\engines\base\dask\columns.py:160: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  scaler.transform(_df)[input_cols]

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-8-1db9dc043f56> in <module>
----> 1 df.rows.limit(5).cols.min_max_scaler("billingId").ext.display()

~\Documents\Optimus\optimus\engines\base\dask\columns.py in min_max_scaler(self, input_cols, output_cols)
    158         _df = df[input_cols]
    159         scaler.fit(_df)
--> 160         scaler.transform(_df)[input_cols]
    161         return df
    162 

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [49]:

df.ext.display(13)

Viewing 13 of 13 rows / 13 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna
3	NiELS	Böhr//((%%	551.0	pizza	8.0	1990/07/09	give
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
6	Galileo	⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI	672.0	arepa	5.0	1930/08/12	never
7	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
8	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let
9	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you
10	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down
11	Isaac	Newton	992.0	pasta	9.0	1999/02/15	never⋅
12	Emmy%%	Nöether$	234.0	pasta	9.0	1993/12/08	gonna
13	nan	nan	nan	nan	nan	nan	nan

Viewing 13 of 13 rows / 13 columns

1 partition(s)

In [64]:

df.cols.impute("billingId",output_cols="hola").ext.display(13)

Viewing 13 of 13 rows / 13 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable	hola 9 (float64) not nullable
1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never	123.0
2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna	423.0
3	NiELS	Böhr//((%%	551.0	pizza	8.0	1990/07/09	give	551.0
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you	521.0
5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up	634.0
6	Galileo	⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI	672.0	arepa	5.0	1930/08/12	never	672.0
7	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna	323.0
8	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let	624.0
9	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you	735.0
10	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down	875.0
11	Isaac	Newton	992.0	pasta	9.0	1999/02/15	never⋅	992.0
12	Emmy%%	Nöether$	234.0	pasta	9.0	1993/12/08	gonna	234.0
13	nan	nan	nan	nan	nan	nan	nan	558.9166666666666

Viewing 13 of 13 rows / 13 columns

1 partition(s)

In [ ]:

df.cols.impute("billingId",output_cols="new_col").ext.display(13)

In [12]:

df.cols.count_na("*")

Out[12]:

{'billingId': 1,
 'id': 0,
 'dummyCol': 1,
 'product': 1,
 'firstName': 1,
 'birth': 1,
 'lastName': 1,
 'price': 1}

In [21]:

import pandas as pd
data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}
df = pd.DataFrame(data)
df.head()

Out[21]:

	col_0	col_1
0	9	-2
1	-3	-7
2	0	6
3	-1	8
4	5	-5

In [25]:

df["col_0"].clip( 1, 5)

Out[25]:

0    5
1    1
2    1
3    1
4    5
Name: col_0, dtype: int64

In [31]:

df.head(10)

Out[31]:

	id	firstName	lastName	billingId	product	price	birth	dummyCol
0	1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
1	2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna
2	3	NiELS	Böhr//((%%	551.0	pizza	8.0	1990/07/09	give
3	4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
4	5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
5	5	Galileo	GALiLEI	672.0	arepa	5.0	1930/08/12	never
6	5	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
7	5	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let
8	5	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you
9	5	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down

In [34]:

df = op.load.csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=True, infer_schema='false', null_value="None")

https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv

In [54]:

df.cols.min("id")

Out[54]:

{'min': {'id': 1, 'min': [('min', id    1
    dtype: int64)]}}

In [47]:

df.cols.min("*")

Out[47]:

{'min': {'id': 1.0,
  'billingId': 123.0,
  'price': 3.0,
  'min': [('min', id             1.0
    billingId    123.0
    price          3.0
    dtype: float64)]}}

In [53]:

df.cols.iqr(["id","price"])

VALUE dict_values([0.25     4.0
0.50     7.0
0.75    10.0
Name: id, dtype: float64, 0.25    3.00
0.50    8.00
0.75    8.25
Name: price, dtype: float64])

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-53-997938715317> in <module>
----> 1 df.cols.iqr(["id","price"])

~\Documents\Optimus\optimus\engines\base\dask\columns.py in iqr(self, columns, more, relative_error)
    122         check_column_numbers(columns, "*")
    123 
--> 124         quartile = df.cols.percentile(columns, [0.25, 0.5, 0.75], relative_error=relative_error)
    125         print(quartile)
    126         for col_name in columns:

~\Documents\Optimus\optimus\engines\base\columns.py in percentile(self, columns, values, relative_error)
    158         if values is None:
    159             values = [0.5]
--> 160         return self.agg_exprs(columns, df.functions.percentile_agg, df, values, relative_error)
    161 
    162     def median(self, columns, relative_error=RELATIVE_ERROR):

~\Documents\Optimus\optimus\engines\base\columns.py in agg_exprs(self, columns, funcs, *args)
    134         :return:
    135         """
--> 136         return self.exec_agg(self.create_exprs(columns, funcs, *args))
    137 
    138     @staticmethod

~\Documents\Optimus\optimus\engines\base\dask\columns.py in exec_agg(exprs)
    465                 if agg_name == "percentile":
    466 
--> 467                     agg_parsed = parse_percentile(columns.values())
    468                 elif agg_name == "hist":
    469                     agg_parsed = parse_hist(agg_results)

~\Documents\Optimus\optimus\engines\base\dask\columns.py in parse_percentile(value)
    444                 _result = {}
    445                 print("VALUE", value)
--> 446                 for (p_value, p_result) in value.iteritems():
    447                     _result.setdefault(p_value, p_result)
    448 

AttributeError: 'dict_values' object has no attribute 'iteritems'

In [12]:

df.cols.min("*")

Out[12]:

{'min': {'id': 1.0, 'billingId': 123.0, 'price': 3.0}}

In [38]:

df.head(12)

Out[38]:

	id	firstName	lastName	billingId	product	price	birth	dummyCol
0	1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
1	2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna
2	3	NiELS	Böhr//((%%	551.0	pizza	8.0	1990/07/09	give
3	4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
4	5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
5	5	Galileo	GALiLEI	672.0	arepa	5.0	1930/08/12	never
6	5	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
7	5	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let
8	5	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you
9	5	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down
10	5	Isaac	Newton	992.0	pasta	9.0	1999/02/15	never
11	5	Emmy%%	Nöether$	234.0	pasta	9.0	1993/12/08	gonna

In [37]:

df.cols.clip("id",1,5).head(10)

Out[37]:

	id	firstName	lastName	billingId	product	price	birth	dummyCol
0	1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
1	2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna
2	3	NiELS	Böhr//((%%	551.0	pizza	8.0	1990/07/09	give
3	4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
4	5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
5	5	Galileo	GALiLEI	672.0	arepa	5.0	1930/08/12	never
6	5	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
7	5	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let
8	5	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you
9	5	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down

In [ ]:

df.cols.qcult()

In [51]:

df.ext.display(13)

Viewing 13 of 13 rows / 13 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna
3	NiELS	Böhr//((%%	551.0	pizza	8.0	1990/07/09	give
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
6	Galileo	⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI	672.0	arepa	5.0	1930/08/12	never
7	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
8	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let
9	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you
10	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down
11	Isaac	Newton	992.0	pasta	9.0	1999/02/15	never⋅
12	Emmy%%	Nöether$	234.0	pasta	9.0	1993/12/08	gonna
13	nan	nan	558.9166666666666	nan	nan	nan	nan

Viewing 13 of 13 rows / 13 columns

1 partition(s)

In [ ]:

pd.DataFrame([[np.nan, 'dogs', 3]], index=df.index)

In [63]:

from sklearn.preprocessing import MinMaxScaler
import dask.dataframe as dd
import dask.array as da

scaler = MinMaxScaler()
columns = ['billingId','price']
b =df[columns]
scaler.fit(b)
c = dd.from_dask_array(da.from_array(scaler.transform(b), chunks=100),columns)

print(c.head())
# df.assign(e=c['price'])


# # print(dd.from_dask_array(c, columns).head(10))
# df[columns]= dd.from_dask_array(c)

   billingId     price
0   0.000000  1.000000
1   0.345224  0.714286
2   0.492520  0.714286
3   0.457998  0.714286
4   0.588032  0.714286

In [63]:

print(df1)

[[0.         1.        ]
 [0.3452244  0.71428571]
 [0.49252014 0.71428571]
 [0.4579977  0.71428571]
 [0.58803222 0.71428571]
 [0.63176064 0.28571429]
 [0.2301496  0.        ]
 [0.57652474 0.        ]
 [0.70425777 0.        ]
 [0.86536249 0.        ]
 [1.         0.85714286]
 [0.12773303 0.85714286]
 [       nan        nan]]

In [ ]:

df.ext.display()

In [10]:

df.ext.sample(5).ext.display()

Viewing 6 of 6 rows / 6 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
7	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
10	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down
12	Emmy%%	Nöether$	234.0	pasta	9.0	1993/12/08	gonna
1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up

Viewing 6 of 6 rows / 6 columns

1 partition(s)

In [23]:

df.ext.stratified_sample("firstName").ext.display()

..\optimus\engines\dask\extension.py:156: UserWarning: `meta` is not specified, inferred from partial data. Please provide `meta` if the result is unexpected.
  Before: .apply(func)
  After:  .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result
  or:     .apply(func, meta=('x', 'f8'))            for series result
  df_ = df.groupby(col_name).apply(lambda x: x.sample(2))
distributed.worker - WARNING -  Compute Failed
Function:  subgraph_callable
args:      ()
kwargs:    {}
Exception: ValueError("Cannot take a larger sample than population when 'replace=False'")

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
    688             try:
--> 689                 result = self._python_apply_general(f)
    690             except Exception:

~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f)
    706         keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 707                                                    self.axis)
    708 

~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\ops.py in apply(self, f, data, axis)
    189             group_axes = _get_axes(group)
--> 190             res = f(group)
    191             if not _is_indexed_like(res, group_axes):

~\Documents\Optimus\optimus\engines\dask\extension.py in <lambda>(x)
    155             n = min(5, df[col_name].value_counts().min())
--> 156             df = df.groupby(col_name).apply(lambda x: x.sample(2))
    157             # df_.index = df_.index.droplevel(0)

~\AppData\Roaming\Python\Python37\site-packages\pandas\core\generic.py in sample(self, n, frac, replace, weights, random_state, axis)
   4864 
-> 4865         locs = rs.choice(axis_length, size=n, replace=replace, p=weights)
   4866         return self.take(locs, axis=axis, is_copy=False)

mtrand.pyx in mtrand.RandomState.choice()

ValueError: Cannot take a larger sample than population when 'replace=False'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-23-2a6d5278e89b> in <module>
----> 1 df.ext.stratified_sample("firstName").ext.display()

~\Documents\Optimus\optimus\engines\dask\extension.py in display(limit, columns, title, truncate)
    328         def display(limit=None, columns=None, title=None, truncate=True):
    329             # TODO: limit, columns, title, truncate
--> 330             Ext.table(limit, columns, title, truncate)
    331 
    332         @staticmethod

~\Documents\Optimus\optimus\engines\dask\extension.py in table(limit, columns, title, truncate)
    334             try:
    335                 if __IPYTHON__ and DataFrame.output is "html":
--> 336                     result = Ext.table_html(title=title, limit=limit, columns=columns, truncate=truncate)
    337                     print_html(result)
    338                 else:

~\Documents\Optimus\optimus\engines\dask\extension.py in table_html(limit, columns, title, full, truncate, count)
    286                 data = df.cols.select(columns).ext.to_dict()
    287             else:
--> 288                 data = df.cols.select(columns).rows.limit(limit).ext.to_dict()
    289 
    290             # Load the Jinja template

~\Documents\Optimus\optimus\engines\dask\extension.py in to_dict()
     66 
     67                 # Because asDict can return messed columns names we order
---> 68                 for index, row in df.iterrows():
     69                     # _row = row.asDict()
     70                     r = collections.OrderedDict()

~\Anaconda3\lib\site-packages\dask\dataframe\core.py in iterrows(self)
   2872     def iterrows(self):
   2873         for i in range(self.npartitions):
-> 2874             df = self.get_partition(i).compute()
   2875             for row in df.iterrows():
   2876                 yield row

~\Anaconda3\lib\site-packages\dask\base.py in compute(self, **kwargs)
    154         dask.base.compute
    155         """
--> 156         (result,) = compute(self, traverse=False, **kwargs)
    157         return result
    158 

~\Anaconda3\lib\site-packages\dask\base.py in compute(*args, **kwargs)
    395     keys = [x.__dask_keys__() for x in collections]
    396     postcomputes = [x.__dask_postcompute__() for x in collections]
--> 397     results = schedule(dsk, keys, **kwargs)
    398     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    399 

~\Anaconda3\lib\site-packages\distributed\client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
   2319             try:
   2320                 results = self.gather(packed, asynchronous=asynchronous,
-> 2321                                       direct=direct)
   2322             finally:
   2323                 for f in futures.values():

~\Anaconda3\lib\site-packages\distributed\client.py in gather(self, futures, errors, maxsize, direct, asynchronous)
   1653             return self.sync(self._gather, futures, errors=errors,
   1654                              direct=direct, local_worker=local_worker,
-> 1655                              asynchronous=asynchronous)
   1656 
   1657     @gen.coroutine

~\Anaconda3\lib\site-packages\distributed\client.py in sync(self, func, *args, **kwargs)
    671             return future
    672         else:
--> 673             return sync(self.loop, func, *args, **kwargs)
    674 
    675     def __repr__(self):

~\Anaconda3\lib\site-packages\distributed\utils.py in sync(loop, func, *args, **kwargs)
    275             e.wait(10)
    276     if error[0]:
--> 277         six.reraise(*error[0])
    278     else:
    279         return result[0]

~\Anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
    691             if value.__traceback__ is not tb:
    692                 raise value.with_traceback(tb)
--> 693             raise value
    694         finally:
    695             value = None

~\Anaconda3\lib\site-packages\distributed\utils.py in f()
    260             if timeout is not None:
    261                 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262             result[0] = yield future
    263         except Exception as exc:
    264             error[0] = sys.exc_info()

~\Anaconda3\lib\site-packages\tornado\gen.py in run(self)
   1131 
   1132                     try:
-> 1133                         value = future.result()
   1134                     except Exception:
   1135                         self.had_exception = True

~\Anaconda3\lib\site-packages\tornado\gen.py in run(self)
   1139                     if exc_info is not None:
   1140                         try:
-> 1141                             yielded = self.gen.throw(*exc_info)
   1142                         finally:
   1143                             # Break up a reference to itself

~\Anaconda3\lib\site-packages\distributed\client.py in _gather(self, futures, errors, direct, local_worker)
   1498                             six.reraise(type(exception),
   1499                                         exception,
-> 1500                                         traceback)
   1501                     if errors == 'skip':
   1502                         bad_keys.add(key)

~\Anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
    690                 value = tp()
    691             if value.__traceback__ is not tb:
--> 692                 raise value.with_traceback(tb)
    693             raise value
    694         finally:

~\Anaconda3\lib\site-packages\dask\optimization.py in __call__(self, *args)
    940                              % (len(self.inkeys), len(args)))
    941         return _get_recursive(self.dsk, self.outkey,
--> 942                               dict(zip(self.inkeys, args)))
    943 
    944     def __reduce__(self):

~\Anaconda3\lib\site-packages\dask\core.py in _get_recursive(dsk, x, cache)
    130         return cache[x]
    131     elif hashable and x in dsk:
--> 132         res = cache[x] = _get_recursive(dsk, dsk[x], cache)
    133         return res
    134     elif type(x) is tuple and x and callable(x[0]):  # istask

~\Anaconda3\lib\site-packages\dask\core.py in _get_recursive(dsk, x, cache)
    134     elif type(x) is tuple and x and callable(x[0]):  # istask
    135         func, args = x[0], x[1:]
--> 136         args2 = [_get_recursive(dsk, k, cache) for k in args]
    137         return func(*args2)
    138     return x

~\Anaconda3\lib\site-packages\dask\core.py in <listcomp>(.0)
    134     elif type(x) is tuple and x and callable(x[0]):  # istask
    135         func, args = x[0], x[1:]
--> 136         args2 = [_get_recursive(dsk, k, cache) for k in args]
    137         return func(*args2)
    138     return x

~\Anaconda3\lib\site-packages\dask\core.py in _get_recursive(dsk, x, cache)
    135         func, args = x[0], x[1:]
    136         args2 = [_get_recursive(dsk, k, cache) for k in args]
--> 137         return func(*args2)
    138     return x
    139 

~\Anaconda3\lib\site-packages\dask\dataframe\core.py in apply_and_enforce(func, args, kwargs, meta)
   3682 
   3683     Ensures the output has the same columns, even if empty."""
-> 3684     df = func(*args, **kwargs)
   3685     if isinstance(df, (pd.DataFrame, pd.Series, pd.Index)):
   3686         if len(df) == 0:

~\Anaconda3\lib\site-packages\dask\dataframe\groupby.py in _groupby_slice_apply(df, grouper, key, func, *args, **kwargs)
    145     if key:
    146         g = g[key]
--> 147     return g.apply(func, *args, **kwargs)
    148 
    149 

~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
    699 
    700                 with _group_selection_context(self):
--> 701                     return self._python_apply_general(f)
    702 
    703         return result

~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f)
    705     def _python_apply_general(self, f):
    706         keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 707                                                    self.axis)
    708 
    709         return self._wrap_applied_output(

~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\ops.py in apply(self, f, data, axis)
    188             # group might be modified
    189             group_axes = _get_axes(group)
--> 190             res = f(group)
    191             if not _is_indexed_like(res, group_axes):
    192                 mutated = True

~\Documents\Optimus\optimus\engines\dask\extension.py in <lambda>(x)
    154             df = self
    155             n = min(5, df[col_name].value_counts().min())
--> 156             df = df.groupby(col_name).apply(lambda x: x.sample(2))
    157             # df_.index = df_.index.droplevel(0)
    158             return df

~\AppData\Roaming\Python\Python37\site-packages\pandas\core\generic.py in sample(self, n, frac, replace, weights, random_state, axis)
   4863                              "provide positive value.")
   4864 
-> 4865         locs = rs.choice(axis_length, size=n, replace=replace, p=weights)
   4866         return self.take(locs, axis=axis, is_copy=False)
   4867 

mtrand.pyx in mtrand.RandomState.choice()

ValueError: Cannot take a larger sample than population when 'replace=False'

In [15]:

df.rows.limit(5).ext.display()

Viewing 6 of 6 rows / 6 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna
3	NiELS	Böhr//((%%	551.0	pizza	8.0	1990/07/09	give
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
6	Galileo	⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI	672.0	arepa	5.0	1930/08/12	never

Viewing 6 of 6 rows / 6 columns

1 partition(s)

In [7]:

df.cols.min("")

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-7-562bbc151fba> in <module>
----> 1 df.cols.min()

TypeError: min() missing 1 required positional argument: 'columns'

In [21]:

df.rows.sort("billingId","asc").ext.display()

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-21-7a9c606e0857> in <module>
----> 1 df.rows.sort("billingId","asc").ext.display()

~\Anaconda3\lib\site-packages\multipledispatch\dispatcher.py in __call__(self, *args, **kwargs)
    276             self._cache[types] = func
    277         try:
--> 278             return func(*args, **kwargs)
    279 
    280         except MDNotImplementedError:

~\Documents\Optimus\optimus\engines\dask\rows.py in sort(columns, order)
    116             """
    117             columns = parse_columns(self, columns)
--> 118             return self.rows.sort([(columns, order,)])
    119 
    120         @staticmethod

~\Anaconda3\lib\site-packages\multipledispatch\dispatcher.py in __call__(self, *args, **kwargs)
    276             self._cache[types] = func
    277         try:
--> 278             return func(*args, **kwargs)
    279 
    280         except MDNotImplementedError:

~\Documents\Optimus\optimus\engines\dask\rows.py in sort(col_sort)
    145                 df = df.meta.preserve(self, Actions.SORT_ROW.value, col_name)
    146 
--> 147                 c = df.cols.names()
    148                 # It seems that is on posible to order rows in Dask using set_index. It only return data in ascendent way.
    149                 # We should fins a way to make it work desc and form multiple columns

~\Documents\Optimus\optimus\engines\dask\columns.py in cols(self)
    966             """
    967             return Cols.exec_agg(Cols.create_exprs(columns, funcs, *args))
--> 968 
    969 
    970 

TypeError: Can't instantiate abstract class Cols with abstract methods apply_by_dtypes, apply_expr, astype, boxplot, bucketizer, cell, clip, copy, correlation, count_mismatch, count_na, count_uniques, count_zeros, drop, frequency_by_group, get_meta, impute, index_to_string, iqr, is_na, keep, max_abs_scaler, min_max_scaler, move, nunique, qcut, remove, remove_accents, remove_special_chars, remove_white_spaces, replace_regex, reverse, scatter, select_by_dtypes, set, set_meta, sort, string_to_index, to_timestamp, unique, value_counts, values_to_cols, years_between, z_score

In [36]:

df.cols.max("price")

Out[36]:

{'max': {'price': 10.0}}

In [ ]:

df.cols.create_expr()

In [30]:

df.dropna(how='any', subset=['price'])

Out[30]:

Dask DataFrame Structure:

	id	firstName	lastName	billingId	product	price	birth	dummyCol
npartitions=1
	int64	object	object	float64	object	float64	object	object
	...	...	...	...	...	...	...	...

Dask Name: dropna, 4 tasks

In [32]:

df.rows.drop_na("price").ext.display()

any price

Viewing 10 of 12 rows / 12 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna
3	NiELS	Böhr//((%%	551.0	pizza	8.0	1990/07/09	give
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
6	Galileo	⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI	672.0	arepa	5.0	1930/08/12	never
7	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
8	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let
9	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you
10	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down
11	Isaac	Newton	992.0	pasta	9.0	1999/02/15	never⋅

Viewing 10 of 12 rows / 12 columns

1 partition(s)

In [12]:

c = df.cols.names()

In [9]:

df.set_index("billingId").reset_index()[c].head()

C:\Users\argenisleon\Anaconda3\lib\site-packages\numpy\lib\function_base.py:3652: RuntimeWarning: Invalid value encountered in percentile
  interpolation=interpolation)

Out[9]:

	id	firstName	lastName	billingId	product	price	birth	dummyCol
0	1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
1	12	Emmy%%	Nöether$	234.0	pasta	9.0	1993/12/08	gonna
2	7	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
3	2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna
4	4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you

In [ ]:

In [172]:

df.cols.mode("id")

Out[172]:

	price
price
3.0	3.0
5.0	5.0
8.0	8.0
9.0	9.0
10.0	10.0

In [ ]:

In [132]:

df.ext.display(20)

Viewing 13 of 13 rows / 13 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
2	André	Ampère	423.0	piza	8.0	1950/07/08	gonna
3	NiELS	Böhr//((%%	551.0	pizza	8.0	1990/07/09	give
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
6	Galileo	⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI	672.0	arepa	5.0	1930/08/12	never
7	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
8	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let
9	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you
10	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down
11	Isaac	Newton	992.0	pasta	9.0	1999/02/15	never⋅
12	Emmy%%	Nöether$	234.0	pasta	9.0	1993/12/08	gonna
13	nan	nan	nan	nan	nan	nan	nan

Viewing 13 of 13 rows / 13 columns

1 partition(s)

In [103]:

df1= op.load.csv("data/foo.csv", sep=",", header=True, infer_schema='true', charset="ISO-8859–1", null_value="None")

data/foo.csv

In [108]:

df1.ext.display(20)

Viewing 19 of 19 rows / 19 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (int64) not nullable	product 5 (object) not nullable	price 6 (int64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
1	Luis	Alvarez$$%!	123	Cake	10	1980/07/07	never
2	AndrÃ©	AmpÃ¨re	423	piza	8	1950/07/08	gonna
3	NiELS	BÃ¶hr//((%%	551	pizza	8	1990/07/09	give
4	PAUL	dirac$	521	pizza	8	1954/07/10	you
5	Albert	Einstein	634	pizza	8	1990/07/11	up
6	Galileo	⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI	672	arepa	5	1930/08/12	never
7	CaRL	Ga%%%uss	323	taco	3	1970/07/13	gonna
8	David	H$$$ilbert	624	taaaccoo	3	1950/07/14	let
9	Johannes	KEPLER	735	taco	3	1920/04/22	you
10	JaMES	M$$ax%%well	875	taco	3	1923/03/12	down
11	Isaac	Newton	992	pasta	9	1999/02/15	never⋅
12	Emmy%%	NÃ¶ether$	234	pasta	9	1993/12/08	gonna
13	Max!!!	Planck!!!	111	hamburguer	4	1994/01/04	run⋅
14	Fred	Hoy&&&le	553	pizzza	8	1997/06/27	around
15	(((⋅⋅⋅Heinrich⋅)))))	Hertz	116	pizza	8	1956/11/30	and
16	William	Gilbert###	886	BEER	2	1958/03/26	desert
17	Marie	CURIE	912	Rice	1	2000/03/22	you
18	Arthur	COM%%%pton	812	110790	5	1899/01/01	#
19	JAMES	Chadwick	467	nan	10	1921/05/03	#

Viewing 19 of 19 rows / 19 columns

1 partition(s)

In [70]:

df.cols.dtypes()

Out[70]:

{'id': 'int64',
 'firstName': 'object',
 'lastName': 'object',
 'billingId': 'float64',
 'product': 'object',
 'price': 'float64',
 'birth': 'object',
 'dummyCol': 'object'}

In [65]:

df.ext.display(20)

Viewing 13 of 13 rows / 13 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
2	AndrÃ©	AmpÃ¨re	423.0	piza	8.0	1950/07/08	gonna
3	NiELS	BÃ¶hr//((%%	551.0	pizza	8.0	1990/07/09	give
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
6	Galileo	⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI	672.0	arepa	5.0	1930/08/12	never
7	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
8	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let
9	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you
10	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down
11	Isaac	Newton	992.0	pasta	9.0	1999/02/15	never⋅

Viewing 13 of 13 rows / 13 columns

1 partition(s)

In [8]:

df.save.csv("data/dask/*.csv")

In [9]:

df.save.parquet("data/dask/foo.parquet")

In [11]:

df.save.json("data/dask/*.json")

In [12]:

df.rows.create_id()

Dask DataFrame Structure:
                  id firstName lastName billingId product    price   birth dummyCol
npartitions=1                                                                      
               int64    object   object   float64  object  float64  object   object
                 ...       ...      ...       ...     ...      ...     ...      ...
Dask Name: from-delayed, 3 tasks

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-12-d2c38e26c0a8> in <module>
----> 1 df.rows.create_id()

~\Documents\Optimus\optimus\dask\rows.py in create_id(column)
     14             df = self
     15             print(df)
---> 16             a = da.arange(df.divisions[-1] + 1, chunks=df.divisions[1:])
     17             df[column] = dd.from_dask_array(a)
     18             return df

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

In [36]:

df1 = df[(df.id > 0) & (df.id <= 7)]
df2 = df1[(df.id > 0) & (df.id <= 3)]

In [38]:

df2.compute().head()

C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\utils.py:694: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  return getattr(obj, self.method)(*args, **kwargs)

Out[38]:

	id	firstName	lastName	billingId	product	price	birth	dummyCol
0	1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
1	2	AndrÃ©	AmpÃ¨re	423.0	piza	8.0	1950/07/08	gonna
2	3	NiELS	BÃ¶hr//((%%	551.0	pizza	8.0	1990/07/09	give

In [61]:

df.rows.select((df.id ==1 ) ).ext.display()

Viewing 1 of 1 rows / 1 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never

Viewing 1 of 1 rows / 1 columns

1 partition(s)

In [58]:

df.rows.select_by_dtypes("id", "str").ext.display()

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pyspark\sql\types.py in _parse_datatype_string(s)
    845             # For backwards compatibility, "integer", "struct<fieldname: datatype>" and etc.
--> 846             return from_ddl_datatype(s)
    847         except:

~\Anaconda3\lib\site-packages\pyspark\sql\types.py in from_ddl_datatype(type_str)
    837         return _parse_datatype_json_string(
--> 838             sc._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str).json())
    839 

AttributeError: 'NoneType' object has no attribute '_jvm'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pyspark\sql\types.py in _parse_datatype_string(s)
    849                 # For backwards compatibility, "fieldname: datatype, fieldname: datatype" case.
--> 850                 return from_ddl_datatype("struct<%s>" % s.strip())
    851             except:

~\Anaconda3\lib\site-packages\pyspark\sql\types.py in from_ddl_datatype(type_str)
    837         return _parse_datatype_json_string(
--> 838             sc._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str).json())
    839 

AttributeError: 'NoneType' object has no attribute '_jvm'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
<ipython-input-58-96533379ad09> in <module>
----> 1 df.rows.select_by_dtypes("id", "str").ext.display()

~\Documents\Optimus\optimus\dask\rows.py in select_by_dtypes(input_cols, data_type)
     43             # self.cols.apply()
     44 
---> 45             return self.where(fbdt(input_cols, data_type))
     46 
     47         @staticmethod

~\Documents\Optimus\optimus\audf.py in filter_row_by_data_type(col_name, data_type, get_type)
    129 
    130     col_name = one_list_to_val(col_name)
--> 131     return F.pandas_udf(pandas_udf_func, return_data_type)(col_name)

~\Anaconda3\lib\site-packages\pyspark\sql\functions.py in pandas_udf(f, returnType, functionType)
   2304         return functools.partial(_create_udf, returnType=return_type, evalType=eval_type)
   2305     else:
-> 2306         return _create_udf(f=f, returnType=return_type, evalType=eval_type)
   2307 
   2308 

~\Anaconda3\lib\site-packages\pyspark\sql\udf.py in _create_udf(f, returnType, evalType)
     70     udf_obj = UserDefinedFunction(
     71         f, returnType=returnType, name=None, evalType=evalType, deterministic=True)
---> 72     return udf_obj._wrapped()
     73 
     74 

~\Anaconda3\lib\site-packages\pyspark\sql\udf.py in _wrapped(self)
    193 
    194         wrapper.func = self.func
--> 195         wrapper.returnType = self.returnType
    196         wrapper.evalType = self.evalType
    197         wrapper.deterministic = self.deterministic

~\Anaconda3\lib\site-packages\pyspark\sql\udf.py in returnType(self)
    117                 self._returnType_placeholder = self._returnType
    118             else:
--> 119                 self._returnType_placeholder = _parse_datatype_string(self._returnType)
    120 
    121         if self.evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF:

~\Anaconda3\lib\site-packages\pyspark\sql\types.py in _parse_datatype_string(s)
    850                 return from_ddl_datatype("struct<%s>" % s.strip())
    851             except:
--> 852                 raise e
    853 
    854 

~\Anaconda3\lib\site-packages\pyspark\sql\types.py in _parse_datatype_string(s)
    840     try:
    841         # DDL format, "fieldname datatype, fieldname datatype".
--> 842         return from_ddl_schema(s)
    843     except Exception as e:
    844         try:

~\Anaconda3\lib\site-packages\pyspark\sql\types.py in from_ddl_schema(type_str)
    832     def from_ddl_schema(type_str):
    833         return _parse_datatype_json_string(
--> 834             sc._jvm.org.apache.spark.sql.types.StructType.fromDDL(type_str).json())
    835 
    836     def from_ddl_datatype(type_str):

AttributeError: 'NoneType' object has no attribute '_jvm'

In [59]:

df.set_index('price').compute()

C:\Users\argenisleon\Anaconda3\lib\site-packages\numpy\lib\function_base.py:3652: RuntimeWarning: Invalid value encountered in percentile
  interpolation=interpolation)

Out[59]:

	id	firstName	lastName	billingId	product	birth	dummyCol
price
3.0	7	CaRL	Ga%%%uss	323.0	taco	1970/07/13	gonna
3.0	8	David	H$$$ilbert	624.0	taaaccoo	1950/07/14	let
3.0	9	Johannes	KEPLER	735.0	taco	1920/04/22	you
3.0	10	JaMES	M$$ax%%well	875.0	taco	1923/03/12	down
5.0	6	Galileo	GALiLEI	672.0	arepa	1930/08/12	never
8.0	2	AndrÃ©	AmpÃ¨re	423.0	piza	1950/07/08	gonna
8.0	3	NiELS	BÃ¶hr//((%%	551.0	pizza	1990/07/09	give
8.0	4	PAUL	dirac$	521.0	pizza	1954/07/10	you
8.0	5	Albert	Einstein	634.0	pizza	1990/07/11	up
9.0	11	Isaac	Newton	992.0	pasta	1999/02/15	never
9.0	12	Emmy%%	NÃ¶ether$	234.0	pasta	1993/12/08	gonna
10.0	1	Luis	Alvarez$$%!	123.0	Cake	1980/07/07	never
NaN	13	NaN	NaN	NaN	NaN	NaN	NaN

In [8]:

%%time
df.cols.min(["billingId"])

Wall time: 195 ms

Out[8]:

{'min': {'billingId': 123.0}}

In [9]:

%%time
df.cols.max(["billingId"])

Wall time: 152 ms

Out[9]:

{'max': {'billingId': 992.0}}

In [10]:

df.cols.names()

Out[10]:

['id',
 'firstName',
 'lastName',
 'billingId',
 'product',
 'price',
 'birth',
 'dummyCol']

In [11]:

df.meta.get()

Out[11]:

{'file_name': 'foo.csv', 'transformations': {'actions': {}}}

In [12]:

df.cols.rename("id", "id1")

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-12-6e558dfa1a9f> in <module>
----> 1 df.cols.rename("id", "id1")

~\Anaconda3\lib\site-packages\multipledispatch\dispatcher.py in __call__(self, *args, **kwargs)
    276             self._cache[types] = func
    277         try:
--> 278             return func(*args, **kwargs)
    279 
    280         except MDNotImplementedError:

~\Documents\Optimus\optimus\dask\columns.py in rename(old_column, new_column)
    186         @dispatch(str, str)
    187         def rename(old_column, new_column):
--> 188             return Cols.rename([(old_column, new_column)], None)
    189 
    190         @staticmethod

~\Anaconda3\lib\site-packages\multipledispatch\dispatcher.py in __call__(self, *args, **kwargs)
    276             self._cache[types] = func
    277         try:
--> 278             return func(*args, **kwargs)
    279 
    280         except MDNotImplementedError:

~\Documents\Optimus\optimus\dask\columns.py in rename(columns_old_new, func)
    162 
    163                     if old_col_name != col_name:
--> 164                         df = df.rename({old_col_name: col_name[1]})
    165 
    166             df.ext.meta = self.ext.meta

~\Anaconda3\lib\site-packages\dask\dataframe\core.py in rename(self, index, columns)
   2707     def rename(self, index=None, columns=None):
   2708         if index is not None:
-> 2709             raise ValueError("Cannot rename index.")
   2710 
   2711         # *args here is index, columns but columns arg is already used

ValueError: Cannot rename index.

In [13]:

df.ext.display("all")

Viewing 13 of 13 rows / 13 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
2	AndrÃ©	AmpÃ¨re	423.0	piza	8.0	1950/07/08	gonna
3	NiELS	BÃ¶hr//((%%	551.0	pizza	8.0	1990/07/09	give
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
6	Galileo	⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI	672.0	arepa	5.0	1930/08/12	never
7	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
8	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let
9	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you
10	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down
11	Isaac	Newton	992.0	pasta	9.0	1999/02/15	never⋅
12	Emmy%%	NÃ¶ether$	234.0	pasta	9.0	1993/12/08	gonna
13	nan	nan	nan	nan	nan	nan	nan

Viewing 13 of 13 rows / 13 columns

1 partition(s)

In [20]:

df.cols.remove_special_chars("lastName")

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-20-8a110b84c317> in <module>
----> 1 df.cols.remove_special_chars("lastName")

AttributeError: 'Cols' object has no attribute 'remove_special_chars'

In [9]:

df.ext.send()

Send!

In [24]:

df.ext.display()

Viewing 10 of 13 rows / 13 columns

1 partition(s)

id 1 (int64) not nullable	firstName 2 (object) not nullable	lastName 3 (object) not nullable	billingId 4 (float64) not nullable	product 5 (object) not nullable	price 6 (float64) not nullable	birth 7 (object) not nullable	dummyCol 8 (object) not nullable
1	Luis	Alvarez$$%!	123.0	Cake	10.0	1980/07/07	never
2	AndrÃ©	AmpÃ¨re	423.0	piza	8.0	1950/07/08	gonna
3	NiELS	BÃ¶hr//((%%	551.0	pizza	8.0	1990/07/09	give
4	PAUL	dirac$	521.0	pizza	8.0	1954/07/10	you
5	Albert	Einstein	634.0	pizza	8.0	1990/07/11	up
6	Galileo	⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI	672.0	arepa	5.0	1930/08/12	never
7	CaRL	Ga%%%uss	323.0	taco	3.0	1970/07/13	gonna
8	David	H$$$ilbert	624.0	taaaccoo	3.0	1950/07/14	let
9	Johannes	KEPLER	735.0	taco	3.0	1920/04/22	you
10	JaMES	M$$ax%%well	875.0	taco	3.0	1923/03/12	down
11	Isaac	Newton	992.0	pasta	9.0	1999/02/15	never⋅

Viewing 10 of 13 rows / 13 columns

1 partition(s)

In [25]:

df.cols.replace("lastName","$", "","chars")

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-25-bd454deb3472> in <module>
----> 1 df.cols.replace("lastName","$", "","chars")

~\Documents\Optimus\optimus\dask\columns.py in replace(input_cols, search, replace_by, search_by, output_cols)
    354 
    355 
--> 356             check_column_numbers(input_cols, "*")
    357             output_cols = get_output_cols(input_cols, output_cols)
    358 

~\Documents\Optimus\optimus\helpers\columns.py in check_column_numbers(columns, number)
    198     """
    199     if columns is None:
--> 200         RaiseIt.value_error(columns, "not None")
    201 
    202     count = len(columns)

~\Documents\Optimus\optimus\helpers\raiseit.py in value_error(var, data_values)
     76                                  type=divisor.join(map(
     77                                      lambda x: "'" + x + "'",
---> 78                                      data_values)), var_type=one_list_to_val(var)))
     79 
     80     @staticmethod

ValueError: 'columns' must be 'not None', received 'None'

In [16]:

from optimus.profiler.profiler import Profiler
p = Profiler()

df.cols.count_by_dtypes("*")

{'id': {'int64': 13}, 'firstName': {'object': 13}, 'lastName': {'object': 13}, 'billingId': {'float64': 13}, 'product': {'object': 13}, 'price': {'float64': 13}, 'birth': {'object': 13}, 'dummyCol': {'object': 13}}

Out[16]:

{'id': {'int': 13, 'float': 0, 'object': 0},
 'firstName': {'int': 0, 'float': 0, 'object': 13},
 'lastName': {'int': 0, 'float': 0, 'object': 13},
 'billingId': {'int': 0, 'float': 13, 'object': 0},
 'product': {'int': 0, 'float': 0, 'object': 13},
 'price': {'int': 0, 'float': 13, 'object': 0},
 'birth': {'int': 0, 'float': 0, 'object': 13},
 'dummyCol': {'int': 0, 'float': 0, 'object': 13}}

In [1]:

import dask
import dask.datasets
import numpy as np
import time
from distributed import Client

client = Client()
client

df = dask.datasets.timeseries()
df = df.repartition(npartitions=300)
df = client.persist(df)

C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\io\demo.py:91: FutureWarning: Creating a DatetimeIndex by passing range endpoints is deprecated.  Use `pandas.date_range` instead.
  freq=partition_freq))
C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\io\demo.py:45: FutureWarning: Creating a DatetimeIndex by passing range endpoints is deprecated.  Use `pandas.date_range` instead.
  index = pd.DatetimeIndex(start=start, end=end, freq=freq, name='timestamp')

In [4]:

def random_indexer(df):
    indexer = ~df.index.isnull()
    for i in range(np.random.randint(15)+1):
        col = np.random.choice(['x','y'])
        value = np.random.uniform(-1,1)
        op = np.random.choice([lambda x, y: x < y, lambda x, y: x > y])
        indexer = np.logical_and(indexer, op(df[col], value))
    return indexer

def random_statistic(indexer, df):
    print(indexer)
    col = np.random.choice(['x', 'y', 'name'])
    if col == 'name':
        op = np.random.choice([lambda x: x.unique().size, np.min, np.max])
    else:
        op = np.random.choice([lambda x: x.unique().size, np.min, np.max, np.sum, np.mean])
    return op(df[col][indexer])

np.random.seed(137)

stats = []
for i in range(10):
    ind = random_indexer(df)
    for k in range(20):
        stats.append(random_statistic(ind, df))

st = time.time()
print(stats)
# stat_computed = client.compute(stats)
ft = time.time()
print(ft-st)

st = time.time()
# stat_results = client.gather(stat_computed)
ft = time.time()
print(ft-st)

Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7200 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 2100 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 3600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 6600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 7800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: y, dtype: bool
Dask Name: logical_and, 9600 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
Dask Series Structure:
npartitions=300
2000-01-01 00:00:00    bool
2000-01-01 02:24:00     ...
                       ... 
2000-01-30 21:36:00     ...
2000-01-31 00:00:00     ...
Name: x, dtype: bool
Dask Name: logical_and, 10800 tasks
[dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>]
0.0030128955841064453
0.0

In [9]:

df.head()

---------------------------------------------------------------------------
CancelledError                            Traceback (most recent call last)
<ipython-input-9-c42a15b2c7cf> in <module>
----> 1 df.head()

~\Anaconda3\lib\site-packages\dask\dataframe\core.py in head(self, n, npartitions, compute)
    874 
    875         if compute:
--> 876             result = result.compute()
    877         return result
    878 

~\Anaconda3\lib\site-packages\dask\base.py in compute(self, **kwargs)
    154         dask.base.compute
    155         """
--> 156         (result,) = compute(self, traverse=False, **kwargs)
    157         return result
    158 

~\Anaconda3\lib\site-packages\dask\base.py in compute(*args, **kwargs)
    395     keys = [x.__dask_keys__() for x in collections]
    396     postcomputes = [x.__dask_postcompute__() for x in collections]
--> 397     results = schedule(dsk, keys, **kwargs)
    398     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    399 

~\Anaconda3\lib\site-packages\distributed\client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
   2307             retries=retries,
   2308             user_priority=priority,
-> 2309             actors=actors,
   2310         )
   2311         packed = pack_data(keys, futures)

~\Anaconda3\lib\site-packages\distributed\client.py in _graph_to_futures(self, dsk, keys, restrictions, loose_restrictions, priority, user_priority, resources, retries, fifo_timeout, actors)
   2232                 for v in s:
   2233                     if v not in self.futures:
-> 2234                         raise CancelledError(v)
   2235 
   2236             dependencies = {k: get_dependencies(dsk, k) for k in dsk}

CancelledError: ('repartition-merge-39715e6237c8baf832ed85d511f135a3', 0)

In [10]:

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-d243315effc1> in <module>
----> 1 df = op.load.csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=True, infer_schema='false', null_value="None")

NameError: name 'op' is not defined

In [14]:

stat_computed = client.compute(np.mode(df["id"]))
stat_results = client.gather(stat_computed)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-14-a799f70d7185> in <module>
----> 1 stat_computed = client.compute(np.mode(df["id"]))
      2 stat_results = client.gather(stat_computed)

AttributeError: module 'numpy' has no attribute 'mode'

In [13]:

print(stat_results)

In [ ]: