%load_ext autoreload
%autoreload 2
import sys
sys.path.append("..")
from optimus import Optimus
C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\config.py:161: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details. data = yaml.load(f.read()) or {} C:\Users\argenisleon\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:49: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version data_klasses = (pandas.Series, pandas.DataFrame, pandas.Panel)
op = Optimus("dask_cudf", comm=True)
df = op.load.csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=True, infer_schema='true', charset="UTF-8").ext.cache()
df.ext.display()
df = df.ext.send(output="json", infer=False, advanced_stats=False)
df.cols.count_uniques("*")
{'count_uniques': {'price': 6.000274674963478}}
df.cols.count_na("*")
1
from optimus.profiler.profiler import Profiler
p = Profiler()
p.run(df)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-11-e792527df240> in <module> 1 from optimus.profiler.profiler import Profiler 2 p = Profiler() ----> 3 p.run(df) ~\Documents\Optimus\optimus\helpers\decorators.py in timed(*args, **kw) 8 def timed(*args, **kw): 9 start_time = timeit.default_timer() ---> 10 f = method(*args, **kw) 11 _time = round(timeit.default_timer() - start_time, 2) 12 logger.print("{name}() executed in {time} sec".format(name=method.__name__, time=_time)) ~\Documents\Optimus\optimus\profiler\profiler.py in run(self, df, columns, buckets, infer, relative_error, approx_count, mismatch, advanced_stats) 72 columns = parse_columns(df, columns) 73 output = self.dataset(df, columns, buckets, infer, relative_error, approx_count, format="dict", ---> 74 mismatch=mismatch, advanced_stats=advanced_stats) 75 76 # Load jinja ~\Documents\Optimus\optimus\profiler\profiler.py in dataset(self, df, columns, buckets, infer, relative_error, approx_count, sample, stats, format, mismatch, advanced_stats) 325 self.cols_count = cols_count = len(df.columns) 326 updated_columns = self.columns_stats(df, cols_to_profile, buckets, infer, relative_error, approx_count, --> 327 mismatch, advanced_stats) 328 329 output_columns = update_dict(output_columns, updated_columns) ~\Documents\Optimus\optimus\profiler\profiler.py in columns_stats(self, df, columns, buckets, infer, relative_error, approx_count, mismatch, advanced_stats) 436 437 # Aggregation --> 438 stats = self.columns_agg(df, columns, buckets, relative_error, approx_count, advanced_stats) 439 440 # Calculate Frequency ~\Documents\Optimus\optimus\profiler\profiler.py in columns_agg(self, df, columns, buckets, relative_error, approx_count, advanced_stats) 493 funcs = [df.functions.stddev, df.functions.kurtosis, df.functions.mean, df.functions.skewness, 494 df.functions.sum, df.functions.variance, df.functions.zeros_agg] --> 495 exprs.extend(df.cols.create_exprs(cols, funcs)) 496 497 # TODO: None in basic calculation ~\Documents\Optimus\optimus\engines\base\dask\columns.py in create_exprs(self, columns, funcs, *args) 537 exprs[col_name].update(func(col_name, args)(df)) 538 else: --> 539 exprs[col_name] = func(col_name, args)(df) 540 541 result = {} ~\Documents\Optimus\optimus\engines\dask\functions.py in _kurtoris(serie) 132 def kurtosis(col_name, args): 133 def _kurtoris(serie): --> 134 result = {"kurtosis": float(stats.kurtosis(serie[col_name]))} 135 return result 136 ~\Anaconda3\lib\site-packages\dask\array\stats.py in kurtosis(a, axis, fisher, bias, nan_policy) 227 olderr = np.seterr(all='ignore') 228 try: --> 229 vals = da.where(zero, 0, m4 / m2**2.0) 230 finally: 231 np.seterr(**olderr) TypeError: unsupported operand type(s) for ** or pow(): 'Array' and 'float'
op= Optimus("dask", comm=True)
# url = "https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/crime.csv"
url = "data/crime.csv"
import pandas as pd
pd.read_csv(url, encoding='latin1')
INCIDENT_NUMBER | OFFENSE_CODE | OFFENSE_CODE_GROUP | OFFENSE_DESCRIPTION | DISTRICT | REPORTING_AREA | SHOOTING | OCCURRED_ON_DATE | YEAR | MONTH | DAY_OF_WEEK | HOUR | UCR_PART | STREET | Lat | Long | Location | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | I182070945 | 619 | Larceny | LARCENY ALL OTHERS | D14 | 808 | NaN | 2018-09-02 13:00:00 | 2018 | 9 | Sunday | 13 | Part One | LINCOLN ST | 42.357791 | -71.139371 | (42.35779134, -71.13937053) |
1 | I182070943 | 1402 | Vandalism | VANDALISM | C11 | 347 | NaN | 2018-08-21 00:00:00 | 2018 | 8 | Tuesday | 0 | Part Two | HECLA ST | 42.306821 | -71.060300 | (42.30682138, -71.06030035) |
2 | I182070941 | 3410 | Towed | TOWED MOTOR VEHICLE | D4 | 151 | NaN | 2018-09-03 19:27:00 | 2018 | 9 | Monday | 19 | Part Three | CAZENOVE ST | 42.346589 | -71.072429 | (42.34658879, -71.07242943) |
3 | I182070940 | 3114 | Investigate Property | INVESTIGATE PROPERTY | D4 | 272 | NaN | 2018-09-03 21:16:00 | 2018 | 9 | Monday | 21 | Part Three | NEWCOMB ST | 42.334182 | -71.078664 | (42.33418175, -71.07866441) |
4 | I182070938 | 3114 | Investigate Property | INVESTIGATE PROPERTY | B3 | 421 | NaN | 2018-09-03 21:05:00 | 2018 | 9 | Monday | 21 | Part Three | DELHI ST | 42.275365 | -71.090361 | (42.27536542, -71.09036101) |
5 | I182070936 | 3820 | Motor Vehicle Accident Response | M/V ACCIDENT INVOLVING PEDESTRIAN - INJURY | C11 | 398 | NaN | 2018-09-03 21:09:00 | 2018 | 9 | Monday | 21 | Part Three | TALBOT AVE | 42.290196 | -71.071590 | (42.29019621, -71.07159012) |
6 | I182070933 | 724 | Auto Theft | AUTO THEFT | B2 | 330 | NaN | 2018-09-03 21:25:00 | 2018 | 9 | Monday | 21 | Part One | NORMANDY ST | 42.306072 | -71.082733 | (42.30607218, -71.08273260) |
7 | I182070932 | 3301 | Verbal Disputes | VERBAL DISPUTE | B2 | 584 | NaN | 2018-09-03 20:39:37 | 2018 | 9 | Monday | 20 | Part Three | LAWN ST | 42.327016 | -71.105551 | (42.32701648, -71.10555088) |
8 | I182070931 | 301 | Robbery | ROBBERY - STREET | C6 | 177 | NaN | 2018-09-03 20:48:00 | 2018 | 9 | Monday | 20 | Part One | MASSACHUSETTS AVE | 42.331521 | -71.070853 | (42.33152148, -71.07085307) |
9 | I182070929 | 3301 | Verbal Disputes | VERBAL DISPUTE | C11 | 364 | NaN | 2018-09-03 20:38:00 | 2018 | 9 | Monday | 20 | Part Three | LESLIE ST | 42.295147 | -71.058608 | (42.29514664, -71.05860832) |
10 | I182070928 | 3301 | Verbal Disputes | VERBAL DISPUTE | C6 | 913 | NaN | 2018-09-03 19:55:00 | 2018 | 9 | Monday | 19 | Part Three | OCEAN VIEW DR | 42.319579 | -71.040328 | (42.31957856, -71.04032766) |
11 | I182070927 | 3114 | Investigate Property | INVESTIGATE PROPERTY | C6 | 936 | NaN | 2018-09-03 20:19:00 | 2018 | 9 | Monday | 20 | Part Three | DALESSIO CT | 42.340115 | -71.053390 | (42.34011469, -71.05339029) |
12 | I182070923 | 3108 | Fire Related Reports | FIRE REPORT - HOUSE, BUILDING, ETC. | D4 | 139 | NaN | 2018-09-03 19:58:00 | 2018 | 9 | Monday | 19 | Part Three | MARLBOROUGH ST | 42.350388 | -71.087853 | (42.35038760, -71.08785290) |
13 | I182070922 | 2647 | Other | THREATS TO DO BODILY HARM | B3 | 429 | NaN | 2018-09-03 20:39:00 | 2018 | 9 | Monday | 20 | Part Two | WOODROW AVE | 42.286470 | -71.087147 | (42.28647012, -71.08714661) |
14 | I182070921 | 3201 | Property Lost | PROPERTY - LOST | B3 | 469 | NaN | 2018-09-02 14:00:00 | 2018 | 9 | Sunday | 14 | Part Three | MULVEY ST | 42.279241 | -71.096674 | (42.27924052, -71.09667382) |
15 | I182070920 | 3006 | Medical Assistance | SICK/INJURED/MEDICAL - PERSON | NaN | NaN | 2018-09-03 19:43:00 | 2018 | 9 | Monday | 19 | Part Three | NaN | 42.352875 | -71.073830 | (42.35287456, -71.07382970) | |
16 | I182070919 | 3301 | Verbal Disputes | VERBAL DISPUTE | C11 | 341 | NaN | 2018-09-03 18:52:00 | 2018 | 9 | Monday | 18 | Part Three | STONEHURST ST | 42.305264 | -71.066838 | (42.30526428, -71.06683755) |
17 | I182070918 | 3305 | Assembly or Gathering Violations | DEMONSTRATIONS/RIOT | D4 | 130 | NaN | 2018-09-03 17:00:00 | 2018 | 9 | Monday | 17 | Part Three | HUNTINGTON AVE | 42.348577 | -71.077720 | (42.34857652, -71.07772012) |
18 | I182070917 | 2647 | Other | THREATS TO DO BODILY HARM | B2 | 901 | NaN | 2018-09-03 19:52:00 | 2018 | 9 | Monday | 19 | Part Two | HORADAN WAY | 42.333717 | -71.096658 | (42.33371742, -71.09665806) |
19 | I182070915 | 614 | Larceny From Motor Vehicle | LARCENY THEFT FROM MV - NON-ACCESSORY | B2 | 181 | NaN | 2018-09-02 18:00:00 | 2018 | 9 | Sunday | 18 | Part One | SHIRLEY ST | 42.325695 | -71.068168 | (42.32569490, -71.06816778) |
20 | I182070913 | 3006 | Medical Assistance | SICK/INJURED/MEDICAL - PERSON | NaN | NaN | 2018-09-03 18:46:00 | 2018 | 9 | Monday | 18 | Part Three | WOLCOTT | -1.000000 | -1.000000 | (-1.00000000, -1.00000000) | |
21 | I182070911 | 3801 | Motor Vehicle Accident Response | M/V ACCIDENT - OTHER | A1 | 69 | NaN | 2018-09-03 18:30:00 | 2018 | 9 | Monday | 18 | Part Three | BEACON ST | 42.355644 | -71.071681 | (42.35564426, -71.07168077) |
22 | I182070910 | 3006 | Medical Assistance | SICK/INJURED/MEDICAL - PERSON | B3 | 434 | NaN | 2018-09-03 18:42:00 | 2018 | 9 | Monday | 18 | Part Three | CAPEN ST | 42.283402 | -71.080797 | (42.28340243, -71.08079740) |
23 | I182070909 | 3803 | Motor Vehicle Accident Response | M/V ACCIDENT - PERSONAL INJURY | E5 | 550 | NaN | 2018-09-03 18:33:00 | 2018 | 9 | Monday | 18 | Part Three | WASHINGTON ST | 42.275818 | -71.139913 | (42.27581799, -71.13991259) |
24 | I182070908 | 522 | Residential Burglary | BURGLARY - RESIDENTIAL - NO FORCE | B2 | 911 | NaN | 2018-09-03 18:38:00 | 2018 | 9 | Monday | 18 | Part One | ANNUNCIATION RD | 42.335062 | -71.093168 | (42.33506218, -71.09316781) |
25 | I182070906 | 3831 | Motor Vehicle Accident Response | M/V - LEAVING SCENE - PROPERTY DAMAGE | NaN | NaN | 2018-09-03 18:20:00 | 2018 | 9 | Monday | 18 | Part Three | NaN | 42.283593 | -71.055657 | (42.28359328, -71.05565683) | |
26 | I182070905 | 3006 | Medical Assistance | SICK/INJURED/MEDICAL - PERSON | D4 | 172 | NaN | 2018-09-03 18:50:00 | 2018 | 9 | Monday | 18 | Part Three | MASSACHUSETTS AVE | 42.333112 | -71.072764 | (42.33311189, -71.07276370) |
27 | I182070904 | 802 | Simple Assault | ASSAULT SIMPLE - BATTERY | C11 | 242 | NaN | 2018-09-03 18:34:00 | 2018 | 9 | Monday | 18 | Part Two | ANNAPOLIS ST | 42.317319 | -71.061509 | (42.31731905, -71.06150882) |
28 | I182070904 | 2007 | Restraining Order Violations | VIOL. OF RESTRAINING ORDER W NO ARREST | C11 | 242 | NaN | 2018-09-03 18:34:00 | 2018 | 9 | Monday | 18 | Part Two | ANNAPOLIS ST | 42.317319 | -71.061509 | (42.31731905, -71.06150882) |
29 | I182070903 | 2900 | Other | VAL - VIOLATION OF AUTO LAW - OTHER | B3 | 463 | NaN | 2018-09-03 18:55:00 | 2018 | 9 | Monday | 18 | Part Two | BLUE HILL AVE | 42.295904 | -71.087733 | (42.29590385, -71.08773294) |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
319043 | I110551302-00 | 3125 | Warrant Arrests | WARRANT ARREST | D4 | 171 | NaN | 2015-07-22 22:00:00 | 2015 | 7 | Wednesday | 22 | Part Three | HARRISON AVE | 42.335560 | -71.074364 | (42.33555954, -71.07436364) |
319044 | I110551302-00 | 623 | Larceny | LARCENY SHOPLIFTING $50 TO $199 | D4 | 171 | NaN | 2015-07-22 22:00:00 | 2015 | 7 | Wednesday | 22 | Part One | HARRISON AVE | 42.335560 | -71.074364 | (42.33555954, -71.07436364) |
319045 | I110372326-00 | 403 | Aggravated Assault | ASSAULT & BATTERY D/W - OTHER | A1 | 97 | NaN | 2016-06-14 09:40:00 | 2016 | 6 | Tuesday | 9 | Part One | SCHOOL ST | 42.357428 | -71.058326 | (42.35742837, -71.05832551) |
319046 | I110372326-00 | 3125 | Warrant Arrests | WARRANT ARREST | A1 | 97 | NaN | 2016-06-14 09:40:00 | 2016 | 6 | Tuesday | 9 | Part Three | SCHOOL ST | 42.357428 | -71.058326 | (42.35742837, -71.05832551) |
319047 | I110261417-00 | 3125 | Warrant Arrests | WARRANT ARREST | B2 | 324 | NaN | 2016-07-29 00:00:00 | 2016 | 7 | Friday | 0 | Part Three | BOWDOIN ST | 42.307038 | -71.066153 | (42.30703835, -71.06615319) |
319048 | I110261417-00 | 619 | Larceny | LARCENY OTHER $200 & OVER | B2 | 324 | NaN | 2016-07-29 00:00:00 | 2016 | 7 | Friday | 0 | Part One | BOWDOIN ST | 42.307038 | -71.066153 | (42.30703835, -71.06615319) |
319049 | I110177502-00 | 3125 | Warrant Arrests | WARRANT ARREST | B2 | 318 | NaN | 2015-10-02 21:00:00 | 2015 | 10 | Friday | 21 | Part Three | HOMESTEAD ST | 42.311277 | -71.089093 | (42.31127726, -71.08909334) |
319050 | I110177502-00 | 802 | Simple Assault | ASSAULT & BATTERY | B2 | 318 | NaN | 2015-10-02 21:00:00 | 2015 | 10 | Friday | 21 | Part Two | HOMESTEAD ST | 42.311277 | -71.089093 | (42.31127726, -71.08909334) |
319051 | I110177502-00 | 3125 | Warrant Arrests | WARRANT ARREST | B2 | 318 | NaN | 2015-10-02 21:00:00 | 2015 | 10 | Friday | 21 | Part Three | HOMESTEAD ST | 42.311277 | -71.089093 | (42.31127726, -71.08909334) |
319052 | I100636670-00 | 629 | Larceny | LARCENY OTHER $50 TO $199 | D4 | 285 | NaN | 2016-06-05 17:23:00 | 2016 | 6 | Sunday | 17 | Part One | COVENTRY ST | 42.336951 | -71.085748 | (42.33695098, -71.08574813) |
319053 | I100636670-00 | 3125 | Warrant Arrests | WARRANT ARREST | D4 | 285 | NaN | 2016-06-05 17:23:00 | 2016 | 6 | Sunday | 17 | Part Three | COVENTRY ST | 42.336951 | -71.085748 | (42.33695098, -71.08574813) |
319054 | I100340225-00 | 3125 | Warrant Arrests | WARRANT ARREST | A1 | 77 | NaN | 2015-07-27 10:47:00 | 2015 | 7 | Monday | 10 | Part Three | BOWDOIN SQ | 42.361645 | -71.062299 | (42.36164502, -71.06229949) |
319055 | I100340225-00 | 339 | Robbery | ROBBERY - UNARMED - STREET | A1 | 77 | NaN | 2015-07-27 10:47:00 | 2015 | 7 | Monday | 10 | Part One | BOWDOIN SQ | 42.361645 | -71.062299 | (42.36164502, -71.06229949) |
319056 | I100222105-02 | 3125 | Warrant Arrests | WARRANT ARREST | E13 | 572 | NaN | 2015-08-03 16:22:00 | 2015 | 8 | Monday | 16 | Part Three | COLUMBUS AVE | 42.313628 | -71.095603 | (42.31362799, -71.09560307) |
319057 | I100033064-00 | 2907 | Violations | VAL - OPERATING AFTER REV/SUSP. | B2 | 304 | NaN | 2016-07-29 18:20:00 | 2016 | 7 | Friday | 18 | Part Two | SLAYTON WAY | 42.321770 | -71.097798 | (42.32177032, -71.09779774) |
319058 | I100033064-00 | 2910 | Violations | VAL - OPERATING AFTER REV/SUSP. | B2 | 304 | NaN | 2016-07-29 18:20:00 | 2016 | 7 | Friday | 18 | Part Two | SLAYTON WAY | 42.321770 | -71.097798 | (42.32177032, -71.09779774) |
319059 | I090321958-00 | 3125 | Warrant Arrests | WARRANT ARREST | C11 | 355 | NaN | 2016-02-01 01:43:00 | 2016 | 2 | Monday | 1 | Part Three | GENEVA AVE | NaN | NaN | (0.00000000, 0.00000000) |
319060 | I090321958-00 | 3125 | Warrant Arrests | WARRANT ARREST | C11 | 355 | NaN | 2016-02-01 01:43:00 | 2016 | 2 | Monday | 1 | Part Three | GENEVA AVE | NaN | NaN | (0.00000000, 0.00000000) |
319061 | I090317057-00 | 403 | Aggravated Assault | ASSAULT & BATTERY D/W - OTHER | B3 | 458 | NaN | 2015-11-20 11:15:00 | 2015 | 11 | Friday | 11 | Part One | BLUE HILL AVE | 42.301897 | -71.085549 | (42.30189690, -71.08554944) |
319062 | I090317057-00 | 3125 | Warrant Arrests | WARRANT ARREST | B3 | 458 | NaN | 2015-11-20 11:15:00 | 2015 | 11 | Friday | 11 | Part Three | BLUE HILL AVE | 42.301897 | -71.085549 | (42.30189690, -71.08554944) |
319063 | I080542626-00 | 3125 | Warrant Arrests | WARRANT ARREST | A1 | 111 | NaN | 2015-08-12 12:00:00 | 2015 | 8 | Wednesday | 12 | Part Three | BOYLSTON ST | 42.352312 | -71.063705 | (42.35231190, -71.06370510) |
319064 | I080542626-00 | 1848 | Drug Violation | DRUGS - POSS CLASS B - INTENT TO MFR DIST DISP | A1 | 111 | NaN | 2015-08-12 12:00:00 | 2015 | 8 | Wednesday | 12 | Part Two | BOYLSTON ST | 42.352312 | -71.063705 | (42.35231190, -71.06370510) |
319065 | I080542626-00 | 1849 | Drug Violation | DRUGS - POSS CLASS B - COCAINE, ETC. | A1 | 111 | NaN | 2015-08-12 12:00:00 | 2015 | 8 | Wednesday | 12 | Part Two | BOYLSTON ST | 42.352312 | -71.063705 | (42.35231190, -71.06370510) |
319066 | I060168073-00 | 1864 | Drug Violation | DRUGS - POSS CLASS D - INTENT MFR DIST DISP | E13 | 912 | NaN | 2018-01-27 14:01:00 | 2018 | 1 | Saturday | 14 | Part Two | CENTRE ST | 42.322838 | -71.100967 | (42.32283759, -71.10096723) |
319067 | I060168073-00 | 3125 | Warrant Arrests | WARRANT ARREST | E13 | 912 | NaN | 2018-01-27 14:01:00 | 2018 | 1 | Saturday | 14 | Part Three | CENTRE ST | 42.322838 | -71.100967 | (42.32283759, -71.10096723) |
319068 | I050310906-00 | 3125 | Warrant Arrests | WARRANT ARREST | D4 | 285 | NaN | 2016-06-05 17:25:00 | 2016 | 6 | Sunday | 17 | Part Three | COVENTRY ST | 42.336951 | -71.085748 | (42.33695098, -71.08574813) |
319069 | I030217815-08 | 111 | Homicide | MURDER, NON-NEGLIGIENT MANSLAUGHTER | E18 | 520 | NaN | 2015-07-09 13:38:00 | 2015 | 7 | Thursday | 13 | Part One | RIVER ST | 42.255926 | -71.123172 | (42.25592648, -71.12317207) |
319070 | I030217815-08 | 3125 | Warrant Arrests | WARRANT ARREST | E18 | 520 | NaN | 2015-07-09 13:38:00 | 2015 | 7 | Thursday | 13 | Part Three | RIVER ST | 42.255926 | -71.123172 | (42.25592648, -71.12317207) |
319071 | I010370257-00 | 3125 | Warrant Arrests | WARRANT ARREST | E13 | 569 | NaN | 2016-05-31 19:35:00 | 2016 | 5 | Tuesday | 19 | Part Three | NEW WASHINGTON ST | 42.302333 | -71.111565 | (42.30233307, -71.11156487) |
319072 | 142052550 | 3125 | Warrant Arrests | WARRANT ARREST | D4 | 903 | NaN | 2015-06-22 00:12:00 | 2015 | 6 | Monday | 0 | Part Three | WASHINGTON ST | 42.333839 | -71.080290 | (42.33383935, -71.08029038) |
319073 rows × 17 columns
from dask import dataframe as dd
df = dd.read_csv(url, encoding='latin1').reset_index()
df.rows.limit(5).ext.display()
index
1 (int64)
not nullable
|
num
2 (int64)
not nullable
|
idk
3 (int64)
not nullable
|
---|---|---|
0
|
1
|
2
|
1
|
2
|
3
|
2
|
3
|
4
|
3
|
4
|
5
|
4
|
5
|
6
|
df.cols.min("num")
{'num': {'min': 1}}
df.cols.min("*")
{'num': {'min': 1}, 'idk': {'min': 2}}
df.cols.percentile("num")
{'num': {'percentile': {'0.5': 6.0}}}
df.cols.percentile("*")
{'num': {'percentile': {'0.5': 6.0}}, 'idk': {'percentile': {'0.5': 4.0}}}
a = {0.25: 3.5, 0.5: 6.0, 0.75: 8.5}
print(a)
{0.25: 3.5, 0.5: 6.0, 0.75: 8.5}
df.rows.select((df["num"] >= 6.8) & (df["num"] <= 99.3)).rows.limit(10).ext.display()
C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\core.py:4382: UserWarning: Insufficient elements for `head`. 5 elements requested, only 4 elements available. Try passing larger `npartitions` to `head`. warnings.warn(msg.format(n, len(r)))
num idk 6 7 3 7 8 4 8 9 5 9 10 6 [OrderedDict([('num', 7), ('idk', 3)]), OrderedDict([('num', 8), ('idk', 4)]), OrderedDict([('num', 9), ('idk', 5)]), OrderedDict([('num', 10), ('idk', 6)])]
num
1 (int64)
not nullable
|
idk
2 (int64)
not nullable
|
---|---|
7
|
3
|
8
|
4
|
9
|
5
|
10
|
6
|
df.outliers.tukey("num").select().ext.display()
{'num': {'percentile': {'0.25': 3.5, '0.5': 6.0, '0.75': 8.5}}} {'num': {'percentile': {'0.25': 3.5, '0.5': 6.0, '0.75': 8.5}}}
C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\core.py:4382: UserWarning: Insufficient elements for `head`. 10 elements requested, only 1 elements available. Try passing larger `npartitions` to `head`. warnings.warn(msg.format(n, len(r)))
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-65-d3c1de2af819> in <module> ----> 1 df.outliers.tukey("num").select().ext.display() ~\Documents\Optimus\optimus\engines\dask\extension.py in display(limit, columns, title, truncate) 332 def display(limit=None, columns=None, title=None, truncate=True): 333 # TODO: limit, columns, title, truncate --> 334 Ext.table(limit, columns, title, truncate) 335 336 @staticmethod ~\Documents\Optimus\optimus\engines\dask\extension.py in table(limit, columns, title, truncate) 338 try: 339 if __IPYTHON__ and DataFrame.output is "html": --> 340 result = Ext.table_html(title=title, limit=limit, columns=columns, truncate=truncate) 341 print_html(result) 342 else: ~\Documents\Optimus\optimus\engines\dask\extension.py in table_html(limit, columns, title, full, truncate, count) 288 data = df.cols.select(columns).ext.to_dict() 289 else: --> 290 data = df.cols.select(columns).rows.limit(limit).ext.to_dict() 291 292 # Load the Jinja template ~\AppData\Roaming\Python\Python37\site-packages\pandas\core\generic.py in __getattr__(self, name) 5065 if self._info_axis._can_hold_identifiers_and_holds_name(name): 5066 return self[name] -> 5067 return object.__getattribute__(self, name) 5068 5069 def __setattr__(self, name, value): AttributeError: 'DataFrame' object has no attribute 'ext'
df.
from dask import dataframe as dd
df = dd.read_csv("data/foo.csv", sep=",").head(20)
C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\core.py:4382: UserWarning: Insufficient elements for `head`. 20 elements requested, only 19 elements available. Try passing larger `npartitions` to `head`. warnings.warn(msg.format(n, len(r)))
df.rows.between("id",1,5).ext.display()
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
2
|
André
|
Ampère
|
423.0
|
piza
|
8.0
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551.0
|
pizza
|
8.0
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
df1 = df.rows.append(df)
df1.ext.display()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-66-44bea7b62d53> in <module> ----> 1 df1.ext.display() NameError: name 'df1' is not defined
#https://github.com/dask/dask/pull/4229#issuecomment-449123512
df["id"].mode().compute()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-178-4167ff9155e5> in <module> ----> 1 df["id"].mode().compute() AttributeError: 'Series' object has no attribute 'mode'
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=0)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-9-ae93fea6af71> in <module> ----> 1 df = pd.read_csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=0) NameError: name 'pd' is not defined
df.head(20)
C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\core.py:4382: UserWarning: Insufficient elements for `head`. 20 elements requested, only 13 elements available. Try passing larger `npartitions` to `head`. warnings.warn(msg.format(n, len(r)))
id | firstName | lastName | billingId | product | price | birth | dummyCol | |
---|---|---|---|---|---|---|---|---|
0 | 1 | Luis | Alvarez$$%! | 123.0 | Cake | 10.0 | 1980/07/07 | never |
1 | 2 | André | Ampère | 423.0 | piza | 8.0 | 1950/07/08 | gonna |
2 | 3 | NiELS | Böhr//((%% | 551.0 | pizza | 8.0 | 1990/07/09 | give |
3 | 4 | PAUL | dirac$ | 521.0 | pizza | 8.0 | 1954/07/10 | you |
4 | 5 | Albert | Einstein | 634.0 | pizza | 8.0 | 1990/07/11 | up |
5 | 6 | Galileo | GALiLEI | 672.0 | arepa | 5.0 | 1930/08/12 | never |
6 | 7 | CaRL | Ga%%%uss | 323.0 | taco | 3.0 | 1970/07/13 | gonna |
7 | 8 | David | H$$$ilbert | 624.0 | taaaccoo | 3.0 | 1950/07/14 | let |
8 | 9 | Johannes | KEPLER | 735.0 | taco | 3.0 | 1920/04/22 | you |
9 | 10 | JaMES | M$$ax%%well | 875.0 | taco | 3.0 | 1923/03/12 | down |
10 | 11 | Isaac | Newton | 992.0 | pasta | 9.0 | 1999/02/15 | never |
11 | 12 | Emmy%% | Nöether$ | 234.0 | pasta | 9.0 | 1993/12/08 | gonna |
12 | 13 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
df.cols.names()
['id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']
df = op.load.csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=True, infer_schema='false', null_value="None")
https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv
df.rows.limit(5).cols.lower("lastName").ext.display()
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
2
|
André
|
ampère
|
423.0
|
piza
|
8.0
|
1950/07/08
|
gonna
|
3
|
NiELS
|
böhr//((%%
|
551.0
|
pizza
|
8.0
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
5
|
Albert
|
einstein
|
634.0
|
pizza
|
8.0
|
1990/07/11
|
up
|
df.rows.limit(5).cols.min_max_scaler("billingId").ext.display()
..\optimus\engines\base\dask\columns.py:160: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result. scaler.transform(_df)[input_cols]
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-8-1db9dc043f56> in <module> ----> 1 df.rows.limit(5).cols.min_max_scaler("billingId").ext.display() ~\Documents\Optimus\optimus\engines\base\dask\columns.py in min_max_scaler(self, input_cols, output_cols) 158 _df = df[input_cols] 159 scaler.fit(_df) --> 160 scaler.transform(_df)[input_cols] 161 return df 162 IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
df.ext.display(13)
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423.0
|
piza
|
8.0
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551.0
|
pizza
|
8.0
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634.0
|
pizza
|
8.0
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672.0
|
arepa
|
5.0
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323.0
|
taco
|
3.0
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624.0
|
taaaccoo
|
3.0
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735.0
|
taco
|
3.0
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875.0
|
taco
|
3.0
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992.0
|
pasta
|
9.0
|
1999/02/15
|
never⋅
|
12
|
Emmy%%
|
Nöether$
|
234.0
|
pasta
|
9.0
|
1993/12/08
|
gonna
|
13
|
nan
|
nan
|
nan
|
nan
|
nan
|
nan
|
nan
|
df.cols.impute("billingId",output_cols="hola").ext.display(13)
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
hola
9 (float64)
not nullable
|
---|---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
123.0
|
2
|
André
|
Ampère
|
423.0
|
piza
|
8.0
|
1950/07/08
|
gonna
|
423.0
|
3
|
NiELS
|
Böhr//((%%
|
551.0
|
pizza
|
8.0
|
1990/07/09
|
give
|
551.0
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
521.0
|
5
|
Albert
|
Einstein
|
634.0
|
pizza
|
8.0
|
1990/07/11
|
up
|
634.0
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672.0
|
arepa
|
5.0
|
1930/08/12
|
never
|
672.0
|
7
|
CaRL
|
Ga%%%uss
|
323.0
|
taco
|
3.0
|
1970/07/13
|
gonna
|
323.0
|
8
|
David
|
H$$$ilbert
|
624.0
|
taaaccoo
|
3.0
|
1950/07/14
|
let
|
624.0
|
9
|
Johannes
|
KEPLER
|
735.0
|
taco
|
3.0
|
1920/04/22
|
you
|
735.0
|
10
|
JaMES
|
M$$ax%%well
|
875.0
|
taco
|
3.0
|
1923/03/12
|
down
|
875.0
|
11
|
Isaac
|
Newton
|
992.0
|
pasta
|
9.0
|
1999/02/15
|
never⋅
|
992.0
|
12
|
Emmy%%
|
Nöether$
|
234.0
|
pasta
|
9.0
|
1993/12/08
|
gonna
|
234.0
|
13
|
nan
|
nan
|
nan
|
nan
|
nan
|
nan
|
nan
|
558.9166666666666
|
df.cols.impute("billingId",output_cols="new_col").ext.display(13)
df.cols.count_na("*")
{'billingId': 1, 'id': 0, 'dummyCol': 1, 'product': 1, 'firstName': 1, 'birth': 1, 'lastName': 1, 'price': 1}
import pandas as pd
data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}
df = pd.DataFrame(data)
df.head()
col_0 | col_1 | |
---|---|---|
0 | 9 | -2 |
1 | -3 | -7 |
2 | 0 | 6 |
3 | -1 | 8 |
4 | 5 | -5 |
df["col_0"].clip( 1, 5)
0 5 1 1 2 1 3 1 4 5 Name: col_0, dtype: int64
df.head(10)
id | firstName | lastName | billingId | product | price | birth | dummyCol | |
---|---|---|---|---|---|---|---|---|
0 | 1 | Luis | Alvarez$$%! | 123.0 | Cake | 10.0 | 1980/07/07 | never |
1 | 2 | André | Ampère | 423.0 | piza | 8.0 | 1950/07/08 | gonna |
2 | 3 | NiELS | Böhr//((%% | 551.0 | pizza | 8.0 | 1990/07/09 | give |
3 | 4 | PAUL | dirac$ | 521.0 | pizza | 8.0 | 1954/07/10 | you |
4 | 5 | Albert | Einstein | 634.0 | pizza | 8.0 | 1990/07/11 | up |
5 | 5 | Galileo | GALiLEI | 672.0 | arepa | 5.0 | 1930/08/12 | never |
6 | 5 | CaRL | Ga%%%uss | 323.0 | taco | 3.0 | 1970/07/13 | gonna |
7 | 5 | David | H$$$ilbert | 624.0 | taaaccoo | 3.0 | 1950/07/14 | let |
8 | 5 | Johannes | KEPLER | 735.0 | taco | 3.0 | 1920/04/22 | you |
9 | 5 | JaMES | M$$ax%%well | 875.0 | taco | 3.0 | 1923/03/12 | down |
df = op.load.csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=True, infer_schema='false', null_value="None")
https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv
df.cols.min("id")
{'min': {'id': 1, 'min': [('min', id 1 dtype: int64)]}}
df.cols.min("*")
{'min': {'id': 1.0, 'billingId': 123.0, 'price': 3.0, 'min': [('min', id 1.0 billingId 123.0 price 3.0 dtype: float64)]}}
df.cols.iqr(["id","price"])
VALUE dict_values([0.25 4.0 0.50 7.0 0.75 10.0 Name: id, dtype: float64, 0.25 3.00 0.50 8.00 0.75 8.25 Name: price, dtype: float64])
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-53-997938715317> in <module> ----> 1 df.cols.iqr(["id","price"]) ~\Documents\Optimus\optimus\engines\base\dask\columns.py in iqr(self, columns, more, relative_error) 122 check_column_numbers(columns, "*") 123 --> 124 quartile = df.cols.percentile(columns, [0.25, 0.5, 0.75], relative_error=relative_error) 125 print(quartile) 126 for col_name in columns: ~\Documents\Optimus\optimus\engines\base\columns.py in percentile(self, columns, values, relative_error) 158 if values is None: 159 values = [0.5] --> 160 return self.agg_exprs(columns, df.functions.percentile_agg, df, values, relative_error) 161 162 def median(self, columns, relative_error=RELATIVE_ERROR): ~\Documents\Optimus\optimus\engines\base\columns.py in agg_exprs(self, columns, funcs, *args) 134 :return: 135 """ --> 136 return self.exec_agg(self.create_exprs(columns, funcs, *args)) 137 138 @staticmethod ~\Documents\Optimus\optimus\engines\base\dask\columns.py in exec_agg(exprs) 465 if agg_name == "percentile": 466 --> 467 agg_parsed = parse_percentile(columns.values()) 468 elif agg_name == "hist": 469 agg_parsed = parse_hist(agg_results) ~\Documents\Optimus\optimus\engines\base\dask\columns.py in parse_percentile(value) 444 _result = {} 445 print("VALUE", value) --> 446 for (p_value, p_result) in value.iteritems(): 447 _result.setdefault(p_value, p_result) 448 AttributeError: 'dict_values' object has no attribute 'iteritems'
df.cols.min("*")
{'min': {'id': 1.0, 'billingId': 123.0, 'price': 3.0}}
df.head(12)
id | firstName | lastName | billingId | product | price | birth | dummyCol | |
---|---|---|---|---|---|---|---|---|
0 | 1 | Luis | Alvarez$$%! | 123.0 | Cake | 10.0 | 1980/07/07 | never |
1 | 2 | André | Ampère | 423.0 | piza | 8.0 | 1950/07/08 | gonna |
2 | 3 | NiELS | Böhr//((%% | 551.0 | pizza | 8.0 | 1990/07/09 | give |
3 | 4 | PAUL | dirac$ | 521.0 | pizza | 8.0 | 1954/07/10 | you |
4 | 5 | Albert | Einstein | 634.0 | pizza | 8.0 | 1990/07/11 | up |
5 | 5 | Galileo | GALiLEI | 672.0 | arepa | 5.0 | 1930/08/12 | never |
6 | 5 | CaRL | Ga%%%uss | 323.0 | taco | 3.0 | 1970/07/13 | gonna |
7 | 5 | David | H$$$ilbert | 624.0 | taaaccoo | 3.0 | 1950/07/14 | let |
8 | 5 | Johannes | KEPLER | 735.0 | taco | 3.0 | 1920/04/22 | you |
9 | 5 | JaMES | M$$ax%%well | 875.0 | taco | 3.0 | 1923/03/12 | down |
10 | 5 | Isaac | Newton | 992.0 | pasta | 9.0 | 1999/02/15 | never |
11 | 5 | Emmy%% | Nöether$ | 234.0 | pasta | 9.0 | 1993/12/08 | gonna |
df.cols.clip("id",1,5).head(10)
id | firstName | lastName | billingId | product | price | birth | dummyCol | |
---|---|---|---|---|---|---|---|---|
0 | 1 | Luis | Alvarez$$%! | 123.0 | Cake | 10.0 | 1980/07/07 | never |
1 | 2 | André | Ampère | 423.0 | piza | 8.0 | 1950/07/08 | gonna |
2 | 3 | NiELS | Böhr//((%% | 551.0 | pizza | 8.0 | 1990/07/09 | give |
3 | 4 | PAUL | dirac$ | 521.0 | pizza | 8.0 | 1954/07/10 | you |
4 | 5 | Albert | Einstein | 634.0 | pizza | 8.0 | 1990/07/11 | up |
5 | 5 | Galileo | GALiLEI | 672.0 | arepa | 5.0 | 1930/08/12 | never |
6 | 5 | CaRL | Ga%%%uss | 323.0 | taco | 3.0 | 1970/07/13 | gonna |
7 | 5 | David | H$$$ilbert | 624.0 | taaaccoo | 3.0 | 1950/07/14 | let |
8 | 5 | Johannes | KEPLER | 735.0 | taco | 3.0 | 1920/04/22 | you |
9 | 5 | JaMES | M$$ax%%well | 875.0 | taco | 3.0 | 1923/03/12 | down |
df.cols.qcult()
df.ext.display(13)
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423.0
|
piza
|
8.0
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551.0
|
pizza
|
8.0
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634.0
|
pizza
|
8.0
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672.0
|
arepa
|
5.0
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323.0
|
taco
|
3.0
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624.0
|
taaaccoo
|
3.0
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735.0
|
taco
|
3.0
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875.0
|
taco
|
3.0
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992.0
|
pasta
|
9.0
|
1999/02/15
|
never⋅
|
12
|
Emmy%%
|
Nöether$
|
234.0
|
pasta
|
9.0
|
1993/12/08
|
gonna
|
13
|
nan
|
nan
|
558.9166666666666
|
nan
|
nan
|
nan
|
nan
|
pd.DataFrame([[np.nan, 'dogs', 3]], index=df.index)
from sklearn.preprocessing import MinMaxScaler
import dask.dataframe as dd
import dask.array as da
scaler = MinMaxScaler()
columns = ['billingId','price']
b =df[columns]
scaler.fit(b)
c = dd.from_dask_array(da.from_array(scaler.transform(b), chunks=100),columns)
print(c.head())
# df.assign(e=c['price'])
# # print(dd.from_dask_array(c, columns).head(10))
# df[columns]= dd.from_dask_array(c)
billingId price 0 0.000000 1.000000 1 0.345224 0.714286 2 0.492520 0.714286 3 0.457998 0.714286 4 0.588032 0.714286
print(df1)
[[0. 1. ] [0.3452244 0.71428571] [0.49252014 0.71428571] [0.4579977 0.71428571] [0.58803222 0.71428571] [0.63176064 0.28571429] [0.2301496 0. ] [0.57652474 0. ] [0.70425777 0. ] [0.86536249 0. ] [1. 0.85714286] [0.12773303 0.85714286] [ nan nan]]
df.ext.display()
df.ext.sample(5).ext.display()
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
7
|
CaRL
|
Ga%%%uss
|
323.0
|
taco
|
3.0
|
1970/07/13
|
gonna
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875.0
|
taco
|
3.0
|
1923/03/12
|
down
|
12
|
Emmy%%
|
Nöether$
|
234.0
|
pasta
|
9.0
|
1993/12/08
|
gonna
|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
5
|
Albert
|
Einstein
|
634.0
|
pizza
|
8.0
|
1990/07/11
|
up
|
df.ext.stratified_sample("firstName").ext.display()
..\optimus\engines\dask\extension.py:156: UserWarning: `meta` is not specified, inferred from partial data. Please provide `meta` if the result is unexpected. Before: .apply(func) After: .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result or: .apply(func, meta=('x', 'f8')) for series result df_ = df.groupby(col_name).apply(lambda x: x.sample(2)) distributed.worker - WARNING - Compute Failed Function: subgraph_callable args: () kwargs: {} Exception: ValueError("Cannot take a larger sample than population when 'replace=False'")
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) ~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs) 688 try: --> 689 result = self._python_apply_general(f) 690 except Exception: ~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f) 706 keys, values, mutated = self.grouper.apply(f, self._selected_obj, --> 707 self.axis) 708 ~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\ops.py in apply(self, f, data, axis) 189 group_axes = _get_axes(group) --> 190 res = f(group) 191 if not _is_indexed_like(res, group_axes): ~\Documents\Optimus\optimus\engines\dask\extension.py in <lambda>(x) 155 n = min(5, df[col_name].value_counts().min()) --> 156 df = df.groupby(col_name).apply(lambda x: x.sample(2)) 157 # df_.index = df_.index.droplevel(0) ~\AppData\Roaming\Python\Python37\site-packages\pandas\core\generic.py in sample(self, n, frac, replace, weights, random_state, axis) 4864 -> 4865 locs = rs.choice(axis_length, size=n, replace=replace, p=weights) 4866 return self.take(locs, axis=axis, is_copy=False) mtrand.pyx in mtrand.RandomState.choice() ValueError: Cannot take a larger sample than population when 'replace=False' During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) <ipython-input-23-2a6d5278e89b> in <module> ----> 1 df.ext.stratified_sample("firstName").ext.display() ~\Documents\Optimus\optimus\engines\dask\extension.py in display(limit, columns, title, truncate) 328 def display(limit=None, columns=None, title=None, truncate=True): 329 # TODO: limit, columns, title, truncate --> 330 Ext.table(limit, columns, title, truncate) 331 332 @staticmethod ~\Documents\Optimus\optimus\engines\dask\extension.py in table(limit, columns, title, truncate) 334 try: 335 if __IPYTHON__ and DataFrame.output is "html": --> 336 result = Ext.table_html(title=title, limit=limit, columns=columns, truncate=truncate) 337 print_html(result) 338 else: ~\Documents\Optimus\optimus\engines\dask\extension.py in table_html(limit, columns, title, full, truncate, count) 286 data = df.cols.select(columns).ext.to_dict() 287 else: --> 288 data = df.cols.select(columns).rows.limit(limit).ext.to_dict() 289 290 # Load the Jinja template ~\Documents\Optimus\optimus\engines\dask\extension.py in to_dict() 66 67 # Because asDict can return messed columns names we order ---> 68 for index, row in df.iterrows(): 69 # _row = row.asDict() 70 r = collections.OrderedDict() ~\Anaconda3\lib\site-packages\dask\dataframe\core.py in iterrows(self) 2872 def iterrows(self): 2873 for i in range(self.npartitions): -> 2874 df = self.get_partition(i).compute() 2875 for row in df.iterrows(): 2876 yield row ~\Anaconda3\lib\site-packages\dask\base.py in compute(self, **kwargs) 154 dask.base.compute 155 """ --> 156 (result,) = compute(self, traverse=False, **kwargs) 157 return result 158 ~\Anaconda3\lib\site-packages\dask\base.py in compute(*args, **kwargs) 395 keys = [x.__dask_keys__() for x in collections] 396 postcomputes = [x.__dask_postcompute__() for x in collections] --> 397 results = schedule(dsk, keys, **kwargs) 398 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)]) 399 ~\Anaconda3\lib\site-packages\distributed\client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs) 2319 try: 2320 results = self.gather(packed, asynchronous=asynchronous, -> 2321 direct=direct) 2322 finally: 2323 for f in futures.values(): ~\Anaconda3\lib\site-packages\distributed\client.py in gather(self, futures, errors, maxsize, direct, asynchronous) 1653 return self.sync(self._gather, futures, errors=errors, 1654 direct=direct, local_worker=local_worker, -> 1655 asynchronous=asynchronous) 1656 1657 @gen.coroutine ~\Anaconda3\lib\site-packages\distributed\client.py in sync(self, func, *args, **kwargs) 671 return future 672 else: --> 673 return sync(self.loop, func, *args, **kwargs) 674 675 def __repr__(self): ~\Anaconda3\lib\site-packages\distributed\utils.py in sync(loop, func, *args, **kwargs) 275 e.wait(10) 276 if error[0]: --> 277 six.reraise(*error[0]) 278 else: 279 return result[0] ~\Anaconda3\lib\site-packages\six.py in reraise(tp, value, tb) 691 if value.__traceback__ is not tb: 692 raise value.with_traceback(tb) --> 693 raise value 694 finally: 695 value = None ~\Anaconda3\lib\site-packages\distributed\utils.py in f() 260 if timeout is not None: 261 future = gen.with_timeout(timedelta(seconds=timeout), future) --> 262 result[0] = yield future 263 except Exception as exc: 264 error[0] = sys.exc_info() ~\Anaconda3\lib\site-packages\tornado\gen.py in run(self) 1131 1132 try: -> 1133 value = future.result() 1134 except Exception: 1135 self.had_exception = True ~\Anaconda3\lib\site-packages\tornado\gen.py in run(self) 1139 if exc_info is not None: 1140 try: -> 1141 yielded = self.gen.throw(*exc_info) 1142 finally: 1143 # Break up a reference to itself ~\Anaconda3\lib\site-packages\distributed\client.py in _gather(self, futures, errors, direct, local_worker) 1498 six.reraise(type(exception), 1499 exception, -> 1500 traceback) 1501 if errors == 'skip': 1502 bad_keys.add(key) ~\Anaconda3\lib\site-packages\six.py in reraise(tp, value, tb) 690 value = tp() 691 if value.__traceback__ is not tb: --> 692 raise value.with_traceback(tb) 693 raise value 694 finally: ~\Anaconda3\lib\site-packages\dask\optimization.py in __call__(self, *args) 940 % (len(self.inkeys), len(args))) 941 return _get_recursive(self.dsk, self.outkey, --> 942 dict(zip(self.inkeys, args))) 943 944 def __reduce__(self): ~\Anaconda3\lib\site-packages\dask\core.py in _get_recursive(dsk, x, cache) 130 return cache[x] 131 elif hashable and x in dsk: --> 132 res = cache[x] = _get_recursive(dsk, dsk[x], cache) 133 return res 134 elif type(x) is tuple and x and callable(x[0]): # istask ~\Anaconda3\lib\site-packages\dask\core.py in _get_recursive(dsk, x, cache) 134 elif type(x) is tuple and x and callable(x[0]): # istask 135 func, args = x[0], x[1:] --> 136 args2 = [_get_recursive(dsk, k, cache) for k in args] 137 return func(*args2) 138 return x ~\Anaconda3\lib\site-packages\dask\core.py in <listcomp>(.0) 134 elif type(x) is tuple and x and callable(x[0]): # istask 135 func, args = x[0], x[1:] --> 136 args2 = [_get_recursive(dsk, k, cache) for k in args] 137 return func(*args2) 138 return x ~\Anaconda3\lib\site-packages\dask\core.py in _get_recursive(dsk, x, cache) 135 func, args = x[0], x[1:] 136 args2 = [_get_recursive(dsk, k, cache) for k in args] --> 137 return func(*args2) 138 return x 139 ~\Anaconda3\lib\site-packages\dask\dataframe\core.py in apply_and_enforce(func, args, kwargs, meta) 3682 3683 Ensures the output has the same columns, even if empty.""" -> 3684 df = func(*args, **kwargs) 3685 if isinstance(df, (pd.DataFrame, pd.Series, pd.Index)): 3686 if len(df) == 0: ~\Anaconda3\lib\site-packages\dask\dataframe\groupby.py in _groupby_slice_apply(df, grouper, key, func, *args, **kwargs) 145 if key: 146 g = g[key] --> 147 return g.apply(func, *args, **kwargs) 148 149 ~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs) 699 700 with _group_selection_context(self): --> 701 return self._python_apply_general(f) 702 703 return result ~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f) 705 def _python_apply_general(self, f): 706 keys, values, mutated = self.grouper.apply(f, self._selected_obj, --> 707 self.axis) 708 709 return self._wrap_applied_output( ~\AppData\Roaming\Python\Python37\site-packages\pandas\core\groupby\ops.py in apply(self, f, data, axis) 188 # group might be modified 189 group_axes = _get_axes(group) --> 190 res = f(group) 191 if not _is_indexed_like(res, group_axes): 192 mutated = True ~\Documents\Optimus\optimus\engines\dask\extension.py in <lambda>(x) 154 df = self 155 n = min(5, df[col_name].value_counts().min()) --> 156 df = df.groupby(col_name).apply(lambda x: x.sample(2)) 157 # df_.index = df_.index.droplevel(0) 158 return df ~\AppData\Roaming\Python\Python37\site-packages\pandas\core\generic.py in sample(self, n, frac, replace, weights, random_state, axis) 4863 "provide positive value.") 4864 -> 4865 locs = rs.choice(axis_length, size=n, replace=replace, p=weights) 4866 return self.take(locs, axis=axis, is_copy=False) 4867 mtrand.pyx in mtrand.RandomState.choice() ValueError: Cannot take a larger sample than population when 'replace=False'
df.rows.limit(5).ext.display()
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423.0
|
piza
|
8.0
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551.0
|
pizza
|
8.0
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634.0
|
pizza
|
8.0
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672.0
|
arepa
|
5.0
|
1930/08/12
|
never
|
df.cols.min("")
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-7-562bbc151fba> in <module> ----> 1 df.cols.min() TypeError: min() missing 1 required positional argument: 'columns'
df.rows.sort("billingId","asc").ext.display()
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-21-7a9c606e0857> in <module> ----> 1 df.rows.sort("billingId","asc").ext.display() ~\Anaconda3\lib\site-packages\multipledispatch\dispatcher.py in __call__(self, *args, **kwargs) 276 self._cache[types] = func 277 try: --> 278 return func(*args, **kwargs) 279 280 except MDNotImplementedError: ~\Documents\Optimus\optimus\engines\dask\rows.py in sort(columns, order) 116 """ 117 columns = parse_columns(self, columns) --> 118 return self.rows.sort([(columns, order,)]) 119 120 @staticmethod ~\Anaconda3\lib\site-packages\multipledispatch\dispatcher.py in __call__(self, *args, **kwargs) 276 self._cache[types] = func 277 try: --> 278 return func(*args, **kwargs) 279 280 except MDNotImplementedError: ~\Documents\Optimus\optimus\engines\dask\rows.py in sort(col_sort) 145 df = df.meta.preserve(self, Actions.SORT_ROW.value, col_name) 146 --> 147 c = df.cols.names() 148 # It seems that is on posible to order rows in Dask using set_index. It only return data in ascendent way. 149 # We should fins a way to make it work desc and form multiple columns ~\Documents\Optimus\optimus\engines\dask\columns.py in cols(self) 966 """ 967 return Cols.exec_agg(Cols.create_exprs(columns, funcs, *args)) --> 968 969 970 TypeError: Can't instantiate abstract class Cols with abstract methods apply_by_dtypes, apply_expr, astype, boxplot, bucketizer, cell, clip, copy, correlation, count_mismatch, count_na, count_uniques, count_zeros, drop, frequency_by_group, get_meta, impute, index_to_string, iqr, is_na, keep, max_abs_scaler, min_max_scaler, move, nunique, qcut, remove, remove_accents, remove_special_chars, remove_white_spaces, replace_regex, reverse, scatter, select_by_dtypes, set, set_meta, sort, string_to_index, to_timestamp, unique, value_counts, values_to_cols, years_between, z_score
df.cols.max("price")
{'max': {'price': 10.0}}
df.cols.create_expr()
df.dropna(how='any', subset=['price'])
id | firstName | lastName | billingId | product | price | birth | dummyCol | |
---|---|---|---|---|---|---|---|---|
npartitions=1 | ||||||||
int64 | object | object | float64 | object | float64 | object | object | |
... | ... | ... | ... | ... | ... | ... | ... |
df.rows.drop_na("price").ext.display()
any price
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423.0
|
piza
|
8.0
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551.0
|
pizza
|
8.0
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634.0
|
pizza
|
8.0
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672.0
|
arepa
|
5.0
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323.0
|
taco
|
3.0
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624.0
|
taaaccoo
|
3.0
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735.0
|
taco
|
3.0
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875.0
|
taco
|
3.0
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992.0
|
pasta
|
9.0
|
1999/02/15
|
never⋅
|
c = df.cols.names()
df.set_index("billingId").reset_index()[c].head()
C:\Users\argenisleon\Anaconda3\lib\site-packages\numpy\lib\function_base.py:3652: RuntimeWarning: Invalid value encountered in percentile interpolation=interpolation)
id | firstName | lastName | billingId | product | price | birth | dummyCol | |
---|---|---|---|---|---|---|---|---|
0 | 1 | Luis | Alvarez$$%! | 123.0 | Cake | 10.0 | 1980/07/07 | never |
1 | 12 | Emmy%% | Nöether$ | 234.0 | pasta | 9.0 | 1993/12/08 | gonna |
2 | 7 | CaRL | Ga%%%uss | 323.0 | taco | 3.0 | 1970/07/13 | gonna |
3 | 2 | André | Ampère | 423.0 | piza | 8.0 | 1950/07/08 | gonna |
4 | 4 | PAUL | dirac$ | 521.0 | pizza | 8.0 | 1954/07/10 | you |
df.cols.mode("id")
price | |
---|---|
price | |
3.0 | 3.0 |
5.0 | 5.0 |
8.0 | 8.0 |
9.0 | 9.0 |
10.0 | 10.0 |
df.ext.display(20)
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423.0
|
piza
|
8.0
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551.0
|
pizza
|
8.0
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634.0
|
pizza
|
8.0
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672.0
|
arepa
|
5.0
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323.0
|
taco
|
3.0
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624.0
|
taaaccoo
|
3.0
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735.0
|
taco
|
3.0
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875.0
|
taco
|
3.0
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992.0
|
pasta
|
9.0
|
1999/02/15
|
never⋅
|
12
|
Emmy%%
|
Nöether$
|
234.0
|
pasta
|
9.0
|
1993/12/08
|
gonna
|
13
|
nan
|
nan
|
nan
|
nan
|
nan
|
nan
|
nan
|
df1= op.load.csv("data/foo.csv", sep=",", header=True, infer_schema='true', charset="ISO-8859–1", null_value="None")
data/foo.csv
df1.ext.display(20)
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (int64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (int64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123
|
Cake
|
10
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423
|
piza
|
8
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551
|
pizza
|
8
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521
|
pizza
|
8
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634
|
pizza
|
8
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672
|
arepa
|
5
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323
|
taco
|
3
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624
|
taaaccoo
|
3
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735
|
taco
|
3
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875
|
taco
|
3
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992
|
pasta
|
9
|
1999/02/15
|
never⋅
|
12
|
Emmy%%
|
Nöether$
|
234
|
pasta
|
9
|
1993/12/08
|
gonna
|
13
|
Max!!!
|
Planck!!!
|
111
|
hamburguer
|
4
|
1994/01/04
|
run⋅
|
14
|
Fred
|
Hoy&&&le
|
553
|
pizzza
|
8
|
1997/06/27
|
around
|
15
|
(((⋅⋅⋅Heinrich⋅)))))
|
Hertz
|
116
|
pizza
|
8
|
1956/11/30
|
and
|
16
|
William
|
Gilbert###
|
886
|
BEER
|
2
|
1958/03/26
|
desert
|
17
|
Marie
|
CURIE
|
912
|
Rice
|
1
|
2000/03/22
|
you
|
18
|
Arthur
|
COM%%%pton
|
812
|
110790
|
5
|
1899/01/01
|
#
|
19
|
JAMES
|
Chadwick
|
467
|
nan
|
10
|
1921/05/03
|
#
|
df.cols.dtypes()
{'id': 'int64', 'firstName': 'object', 'lastName': 'object', 'billingId': 'float64', 'product': 'object', 'price': 'float64', 'birth': 'object', 'dummyCol': 'object'}
df.ext.display(20)
20
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423.0
|
piza
|
8.0
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551.0
|
pizza
|
8.0
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634.0
|
pizza
|
8.0
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672.0
|
arepa
|
5.0
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323.0
|
taco
|
3.0
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624.0
|
taaaccoo
|
3.0
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735.0
|
taco
|
3.0
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875.0
|
taco
|
3.0
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992.0
|
pasta
|
9.0
|
1999/02/15
|
never⋅
|
df.save.csv("data/dask/*.csv")
df.save.parquet("data/dask/foo.parquet")
df.save.json("data/dask/*.json")
df.rows.create_id()
Dask DataFrame Structure: id firstName lastName billingId product price birth dummyCol npartitions=1 int64 object object float64 object float64 object object ... ... ... ... ... ... ... ... Dask Name: from-delayed, 3 tasks
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-12-d2c38e26c0a8> in <module> ----> 1 df.rows.create_id() ~\Documents\Optimus\optimus\dask\rows.py in create_id(column) 14 df = self 15 print(df) ---> 16 a = da.arange(df.divisions[-1] + 1, chunks=df.divisions[1:]) 17 df[column] = dd.from_dask_array(a) 18 return df TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'
df1 = df[(df.id > 0) & (df.id <= 7)]
df2 = df1[(df.id > 0) & (df.id <= 3)]
df2.compute().head()
C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\utils.py:694: UserWarning: Boolean Series key will be reindexed to match DataFrame index. return getattr(obj, self.method)(*args, **kwargs)
id | firstName | lastName | billingId | product | price | birth | dummyCol | |
---|---|---|---|---|---|---|---|---|
0 | 1 | Luis | Alvarez$$%! | 123.0 | Cake | 10.0 | 1980/07/07 | never |
1 | 2 | André | Ampère | 423.0 | piza | 8.0 | 1950/07/08 | gonna |
2 | 3 | NiELS | Böhr//((%% | 551.0 | pizza | 8.0 | 1990/07/09 | give |
df.rows.select((df.id ==1 ) ).ext.display()
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
df.rows.select_by_dtypes("id", "str").ext.display()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) ~\Anaconda3\lib\site-packages\pyspark\sql\types.py in _parse_datatype_string(s) 845 # For backwards compatibility, "integer", "struct<fieldname: datatype>" and etc. --> 846 return from_ddl_datatype(s) 847 except: ~\Anaconda3\lib\site-packages\pyspark\sql\types.py in from_ddl_datatype(type_str) 837 return _parse_datatype_json_string( --> 838 sc._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str).json()) 839 AttributeError: 'NoneType' object has no attribute '_jvm' During handling of the above exception, another exception occurred: AttributeError Traceback (most recent call last) ~\Anaconda3\lib\site-packages\pyspark\sql\types.py in _parse_datatype_string(s) 849 # For backwards compatibility, "fieldname: datatype, fieldname: datatype" case. --> 850 return from_ddl_datatype("struct<%s>" % s.strip()) 851 except: ~\Anaconda3\lib\site-packages\pyspark\sql\types.py in from_ddl_datatype(type_str) 837 return _parse_datatype_json_string( --> 838 sc._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str).json()) 839 AttributeError: 'NoneType' object has no attribute '_jvm' During handling of the above exception, another exception occurred: AttributeError Traceback (most recent call last) <ipython-input-58-96533379ad09> in <module> ----> 1 df.rows.select_by_dtypes("id", "str").ext.display() ~\Documents\Optimus\optimus\dask\rows.py in select_by_dtypes(input_cols, data_type) 43 # self.cols.apply() 44 ---> 45 return self.where(fbdt(input_cols, data_type)) 46 47 @staticmethod ~\Documents\Optimus\optimus\audf.py in filter_row_by_data_type(col_name, data_type, get_type) 129 130 col_name = one_list_to_val(col_name) --> 131 return F.pandas_udf(pandas_udf_func, return_data_type)(col_name) ~\Anaconda3\lib\site-packages\pyspark\sql\functions.py in pandas_udf(f, returnType, functionType) 2304 return functools.partial(_create_udf, returnType=return_type, evalType=eval_type) 2305 else: -> 2306 return _create_udf(f=f, returnType=return_type, evalType=eval_type) 2307 2308 ~\Anaconda3\lib\site-packages\pyspark\sql\udf.py in _create_udf(f, returnType, evalType) 70 udf_obj = UserDefinedFunction( 71 f, returnType=returnType, name=None, evalType=evalType, deterministic=True) ---> 72 return udf_obj._wrapped() 73 74 ~\Anaconda3\lib\site-packages\pyspark\sql\udf.py in _wrapped(self) 193 194 wrapper.func = self.func --> 195 wrapper.returnType = self.returnType 196 wrapper.evalType = self.evalType 197 wrapper.deterministic = self.deterministic ~\Anaconda3\lib\site-packages\pyspark\sql\udf.py in returnType(self) 117 self._returnType_placeholder = self._returnType 118 else: --> 119 self._returnType_placeholder = _parse_datatype_string(self._returnType) 120 121 if self.evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF: ~\Anaconda3\lib\site-packages\pyspark\sql\types.py in _parse_datatype_string(s) 850 return from_ddl_datatype("struct<%s>" % s.strip()) 851 except: --> 852 raise e 853 854 ~\Anaconda3\lib\site-packages\pyspark\sql\types.py in _parse_datatype_string(s) 840 try: 841 # DDL format, "fieldname datatype, fieldname datatype". --> 842 return from_ddl_schema(s) 843 except Exception as e: 844 try: ~\Anaconda3\lib\site-packages\pyspark\sql\types.py in from_ddl_schema(type_str) 832 def from_ddl_schema(type_str): 833 return _parse_datatype_json_string( --> 834 sc._jvm.org.apache.spark.sql.types.StructType.fromDDL(type_str).json()) 835 836 def from_ddl_datatype(type_str): AttributeError: 'NoneType' object has no attribute '_jvm'
df.set_index('price').compute()
C:\Users\argenisleon\Anaconda3\lib\site-packages\numpy\lib\function_base.py:3652: RuntimeWarning: Invalid value encountered in percentile interpolation=interpolation)
id | firstName | lastName | billingId | product | birth | dummyCol | |
---|---|---|---|---|---|---|---|
price | |||||||
3.0 | 7 | CaRL | Ga%%%uss | 323.0 | taco | 1970/07/13 | gonna |
3.0 | 8 | David | H$$$ilbert | 624.0 | taaaccoo | 1950/07/14 | let |
3.0 | 9 | Johannes | KEPLER | 735.0 | taco | 1920/04/22 | you |
3.0 | 10 | JaMES | M$$ax%%well | 875.0 | taco | 1923/03/12 | down |
5.0 | 6 | Galileo | GALiLEI | 672.0 | arepa | 1930/08/12 | never |
8.0 | 2 | André | Ampère | 423.0 | piza | 1950/07/08 | gonna |
8.0 | 3 | NiELS | Böhr//((%% | 551.0 | pizza | 1990/07/09 | give |
8.0 | 4 | PAUL | dirac$ | 521.0 | pizza | 1954/07/10 | you |
8.0 | 5 | Albert | Einstein | 634.0 | pizza | 1990/07/11 | up |
9.0 | 11 | Isaac | Newton | 992.0 | pasta | 1999/02/15 | never |
9.0 | 12 | Emmy%% | Nöether$ | 234.0 | pasta | 1993/12/08 | gonna |
10.0 | 1 | Luis | Alvarez$$%! | 123.0 | Cake | 1980/07/07 | never |
NaN | 13 | NaN | NaN | NaN | NaN | NaN | NaN |
%%time
df.cols.min(["billingId"])
Wall time: 195 ms
{'min': {'billingId': 123.0}}
%%time
df.cols.max(["billingId"])
Wall time: 152 ms
{'max': {'billingId': 992.0}}
df.cols.names()
['id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']
df.meta.get()
{'file_name': 'foo.csv', 'transformations': {'actions': {}}}
df.cols.rename("id", "id1")
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-12-6e558dfa1a9f> in <module> ----> 1 df.cols.rename("id", "id1") ~\Anaconda3\lib\site-packages\multipledispatch\dispatcher.py in __call__(self, *args, **kwargs) 276 self._cache[types] = func 277 try: --> 278 return func(*args, **kwargs) 279 280 except MDNotImplementedError: ~\Documents\Optimus\optimus\dask\columns.py in rename(old_column, new_column) 186 @dispatch(str, str) 187 def rename(old_column, new_column): --> 188 return Cols.rename([(old_column, new_column)], None) 189 190 @staticmethod ~\Anaconda3\lib\site-packages\multipledispatch\dispatcher.py in __call__(self, *args, **kwargs) 276 self._cache[types] = func 277 try: --> 278 return func(*args, **kwargs) 279 280 except MDNotImplementedError: ~\Documents\Optimus\optimus\dask\columns.py in rename(columns_old_new, func) 162 163 if old_col_name != col_name: --> 164 df = df.rename({old_col_name: col_name[1]}) 165 166 df.ext.meta = self.ext.meta ~\Anaconda3\lib\site-packages\dask\dataframe\core.py in rename(self, index, columns) 2707 def rename(self, index=None, columns=None): 2708 if index is not None: -> 2709 raise ValueError("Cannot rename index.") 2710 2711 # *args here is index, columns but columns arg is already used ValueError: Cannot rename index.
df.ext.display("all")
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423.0
|
piza
|
8.0
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551.0
|
pizza
|
8.0
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634.0
|
pizza
|
8.0
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672.0
|
arepa
|
5.0
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323.0
|
taco
|
3.0
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624.0
|
taaaccoo
|
3.0
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735.0
|
taco
|
3.0
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875.0
|
taco
|
3.0
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992.0
|
pasta
|
9.0
|
1999/02/15
|
never⋅
|
12
|
Emmy%%
|
Nöether$
|
234.0
|
pasta
|
9.0
|
1993/12/08
|
gonna
|
13
|
nan
|
nan
|
nan
|
nan
|
nan
|
nan
|
nan
|
df.cols.remove_special_chars("lastName")
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-20-8a110b84c317> in <module> ----> 1 df.cols.remove_special_chars("lastName") AttributeError: 'Cols' object has no attribute 'remove_special_chars'
df.ext.send()
Send!
df.ext.display()
id
1 (int64)
not nullable
|
firstName
2 (object)
not nullable
|
lastName
3 (object)
not nullable
|
billingId
4 (float64)
not nullable
|
product
5 (object)
not nullable
|
price
6 (float64)
not nullable
|
birth
7 (object)
not nullable
|
dummyCol
8 (object)
not nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10.0
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423.0
|
piza
|
8.0
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551.0
|
pizza
|
8.0
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521.0
|
pizza
|
8.0
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634.0
|
pizza
|
8.0
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672.0
|
arepa
|
5.0
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323.0
|
taco
|
3.0
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624.0
|
taaaccoo
|
3.0
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735.0
|
taco
|
3.0
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875.0
|
taco
|
3.0
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992.0
|
pasta
|
9.0
|
1999/02/15
|
never⋅
|
df.cols.replace("lastName","$", "","chars")
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-25-bd454deb3472> in <module> ----> 1 df.cols.replace("lastName","$", "","chars") ~\Documents\Optimus\optimus\dask\columns.py in replace(input_cols, search, replace_by, search_by, output_cols) 354 355 --> 356 check_column_numbers(input_cols, "*") 357 output_cols = get_output_cols(input_cols, output_cols) 358 ~\Documents\Optimus\optimus\helpers\columns.py in check_column_numbers(columns, number) 198 """ 199 if columns is None: --> 200 RaiseIt.value_error(columns, "not None") 201 202 count = len(columns) ~\Documents\Optimus\optimus\helpers\raiseit.py in value_error(var, data_values) 76 type=divisor.join(map( 77 lambda x: "'" + x + "'", ---> 78 data_values)), var_type=one_list_to_val(var))) 79 80 @staticmethod ValueError: 'columns' must be 'not None', received 'None'
from optimus.profiler.profiler import Profiler
p = Profiler()
df.cols.count_by_dtypes("*")
{'id': {'int64': 13}, 'firstName': {'object': 13}, 'lastName': {'object': 13}, 'billingId': {'float64': 13}, 'product': {'object': 13}, 'price': {'float64': 13}, 'birth': {'object': 13}, 'dummyCol': {'object': 13}}
{'id': {'int': 13, 'float': 0, 'object': 0}, 'firstName': {'int': 0, 'float': 0, 'object': 13}, 'lastName': {'int': 0, 'float': 0, 'object': 13}, 'billingId': {'int': 0, 'float': 13, 'object': 0}, 'product': {'int': 0, 'float': 0, 'object': 13}, 'price': {'int': 0, 'float': 13, 'object': 0}, 'birth': {'int': 0, 'float': 0, 'object': 13}, 'dummyCol': {'int': 0, 'float': 0, 'object': 13}}
import dask
import dask.datasets
import numpy as np
import time
from distributed import Client
client = Client()
client
df = dask.datasets.timeseries()
df = df.repartition(npartitions=300)
df = client.persist(df)
C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\io\demo.py:91: FutureWarning: Creating a DatetimeIndex by passing range endpoints is deprecated. Use `pandas.date_range` instead. freq=partition_freq)) C:\Users\argenisleon\Anaconda3\lib\site-packages\dask\dataframe\io\demo.py:45: FutureWarning: Creating a DatetimeIndex by passing range endpoints is deprecated. Use `pandas.date_range` instead. index = pd.DatetimeIndex(start=start, end=end, freq=freq, name='timestamp')
def random_indexer(df):
indexer = ~df.index.isnull()
for i in range(np.random.randint(15)+1):
col = np.random.choice(['x','y'])
value = np.random.uniform(-1,1)
op = np.random.choice([lambda x, y: x < y, lambda x, y: x > y])
indexer = np.logical_and(indexer, op(df[col], value))
return indexer
def random_statistic(indexer, df):
print(indexer)
col = np.random.choice(['x', 'y', 'name'])
if col == 'name':
op = np.random.choice([lambda x: x.unique().size, np.min, np.max])
else:
op = np.random.choice([lambda x: x.unique().size, np.min, np.max, np.sum, np.mean])
return op(df[col][indexer])
np.random.seed(137)
stats = []
for i in range(10):
ind = random_indexer(df)
for k in range(20):
stats.append(random_statistic(ind, df))
st = time.time()
print(stats)
# stat_computed = client.compute(stats)
ft = time.time()
print(ft-st)
st = time.time()
# stat_results = client.gather(stat_computed)
ft = time.time()
print(ft-st)
Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7200 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 2100 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 3600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 6600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 7800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: y, dtype: bool Dask Name: logical_and, 9600 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks Dask Series Structure: npartitions=300 2000-01-01 00:00:00 bool 2000-01-01 02:24:00 ... ... 2000-01-30 21:36:00 ... 2000-01-31 00:00:00 ... Name: x, dtype: bool Dask Name: logical_and, 10800 tasks [dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=<U3>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<size-ag..., dtype=int32>, dd.Scalar<series-..., dtype=float64>, dd.Scalar<series-..., dtype=float64>] 0.0030128955841064453 0.0
df.head()
--------------------------------------------------------------------------- CancelledError Traceback (most recent call last) <ipython-input-9-c42a15b2c7cf> in <module> ----> 1 df.head() ~\Anaconda3\lib\site-packages\dask\dataframe\core.py in head(self, n, npartitions, compute) 874 875 if compute: --> 876 result = result.compute() 877 return result 878 ~\Anaconda3\lib\site-packages\dask\base.py in compute(self, **kwargs) 154 dask.base.compute 155 """ --> 156 (result,) = compute(self, traverse=False, **kwargs) 157 return result 158 ~\Anaconda3\lib\site-packages\dask\base.py in compute(*args, **kwargs) 395 keys = [x.__dask_keys__() for x in collections] 396 postcomputes = [x.__dask_postcompute__() for x in collections] --> 397 results = schedule(dsk, keys, **kwargs) 398 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)]) 399 ~\Anaconda3\lib\site-packages\distributed\client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs) 2307 retries=retries, 2308 user_priority=priority, -> 2309 actors=actors, 2310 ) 2311 packed = pack_data(keys, futures) ~\Anaconda3\lib\site-packages\distributed\client.py in _graph_to_futures(self, dsk, keys, restrictions, loose_restrictions, priority, user_priority, resources, retries, fifo_timeout, actors) 2232 for v in s: 2233 if v not in self.futures: -> 2234 raise CancelledError(v) 2235 2236 dependencies = {k: get_dependencies(dsk, k) for k in dsk} CancelledError: ('repartition-merge-39715e6237c8baf832ed85d511f135a3', 0)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-10-d243315effc1> in <module> ----> 1 df = op.load.csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header=True, infer_schema='false', null_value="None") NameError: name 'op' is not defined
stat_computed = client.compute(np.mode(df["id"]))
stat_results = client.gather(stat_computed)
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-14-a799f70d7185> in <module> ----> 1 stat_computed = client.compute(np.mode(df["id"])) 2 stat_results = client.gather(stat_computed) AttributeError: module 'numpy' has no attribute 'mode'
print(stat_results)
1