Your results won't look exactly like this notebook, as I did slightly different processing locally.
import dask.dataframe as dd
from dask.distributed import Client, progress
Client()
Client
|
Cluster
|
df = dd.read_parquet('/home/bird/Data/tt/full/overscripted.parquet/', columns=('argument_0', 'symbol', 'operation'))
df.head()
argument_0 | symbol | operation | |
---|---|---|---|
0 | window.navigator.userAgent | get | |
1 | window.navigator.userAgent | get | |
2 | window.navigator.userAgent | get | |
3 | window.navigator.appName | get | |
4 | window.navigator.appVersion | get |
fillText = df[df.symbol == 'CanvasRenderingContext2D.fillText']
fillText = fillText.persist()
progress(fillText, notebook=False)
fillText.head()
argument_0 | symbol | operation | |
---|---|---|---|
call_id | |||
1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__125 | Soft Ruddy Foothold 2 | CanvasRenderingContext2D.fillText | call |
1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__132 | !H71JCaj)]# 1@# | CanvasRenderingContext2D.fillText | call |
1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__188 | <@nv45. F1n63r,Pr1n71n6! | CanvasRenderingContext2D.fillText | call |
1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__197 | 668 | CanvasRenderingContext2D.fillText | call |
1_00021485d883465dc356bceabf4203dec5012044c643ab3498da2d1c.json__30 | Soft Ruddy Foothold 2 | CanvasRenderingContext2D.fillText | call |
_arg_counts = fillText.argument_0.value_counts().persist()
progress(_arg_counts, notebook=False)
[########################################] | 100% Completed | 8.6s
arg_counts = _arg_counts.compute().reset_index().rename(columns={
'index': 'argument_0',
'argument_0': 'count'
}).sort_values('count', ascending=False)
arg_counts.head(40)
argument_0 | count | |
---|---|---|
0 | 😃 | 37327 |
1 | Cwm fjordbank glyphs vext quiz, 😃 | 21436 |
2 | 14313 | |
3 | 🇺🇳 | 12062 |
4 | 🧚♂️ | 10422 |
5 | 🧚♂️ | 10422 |
6 | 🇺🇳 | 10422 |
7 | 45 | 8637 |
8 | 38 | 8340 |
9 | !H71JCaj)]# 1@# | 8149 |
10 | Soft Ruddy Foothold 2 | 8149 |
11 | !image! | 7301 |
12 | e | 6045 |
13 | 201708 | 5785 |
14 | 201706 | 5785 |
15 | 201704 | 5785 |
16 | 0 | 5550 |
17 | a | 4787 |
18 | i | 4471 |
19 | n | 4194 |
20 | o | 4096 |
21 | t | 3974 |
22 | http://valve.github.io | 3912 |
23 | r | 3621 |
24 | s | 3447 |
25 | <@nv45. F1n63r,Pr1n71n6! | 3347 |
26 | 🐨 | 3315 |
27 | 10 | 3289 |
28 | 11 | 3230 |
29 | 月份 | 3087 |
30 | 06 | 2799 |
31 | 07 | 2794 |
32 | 08 | 2785 |
33 | 09 | 2782 |
34 | 2,000 | 2706 |
41 | $201000 | 2697 |
39 | $197000 | 2697 |
40 | $196000 | 2697 |
36 | $200000 | 2697 |
38 | $198000 | 2697 |
_operation_counts = fillText.operation.value_counts().persist()
progress(_operation_counts, notebook=False)
[########################################] | 100% Completed | 8.5s
operation_counts = _operation_counts.compute()
operation_counts
call 542838 set 58 set (failed) 0 get 0 Name: operation, dtype: int64