Query most popular files.
SELECT * FROM (
SELECT url, count(*) as downloads FROM `the-psf.pypi.file_downloads`
WHERE DATE(timestamp) = "2020-04-16" GROUP by url
) ORDER BY downloads DESC
LIMIT 1000
Download query as json. Attach Content-Length to each file.
prefix = 'https://files.pythonhosted.org'
for item in data:
url = item['url']
content = session.head(prefix + url)
length = content.headers['Content-Length']
item['length'] = int(length)
item['downloads'] = int(item['downloads'])
import json
data = json.load(open('pypipopular.json'))
for item in data:
item['aggregate'] = item['length'] * item['downloads']
data.sort(key=lambda x: -x['downloads'])
for i, item in enumerate(data): item['rank'] = i+1 # ranked by number of downloads
data.sort(key=lambda x: -x['aggregate']) # sorted by downloads * file size
rows = []
for item in data[:128]:
rows.append((item['url'].split('/')[-1], "{:.0f}".format(item['aggregate']/2**30), item['rank']))
table = ("<table>" + "<tr><th>file</th><th>GiB/day</th><th>popularity</th></tr>" + "\n".join("<tr><td>{}</td><td>{}</td><td>{}</td></tr>".format(*row) for row in rows) + "</table>")
import IPython.display
IPython.display.HTML(table)
file | GiB/day | popularity |
---|---|---|
pyspark-2.4.5.tar.gz | 15345 | 369 |
tensorflow-2.1.0-cp36-cp36m-manylinux2010_x86_64.whl | 15083 | 643 |
tensorflow-2.1.0-cp37-cp37m-manylinux2010_x86_64.whl | 9552 | 905 |
tensorflow-2.0.0-cp27-cp27mu-manylinux2010_x86_64.whl | 8004 | 298 |
botocore-1.15.39-py2.py3-none-any.whl | 7769 | 12 |
numpy-1.18.2-cp36-cp36m-manylinux1_x86_64.whl | 7001 | 60 |
scipy-1.4.1-cp36-cp36m-manylinux1_x86_64.whl | 4243 | 177 |
xgboost-1.0.2-py3-none-manylinux1_x86_64.whl | 3488 | 705 |
scipy-1.1.0-cp27-cp27mu-manylinux1_x86_64.whl | 3200 | 269 |
numpy-1.18.2-cp37-cp37m-manylinux1_x86_64.whl | 3096 | 185 |
xgboost-0.90-py2.py3-none-manylinux1_x86_64.whl | 3054 | 939 |
scipy-1.4.1-cp37-cp37m-manylinux1_x86_64.whl | 2426 | 297 |
numpy-1.16.6-cp27-cp27mu-manylinux1_x86_64.whl | 2376 | 202 |
pyarrow-0.16.0-cp36-cp36m-manylinux1_x86_64.whl | 2185 | 659 |
gensim-3.6.0-cp27-cp27mu-manylinux1_x86_64.whl | 2137 | 310 |
botocore-1.15.40-py2.py3-none-any.whl | 2080 | 63 |
scipy-1.2.3-cp27-cp27mu-manylinux1_x86_64.whl | 2064 | 329 |
awscli-1.18.39-py2.py3-none-any.whl | 1971 | 31 |
pip-20.0.2-py2.py3-none-any.whl | 1822 | 13 |
ansible-2.9.6.tar.gz | 1551 | 254 |
numpy-1.18.2-cp35-cp35m-manylinux1_x86_64.whl | 1432 | 364 |
Babel-2.8.0-py2.py3-none-any.whl | 1417 | 171 |
pyarrow-0.16.0-cp36-cp36m-manylinux2014_x86_64.whl | 1393 | 921 |
pandas-1.0.3-cp36-cp36m-manylinux1_x86_64.whl | 1383 | 206 |
botocore-1.13.50-py2.py3-none-any.whl | 1300 | 118 |
matplotlib-3.2.1-cp36-cp36m-manylinux1_x86_64.whl | 1246 | 278 |
botocore-1.12.253-py2.py3-none-any.whl | 1235 | 124 |
notebook-5.7.8-py2.py3-none-any.whl | 1121 | 232 |
virtualenv-20.0.17-py2.py3-none-any.whl | 1059 | 113 |
scipy-1.4.1-cp35-cp35m-manylinux1_x86_64.whl | 1012 | 603 |
pandas-1.0.3-cp37-cp37m-manylinux1_x86_64.whl | 975 | 291 |
notebook-6.0.3-py3-none-any.whl | 903 | 296 |
pycryptodomex-3.9.7-cp37-cp37m-manylinux1_x86_64.whl | 867 | 413 |
matplotlib-3.2.1-cp37-cp37m-manylinux1_x86_64.whl | 864 | 371 |
pandas-0.24.2-cp27-cp27mu-manylinux1_x86_64.whl | 801 | 340 |
mypy-0.770-cp37-cp37m-manylinux1_x86_64.whl | 772 | 644 |
grpcio-1.28.1.tar.gz | 767 | 597 |
numpy-1.16.4-cp35-cp35m-manylinux1_x86_64.whl | 752 | 548 |
cryptography-2.9-cp35-abi3-manylinux2010_x86_64.whl | 717 | 88 |
pandas-0.24.2-cp35-cp35m-manylinux1_x86_64.whl | 680 | 380 |
mlflow-1.7.2-py3-none-any.whl | 636 | 590 |
virtualenv-20.0.18-py2.py3-none-any.whl | 635 | 207 |
awscli-1.18.40-py2.py3-none-any.whl | 558 | 143 |
pytz-2019.3-py2.py3-none-any.whl | 546 | 15 |
setuptools-46.1.3-py3-none-any.whl | 544 | 18 |
scikit_learn-0.22.2.post1-cp36-cp36m-manylinux1_x86_64.whl | 542 | 347 |
matplotlib-2.2.5-cp27-cp27mu-manylinux1_x86_64.whl | 540 | 562 |
numpy-1.18.2-cp38-cp38-manylinux1_x86_64.whl | 535 | 819 |
pycryptodome-3.9.7-cp37-cp37m-manylinux1_x86_64.whl | 492 | 642 |
snowflake_connector_python-2.2.4-cp37-cp37m-manylinux2010_x86_64.whl | 491 | 588 |
docutils-0.15.2-py2-none-any.whl | 474 | 22 |
plotly-4.6.0-py2.py3-none-any.whl | 472 | 391 |
docutils-0.15.2-py3-none-any.whl | 468 | 23 |
python_dateutil-2.8.1-py2.py3-none-any.whl | 447 | 4 |
widgetsnbextension-3.5.1-py2.py3-none-any.whl | 445 | 130 |
scikit_learn-0.20.3-cp36-cp36m-manylinux1_x86_64.whl | 445 | 331 |
future-0.18.2.tar.gz | 428 | 39 |
numpy-1.17.4-cp35-cp35m-manylinux1_x86_64.whl | 424 | 938 |
numpy-1.17.4-cp37-cp37m-manylinux1_x86_64.whl | 412 | 959 |
numpy-1.18.1-cp37-cp37m-manylinux1_x86_64.whl | 405 | 984 |
SQLAlchemy-1.3.16.tar.gz | 391 | 404 |
SQLAlchemy-1.3.13.tar.gz | 389 | 403 |
awscli-1.16.314-py2.py3-none-any.whl | 384 | 204 |
boto-2.49.0-py2.py3-none-any.whl | 377 | 85 |
PyYAML-5.3.1.tar.gz | 363 | 11 |
tensorboard-2.0.2-py2-none-any.whl | 349 | 304 |
cryptography-2.9-cp35-abi3-manylinux1_x86_64.whl | 345 | 228 |
pipenv-2018.11.26-py3-none-any.whl | 344 | 389 |
lxml-4.5.0-cp36-cp36m-manylinux1_x86_64.whl | 337 | 435 |
pycryptodomex-3.9.7-cp36-cp36m-manylinux1_x86_64.whl | 335 | 857 |
Django-3.0.5-py3-none-any.whl | 334 | 538 |
statsmodels-0.11.1-cp36-cp36m-manylinux1_x86_64.whl | 331 | 611 |
lxml-4.5.0-cp37-cp37m-manylinux1_x86_64.whl | 323 | 445 |
certifi-2020.4.5.1-py2.py3-none-any.whl | 312 | 3 |
pandas-0.25.3-cp36-cp36m-manylinux1_x86_64.whl | 299 | 760 |
grpcio-1.28.1-cp36-cp36m-manylinux2010_x86_64.whl | 294 | 272 |
scikit_learn-0.22.2.post1-cp37-cp37m-manylinux1_x86_64.whl | 290 | 572 |
Pygments-2.6.1-py3-none-any.whl | 289 | 71 |
pandas-0.24.2-cp36-cp36m-manylinux1_x86_64.whl | 273 | 793 |
chardet-3.0.4-py2.py3-none-any.whl | 272 | 2 |
pandas-0.25.3-cp37-cp37m-manylinux1_x86_64.whl | 271 | 815 |
pandas-0.23.4-cp37-cp37m-manylinux1_x86_64.whl | 271 | 722 |
matplotlib-3.0.3-cp35-cp35m-manylinux1_x86_64.whl | 262 | 980 |
sphinx_rtd_theme-0.4.3-py2.py3-none-any.whl | 252 | 596 |
Pillow-6.2.2-cp27-cp27mu-manylinux1_x86_64.whl | 248 | 241 |
tensorboard-2.1.1-py3-none-any.whl | 244 | 412 |
botocore-1.14.17-py2.py3-none-any.whl | 244 | 569 |
setuptools-44.1.0-py2.py3-none-any.whl | 243 | 54 |
h5py-2.10.0-cp36-cp36m-manylinux1_x86_64.whl | 237 | 333 |
mysql-connector-2.2.9.tar.gz | 236 | 1000 |
protobuf-3.11.3-cp36-cp36m-manylinux1_x86_64.whl | 226 | 151 |
cryptography-2.9-cp27-cp27mu-manylinux2010_x86_64.whl | 225 | 328 |
pycountry-19.8.18.tar.gz | 221 | 920 |
protobuf-3.11.3-cp37-cp37m-manylinux1_x86_64.whl | 212 | 164 |
urllib3-1.25.8-py2.py3-none-any.whl | 210 | 7 |
scikit_learn-0.20.4-cp27-cp27mu-manylinux1_x86_64.whl | 209 | 620 |
networkx-2.4-py3-none-any.whl | 208 | 222 |
tensorflow_data_validation-0.15.0-cp27-cp27mu-manylinux2010_x86_64.whl | 208 | 313 |
lxml-4.5.0-cp27-cp27mu-manylinux1_x86_64.whl | 204 | 641 |
distlib-0.3.0.zip | 195 | 64 |
cryptography-2.9-cp27-cp27mu-manylinux1_x86_64.whl | 182 | 387 |
cryptography-2.8-cp34-abi3-manylinux2010_x86_64.whl | 177 | 344 |
Django-2.2.12-py3-none-any.whl | 176 | 883 |
scikit_learn-0.22.2.post1-cp35-cp35m-manylinux1_x86_64.whl | 175 | 838 |
numpy-1.18.2.zip | 174 | 699 |
botocore-1.13.20-py2.py3-none-any.whl | 171 | 714 |
tfx_bsl-0.15.3-cp27-cp27mu-manylinux2010_x86_64.whl | 169 | 314 |
tensorboard-1.15.0-py3-none-any.whl | 169 | 533 |
Pygments-2.5.2-py2.py3-none-any.whl | 168 | 141 |
grpcio-1.28.1-cp37-cp37m-manylinux2010_x86_64.whl | 168 | 432 |
jedi-0.17.0-py2.py3-none-any.whl | 164 | 194 |
protobuf-3.11.3-cp35-cp35m-manylinux1_x86_64.whl | 163 | 223 |
protobuf-3.11.3-cp27-cp27mu-manylinux1_x86_64.whl | 162 | 225 |
scikit_learn-0.21.3-cp36-cp36m-manylinux1_x86_64.whl | 161 | 864 |
protobuf-3.6.0-cp36-cp36m-manylinux1_x86_64.whl | 149 | 950 |
docutils-0.16-py2.py3-none-any.whl | 146 | 89 |
netaddr-0.7.19-py2.py3-none-any.whl | 144 | 316 |
lightgbm-2.3.1-py2.py3-none-manylinux1_x86_64.whl | 142 | 248 |
Werkzeug-1.0.1-py2.py3-none-any.whl | 142 | 47 |
tensorboard-1.14.0-py3-none-any.whl | 140 | 541 |
pyzmq-19.0.0-cp27-cp27mu-manylinux1_x86_64.whl | 136 | 221 |
SQLAlchemy-1.3.15.tar.gz | 135 | 915 |
Sphinx-3.0.1-py3-none-any.whl | 133 | 518 |
joblib-0.14.1-py2.py3-none-any.whl | 133 | 51 |
h5py-2.10.0-cp27-cp27mu-manylinux1_x86_64.whl | 132 | 512 |
imageio-2.8.0-py3-none-any.whl | 129 | 604 |
Pillow-7.1.1-cp37-cp37m-manylinux1_x86_64.whl | 127 | 429 |
tensorflow_model_analysis-0.15.4-py2-none-any.whl | 126 | 312 |