import keyring
from getpass import getpass
token = keyring.get_password("readthedocs.org", "_api")
if not token:
token = getpass("readthedocs.org token: ")
keyring.set_password("readthedocs.org", "_api", token)
import requests
import requests_cache
from yarl import URL
s = requests_cache.CachedSession()
s.headers["Authorization"] = f"Token {token}"
readthedocs_api = URL("https://readthedocs.org/api/v3/")
def list_projects(url=readthedocs_api / "projects"):
r = s.get(url)
r.raise_for_status()
resp = r.json()
for project in resp["results"]:
yield project
if resp['next']:
yield from list_projects(resp['next'])
projects = list(list_projects())
len(projects)
59
projects[0]
{'_links': {'_self': 'https://readthedocs.org/api/v3/projects/binderhub/', 'builds': 'https://readthedocs.org/api/v3/projects/binderhub/builds/', 'environmentvariables': 'https://readthedocs.org/api/v3/projects/binderhub/environmentvariables/', 'notifications': 'https://readthedocs.org/api/v3/projects/binderhub/notifications/', 'redirects': 'https://readthedocs.org/api/v3/projects/binderhub/redirects/', 'subprojects': 'https://readthedocs.org/api/v3/projects/binderhub/subprojects/', 'superproject': 'https://readthedocs.org/api/v3/projects/binderhub/superproject/', 'translations': 'https://readthedocs.org/api/v3/projects/binderhub/translations/', 'versions': 'https://readthedocs.org/api/v3/projects/binderhub/versions/'}, 'created': '2017-05-31T16:38:53.966781Z', 'default_branch': 'main', 'default_version': 'latest', 'external_builds_privacy_level': 'public', 'homepage': 'https://github.com/jupyterhub/binderhub', 'id': 112923, 'language': {'code': 'en', 'name': 'English'}, 'modified': '2022-10-20T13:26:35.096249Z', 'name': 'binderhub', 'privacy_level': 'public', 'programming_language': {'code': 'py', 'name': 'Python'}, 'repository': {'type': 'git', 'url': 'https://github.com/jupyterhub/binderhub'}, 'single_version': False, 'slug': 'binderhub', 'subproject_of': None, 'tags': ['binder', 'ipython', 'jupyter', 'jupyterhub'], 'translation_of': None, 'urls': {'builds': 'https://readthedocs.org/projects/binderhub/builds/', 'documentation': 'https://binderhub.readthedocs.io/en/latest/', 'home': 'https://readthedocs.org/projects/binderhub/', 'versions': 'https://readthedocs.org/projects/binderhub/versions/'}, 'users': [{'username': 'yuvipanda'}, {'username': 'willingc'}, {'username': 'minrk'}, {'username': 'betatim'}, {'username': 'choldgraf'}, {'username': 'consideRatio'}], 'versioning_scheme': 'multiple_versions_with_translations'}
project_names = [
p["slug"] for p in projects
]
from pathlib import Path
import json
from playwright.async_api import Page, async_playwright
cookie_path = Path("cookies.json")
login_url = "https://readthedocs.org/accounts/login/?next=/dashboard/"
dashboard_url = "https://readthedocs.org/dashboard/"
async def login() -> list[dict]:
"""Login to devilry and save cookies"""
if cookie_path.exists():
with cookie_path.open() as f:
# TODO: check if valid
return json.load(f)
async with async_playwright() as p:
browser = await p.firefox.launch(headless=False)
page = await browser.new_page()
await page.goto(login_url)
print("Login to readthedocs.org ...")
await page.wait_for_url(dashboard_url + "*", timeout=120_000)
cookies = await browser.contexts[0].cookies()
print(f"Saving cookies to {cookie_path}")
with cookie_path.open("w") as f:
json.dump(cookies, f)
return cookies
cookies = await login()
project_list = [
"jupyterhub",
"oauthenticator",
"zero-to-jupyterhub",
"oauthenticator",
"jupyterhub-kubespawner",
]
stats_dir = Path("stats")
stats_dir.mkdir(exist_ok=True)
async def download_stats(project_name: str):
cookies = await login()
async with async_playwright() as p:
browser = await p.firefox.launch(headless=False)
page = await browser.new_page()
await browser.contexts[0].add_cookies(cookies)
for kind in ("traffic", "search"):
url = URL(dashboard_url) / project_name / f"{kind}-analytics"
await page.goto(str(url))
btn = page.get_by_text("Download all data", exact=True)
async with page.expect_download() as download_info:
await btn.click()
download = await download_info.value
dest = stats_dir / download.suggested_filename
print(f"Downloading {dest}")
await download.save_as(stats_dir / download.suggested_filename)
# manually derived from above
project_names = [
"binderhub",
"ipykernel",
"ipyparallel",
"ipython",
"ipywidgets",
"jupyter",
"jupyter-client",
"jupyter-console",
"jupyter-core",
"jupyter-docker-stacks",
"jupyterhub",
"jupyterhub-deploy-teaching",
"jupyterhub-dockerspawner",
"jupyterhub-grafana",
"jupyterhub-kubespawner",
"jupyterhub-python-repo-template",
"jupyterhub-team-compass",
"jupyterhub-traefik-proxy",
"jupyterhub-tutorial",
"jupyter-notebook",
"jupyter-server",
"jupyter-server-proxy",
"jupyter-software-steering-council-team-compass",
"ltiauthenticator",
"mybinder-sre",
"nbconvert",
"nbdime",
"nbformat",
"nbgitpuller",
"oauthenticator",
"pytest-jupyterhub",
"qtconsole",
"repo2docker",
"the-littlest-jupyterhub",
"traitlets",
"zero-to-jupyterhub",
]
import asyncio
concurrency = 5
async def concurrent_call(semaphore, f, *args, **kwargs):
"""limit concurrency because too many playwrights can crash"""
async with semaphore:
return await f(*args, **kwargs)
# for project_name in project_names:
# await download_stats(project_name)
# semaphore = asyncio.Semaphore(concurrency)
# await asyncio.gather(*[concurrent_call(semaphore, download_stats, project_name) for project_name in project_names])
# download serially
for project_name in project_names:
await download_stats(project_name)
Downloading stats/readthedocs_traffic_analytics_binderhub_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_binderhub_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_ipykernel_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_ipykernel_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_ipyparallel_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_ipyparallel_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_ipython_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_ipython_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_ipywidgets_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_ipywidgets_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyter_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyter_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyter-client_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyter-client_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyter-console_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyter-console_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyter-core_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyter-core_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyter-docker-stacks_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyter-docker-stacks_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyterhub_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyterhub_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyterhub-deploy-teaching_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyterhub-deploy-teaching_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyterhub-dockerspawner_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyterhub-dockerspawner_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyterhub-grafana_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyterhub-grafana_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyterhub-kubespawner_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyterhub-kubespawner_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyterhub-python-repo-template_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyterhub-python-repo-template_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyterhub-team-compass_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyterhub-team-compass_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyterhub-traefik-proxy_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyterhub-traefik-proxy_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyterhub-tutorial_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyterhub-tutorial_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyter-notebook_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyter-notebook_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyter-server_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyter-server_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyter-server-proxy_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyter-server-proxy_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_jupyter-software-steering-council-team-compass_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_jupyter-software-steering-council-team-compass_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_ltiauthenticator_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_ltiauthenticator_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_mybinder-sre_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_mybinder-sre_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_nbconvert_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_nbconvert_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_nbdime_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_nbdime_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_nbformat_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_nbformat_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_nbgitpuller_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_nbgitpuller_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_oauthenticator_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_oauthenticator_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_pytest-jupyterhub_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_pytest-jupyterhub_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_qtconsole_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_qtconsole_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_repo2docker_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_repo2docker_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_the-littlest-jupyterhub_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_the-littlest-jupyterhub_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_traitlets_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_traitlets_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_traffic_analytics_zero-to-jupyterhub_2023-12-28_2024-03-27.csv Downloading stats/readthedocs_search_analytics_zero-to-jupyterhub_2023-12-28_2024-03-27.csv
!open stats