#!/usr/bin/env python # coding: utf-8 # # COVID-19 Literature Publication Times extracted from PubMed # In[1]: import lzma import urllib.request import pandas import tqdm.notebook from pubmedpy.eutilities import download_pubmed_ids # Download articles that are part of [LitCovid](https://www.ncbi.nlm.nih.gov/research/coronavirus/): # # > **Keep up with the latest coronavirus research** # Qingyu Chen, Alexis Allot, Zhiyong Lu # *Nature* (2020-03-10) # DOI: [10.1038/d41586-020-00694-1](https://doi.org/10.1038/d41586-020-00694-1) ยท PMID: [32157233](https://www.ncbi.nlm.nih.gov/pubmed/32157233) # # In[2]: url = "https://www.ncbi.nlm.nih.gov/research/coronavirus-api/export/tsv" _filename, headers = urllib.request.urlretrieve(url, filename="data/litcovid.tsv") # show dated filename headers.get("Content-Disposition") # In[3]: litcovid_df = pandas.read_table("data/litcovid.tsv", comment="#") litcovid_df.head() # In[4]: print(f'{len(litcovid_df):,} articles in litcovid.tsv') # In[5]: path = "data/litcovid-esummaries.xml.xz" # In[6]: pubmed_ids = sorted(map(int, litcovid_df.pmid)) print(f'{len(pubmed_ids):,}') with lzma.open(path, 'wt') as write_file: download_pubmed_ids( pubmed_ids, write_file, endpoint='esummary', retmax=200, retmin=50, sleep=0, error_sleep=1, tqdm=tqdm.notebook.tqdm, )