import lzma
import urllib.request
import pandas
import tqdm.notebook
from pubmedpy.eutilities import download_pubmed_ids
Download articles that are part of LitCovid:
Keep up with the latest coronavirus research
Qingyu Chen, Alexis Allot, Zhiyong Lu
Nature (2020-03-10) https://doi.org/ggq9fd
DOI: 10.1038/d41586-020-00694-1 · PMID: 32157233
url = "https://www.ncbi.nlm.nih.gov/research/coronavirus-api/export/tsv"
_filename, headers = urllib.request.urlretrieve(url, filename="data/litcovid.tsv")
# show dated filename
headers.get("Content-Disposition")
'attachment; filename=05272020.litcovid.export.tsv'
litcovid_df = pandas.read_table("data/litcovid.tsv", comment="#")
litcovid_df.head()
pmid | title | journal | |
---|---|---|---|
0 | 32450607 | Gastrointestinal: Bowel ischemia in a suspecte... | J Gastroenterol Hepatol |
1 | 32450565 | Novel Coronavirus-Induced Right Ventricular Fa... | Cardiology |
2 | 32450560 | COVID-19, Low-Molecular-Weight Heparin, and He... | Kidney Blood Press Res |
3 | 32450492 | Mental health and COVID-19 in Nepal: A case of... | Asian J Psychiatr |
4 | 32450477 | Hardware versus heartware: The need to address... | J Clin Anesth |
print(f'{len(litcovid_df):,} articles in litcovid.tsv')
16,405 articles in litcovid.tsv
path = "data/litcovid-esummaries.xml.xz"
pubmed_ids = sorted(map(int, litcovid_df.pmid))
print(f'{len(pubmed_ids):,}')
with lzma.open(path, 'wt') as write_file:
download_pubmed_ids(
pubmed_ids, write_file, endpoint='esummary',
retmax=200, retmin=50, sleep=0, error_sleep=1,
tqdm=tqdm.notebook.tqdm,
)
16,405
HBox(children=(FloatProgress(value=0.0, max=16405.0), HTML(value='')))
WARNING:root:1 successive error: 200 IDs[32355056 … 32358406] threw HTTPSConnectionPool(host='eutils.ncbi.nlm.nih.gov', port=443): Max retries exceeded with url: /entrez/eutils/esummary.fcgi?db=pubmed&id=32355056%2C32355099%2C32355107%2C32355114%2C32355115%2C32355116%2C32355117%2C32355118%2C32355119%2C32355131%2C32355132%2C32355222%2C32355243%2C32355260%2C32355296%2C32355299%2C32355328%2C32355329%2C32355330%2C32355392%2C32355394%2C32355415%2C32355424%2C32355435%2C32355447%2C32355450%2C32355509%2C32355510%2C32355546%2C32355547%2C32355555%2C32355556%2C32355564%2C32355606%2C32355607%2C32355634%2C32355638%2C32355651%2C32355653%2C32355658%2C32355659%2C32355694%2C32355837%2C32355863%2C32355869%2C32355889%2C32355892%2C32355904%2C32355949%2C32355961%2C32355962%2C32355982%2C32355985%2C32356025%2C32356031%2C32356032%2C32356040%2C32356047%2C32356096%2C32356161%2C32356164%2C32356225%2C32356251%2C32356252%2C32356294%2C32356298%2C32356301%2C32356302%2C32356307%2C32356322%2C32356356%2C32356382%2C32356422%2C32356423%2C32356460%2C32356508%2C32356510%2C32356516%2C32356569%2C32356573%2C32356577%2C32356578%2C32356580%2C32356583%2C32356590%2C32356601%2C32356603%2C32356625%2C32356626%2C32356627%2C32356628%2C32356639%2C32356640%2C32356641%2C32356642%2C32356654%2C32356672%2C32356698%2C32356760%2C32356761%2C32356777%2C32356849%2C32356857%2C32356858%2C32356863%2C32356866%2C32356867%2C32356869%2C32356871%2C32356896%2C32356900%2C32356908%2C32356910%2C32356926%2C32356927%2C32356944%2C32356945%2C32356955%2C32357070%2C32357072%2C32357074%2C32357084%2C32357086%2C32357206%2C32357209%2C32357210%2C32357257%2C32357273%2C32357275%2C32357277%2C32357281%2C32357288%2C32357307%2C32357377%2C32357378%2C32357379%2C32357380%2C32357381%2C32357424%2C32357457%2C32357471%2C32357503%2C32357545%2C32357553%2C32357575%2C32357582%2C32357655%2C32357675%2C32357808%2C32357883%2C32357949%2C32357950%2C32357952%2C32357954%2C32357957%2C32357959%2C32357975%2C32357976%2C32357977%2C32357978%2C32357994%2C32357995%2C32357996%2C32357997%2C32357998%2C32358044%2C32358045%2C32358057%2C32358098%2C32358099%2C32358105%2C32358107%2C32358120%2C32358121%2C32358130%2C32358131%2C32358134%2C32358142%2C32358176%2C32358180%2C32358202%2C32358203%2C32358216%2C32358217%2C32358218%2C32358227%2C32358228%2C32358229%2C32358230%2C32358231%2C32358232%2C32358233%2C32358234%2C32358250%2C32358303%2C32358311%2C32358312%2C32358325%2C32358326%2C32358406&rettype=xml (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f3764988280>: Failed to establish a new connection: [Errno -2] Name or service not known'))