Mining acknowledgments in ADS

Adapted from code written by Thomas P. Robitaille (Homepage)

NOTE: The background and results are discussed in this blog post.

This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License.

license

In [1]:
%matplotlib inline
import os

import brewer2mpl
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import requests
import pandas as pd
from prettyplotlib.utils import remove_chartjunk

mpl.rcParams['axes.color_cycle'] = brewer2mpl.get_map('Paired', 'qualitative', 12).mpl_colors[1::2] + [(0.94, 0.01, 0.50)]
mpl.rcParams['figure.figsize'] = (9,6)
mpl.rcParams['font.size'] = 14
In [2]:
DEV_KEY = os.environ['ADS_DEV_KEY']
BASE_URL = 'http://adslabs.org/adsabs/api/search/'

def yearly_counts(query, ack=False):
    q = ('q', query) if not ack else ('q', 'ack:%s' % query)

    params = [q,
              ('filter', 'database:astronomy'),
              ('filter', 'property:refereed'),              
              ('rows', 200),
              ('dev_key', DEV_KEY),
              ('facet', 'year'),
              ('start', 0),
              ]
    
    r = requests.get(BASE_URL, params=params)
    r.raise_for_status()
    
    result = map(int, r.json()['results']['facets']['facet_fields']['year'])
    year, ct = result[::2], result[1::2]
    result = dict(zip(year, ct))
    result = pd.Series(ct, index=year, name=query)
    result.index.name = 'year'
    return result[range(2000, 2015)]

def results(query):
    params = [('q', query),
              ('hl', 'body'),
              ('fl', 'pubdate'),
              ('filter', 'database:astronomy'),
              ('filter', 'property:refereed'),              
              ('rows', 200),
              ('dev_key', DEV_KEY),
              ('facet', 'year'),
              ('start', 0),
              ]
    
    r = requests.get(BASE_URL, params=params)
    r.raise_for_status()
    
    return r.json()['results']
In [436]:
def trendlines(df, labels=None):
    labels = labels or {}

    # adjust for partial year
    df = df.copy()
    df.ix[2014] *= 1.5
    
    for col in df.columns:
        plt.plot(df.index, df[col].values, label=labels.get(col, col), 
                 lw=4, alpha=0.8)
        
    plt.xlim(2000, 2014)
    plt.xlabel('Year')
    plt.ylabel('Refereed Publication Mentions')
    plt.legend(loc='upper left', frameon=False)
    remove_chartjunk(plt.gca(), ['top', 'right'])
    
In [442]:
LANGS = ['IDL', 
         'Python', 
         '"MATLAB" OR "Matlab"', 
         '"Fortran" OR "FORTRAN"', 
         'Java',
         '"C programming language" OR "C language" OR "C code" OR "C library" OR "C module"',
         '"R programming language" OR "R language" OR "R code" OR "R library" OR "R module"',
    ]
renames = {LANGS[2]: 'MATLAB', LANGS[3]: 'FORTRAN', LANGS[-2]: "C/C++", LANGS[-1]:'R'}
lang_data = pd.concat([yearly_counts(lang) for lang in LANGS], axis=1)
lang_data = lang_data.rename_axis(renames, axis=1)
In [443]:
trendlines(lang_data)
plt.savefig('languages.eps')
In [444]:
PROGS = 'Aladin ds9 Topcat Starlink'.split()
prog_data = pd.concat([yearly_counts(prog) for prog in PROGS], axis=1)
trendlines(prog_data)