Artificial Intelligence Publications¶

In [1]:

%load_ext watermark
%watermark -a 'Sebastian Raschka' -d -v -p plotly,pandas

Sebastian Raschka 08/31/2015 

CPython 3.4.3
IPython 4.0.0

plotly 1.8.3
pandas 0.16.2

Preparing the Data¶

In [2]:

import pandas as pd

templ = pd.read_csv('./data/template.csv')
templ = templ[['COUNTRY', 'CODE']]
templ.columns = ['Country', 'Code']
templ.head()

Out[2]:

	Country	Code
0	Afghanistan	AFG
1	Albania	ALB
2	Algeria	DZA
3	American Samoa	ASM
4	Andorra	AND

In [3]:

rank_ai = pd.read_csv('./data/scimagojr_ai_countryrank.csv')
rank_ai.head()

Out[3]:

	Rank	Country	Documents	Citations	Self-Citations	Citations per Document	H index
0	1	China	80685	286809	179188	11.22	150
1	2	United States	65565	1063805	339426	23.22	345
2	3	Japan	27049	139799	45035	8.44	116
3	4	United Kingdom	22460	266440	55920	18.00	174
4	5	Germany	17364	146713	30620	14.99	142

In [4]:

rank_ai['Clean Citation/Doc'] = (rank_ai['Citations'] - rank_ai['Self-Citations']) / rank_ai['Documents']
rank_ai=rank_ai[['Country', 
             'Documents', 
             'Citations per Document', 
             'Clean Citation/Doc']]
rank_ai.head()

Out[4]:

	Country	Documents	Citations per Document	Clean Citation/Doc
0	China	80685	11.22	1.333841
1	United States	65565	23.22	11.048257
2	Japan	27049	8.44	3.503420
3	United Kingdom	22460	18.00	9.373108
4	Germany	17364	14.99	6.685844

In [5]:

rank_all = pd.read_csv('./data/scimagojr_all_countryrank.csv')
rank_all=rank_all[['Country', 'Documents']]
rank_all.columns = ['Country', 'Documents_all']

rank = rank_ai.merge(rank_all, on='Country')
rank['AI/All Documents'] = rank['Documents'] / rank['Documents_all'] * 100

rank.head()

Out[5]:

	Country	Documents	Citations per Document	Clean Citation/Doc	Documents_all	AI/All Documents
0	China	80685	11.22	1.333841	3617355	2.230497
1	United States	65565	23.22	11.048257	8626193	0.760069
2	Japan	27049	8.44	3.503420	2074872	1.303647
3	United Kingdom	22460	18.00	9.373108	2397817	0.936685
4	Germany	17364	14.99	6.685844	2176860	0.797663

In [6]:

df = rank.merge(templ, on='Country')
df.to_csv('./data/citations.csv', index=False)

Number of "Artificial Intelligence" Publications from 1996 to 2014¶

In [7]:

import plotly.plotly as py
from plotly.graph_objs import Annotation, Annotations
import pandas as pd

df = pd.read_csv('./data/citations.csv')

data=[dict(
        type='choropleth',
        locations=df['Code'],
        z = df['Documents'],
        text = df['Country'],
        colorscale=[[0,"rgb(5, 10, 172)"],
                    [0.35,"rgb(40, 60, 190)"],
                    [0.5,"rgb(70, 100, 245)"],
                    [0.6,"rgb(90, 120, 245)"],
                    [0.7,"rgb(106, 137, 247)"],
                    [1,"rgb(220, 220, 220)"]],
        autocolorscale=False,
        reversescale=True,
        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
        tick0=0,
        zmin=0,
        dtick=1000,
        colorbar=dict(
            autotick=False,
            tickprefix='',
            title='Number of Publications'))]

layout=dict(
    title='Number of "Artificial Intelligence" Publications from 1996 to 2014',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection=dict(type='Mercator')),
    annotations=Annotations([
        Annotation(
            text='Data source: http://www.scimagojr.com<br>'\
                  'Author: Sebastian Raschka<br>'\
                  '(sebastianraschka.com, @rasbt)',
            xref='paper',
            yref='paper',
            align='left',
            x=0,
            y=0,
            yanchor='bottom',
            showarrow=False)]))

fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_1.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-1')

Out[7]:

Number of Citations/Publication (self-citations included)¶

In [8]:

data = [dict(
        type='choropleth',
        locations=df['Code'],
        z=df['Citations per Document'],
        text=df['Country'],
        colorscale=[[0,"rgb(5, 10, 172)"],
                    [0.35,"rgb(40, 60, 190)"],
                    [0.5,"rgb(70, 100, 245)"],
                    [0.6,"rgb(90, 120, 245)"],
                    [0.7,"rgb(106, 137, 247)"],
                    [1,"rgb(220, 220, 220)"]],
        autocolorscale=False,
        reversescale=True,
        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
        tick0=0,
        zmin=0,
        dtick=1000,
        colorbar=dict(
            autotick=False,
            tickprefix='',
            title='Number of Citations/Publication<br>(self-citations included)'))]

layout=dict(
    title='Number Citations per Publication in "Artificial Intelligence" Research from 1996 to 2014'\
           '<br>(self-citations included)',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection=dict(type='Mercator')),
    annotations=Annotations([
        Annotation(
            text='Data source: http://www.scimagojr.com<br>'\
                  'Author: Sebastian Raschka<br>'\
                  '(sebastianraschka.com, @rasbt)',
            yref='paper',
            align='left',
            x=0,
            y=0,
            yanchor='bottom',
            showarrow=False)]))

fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_2.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-2')

Out[8]:

Number of Citations/Publication
(self-citations excluded)

In [9]:

data = [dict(
        type='choropleth',
        locations=df['Code'],
        z=df['Clean Citation/Doc'],
        text=df['Country'],
        colorscale=[[0,"rgb(5, 10, 172)"],
                    [0.35,"rgb(40, 60, 190)"],
                    [0.5,"rgb(70, 100, 245)"],
                    [0.6,"rgb(90, 120, 245)"],
                    [0.7,"rgb(106, 137, 247)"],
                    [1,"rgb(220, 220, 220)"]],
        autocolorscale=False,
        reversescale=True,
        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
        tick0=0,
        zmin=0,
        dtick=1000,
        colorbar=dict(
            autotick=False,
            tickprefix='',
            title='Number of Citations/Publication<br>(self-citations excluded)'))]

layout=dict(
    title='Number Citations per Publication in "Artificial Intelligence" Research from 1996 to 2014'\
           '<br>(self-citations excluded)',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection=dict(type='Mercator')),
    annotations=Annotations([
        Annotation(
            text='Data source: http://www.scimagojr.com<br>'\
                  'Author: Sebastian Raschka<br>'\
                  '(sebastianraschka.com, @rasbt)',
            yref='paper',
            align='left',
            x=0,
            y=0,
            yanchor='bottom',
            showarrow=False)]))

fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_3.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-3')

Out[9]:

AI Publications per Total Publications¶

In [10]:

data = [dict(
        type='choropleth',
        locations=df['Code'],
        z=df['AI/All Documents'],
        text=df['Country'],
        colorscale=[[0,"rgb(5, 10, 172)"],
                    [0.35,"rgb(40, 60, 190)"],
                    [0.5,"rgb(70, 100, 245)"],
                    [0.6,"rgb(90, 120, 245)"],
                    [0.7,"rgb(106, 137, 247)"],
                    [1,"rgb(220, 220, 220)"]],
        autocolorscale=False,
        reversescale=True,
        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
        tick0=0,
        zmin=0,
        dtick=1000,
        colorbar=dict(
            autotick=False,
            tickprefix='',
            title='AI/all publications in percent'))]

layout=dict(
    title='Percentage of Publications in "Artificial Intelligence" Research from 1996 to 2014',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection=dict(type='Mercator')),
    annotations=Annotations([
        Annotation(
            text='Data source: http://www.scimagojr.com<br>'\
                  'Author: Sebastian Raschka<br>'\
                  '(sebastianraschka.com, @rasbt)',
            yref='paper',
            align='left',
            x=0,
            y=0,
            yanchor='bottom',
            showarrow=False)]))

fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_4.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-4') 

Out[10]:

Scatterplots¶

In [11]:

import plotly.plotly as py
from plotly.graph_objs import *

trace0 = Scatter(
    x=df.loc[df['Documents']>=10000, 'Documents'],
    y=df.loc[df['Documents']>=10000, 'Clean Citation/Doc'],
    mode='markers+text',
    text=df['Country'],
    textposition='top center',
    textfont=Font(family='Arial'),
    marker=Marker(size=12),)


data = Data([trace0])
layout = Layout(
    xaxis=XAxis(autorange=True, title='Number of A.I. documents published'),
    yaxis=YAxis(autorange=True, title='Number of citations (self-citations excluded'),
    legend=Legend(
        y=0.5,
        yref='paper',
        font=Font(size=18)),
    annotations=Annotations([
        Annotation(
            text='Data source: http://www.scimagojr.com<br>'\
                  'Author: Sebastian Raschka<br>'\
                  '(sebastianraschka.com, @rasbt)',
            yref='paper',
            align='left',
            x=-0.5,
            y=-0.2,
            yanchor='bottom',
            showarrow=False,
            font={'size': 7}),
                           
    Annotation(
            text='*Countries with < 10k publications excluded.',
            yref='paper',
            xref='paper',
            align='center',
            y=1.05,
            x=0.5,
            yanchor='bottom',
            showarrow=False,
            font={'size': 12})]),
    title='A.I. Publications By Country* from 1996-2014',)

fig = Figure(data=data, layout=layout)
py.iplot(fig, filename='ai-publications-scatter-1')

Out[11]:

In [ ]:

Artificial Intelligence Publications¶

Preparing the Data¶

Number of "Artificial Intelligence" Publications from 1996 to 2014¶

Number of Citations/Publication (self-citations included)¶

Number of Citations/Publication(self-citations excluded)

AI Publications per Total Publications¶

Scatterplots¶

Number of Citations/Publication
(self-citations excluded)