%load_ext watermark
%watermark -a 'Sebastian Raschka' -d -v -p plotly,pandas
Sebastian Raschka 08/31/2015 CPython 3.4.3 IPython 4.0.0 plotly 1.8.3 pandas 0.16.2
import pandas as pd
templ = pd.read_csv('./data/template.csv')
templ = templ[['COUNTRY', 'CODE']]
templ.columns = ['Country', 'Code']
templ.head()
Country | Code | |
---|---|---|
0 | Afghanistan | AFG |
1 | Albania | ALB |
2 | Algeria | DZA |
3 | American Samoa | ASM |
4 | Andorra | AND |
rank_ai = pd.read_csv('./data/scimagojr_ai_countryrank.csv')
rank_ai.head()
Rank | Country | Documents | Citable documents | Citations | Self-Citations | Citations per Document | H index | |
---|---|---|---|---|---|---|---|---|
0 | 1 | China | 80685 | 0 | 286809 | 179188 | 11.22 | 150 |
1 | 2 | United States | 65565 | 0 | 1063805 | 339426 | 23.22 | 345 |
2 | 3 | Japan | 27049 | 0 | 139799 | 45035 | 8.44 | 116 |
3 | 4 | United Kingdom | 22460 | 0 | 266440 | 55920 | 18.00 | 174 |
4 | 5 | Germany | 17364 | 0 | 146713 | 30620 | 14.99 | 142 |
rank_ai['Clean Citation/Doc'] = (rank_ai['Citations'] - rank_ai['Self-Citations']) / rank_ai['Documents']
rank_ai=rank_ai[['Country',
'Documents',
'Citations per Document',
'Clean Citation/Doc']]
rank_ai.head()
Country | Documents | Citations per Document | Clean Citation/Doc | |
---|---|---|---|---|
0 | China | 80685 | 11.22 | 1.333841 |
1 | United States | 65565 | 23.22 | 11.048257 |
2 | Japan | 27049 | 8.44 | 3.503420 |
3 | United Kingdom | 22460 | 18.00 | 9.373108 |
4 | Germany | 17364 | 14.99 | 6.685844 |
rank_all = pd.read_csv('./data/scimagojr_all_countryrank.csv')
rank_all=rank_all[['Country', 'Documents']]
rank_all.columns = ['Country', 'Documents_all']
rank = rank_ai.merge(rank_all, on='Country')
rank['AI/All Documents'] = rank['Documents'] / rank['Documents_all'] * 100
rank.head()
Country | Documents | Citations per Document | Clean Citation/Doc | Documents_all | AI/All Documents | |
---|---|---|---|---|---|---|
0 | China | 80685 | 11.22 | 1.333841 | 3617355 | 2.230497 |
1 | United States | 65565 | 23.22 | 11.048257 | 8626193 | 0.760069 |
2 | Japan | 27049 | 8.44 | 3.503420 | 2074872 | 1.303647 |
3 | United Kingdom | 22460 | 18.00 | 9.373108 | 2397817 | 0.936685 |
4 | Germany | 17364 | 14.99 | 6.685844 | 2176860 | 0.797663 |
df = rank.merge(templ, on='Country')
df.to_csv('./data/citations.csv', index=False)
import plotly.plotly as py
from plotly.graph_objs import Annotation, Annotations
import pandas as pd
df = pd.read_csv('./data/citations.csv')
data=[dict(
type='choropleth',
locations=df['Code'],
z = df['Documents'],
text = df['Country'],
colorscale=[[0,"rgb(5, 10, 172)"],
[0.35,"rgb(40, 60, 190)"],
[0.5,"rgb(70, 100, 245)"],
[0.6,"rgb(90, 120, 245)"],
[0.7,"rgb(106, 137, 247)"],
[1,"rgb(220, 220, 220)"]],
autocolorscale=False,
reversescale=True,
marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
tick0=0,
zmin=0,
dtick=1000,
colorbar=dict(
autotick=False,
tickprefix='',
title='Number of Publications'))]
layout=dict(
title='Number of "Artificial Intelligence" Publications from 1996 to 2014',
geo=dict(
showframe=False,
showcoastlines=False,
projection=dict(type='Mercator')),
annotations=Annotations([
Annotation(
text='Data source: http://www.scimagojr.com<br>'\
'Author: Sebastian Raschka<br>'\
'(sebastianraschka.com, @rasbt)',
xref='paper',
yref='paper',
align='left',
x=0,
y=0,
yanchor='bottom',
showarrow=False)]))
fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_1.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-1')
data = [dict(
type='choropleth',
locations=df['Code'],
z=df['Citations per Document'],
text=df['Country'],
colorscale=[[0,"rgb(5, 10, 172)"],
[0.35,"rgb(40, 60, 190)"],
[0.5,"rgb(70, 100, 245)"],
[0.6,"rgb(90, 120, 245)"],
[0.7,"rgb(106, 137, 247)"],
[1,"rgb(220, 220, 220)"]],
autocolorscale=False,
reversescale=True,
marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
tick0=0,
zmin=0,
dtick=1000,
colorbar=dict(
autotick=False,
tickprefix='',
title='Number of Citations/Publication<br>(self-citations included)'))]
layout=dict(
title='Number Citations per Publication in "Artificial Intelligence" Research from 1996 to 2014'\
'<br>(self-citations included)',
geo=dict(
showframe=False,
showcoastlines=False,
projection=dict(type='Mercator')),
annotations=Annotations([
Annotation(
text='Data source: http://www.scimagojr.com<br>'\
'Author: Sebastian Raschka<br>'\
'(sebastianraschka.com, @rasbt)',
yref='paper',
align='left',
x=0,
y=0,
yanchor='bottom',
showarrow=False)]))
fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_2.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-2')
data = [dict(
type='choropleth',
locations=df['Code'],
z=df['Clean Citation/Doc'],
text=df['Country'],
colorscale=[[0,"rgb(5, 10, 172)"],
[0.35,"rgb(40, 60, 190)"],
[0.5,"rgb(70, 100, 245)"],
[0.6,"rgb(90, 120, 245)"],
[0.7,"rgb(106, 137, 247)"],
[1,"rgb(220, 220, 220)"]],
autocolorscale=False,
reversescale=True,
marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
tick0=0,
zmin=0,
dtick=1000,
colorbar=dict(
autotick=False,
tickprefix='',
title='Number of Citations/Publication<br>(self-citations excluded)'))]
layout=dict(
title='Number Citations per Publication in "Artificial Intelligence" Research from 1996 to 2014'\
'<br>(self-citations excluded)',
geo=dict(
showframe=False,
showcoastlines=False,
projection=dict(type='Mercator')),
annotations=Annotations([
Annotation(
text='Data source: http://www.scimagojr.com<br>'\
'Author: Sebastian Raschka<br>'\
'(sebastianraschka.com, @rasbt)',
yref='paper',
align='left',
x=0,
y=0,
yanchor='bottom',
showarrow=False)]))
fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_3.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-3')
data = [dict(
type='choropleth',
locations=df['Code'],
z=df['AI/All Documents'],
text=df['Country'],
colorscale=[[0,"rgb(5, 10, 172)"],
[0.35,"rgb(40, 60, 190)"],
[0.5,"rgb(70, 100, 245)"],
[0.6,"rgb(90, 120, 245)"],
[0.7,"rgb(106, 137, 247)"],
[1,"rgb(220, 220, 220)"]],
autocolorscale=False,
reversescale=True,
marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
tick0=0,
zmin=0,
dtick=1000,
colorbar=dict(
autotick=False,
tickprefix='',
title='AI/all publications in percent'))]
layout=dict(
title='Percentage of Publications in "Artificial Intelligence" Research from 1996 to 2014',
geo=dict(
showframe=False,
showcoastlines=False,
projection=dict(type='Mercator')),
annotations=Annotations([
Annotation(
text='Data source: http://www.scimagojr.com<br>'\
'Author: Sebastian Raschka<br>'\
'(sebastianraschka.com, @rasbt)',
yref='paper',
align='left',
x=0,
y=0,
yanchor='bottom',
showarrow=False)]))
fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_4.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-4')
import plotly.plotly as py
from plotly.graph_objs import *
trace0 = Scatter(
x=df.loc[df['Documents']>=10000, 'Documents'],
y=df.loc[df['Documents']>=10000, 'Clean Citation/Doc'],
mode='markers+text',
text=df['Country'],
textposition='top center',
textfont=Font(family='Arial'),
marker=Marker(size=12),)
data = Data([trace0])
layout = Layout(
xaxis=XAxis(autorange=True, title='Number of A.I. documents published'),
yaxis=YAxis(autorange=True, title='Number of citations (self-citations excluded'),
legend=Legend(
y=0.5,
yref='paper',
font=Font(size=18)),
annotations=Annotations([
Annotation(
text='Data source: http://www.scimagojr.com<br>'\
'Author: Sebastian Raschka<br>'\
'(sebastianraschka.com, @rasbt)',
yref='paper',
align='left',
x=-0.5,
y=-0.2,
yanchor='bottom',
showarrow=False,
font={'size': 7}),
Annotation(
text='*Countries with < 10k publications excluded.',
yref='paper',
xref='paper',
align='center',
y=1.05,
x=0.5,
yanchor='bottom',
showarrow=False,
font={'size': 12})]),
title='A.I. Publications By Country* from 1996-2014',)
fig = Figure(data=data, layout=layout)
py.iplot(fig, filename='ai-publications-scatter-1')