Notebook

In [1]:

# load related library

In [2]:

# related library
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import geopandas as gpd

import ipywidgets
from ipywidgets import widgets 
from ipywidgets import *  
from IPython.display import display,clear_output

from ipywidgets import Layout
from traitlets import directional_link

from datetime import datetime
from datetime import date
from dateutil import rrule

from boto.s3.connection import S3Connection
import requests
from io import BytesIO
# # when I run it in mybinder, it comes up with RuntimeWarning:
# /srv/conda/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
#   return f(*args, **kwds)
# I looked for information online, it said this warning could be ignored safely.

In [3]:

#ipywidgets.__version__,matplotlib.__version__,pd.__version__,np.__version__,gpd.__version__

In [4]:

#ipywidgets.__version__,matplotlib.__version__,pd.__version__,np.__version__,gpd.__version__,quilt

In [5]:

#ipywidgets.__version__,matplotlib.__version__,pd.__version__,np.__version__,gpd.__version__

In [6]:

# prepare data for dropdown: continent list and country list

# get world info
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
# get continent list plus "all"
continent_list = ["all"] + list(set(world['continent']))
#continent_list.append("all")

continent_country_dict = {}
for continent in continent_list:
    continent_country_dict[continent] = ["all"] + list(world[world['continent']== continent]['name'])
    #continent_country_dict[continent].append("all")
    
#print ("continent list for dropdown:")
#print (continent_list)

In [7]:

# load dataset

# If you want to do it locally, you can download dataset from the links below and load it using pd.read_csv just like comments did.
# https://www.dropbox.com/sh/mt7by5f1wgl6n3z/AACddwkFPq5lPpH3ry83MgSDa?dl=0

In [11]:

# Load local dataset.
a_people = pd.read_csv("src/article_people_score_date_geo.csv")
t_people = pd.read_csv("src/talk_people_score_date_geo.csv")
a_events = pd.read_csv("src/article_events_score_date_geo.csv")
t_events = pd.read_csv("src/talk_events_score_date_geo.csv")

In [16]:

complete_df = {}
complete_df['Articles'] = {}
complete_df['Talks'] = {}
complete_df['Articles']['People'] = a_people
complete_df['Articles']['Events'] = a_events
complete_df['Talks']['People'] = t_people
complete_df['Talks']['Events'] = t_events

In [18]:

# side functions
# Please run them before ploting

In [19]:

def exclude_BC(df):
    bc_list = [one for one in df['date'] if one.startswith("-")]
    df_withoutBC = df[~df['date'].isin(bc_list)]
    #print ("There are %d dates before Christ,%d entities left after excluding"%(len(bc_list),len(df_withoutBC)))
    return df_withoutBC

In [20]:

def add_datetime_column(df,unit):
    # unit: date,month,year
    datetime_format = ""
    if unit == "date":
        datetime_format = "%Y-%m-%d"
    elif unit == "month":
        datetime_format = "%Y-%m"
    elif unit == "year":
        datetime_format = "%Y"
    new_list = [datetime.strptime(one,datetime_format) for one in df[unit]]
    if unit == "date":
        df = df.assign(datetime_date=pd.Series(new_list).values)
    elif unit == "month":
        df = df.assign(datetime_month=pd.Series(new_list).values)
    elif unit == "year":
        df = df.assign(datetime_year=pd.Series(new_list).values)
    return df

In [21]:

def add_month_column(df):
    new_list = [one[:7] for one in df['date']]
    df = df.assign(month=pd.Series(new_list).values)
    return df

In [22]:

def add_year_column(df):
    new_list = [one[:4] for one in df['date']]
    df = df.assign(year=pd.Series(new_list).values)
    return df

In [23]:

def score_median_group_by_column(df,tcn,scn):
    #print (df)
    out_df = df.groupby(tcn).agg({scn:np.median}).reset_index()
    #print (df)
    return out_df
# df_month_score=score_median_group_by_column(monthSenti,'month','score')
# print (df_month_score.head())

In [24]:

def score_percentile_group_by_column(df,tcn,scn,percentile, lex):
    #print (df)
    
    # Debug:
    per_str = '{}%'.format(int(percentile*100))
    
    df_describe = df.groupby(tcn).describe()
    df_pos = df_describe[f'pos_score_{lex}'][[per_str]].rename(columns={per_str:f'pos_score_{lex}'})
    df_neg = df_describe[f'neg_score_{lex}'][[per_str]].rename(columns={per_str:f'neg_score_{lex}'})
    df_len = df_describe['length'][[per_str]].rename(columns={per_str:'length'})
    df_tot = df_describe['total'][[per_str]].rename(columns={per_str:'total'})
    out_df = pd.concat([df_pos, df_neg, df_len, df_tot], axis=1).reset_index()
    #out_df = df.groupby(tcn).quantile(percentile).reset_index()
    #print (tcn)
    #print (out_df)
    return out_df

In [25]:

# plot function
def prepare_flowplot_percentiles(lexicon,sentiment,group,domain,geo,times,ra,window,unit,min_length):
    # lexicon,sentiment,group,domain,geo,time,ra,unit
    # lexicon: OL, MPQA, LIWC, ANEW
    # sentiment: pos, neg, total
    # group: [group_people,group_events,group_others] boolean value
    # domain: Articles, Talks
    # geo: [continent,country] continent,country could be 'all'
    # times: [start_year,start_month,end_year,end_month] int
    # unit: 'year' or 'month'
    # output_text
    statistics = []
    dict_a_people = {"total":"1,146,257","date":"775,664","BC":"27"}
    dict_a_events = {"total":"54,071","date":"22,582","BC":"33"}
    dict_t_people = {"total":"415,124","date":"289,108","BC":"26"}
    dict_t_events = {"total":"21,621","date":"10,283","BC":"27"}
    statistics_dict = {"Articles":{"People":dict_a_people,"Events":dict_a_events},"Talks":{"People":dict_t_people,"Events":dict_t_events}}
    birth_occurrance = {"People":"birth","Events":"occurrance"}
    
    # prepare showed dates depending on time range
    # get time range
    start_yy,start_mm,end_yy,end_mm = times
    start_date = date(start_yy,start_mm,1)
    end_date = date(end_yy,end_mm,1)
    # prepare x-axis
    xaxis_value = [day for day in rrule.rrule(rrule.MONTHLY,dtstart=start_date,until=end_date)]
    #print ("value for x-axis looks like this: \n%s \nlen(x-axis): %d"%(xaxis_value[-1],len(xaxis_value)))
    
    # prepare dataframe to display
    df = pd.DataFrame()
    
    #group_people,group_events,group_others = group
    
    # choose corresponding dataframe depend on domain and group
    for one in group:
        if one.value:
            # add statistics for total entities for this group
            statistics.append("There are totally %s %s entities in Wikipedia %s."%(statistics_dict[domain][one.description]['total'],one.description,domain))
            statistics.append("Among them, %s have %s date information. Inside them we exclude %s entities whose date before Christ (BC)."%
                              (statistics_dict[domain][one.description]['date'],birth_occurrance[one.description],statistics_dict[domain][one.description]['BC']))
            if df.empty:
                df = complete_df[domain][one.description][['pos_score_'+lexicon,'neg_score_'+lexicon,'date','country','continent','length']]
            else:
                df = pd.concat([df,complete_df[domain][one.description][['pos_score_'+lexicon,'neg_score_'+lexicon,'date','country','continent','length']]])

    if df.empty:
        statistics.append("Please select value for Group.")
        return False,False,False,False,False,False,False,statistics,False,False
    statistics.append("After all, there are {:,d} entities with AC date information in this run.".format(len(df)))
    
    # filter length
    df = df[df['length']>=min_length]
    #print (len(df))
    statistics.append("After filtering with the length of the document, there are {:,d} entities left.".format(len(df)))
                      
    # filter out entities based on geo
    continent,country = geo
    # if continent == 'all', do nothing
    if not continent == 'all':
        if country == 'all':
            # filter out entities with target continent
            df = df[df['continent']==continent]
        else:
            if country == 'United States of America':
                country = 'United States'
            # filter out entities with target country
            df = df[df['country']==country]
    statistics.append("After filtering with area, there are {:,d} entities left.".format(len(df)))
    
    df_test = df.copy()
    if len(df)==0:
        statistics.append("No entities fit the requirements, please change the settings.")
        return False,False,False,False,False,False,False,statistics,False,False
          
    # add target unit and corresponding datetime type for entities
    if unit == "month":
        df = add_month_column(df)
    elif unit == "year":
        df = add_year_column(df)

        
    # we need datetime type because we use it to filter out entities within time range
    df = add_datetime_column(df,unit)
    
    # filter out entities based on time range
    df = df[df["datetime_"+unit].isin(xaxis_value)]
    statistics.append("After filtering with date, there are {:,d} entities left, and collected in the plot.".format(len(df)))
    
    if len(df)==0:
        statistics.append("No entities fit the requirements, please change the settings.")
        return False,False,False,False,False,False,False,statistics,False,False
    
    # get score depending on lexicon and sentiment
    df = df.assign(total=df["pos_score_"+lexicon]+df["neg_score_"+lexicon])
    if sentiment == "total":        
        score_column = "total"
    else:
        score_column = sentiment+"_score_"+lexicon
        

    # plot flowplot    
    # get median
    df_median = score_median_group_by_column(df,unit,score_column)
    df_median = add_datetime_column(df_median,unit)
    df_25percentile = score_percentile_group_by_column(df,'datetime_'+unit,score_column,0.25, lexicon)
    df_75percentile = score_percentile_group_by_column(df,'datetime_'+unit,score_column,0.75, lexicon)
    #print ("here")
    #print (df_25percentile)
    
    # filling blank
    fill_temp = pd.DataFrame(xaxis_value,columns=['datetime_'+unit])
    fill_df_median = fill_temp.merge(df_median,how='left')
    fill_df_median.loc[fill_df_median[score_column].isnull(),score_column]=0
    
    # Debug
    fill_df_25percentile = fill_temp.merge(df_25percentile,how='left', left_on='datetime_month', right_on='datetime_month')
    fill_df_25percentile.loc[fill_df_25percentile[score_column].isnull(),score_column]=0
    
    # Debug
    fill_df_75percentile = fill_temp.merge(df_75percentile,how='left', left_on='datetime_month', right_on='datetime_month')
    fill_df_75percentile.loc[fill_df_75percentile[score_column].isnull(),score_column]=0
    
    if ra == True:
        # get rolling average
#         df_median["ra"] = df_median[score_column].rolling(window,center=True).mean()
#         df_25percentile["ra"] = df_25percentile[score_column].rolling(window,center=True).mean()
#         df_75percentile["ra"] = df_75percentile[score_column].rolling(window,center=True).mean()    
        fill_df_median["ra"] = fill_df_median[score_column].rolling(window,center=True).mean()
        fill_df_25percentile["ra"] = fill_df_25percentile[score_column].rolling(window,center=True).mean()
        fill_df_75percentile["ra"] = fill_df_75percentile[score_column].rolling(window,center=True).mean()
    
    
    # plot flowplot
    #ax.plot(df_median['datetime_'+unit],df_median[score_column],'r-')
    temp_x = fill_df_median['datetime_'+unit]
    temp_x = temp_x.values
    #temp_x = pd.Series.values(temp_x)
    #ax.fill_between(temp_x,df_25percentile[score_column],df_75percentile[score_column],color='b',alpha=0.2)
    
    # Debug
    df_time_size = df.groupby('datetime_'+unit).size().reset_index(name='size')
    fill_df_time_size = fill_temp.merge(df_time_size,how='left').fillna(0)
    #print (df_time_size)
    
    return True, fill_df_median,fill_df_25percentile,fill_df_75percentile,score_column,temp_x,fill_df_time_size,statistics,start_date,end_date

#     # set ax
#     myfontsize = 10
#     ax.set_title("median,percentile of "+sentiment+" score for "+group+" based on "+lexicon,fontsize=myfontsize)
#     ax.set_xlabel('time')
#     ax.set_ylabel('score')
#     ax.set_xlim([start_date,end_date])
#     [[item.set_color('b') for item in bp_dict[key]['boxes']] for key in bp.keys()]
#     [[item.set_color('b') for item in bp_dict[key]['whiskers']] for key in bp.keys()]
#     [[item.set_color('r') for item in bp_dict[key]['medians']] for key in bp.keys()]
    
#     for tick in bp_axes.get_xticklabels():
#                 tick.set_rotation(90)
    #plt.show()
#prepare_flowplot_percentiles("OL","total","People",domain,geo,times,ra,window,unit)   
#flowplot_percentiles("OL","total","People",["Europe","all"],"Articles","month",[1940,1,1],[1960,1,1])

In [26]:

# widget 1: median of sentiment score for Wikipedia concepts over time grouped by month

In [27]:

# define actions while click update botton
def on_button_clicked(b):
    # prepare and filter dataset
    # get parameters from input
    # get geo
    continent = dropdown_continent.value
    country = dropdown_country.value
    geo = [continent,country]
    # get time
    time_start_year = dropdown_start_year.value
    time_start_month = dropdown_start_month.value
    time_end_year = dropdown_end_year.value
    time_end_month = dropdown_end_month.value
    time = [time_start_year,time_start_month,time_end_year,time_end_month]
    # get lexicon
    lexicon = radio_button_lexicon.value
    # get sentiment
    sentiment = sen_dict[radio_button_sentiment.value]
    # get domain
    domain = radio_button_domain.value
    # get group
    # group_people,group_events,group_others = cb_container.children
    group = cb_container.children
    # get rolling average
    ra = checkbox_ra.value
    window = dropdown_ra.value
    
    # get minimum length 
    min_length = dropdown_length.value
    
    # define time unit to month
    unit = "month"
    
    # output text
    #output_label_list = []
    output_label_str_container = widgets.VBox(layout=Layout(width='100%',border='solid 1px'))
    
    exist, df50,df25,df75,score_column,percentile_x,df_time_size,output_label_list,s_date,e_date =prepare_flowplot_percentiles(lexicon,sentiment,group,domain,geo,time,ra,window,unit,min_length)
    
    if exist == False:
        fig = None
    else:
        # if rolling average==True, get column 'ra' instead of 
        if ra == True:
            score_column = 'ra'



        # draw
        # Debug
        fig,axes = plt.subplots(2,1,sharex='row', figsize=(15,8))
        ax0,ax1 = axes.flatten()
        ax = [ax0,ax1]
        time_size_loc = np.arange(len(df_time_size['datetime_'+unit].tolist()))
        ax[0].bar(time_size_loc,df_time_size['size'],width=0.8, align='edge')
        df50.plot(x='datetime_'+unit, y=score_column, color='r', ax=ax[1])
        ax[1].fill_between(percentile_x,df25[score_column],df75[score_column],color='b',alpha=0.2)

        ax[0].set_xlim(0,time_size_loc.shape[0])
        ax[0].get_xaxis().set_visible(False)
        ax[1].get_legend().remove()

        # add label for plots
        plot_title_dict = {"ra":"rolling average of median score"}
        ax[0].set_title("Number of entities from %s to %s matching the current setting (grouped by month)"%(s_date.strftime("%B %Y"),e_date.strftime("%B %Y")))
        ax[0].set_ylabel("number of entities")
        ax[1].set_title("Corresponding %s for each month"%(plot_title_dict.get(score_column,"median score")))
        ax[1].set_ylabel("score")
        ax[1].set_xlabel("time")
    
    #output_label_list.append("Totally %d entities are collected in the plot."%(sum(df_time_size['size'].tolist())))
    output_labels = [widgets.HTML(value=i) for i in output_label_list]
    output_label_str_container.children = [i for i in output_labels]
    with box_out:
        clear_output(wait=True)
        display(fig)
    with text_out:
        clear_output(wait=True)
        display(output_label_str_container)
    if fig:
        plt.close(fig)

In [28]:

# framework
#     lexicon container: Which lexicon you want to choose? (OL, MPQA, LIWC)
#         title html
#         radio_button
#     group container: Which group you want to show? (People, Events)
#         title html
#         checkboxes container:
#             checkbox
#             checkbox
#     area filter: Do you want to filter out the continent or country for entities?
#         title html
#         dropdown_continent
#         dropdown_country
#     domain container: What domain you want to use? (Article, Talks)
#         title html
#         checkboxes container:
#             radio_button
#     time filter: Set the start and stop of time range
#         dropdown
#         

# change label to HTML (for set style more flexible)
    
container_width = 'auto'
with_border_layout = Layout(border='solid 0.5px')


# preparing a container for header
header_container = widgets.VBox(layout=Layout(width='100%',border='solid 0.5px'))
header_text = """<h1>WikiSentiFlow</h1><br>
<p>Introduction
<ul>
<li>This widget is used to show the changes of sentiment scores for Wikipedia entities (concepts) with time. It shows 25%, 50% (median), 75% quantile of sentiment score for each month to present the sentiment distribution.</li>
<li>Wikipedia entities contains entities in Wikipedia Article and Wikipedia Talk. Talk page is an area for editors to discuss about corresponding article, which can be visited from upper left side of article page.</li>
<li>This widget includes entities both People and Events. The date for people indicates birth date, while the date for events indicates occurrance date.</li>
<li>The text of Articles and Talks is extracted from Wikipedia Dump, and time stamps are extracted from DBPedia.</li>
<li>The scores are calculated with term frequency for sentiment words based on certain lexicons (OL, MPQA, LIWC, ANEW). For ANEW we take valency into account too.</li>
</ul></p>
<p>How to use
<ol>
<li>Select value for Lexicon, Sentiment, Group, Domain, Geolocation, Time, Minimum Length.</li>
<li>Rolling average is optional. Tick it and set a window size then a rolling average of median will be showed. Otherwise the median will be showed.</li>
<li>Click "Go" to run it.</li>
</ol>
</p>
<p>Results
<ul><li>For each run there will be two plots sharing x-axis. 
The first plot is the number of entities matching the setting.
The second plot shows a red line with purple shadows representing the corresponding scores. 
The red line presents median (50% quantile) of scores based on your settings.
And the shadows cover 25% to 75% quantile of the scores.
If rolling average is ticked, then the shadows and red line refer to rolling average of 25%, 50%, 75% quantile.
The time is splited with month as unit.</li>
<li>In the bottom you will get the data characteristics for the current run.</li></ul></p>"""
html_header = widgets.HTML(value=header_text)
header_container.children=[html_header]


# for lexicon
lexicon_container = widgets.VBox(layout=Layout(width='8%',border='solid 0.5px'))
# add title
html_lexicon = widgets.HTML(value="<b>Lexicon</b>")
#label_lexicon = widgets.Label(value="sentiment lexicon")
# preparing a container to put in radio buttons
radio_button_lexicon = widgets.RadioButtons(
    options=['OL', 'MPQA', 'LIWC','ANEW'],
    #description='sentiment lexicon',
    #style=style,
    disabled=False
)
# put text and button into lexicon container
lexicon_container.children = [html_lexicon,radio_button_lexicon]


# for sentiment
sentiment_container = widgets.VBox(layout=lexicon_container.layout)
# add title
html_sentiment = widgets.HTML(value="<b>Sentiment</b>")
# preparing a container to put in radio buttons
radio_button_sentiment = widgets.RadioButtons(
    options=['Total', 'Positive', 'Negative'],
    #description='',
    #style=style,
    disabled=False
)
# put text and button into lexicon container
sentiment_container.children = [html_sentiment,radio_button_sentiment]


# for target group
group_container = widgets.VBox(layout=lexicon_container.layout)
html_group = widgets.HTML(value="<b>Group</b>")
# checkboxes container
cb_container = widgets.VBox(layout=Layout(
)) 
# preparing a container to put in created checkbox
checkboxes = []  
# create checkbox
checkboxes.append(widgets.Checkbox(description = 'People', value=False,layout=Layout(left='-80px')))
checkboxes.append(widgets.Checkbox(description = 'Events', value=False,layout=Layout(left='-80px')))
# put check box into checkboxes container
cb_container.children=[i for i in checkboxes]
#display(cb_container)
# # add a new container to control the arrangement
# temp_container = widgets.HBox()
# temp = widgets.Label(description='choose target group')
# temp_container.children=[cb_container,temp]
group_container.children=[html_group,cb_container]


# for area 
area_container = widgets.VBox(layout=Layout(width='24%',border='solid 0.5px'))
html_area = widgets.HTML(value="<b>Geolocation</b>")
#dropdown_container = widgets.HBox()
#dropdown
dropdown_continent = widgets.Dropdown(
    options = continent_list,
    value = 'all',
    description='Continent:',
    layout=Layout(width='240px',left='-10px'),
    disabled=False,
)  
dropdown_country = widgets.Dropdown(
    #options = country_list,
    description='Country:',
    #options = ["all"],
    #value='all',
    layout=Layout(width='240px',left='-10px'),
    disabled = False,
)
def transform(case):
    return continent_country_dict[case]
directional_link((dropdown_continent,'value'),(dropdown_country,'options'),transform)
area_container.children=[html_area,dropdown_continent,dropdown_country]


# for domain
domain_container = widgets.VBox(layout=lexicon_container.layout)
# add title
html_domain = widgets.HTML(value="<b>Domain</b>")
# preparing a container to put in radio buttons
radio_button_domain = widgets.RadioButtons(
    options=['Articles', 'Talks'],
    disabled=False
)
# put text and button into domain container
domain_container.children = [html_domain,radio_button_domain]


# for date
time_container = widgets.VBox(layout=Layout(width='26%',border='solid 0.5px'))
#add title
html_time = widgets.HTML(value="<b>Time</b>")
# prepare time year list
# dates = [day for day in rrule.rrule(rrule.YEARLY, dtstart=date(1700,1,1), until=date.today())]
# year_list = [(i.strftime('%Y'),i) for i in dates]
year_list = list(range(1500,2019,1))
month_list = list(range(1,13,1))
#time_list = [1940,1960,1992,1993]
time_style = {'description_width': '63%'}
dropdown_start_year = widgets.Dropdown(
    options = year_list,
    value=year_list[-100],
    description='Start (year, month):',
    style=time_style,
    layout=Layout(width='190px'),
    #style={'description_width':'initial'},
    disabled=False,
)
dropdown_start_month = widgets.Dropdown(
    options = month_list,
    #description='',
    layout=Layout(width='50px'),
    disabled=False,
)
time_container_start = widgets.HBox()
#time_container_start.children=[widgets.Label('start (year,month):'),dropdown_start_year,dropdown_start_month]
time_container_start.children=[dropdown_start_year,dropdown_start_month]

dropdown_end_year = widgets.Dropdown(
    options = year_list,
    description='End (year, month):',
    style=time_style,
    #options = ["all"],
    value=year_list[-1],
    layout=Layout(width='190px'),
    disabled = False,
)
dropdown_end_month = widgets.Dropdown(
    options = month_list,
    #description='',
    #options = ["all"],
    #value=month_list[-1],
    layout=Layout(width='50px'),
    disabled = False,
)
time_container_end = widgets.HBox()
time_container_end.children = [dropdown_end_year,dropdown_end_month]
time_container.children = [html_time,time_container_start,time_container_end]


# for rolling average
ra_container = widgets.VBox(layout=Layout(align_items='center',width='100%'))
#mystyle={'description_width':'initial'}
checkbox_ra = widgets.Checkbox(layout=Layout(left='-13px'),value=False,description='Rolling Average')
checkbox_ra_container = widgets.HBox()
checkbox_ra_container.children = [checkbox_ra]
ra_list = list(range(3,50,1))
ra_style = {'description_width': '60%'}
dropdown_ra = widgets.Dropdown(
    options = ra_list,
    description='Windows Size:',
    style=ra_style,
    
    #options = ["all"],
    #value=year_list[-1],
    layout=Layout(width='170px',left='-6px'),
    disabled = True,
)
dropdown_ra_container = widgets.HBox()
dropdown_ra_container.children = [dropdown_ra]
def transform_ra(case):
    return {True:False,False:True}[case]

directional_link((checkbox_ra,'value'),(dropdown_ra,'disabled'),transform_ra)
ra_container.children = [checkbox_ra_container,dropdown_ra_container]

# for min length
length_list = list(range(0,50,5))
length_style = {'description_width':'70%'}
dropdown_length = widgets.Dropdown(
    options = length_list,
    description='Minimum Length:',
    style = length_style,
    layout=Layout(width='176px',left='-9px'),
    disabled=False,
)


#for button
update_container = widgets.VBox(layout=Layout(#display='flex',
                                              #flex_flow='column',
                                              align_items='center',
                                              width='16%',
                                              #width=container_width,
                                              #border='solid 0.5px'
))
#add button that updates the graph based on the checkboxes
button = widgets.Button(description="Go",button_style='primary',layout=Layout(width='90%'))
update_container.children=[ra_container,dropdown_length,button]


# preparing a container for input panel
input_container = widgets.HBox(layout=Layout(
    display='flex',
    flex_flow='row',
    align_items='stretch',
    border='solid 0.5px',
    #height='120px'
    #width='30%'
))
input_container.children=[lexicon_container, sentiment_container,group_container,domain_container,area_container,time_container,update_container]


# for plot
box_out = ipywidgets.Output(layout=Layout(width='100%',height='500px',border='solid 0.5px'))
# for out_text
#label_out = widgets.Label(value="Here is the statistics of output...",layout=Layout(width='100%'))
text_out = widgets.Output(layout=Layout(width='100%',border='solid 0.5px'))


# for output
output_container = widgets.VBox(layout=Layout(border='solid 0.5px'))
output_container.children = [box_out,text_out]


# container for all: including header, input and output two sections
all_container = widgets.VBox(layout=with_border_layout)
all_container.children = [header_container,input_container,output_container]

sen_dict = {"Total":"total","Positive":"pos","Negative":"neg"}

# run the widget
display(all_container)
button.on_click(on_button_clicked)

VBox(children=(VBox(children=(HTML(value='<h1>WikiSentiFlow</h1><br>\n<p>Introduction\n<ul>\n<li>This widget i…

Link to WikiSentiScatter