import cmtutils as cu import cmtutils.nipsy as nipsy import cmtutils.plot as plot import os import pandas as pd import numpy as np reviews = nipsy.load_review_history() review_count = pd.Series(index=nipsy.review_date_range) for date in nipsy.review_date_range: review_count.loc[date] = nipsy.reviews_before(reviews, date).Quality.shape[0] import matplotlib.pyplot as plt import mlai as ma fig, ax = plt.subplots(figsize=plot.big_wide_figsize) review_count.plot(linewidth=3, ax=ax) plot.deadlines(ax) ma.write_figure(filename='review-count.svg', directory='./neurips') lastseen = reviews.drop_duplicates(subset='ID').set_index('ID') lastseen = lastseen['LastSeen'] review_count = pd.DataFrame(index=reviews.ID.unique(), columns=nipsy.review_date_range) for date in nipsy.review_date_range: counts = nipsy.reviews_status(reviews, date, column='Quality').count(level='ID') review_count[date] = counts.fillna(0) review_count.fillna(0, inplace=True) review_count = review_count.T for col in review_count.columns: if pd.notnull(lastseen[col]): review_count[col][review_count.index>lastseen[col]] = np.NaN review_count = review_count.T import matplotlib.pyplot as plt import mlai as ma fig, ax = plt.subplots(figsize=plot.big_wide_figsize) review_count.min().plot(linewidth=3, ax=ax) review_count.max().plot(linewidth=3, ax=ax) review_count.median().plot(linewidth=3, ax=ax) review_count.mean().plot(linewidth=3, ax=ax) plot.deadlines(ax) ma.write_figure(filename='number-of-reviews-over-time.svg', directory='./neurips') count = pd.Series(index=nipsy.review_date_range) for date in nipsy.review_date_range: count[date] = (review_count[date]<3).sum() import matplotlib.pyplot as plt import mlai as ma fig, ax = plt.subplots(figsize=plot.big_wide_figsize) count.plot(linewidth=3, ax=ax) plot.deadlines(ax) ma.write_figure(filename='paper-short-reviews.svg', directory='./neurips') import matplotlib.pyplot as plt import mlai as ma fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.evolving_statistic(reviews, 'Conf', window=4, ax=ax) ma.write_figure(filename='review-confidence-time.svg', directory='./neurips') fig, ax = plt.subplots(figsize=plot.big_wide_figsize) column = "Conf" cat1, cat2 = nipsy.late_early_values(reviews, column) plot.late_early(cat1, cat2, column=column, ylim=(3.2, 3.8), ax=ax) ma.write_figure(filename='review-confidence-early-late.svg', directory='./neurips') fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.evolving_statistic(reviews, column='Quality', window=4, ax=ax) ma.write_figure(filename='review-quality-time.svg', directory='./neurips') fig, ax = plt.subplots(figsize=plot.big_wide_figsize) column = "Quality" cat1, cat2 = nipsy.late_early_values(reviews, column) plot.late_early(cat1, cat2, column=column, ylim=(5.0, 5.6), ax=ax) ma.write_figure(filename='review-quality-early-late.svg', directory='./neurips') fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.evolving_statistic(reviews, 'Impact', window=4, ax=ax) ma.write_figure(filename='review-impact-time.svg', directory='./neurips') fig, ax = plt.subplots(figsize=plot.big_wide_figsize) column = "Impact" cat1, cat2 = nipsy.late_early_values(reviews, column) plot.late_early(cat1, cat2, column=column, ylim=(1, 1.4), ax=ax) ma.write_figure(filename='review-impact-early-late.svg', directory='./neurips') reviews['length'] = reviews['Comments'].apply(len) + reviews['Summary'].apply(len) fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.evolving_statistic(reviews, 'length', window=4, ax=ax) ma.write_figure(filename='review-length-time.svg', directory='./neurips') fig, ax = plt.subplots(figsize=plot.big_wide_figsize) column = "length" cat1, cat2 = nipsy.late_early_values(reviews, column) plot.late_early(cat1, cat2, column=column, ylim=(2000, 2500), ax=ax) ma.write_figure(filename='review-length-early-late.svg', directory='./neurips') review_quality = pd.DataFrame(index=reviews.ID.unique(), columns=nipsy.review_date_range) for date in nipsy.review_date_range: qual = nipsy.reviews_status(reviews, date, column='Quality') review_quality[date] = qual.sum(level='ID')/qual.count(level='ID') # There's a bug where mean doesn't work in Pandas 1.2.4?? original_pairs = pd.read_csv(os.path.join(nipsy.review_store, 'Duplicate_PaperID_Pairs.csv'), index_col='original') duplicate_pairs = pd.read_csv(os.path.join(nipsy.review_store, 'Duplicate_PaperID_Pairs.csv'), index_col='duplicate') duplicate_reviews = duplicate_pairs.join(review_quality, how="inner").set_index('original') original_reviews = original_pairs.join(review_quality, how="inner") del original_reviews["duplicate"] corr_series = duplicate_reviews.corrwith(original_reviews) corr_series.index = pd.to_datetime(corr_series.index) def bootstrap_index(df): n = len(df.index) return df.index[np.random.randint(n, size=n)] bootstrap_corr_df = pd.DataFrame(index=corr_series.index) for i in range(1000): ind = bootstrap_index(original_reviews) b_corr_series = duplicate_reviews.loc[ind].corrwith(original_reviews.loc[ind]) b_corr_series.index = pd.to_datetime(b_corr_series.index) bootstrap_corr_df[i] = b_corr_series import datetime as dt fig, ax = plt.subplots(figsize=plot.big_wide_figsize) final_vals = bootstrap_corr_df.loc[bootstrap_corr_df.index.max()] total_mean = final_vals.mean() (bootstrap_corr_df - final_vals+total_mean).plot(legend=False, ax=ax, linewidth=1, alpha=0.05, color='k') corr_series.plot(ax=ax, linewidth=3, color="w") ax.set_ylim(0.45, 0.65) ax.set_xlim(dt.datetime(2014,7,23),nipsy.events['decisions_despatched']) ax.set_title("Correlation of Duplicate Reviews over time") plot.deadlines(ax) ma.write_figure(filename='correlation-duplicate-reviews-bootstrap.svg', directory='./neurips') fig, ax = plt.subplots(figsize=plot.big_wide_figsize) # import datetime as dt corr_series.plot(ax=ax, linewidth=3) ax.set_ylim(0.5, 0.6) ax.set_xlim(dt.datetime(2014,7,23),nipsy.events['decisions_despatched']) ax.set_title("Correlation of Duplicate Reviews over time") plot.deadlines(ax) ma.write_figure(filename='correlation-duplicate-reviews.svg', directory='./neurips') import matplotlib.pyplot as plt plt.rcParams.update({'font.size': 22}) import cmtutils as cu import cmtutils.nipsy as nipsy import cmtutils.plot as plot import pandas as pd import numpy as np papers = cu.Papers() UPDATE_IMPACTS = False # Set to True to download impacts from Semantic Scholar if UPDATE_IMPACTS: from datetime import datetime date=datetime.today().strftime('%Y-%m-%d') else: date = "2021-06-11" # Rerun to download impacts from Semantic Scholar if UPDATE_IMPACTS: semantic_ids = nipsy.load_semantic_ids() citations_dict = citations.to_dict(orient='index') # Need to be a bit cleverer here. Semantic scholar will throttle this call. sscholar = nipsy.download_citation_counts(citations_dict=citations_dict, semantic_ids=semantic_ids) citations = pd.DataFrame.from_dict(citations_dict, orient="index") citations.to_pickle(date + '-semantic-scholar-info.pickle') else: citations = nipsy.load_citation_counts(date=date) decisions = nipsy.load_decisions() nipsy.augment_decisions(decisions) joindf = nipsy.join_decisions_citations(decisions, citations) import matplotlib.pyplot as plt import mlai as ma column = "average_calibrated_quality" filter_col = "all" fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.log_one_citations(column, joindf, filt=joindf[filter_col], ax=ax) ax.set_xticks([]) ma.write_figure(filename="citations-vs-{col}-{filt}.svg".format(filt=filter_col, col=column.replace("_", "-")), directory="./neurips") column = "average_calibrated_quality" filter_col = "accept" fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.log_one_citations(column, joindf, filt=joindf[filter_col], ax=ax) ma.write_figure(filename="citations-vs-{col}-{filt}.svg".format(filt=filter_col, col=column.replace("_", "-")), directory="./neurips") column = "average_calibrated_quality" filter_col = "reject" fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.log_one_citations(column, joindf, filt=joindf[filter_col], ax=ax) ma.write_figure(filename="citations-vs-{col}-{filt}.svg".format(filt=filter_col, col=column.replace("_", "-")), directory="./neurips") column = "average_impact" filter_col = "accept" fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.log_one_citations(column, joindf, filt=joindf[filter_col], ax=ax) ma.write_figure(filename="citations-vs-{col}-{filt}.svg".format(filt=filter_col, col=column.replace("_", "-")), directory="./neurips") column = 'average_confidence' filter_col = "accept" fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.log_one_citations(column, joindf, filt=joindf[filter_col], ax=ax) ma.write_figure(filename="citations-vs-{col}-{filt}.svg".format(filt=filter_col, col=column.replace("_", "-")), directory="./neurips") def bootstrap_index(df): n = len(df.index) return df.index[np.random.randint(n, size=n)] for column in ["average_quality", "average_impact", "average_confidence"]: cor = [] for i in range(1000): ind = bootstrap_index(joindf.loc[joindf.accept]) cor.append(joindf.loc[ind][column].corr(np.log(1+joindf.loc[ind]['numCitedBy']))) cora = np.array(cor) rho = cora.mean() twosd = 2*np.sqrt(cora.var()) print("{column}".format(column=column.replace("_", " "))) print("Mean correlation is {rho} +/- {twosd}".format(rho=rho, twosd=twosd))