import pandas as pd

def load_jcr_old(year):
    columns_to_load = ['ISSN', 'Impact Factor', '{' + str(year) + '} Articles']
    tbl = pd.read_csv('data/SCIE_JCR{}.csv'.format(year), skiprows=2)[columns_to_load]
    tbl.columns = ('ISSN', 'IF'+str(year), 'A'+str(year))
    return tbl.set_index('ISSN')

tbl2000 = load_jcr_old(2000)
tbl2000.head()

ls data

oldtables = [load_jcr_old(y) for y in range(2000, 2013)]

from functools import reduce
alloldtables = reduce(lambda x, y: pd.merge(x, y, how='outer', left_index=True, right_index=True),
                      oldtables).dropna(subset=('A2012',))

alloldtables.tail()

tbl = pd.read_csv('data/2013_IF.csv', skiprows=1, na_values=('Not Available',), thousands=','
                  ).drop_duplicates()[['Full Journal Title', 'Journal Impact Factor', 'Total Cites']]

tbl['Full Journal Title'] = tbl['Full Journal Title'].apply(lambda x: x.upper() if isinstance(x, str) else '')

tbl.head()

tbl['A2013'] = (tbl['Total Cites'] / tbl['Journal Impact Factor'] / 2).fillna(0).astype(int)
tbl['IF2013'] = tbl['Journal Impact Factor']

tbl.head()

jcr2013 = tbl[['A2013', 'IF2013', 'Full Journal Title']]

category2012 = pd.read_csv('data/SCIE_JCR2012-category.csv', skiprows=2)[[
    'ISSN', 'Subject Category', 'Journal Title (Full)', 'Abbreviated Journal Title']]

category2012.head()

selected_categories = set(['BIOCHEMICAL RESEARCH METHODS',
'BIOCHEMISTRY & MOLECULAR BIOLOGY','BIOLOGY','BIOTECHNOLOGY & APPLIED MICROBIOLOGY',
'CELL BIOLOGY','BIOPHYSICS','CRYSTALLOGRAPHY',
'DEVELOPMENTAL BIOLOGY','GENETICS & HEREDITY','IMMUNOLOGY','MATHEMATICAL & COMPUTATIONAL BIOLOGY',
'MICROBIOLOGY','MYCOLOGY','MULTIDISCIPLINARY SCIENCES',
'NEUROSCIENCES','PLANT SCIENCES','VIROLOGY'])

selectedcategory2012 = category2012[category2012['Subject Category'].isin(selected_categories)]

len(category2012), len(selectedcategory2012)

merge1 = pd.merge(selectedcategory2012, alloldtables, how='inner', left_on='ISSN', right_index=True)

merge2 = pd.merge(merge1, jcr2013, how='inner', left_on='Journal Title (Full)',
                  right_on='Full Journal Title').sort('IF2013', ascending=False)

merge2.head()

merge2.to_csv('JCRMerged.csv')
gtbl = merge2

trend_first_year = 2011
trend_last_year = 2013

trendfit_years = range(trend_first_year, trend_last_year+1)
trendfit_if_labels = ['IF{}'.format(y) for y in trendfit_years]

for y in trendfit_years:
    gtbl['LOGIF{}'.format(y)] = np.log2(gtbl['IF{}'.format(y)])

logif_labels = ['LOGIF{}'.format(y) for y in trendfit_years]

gtbl['trend_slope'] = gtbl.apply(
        (lambda x: np.polyfit(trendfit_years, x[logif_labels], 1)[0]), axis=1)

gtbl.ix[:, -6:].head()

gtbl['trend_slope'].hist(range=(-2, 2), bins=50)

gtbl['recent_if'] = (
    gtbl[trendfit_if_labels].product(axis=1) ** (1/len(trendfit_years)))

full_if_years = np.arange(2003, 2014)
full_if_labels = ['IF{}'.format(y) for y in full_if_years]
years_zero_centered = full_if_years - full_if_years.mean()

xshrink = 0.1
yshrink = 0.1
xremapscale = 0.75
xlabelcutoff = 15 ** xremapscale
fig = plt.figure(figsize=(8.5, 7))

plottbl = gtbl[gtbl['recent_if'] > 4]

for rowi, row in plottbl.iterrows():
    # alignment positions of this subplot
    xcenter = row['recent_if'] ** xremapscale
    ytop = row['trend_slope']

    ifs = np.array(row[full_if_labels])
    nmissing = sum(list(map(np.isnan, ifs)))

    yoffsets = np.array(ifs / row['recent_if']) * yshrink
    yheight = yoffsets.max() - yoffsets.min()
    yoffsets = yoffsets - yoffsets.min() - yheight / 2

    ypoints = (yoffsets + ytop)[nmissing:]
    xpoints = (years_zero_centered * xshrink + xcenter)[nmissing:]

    plt.plot(xpoints, ypoints, c='black', alpha=0.7)

    if xcenter > xlabelcutoff or abs(ytop) > 0.2:
        plt.annotate(row['Abbreviated Journal Title'].title(),
                     (xpoints[-1], ypoints[-1]))

plt.axhline(0, c='black', lw=1)
plt.ylim(-0.8, 0.8)

xtickpositions = np.array([5, 10, 15, 20, 25, 30, 35, 40, 45])
plt.xticks(xtickpositions ** xremapscale, list(map(str, xtickpositions)))

plt.grid(True, ls='-', alpha=0.3)
plt.xlabel('Mean Impact Factor 2011-2013')
plt.ylabel('IF Change Trend 2011-2013')
plt.title('All Journals Related to Molecular & Cell Biology')
plt.savefig(DROPBOXHOME+'/Data/2014/jcr-trend-all.pdf')