# Next line commented out because we only want to run this once
#!python src/load_kiva_lenders_to_mongodb.py

# Next line commented out because we only want to run this once
#!python src/load_kiva_loans_lenders_to_mongodb.py

!python src/convert_mongodb_loans_to_dataframe.py --dataDir data/predicting_funding \
                                                  --baseName loans \
                                                  --startYear 2006 \
                                                  --endYear 2014

%matplotlib inline
import pandas as pd
import numpy as np
import sys
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
from pandas.io.pytables import read_hdf
import matplotlib.dates as mdates

loansDataFrameFile = 'data/predicting_funding/loans_dataframe.h5'
print >> sys.stderr, "Reading loans dataframe file %s ..." % loansDataFrameFile,
loansDF = read_hdf(loansDataFrameFile, 'table')
print >> sys.stderr, "done"

#print "loansDF.shape = ", loansDF.shape

# Map date to the first day of the quarter
loansDF['posted_quarter'] = loansDF['posted_date'].apply( lambda x: x.replace(hour=0, minute=0, second=0, day=1, month=((x.month / 4)*3)+1 ))

loansDF['fully_funded'] = loansDF['funded_amount'] >= loansDF['loan_amount']

#loansDF.head(3)


loansDFGroupedByMonthAndGender = loansDF.groupby(['posted_quarter','borrower_majority_gender','fully_funded'])

XSeries = []
YSeries = {'M': {True: {},
                 False: {}},
           'F': {True: {},
                 False: {}},
           'N': {True: {},
                 False: {}}}

for k,v in sorted(loansDFGroupedByMonthAndGender.groups.items(), key=lambda x : x[0][0]):
    period, gender, isFullyFunded = k
    count = len(v)
    XSeries.append(period)
    YSeries[gender][isFullyFunded][period] = count
    
for p in XSeries:
    for gender in ['M', 'F', 'N']:
        for isFullyFunded in [True, False]:
            if not YSeries[gender][isFullyFunded].has_key(p):
                YSeries[gender][isFullyFunded][p] = 0

YSeriesMaleNotFullyFunded = [YSeries['M'][False][p] for p in XSeries]
YSeriesMaleFullyFunded = [YSeries['M'][True][p] for p in XSeries]

YSeriesFemaleNotFullyFunded = [YSeries['F'][False][p] for p in XSeries]
YSeriesFemaleFullyFunded = [YSeries['F'][True][p] for p in XSeries]

# Negligeable
#YSeriesNeutralNotFullyFunded = [YSeries['N'][False][p] for p in XSeries]
#YSeriesNeutralFullyFunded = [YSeries['N'][True][p] for p in XSeries]

width = 20

fig = plt.figure(40, figsize=(12,12))
ax = fig.add_subplot(111)
ax.bar(date2num(XSeries), YSeriesMaleNotFullyFunded,label="male, not fully funded",width=width,color='lightblue',align='center')
ax.bar(date2num(XSeries), YSeriesMaleFullyFunded,bottom=YSeriesMaleNotFullyFunded,label="male, fully funded",width=width,color='blue',align='center')

ax.bar(date2num(XSeries)+30, YSeriesFemaleNotFullyFunded,label="female, not fully funded", width=width,color='lightgreen',align='center')
ax.bar(date2num(XSeries)+30, YSeriesFemaleFullyFunded, bottom=YSeriesFemaleNotFullyFunded ,label="female, fully funded", width=width,color='green',align='center')

ax.xaxis_date()
plt.setp(plt.gca().get_yticklabels(), fontsize="25")
plt.setp(plt.gca().get_xticklabels(), fontsize="25", ha="left")
ax.autoscale(tight=True)
plt.legend(loc="best", fontsize="25")
plt.title('Number of loans per quarter\nsplit by gender and funding status', fontsize="25", y=1.03)
plt.ylabel('Number of loans', fontsize="25")
ax.yaxis.labelpad = 20
plt.show()

#print loansDF.head()

amountXSeries = []
amountYSeries = {'M': {True: {},
                       False: {}},
                 'F': {True: {},
                       False: {}},
                 'N': {True: {},
                       False: {}}}

for k,v in sorted(loansDFGroupedByMonthAndGender.groups.items(), key=lambda x : x[0][0]):
    period, gender, isFullyFunded = k
    row = loansDF.loc[v]
    loanAmountSum = sum(row['loan_amount'])
#    print "loanAmountSum = ", loanAmountSum
    amountXSeries.append(period)
    if amountYSeries[gender][isFullyFunded].has_key(period):
#        print "increasing %s,%s,%s from %d to" % (gender, isFullyFunded, period, amountYSeries[gender][isFullyFunded][period]),
        amountYSeries[gender][isFullyFunded][period] += loanAmountSum
#        print "%d" % amountYSeries[gender][isFullyFunded][period]
    else:
        amountYSeries[gender][isFullyFunded][period] = loanAmountSum
#        print "setting %s,%s,%s to %d" % (gender, isFullyFunded, period, amountYSeries[gender][isFullyFunded][period])
    
for p in amountXSeries:
    for gender in ['M', 'F', 'N']:
        for isFullyFunded in [True, False]:
            if not amountYSeries[gender][isFullyFunded].has_key(p):
                amountYSeries[gender][isFullyFunded][p] = 0
                
#print amountYSeries

amountYSeriesMaleNotFullyFunded = [amountYSeries['M'][False][p] for p in amountXSeries]
amountYSeriesMaleFullyFunded = [amountYSeries['M'][True][p] for p in amountXSeries]

amountYSeriesFemaleNotFullyFunded = [amountYSeries['F'][False][p] for p in amountXSeries]
amountYSeriesFemaleFullyFunded = [amountYSeries['F'][True][p] for p in amountXSeries]

# Negligeable
#amountYSeriesNeutralNotFullyFunded = [amountYSeries['N'][False][p] for p in amountXSeries]
#amountYSeriesNeutralFullyFunded = [amountYSeries['N'][True][p] for p in amountXSeries]

width = 20

fig = plt.figure(40, figsize=(12,12))
ax1 = fig.add_subplot(111)
ax1.bar(date2num(amountXSeries), amountYSeriesMaleNotFullyFunded,label="male, not fully funded",width=width,color='lightblue',align='center')
ax1.bar(date2num(amountXSeries), amountYSeriesMaleFullyFunded,bottom=amountYSeriesMaleNotFullyFunded,label="male, fully funded",width=width,color='blue',align='center')

ax1.bar(date2num(amountXSeries)+30, amountYSeriesFemaleNotFullyFunded,label="female, not fully funded", width=width,color='lightgreen',align='center')
ax1.bar(date2num(amountXSeries)+30, amountYSeriesFemaleFullyFunded, bottom=amountYSeriesFemaleNotFullyFunded ,label="female, fully funded", width=width,color='green',align='center')

ax1.xaxis_date()
plt.setp(plt.gca().get_yticklabels(), fontsize="20")
plt.setp(plt.gca().get_xticklabels(), fontsize="20", ha="left")
ax.autoscale(tight=True)
plt.legend(loc="best", fontsize="20")
plt.title('Total requested loan amounts per quarter\nsplit by gender and funding status', fontsize="20", y=1.03)
plt.ylabel('USD', fontsize="20")
ax.yaxis.labelpad = 20
plt.show()

loanXSeries = {'M': {True: [],
                     False: []},
                 'F': {True: [],
                       False: []},
                 'N': {True: [],
                       False: []}}
loanYSeries = {'M': {True: {},
                     False: {}},
                 'F': {True: {},
                       False: {}},
                 'N': {True: {},
                       False: {}}}

for k,v in sorted(loansDFGroupedByMonthAndGender.groups.items(), key=lambda x : x[0][0]):
    period, gender, isFullyFunded = k
    row = loansDF.loc[v]
    loanAmountAverage = sum(row['loan_amount']) / float(len(row['loan_amount']))
    fundedAmountAverage = sum(row['funded_amount']) / float(len(row['funded_amount']))
    lenderCountAverage = sum(row['lender_count']) / float(len(row['lender_count']))
    loanXSeries[gender][isFullyFunded].append(period)
    loanYSeries[gender][isFullyFunded][period] = {'loanAmountAvg': loanAmountAverage,
                                                  'fundedAmountAvg': fundedAmountAverage,
                                                  'lenderCountAvg' : lenderCountAverage}
                    
loanYSeriesMaleNotFullyFunded = [loanYSeries['M'][False][p]['loanAmountAvg'] for p in loanXSeries['M'][False]]
loanYSeriesMaleFullyFunded = [loanYSeries['M'][True][p]['loanAmountAvg'] for p in loanXSeries['M'][True]]
fundedYSeriesMaleNotFullyFunded = [loanYSeries['M'][False][p]['fundedAmountAvg'] for p in loanXSeries['M'][False]]
lenderCountYSeriesMaleNotFullyFunded = [loanYSeries['M'][False][p]['lenderCountAvg'] for p in loanXSeries['M'][False]]
lenderCountYSeriesMaleFullyFunded = [loanYSeries['M'][True][p]['lenderCountAvg'] for p in loanXSeries['M'][True]]

loanYSeriesFemaleNotFullyFunded = [loanYSeries['F'][False][p]['loanAmountAvg'] for p in loanXSeries['F'][False]]
loanYSeriesFemaleFullyFunded = [loanYSeries['F'][True][p]['loanAmountAvg'] for p in loanXSeries['F'][True]]
fundedYSeriesFemaleNotFullyFunded = [loanYSeries['F'][False][p]['fundedAmountAvg'] for p in loanXSeries['F'][False]]
lenderCountYSeriesFemaleNotFullyFunded = [loanYSeries['F'][False][p]['lenderCountAvg'] for p in loanXSeries['F'][False]]
lenderCountYSeriesFemaleFullyFunded = [loanYSeries['F'][True][p]['lenderCountAvg'] for p in loanXSeries['F'][True]]

width = 20

fig = plt.figure(60, figsize=(12,8))
ax2 = fig.add_subplot(2,1,1)
ax2.plot(date2num(loanXSeries['M'][False]), loanYSeriesMaleNotFullyFunded, '+-', label="avg. loan amount, male, not fully funded", color='blue')
ax2.plot(date2num(loanXSeries['M'][False]), fundedYSeriesMaleNotFullyFunded, '*-', label="avg. funded amount, male, not fully funded", color='blue')
ax2.plot(date2num(loanXSeries['M'][True]), loanYSeriesMaleFullyFunded, 'o-', label="avg. loan=funded amount, male, fully funded", color='blue')

ax2.plot(date2num(loanXSeries['F'][False]), loanYSeriesFemaleNotFullyFunded, '+-', label="avg. loan amount, female, not fully funded", color='green')
ax2.plot(date2num(loanXSeries['F'][False]), fundedYSeriesFemaleNotFullyFunded, '*-', label="avg. funded amount, female, not fully funded", color='green')
ax2.plot(date2num(loanXSeries['F'][True]), loanYSeriesFemaleFullyFunded, 'o-', label="avg. loan=funded amount, female, fully funded", color='green')

ax2.xaxis_date()
ax2.autoscale(tight=True)
ax2.set_ylabel('USD')
ax2.legend(loc="best")
ax2.set_title('Loan characteristics per quarter, split by gender and funding status')

ax3 = fig.add_subplot(2,1,2)
ax3.plot(date2num(loanXSeries['M'][True]), lenderCountYSeriesMaleFullyFunded, 'o-', label="male, fully funded", color='blue')
ax3.plot(date2num(loanXSeries['F'][True]), lenderCountYSeriesFemaleFullyFunded, 'o-', label="female, fully funded", color='green')
ax3.plot(date2num(loanXSeries['M'][False]), lenderCountYSeriesMaleNotFullyFunded, 'x-', label="male, not fully funded", color='blue')
ax3.plot(date2num(loanXSeries['F'][False]), lenderCountYSeriesFemaleNotFullyFunded, 'x-', label="female, not fully funded", color='green')
ax3.set_ylabel("Number of lenders per loan")

ax3.xaxis_date()
ax3.autoscale(tight=True)
ax3.set_title('Average number of lenders per loan, split by gender and funding status')

ax3.legend(loc="best")
plt.show()

!python src/calculate_country2country_loan_flow.py --inDataDir data/predicting_funding \
                                                   --isoCountryCodesFile data/predicting_funding/iso3166_country_codes.tsv \
                                                   --outDataDir data/predicting_funding \
                                                   --outBaseName kiva \
                                                   --startYear 2012 \
                                                   --endYear 2014 \
                                                   --minValue 5000

!cp data/predicting_funding/kiva_country2country_loan_flows.json d3/data.json

%%HTML
<iframe src="d3/sankey.html" width=1000 height=1650></iframe>

%matplotlib inline
import pandas as pd
import numpy as np
import sys
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
from pandas.io.pytables import read_hdf

loansDataFrameFile = 'data/predicting_funding/loans_dataframe.h5'
print >> sys.stderr, "Reading loans dataframe file %s ..." % loansDataFrameFile,
loansDF = read_hdf(loansDataFrameFile, 'table')
print >> sys.stderr, "done"

# We're only interested in loans from the period 2012-2014 that were not fully funded
loansDF['funding_ratio'] = loansDF['funded_amount'] / loansDF['loan_amount']

nonFundedLoansDF = loansDF[loansDF['funding_ratio'] < 1.0]

#nonFundedLoansDF.head(3)

fundingRatioSeries = nonFundedLoansDF['funding_ratio'].values

print "number of not-fully funded loans in 2012-2014 period:", len(fundingRatioSeries)

plt.title("Distribution of funding ratio\nfor not-fully funded loans applications")
for nrBins in [2,3,4,5,10,20,100]:
    plt.hist(fundingRatioSeries, nrBins, label="%d bins" % nrBins)
plt.legend()
plt.show()

nonFundedLoansFemaleDF = loansDF[(loansDF['funding_ratio'] < 1.0) & (loansDF['borrower_majority_gender'] == 'F')]
nonFundedLoansMaleDF = loansDF[(loansDF['funding_ratio'] < 1.0) & (loansDF['borrower_majority_gender'] == 'M')]

FemaleFundingRatioSeries = nonFundedLoansFemaleDF['funding_ratio'].values
MaleFundingRatioSeries = nonFundedLoansMaleDF['funding_ratio'].values

print "number of not-fully funded female loans in 2012-2014 period:", len(FemaleFundingRatioSeries)
print "number of not-fully funded male loans in 2012-2014 period:", len(MaleFundingRatioSeries)

plt.title("Distribution of funding ratio\nfor not-fully funded loans applications by FEMALES")
for nrBins in [2,3,4,5,10,20,100]:
    plt.hist(FemaleFundingRatioSeries, nrBins, label="%d bins" % nrBins)
plt.legend()
plt.show()

plt.title("Distribution of funding ratio\nfor not-fully funded loans applications by MALES")
for nrBins in [2,3,4,5,10,20,100]:
    plt.hist(MaleFundingRatioSeries, nrBins, label="%d bins" % nrBins)
plt.legend()
plt.show()


!python src/convert_mongodb_to_blei_ldac.py --dataDir data/predicting_funding\
                                            --corpusBaseName kiva \
                                            --stopwordFile=data/predicting_funding/kiva_stopwords.tsv \
                                            --startYear 2012 \
                                            --endYear 2014 \
                                            --maxNrDocs 1000000 \
                                            --filterBelow 10 \
                                            --filterAbove 0.5 \
                                            --filterKeepN 1000 \
                                            --classificationField funding_ratio \
                                            --classLabelFileName kiva-labels-with-loan-ids.dat

!python src/prepare_sdla_train_test_sets.py --bleiCorpusFile data/predicting_funding/kiva.lda-c \
                                            --bleiLabelFile data/predicting_funding/kiva-labels-with-loan-ids.dat \
                                            --bleiTrainCorpusFile data/predicting_funding/slda_in/kiva-train-data.dat \
                                            --bleiTrainLabelFile data/predicting_funding/slda_in/kiva-train-label.dat \
                                            --bleiTrainIdFile data/predicting_funding/slda_in/kiva-train-loan-ids.dat \
                                            --bleiTestCorpusFile data/predicting_funding/slda_in/kiva-test-data.dat \
                                            --bleiTestLabelFile data/predicting_funding/slda_in/kiva-test-label.dat \
                                            --bleiTestIdFile data/predicting_funding/slda_in/kiva-test-loan-ids.dat \
                                            --test_size 4000 \
                                            --train_size 36000

!/Users/frederik/install/slda-master/slda est \
                data/predicting_funding/slda_in/kiva-train-data.dat \
                data/predicting_funding/slda_in/kiva-train-label.dat \
                data/predicting_funding/slda_settings.txt 1.0 20 \
                seeded \
                data/predicting_funding/slda_out

!/Users/frederik/install/slda-master/slda inf \
                                          data/predicting_funding/slda_in/kiva-test-data.dat \
                                          data/predicting_funding/slda_in/kiva-test-label.dat \
                                          data/predicting_funding/slda_settings.txt \
                                          data/predicting_funding/slda_out/final.model \
                                          data/predicting_funding/slda_out

!python src/evaluate_sdla.py --predictedFile data/predicting_funding/slda_out/inf-labels.dat \
                             --expectedFile data/predicting_funding/slda_in/kiva-test-label.dat \
                             --average weighted

!python src/print_topics.py --vocabFile data/predicting_funding/kiva.lda-c.vocab \
                            --sldaModelFile data/predicting_funding/slda_out/final.model.text

!python src/loan_funding_predictor_poc.py --trainSldaGammaFile data/predicting_funding/slda_out/final.gamma \
                                          --trainLabelFile data/predicting_funding/slda_in/kiva-train-label.dat \
                                          --trainIdFile data/predicting_funding/slda_in/kiva-train-loan-ids.dat \
                                          --testSldaGammaFile data/predicting_funding/slda_out/inf-gamma.dat \
                                          --testLabelFile data/predicting_funding/slda_out/inf-labels.dat \
                                          --testIdFile data/predicting_funding/slda_in/kiva-test-loan-ids.dat \
                                          --feat "slda" \
                                          --feat "borrower_majority_gender" \
                                          --feat "loan_amount" \
                                          --feat "has_image" \
                                          --feat "posted_day_of_month" \
                                          --feat "posted_month" \
                                          --feat "geo_lat" \
                                          --feat "geo_lon" \
                                          --feat "repayment_term" \
                                          --feat "nr_borrowers" \
                                          --feat "bonus_credit_eligibility" \
                                          --feat "translated" \
                                          --feat "partner_rating" \
                                          --feat "partner_delinquency_rate" \
                                          --feat "partner_loans_posted" \
                                          --feat "partner_total_amount_raised" \
                                          --feat "en_description_length" \
                                            --feat "constant" \

!python src/loan_funding_predictor.py --startYear 2012 \
                                      --endYear 2014 \
                                      --logResModelFile data/predicting_funding/logres_out/kivaLoanFundingPredictor.pkl

%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np

fig = plt.figure(20, figsize=(8,8))

months = ['Mar', 'Feb', 'Jan', 'Apr', 'Oct', 'Nov', 'Aug', 'Jul', 'May', 'Dec', 'Sep', 'Jun']
logresCoefficients = [0.49, 0.42, 0.35, 0.12, -0.04, -0.11, -0.12, -0.12, -0.15, -0.17, -0.20, -0.28]
index = np.arange(12)
bar_width = 0.50

plt.bar(index, logresCoefficients, bar_width)

plt.xlabel('Month', fontsize="20")
plt.ylabel('Impact on positive outcome (full funding) ', fontsize="20")
plt.title('LogRes Model Coefficients for PostedMonth features', fontsize="20", y=1.03)
plt.xticks(index + bar_width, months)

plt.setp(plt.gca().get_yticklabels(), fontsize="20")
plt.setp(plt.gca().get_xticklabels(), fontsize="20", ha="center", rotation=45)
#plt.autoscale(tight=True)

plt.tight_layout()
plt.show()

%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np

fig = plt.figure(20, figsize=(20,8))
ax = fig.add_subplot(111)

topics = [['one', 'group', 'fund', 'farmers', 'season', 'also', 'harvest', 'farmer', 'year', 'savings'],
            ['small', 'help', 'used', 'town', 'expand', 'requested', 'large', 'hopes', 'brac', 'lives'],
            ['selling', 'increase', 'shop', 'capital', 'clothing', 'started', 'sells', 'clothes', 'ago', 'working'],
            ['milk', 'cattle', 'also', 'livestock', 'farm', 'raising', 'two', 'purchase', 'pigs', 'cows'],
            ['use', 'five', 'married', 'hopes', 'goal', 'future', 'profits', 'plans', 'previous', 'kes'],
            ['needs', 'sell', 'market', 'support', 'financial', 'provide', 'opportunity', 'price', 'higher', 'however'],
            ['husband', 'woman', 'like', 'married', 'man', 'would', 'money', 'family', 'hardworking', 'purchase'],
            ['women', 'members', 'sells', 'profit', 'drinks', 'grocery', 'various', 'made', 'cosmetics', 'live'],
            ['able', 'work', 'support', 'thanks', 'help', 'continue', 'service', 'lenders', 'little', 'due'],
            ['school', 'mother', 'education', 'pay', 'father', 'young', 'year', 'single', 'parents', 'daughter'],
            ['improve', 'living', 'since', 'requesting', 'old', 'partner', 'services', 'married', 'experience', 'car'],
            ['income', 'earn', 'farming', 'rice', 'living', 'hopes', 'main', 'per', 'day', 'two'],
            ['house', 'home', 'lives', 'old', 'son', 'wife', 'vegetables', 'materials', 'live', 'requesting'],
            ['city', 'time', 'works', 'day', 'making', 'located', 'well', 'says', 'bank', 'work'],
            ['family', 'income', 'new', 'conditions', 'local', 'start', 'cover', 'purchase', 'credit', 'lives'],
            ['store', 'php', 'additional', 'future', 'nwtf', 'requested', 'general', 'save', 'earns', 'philippines'],
            ['products', 'customers', 'store', 'good', 'many', 'quality', 'sales', 'life', 'offer', 'household'],
            ['better', 'work', 'life', 'corn', 'supplies', 'give', 'crops', 'land', 'worked', 'basic'],
            ['school', 'food', 'water', 'sugar', 'selling', 'married', 'oil', 'six', 'four', 'rice'],
            ['community', 'meet', 'every', 'help', 'household', 'week', 'expenses', 'clients', 'daily', 'fellowship']]

topicIndicesAndCoefficients = [
(11, 0.34463381927852504),
(13, 0.28708132847232587),
(18, 0.21475821305783774),
(9, 0.20934126188765559),
(5, 0.20686539023306405),
(19, 0.15099384642581043),
(4, 0.078019280568203023),
(14, 0.061570138754807865),
(17, 0.042947945859326486),
(3, 0.023512600959147277),
(15, 0.019221226724594188),
(8, 0.0070316234981432591),
(10, -0.026241133761063538) ,
(1, -0.071793214438988479) ,
(7, -0.078008030184428076),
(6, -0.10954977745880268) ,
(0, -0.14755071028433084),
(2, -0.1481791870952158) ,
(12, -0.16987378700739811) ,
(16, -0.17389029153368829)]

Xtopics = ["topic_%d" % (topicIndicesAndCoefficients[i][0]) for i in range(len(topics))]
topicLogresCoefficients = [topicIndicesAndCoefficients[i][1] for i in range(len(topics))]
index = np.arange(len(topics))
bar_width = 0.70

plt.bar(index, topicLogresCoefficients, bar_width)

plt.xlabel('Topic', fontsize="20")
plt.ylabel('Impact on positive outcome (full funding) ', fontsize="20")
plt.title('LogRes Model Coefficients for topical features', fontsize="20", y=1.03)
plt.xticks(index + bar_width, Xtopics)

for i in range(len(topics)):
    coeff = topicLogresCoefficients[i]
    string = "\n".join(topics[i])
    if coeff >= 0:
        y = -0.13
    else:
        y = 0.02
    ax.annotate(string, xy=(index[i]+0.05, y), xytext=(index[i]+0.05, y), fontsize="10")

plt.setp(plt.gca().get_yticklabels(), fontsize="20")
plt.setp(plt.gca().get_xticklabels(), fontsize="10", ha="right", rotation=0)
plt.autoscale(tight=True)

plt.tight_layout()
plt.show()

%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np

fig = plt.figure(20, figsize=(12,8))

rankedFeaturesAndCoeffs = [
('TotalAmountRaised', 0.51512673372977458), 
('Log10NumberOfBorrowers', 0.48883803701379364), 
('GeoLongitude', 0.38565914705597004),
('Rating', 0.077958867518446537), 
('Log10EnglishDescriptionLength', 0.076418455975725913), 
('DelinquencyRate', 0.0099085728485939755), 
('GeoLatitude', -0.071888473425193139), 
('BonusCreditEligibility', -0.16710379963027269), 
('RepaymentTerm', -0.28978714414735179), 
('LoansPosted', -0.53589567168998353), 
('MajorityGender', -0.584374830570077), 
('Log10LoanAmount', -1.4442547937444266)]

features = ["%s" % (rankedFeaturesAndCoeffs[i][0]) for i in range(len(rankedFeaturesAndCoeffs))]
print features
logresCoeffs = [rankedFeaturesAndCoeffs[i][1] for i in range(len(rankedFeaturesAndCoeffs))]
print logresCoeffs
index = np.arange(len(rankedFeaturesAndCoeffs))
bar_width = 0.50

plt.bar(index, logresCoeffs, bar_width)

plt.xlabel('Other features', fontsize="20")
plt.ylabel('Impact on positive outcome\n(full funding) ', fontsize="20")
plt.title('LogRes Model Coefficients for remaining features', fontsize="20", y=1.03)
plt.xticks(index + bar_width, features)

plt.setp(plt.gca().get_yticklabels(), fontsize="20")
plt.setp(plt.gca().get_xticklabels(), fontsize="16", ha="right", rotation=45)
plt.autoscale(tight=True)

plt.tight_layout()
plt.show()