import pandas as pd
import json
import os
import urllib
import urllib2
import numpy as np
import unicodedata
from myalchemy import MyAlchemy


#Since we only get so many Alchemy API calls, might as well not call
#Alchemy on duplicate posts between subreddits. We'll merge these later......
#df = pd.read_csv('Data/full.csv', encoding='utf-8')

#print "Original size of data set is", len(df)
#df = df.drop_duplicates('id') # We only want unique post id entries, not to waste alchemy calls
#print "Size of data set with only unique posts is", len(df)
#subs = list(df['subreddit'].unique())
#dflen = len(df)
#df['alchemy'] = ['null']*dflen

#df = pd.read_csv('Data/uniqueentries.csv', encoding='utf-8')

file_dir = "Data/combinedcomments/"

path, dirs, files = os.walk(file_dir).next()
csvfiles = [file_dir + i for i in files if ".csv" in i ] #Builds a list with .csv files
csvfiles.sort()
#csvfiles

def check_null(x):
    try:
        np.isnan(x)
        return False
    except:
        return True

def alchemy_comments(df, start, apikey, csvfiles, end=len(df)):
    '''
    will run the alchemy keyword annalysis on the comment files
    '''
    p = MyAlchemy(apikey)
    dfids = list(df.index)
    for i in range(start, end):
        subrequest = df['subreddit'][dfids[i]] 
        commentfile = ''
        for comb in csvfiles:
            if subrequest in comb:
                commentfile = comb
        commentdf = pd.read_csv(commentfile, encoding='utf-8')
        commentdf = commentdf.drop('type',1)
        commentdf = commentdf.drop_duplicates()
        commentdf = commentdf[commentdf['comment'].apply(lambda x: check_null(x))]
        commentdf['comment'] = commentdf['comment'].apply(lambda x: unicodedata.normalize('NFKD', x).encode('ascii', 'ignore'))
        commentdf = commentdf[commentdf['post'] == df['id'][dfids[i]]]
        comments = list(commentdf['comment'])
        # If we want to add the title to the alchemy call
        comments.append(df['title'][dfids[i]])
        # If we want to add the self text to the alchemy call 
        if check_null(df['selftext'][dfids[i]]):
            comments.append(df['selftext'][dfids[i]])
            
        # Both joining the comments and sending alchemy calls can be problematic    
        try:
            comments = ' '.join(comments)
            if len(comments) > 8000:
                comments = comments[0:7999]
        except:
            print "Comment join error", comments
        
        # I'm not sure what Alchemy does once you reach the cap... so you might want to check if null or something here.
        try:
            df['alchemy'][dfids[i]] = p.run_method(comments, 'keywords', {'keywordExtractMode':'strict'})
        except:
            print "Alchemy error", df['id'][dfids[i]]
        
    return df
        

#Alchemy keys
apikey1 = "dcac82649daaa2627ee783b25779cfaed4af0067" #Jay's key
apikey2 = "e945cef59338f9e8e7bc962badde170e623fb7e5" #Basti's key
apikey3 = "cb736ca44e57cd6764b70ec86886f4fce8f6a68d" #Serguei's Key

#df = alchemy_comments(df, 25000, apikey2, csvfiles, end=25992)
#df.to_csv('Data/uniqueentries.csv', index=False, encoding='utf-8')

fulldf = pd.read_csv('Data/full.csv', encoding='utf-8')
fulldf['alchemy'] = ['null']*len(fulldf)

for i in fulldf.index:
    fid = fulldf['id'][i]
    alc = df[df['id'] == fid]['alchemy']
    fulldf['alchemy'][i] = alc


fulldf.to_csv('Data/full.csv', index=False, encoding='utf-8')

#print p.run_method(comments, 'concepts')
#print p.run_method(comments, 'keywords', {'keywordExtractMode':'strict'})
#print p.run_method(comments, 'category')
#print p.run_method(comments, 'sentiment')
#print p.run_method(comments, 'entities')
#print p.run_method(reddit_base, 'urlkeywords')