#!/usr/bin/env python
# coding: utf-8

# In[1]:


from __future__ import print_function
import prov.model as pm
import rdflib as rl
from StringIO import StringIO
from glob import glob

import logging
reload(logging)
import sys
logging.basicConfig(format=('%(name)s - %(message)s'), stream=sys.stdout)

logger = logging.getLogger('prov.model')
logger.setLevel(logging.getLevelName('INFO'))
logger2 = logging.getLogger('rdf')
logger2.setLevel(logging.getLevelName('INFO'))

logger2.info = print

import requests
import json
import os

from numpy.testing import assert_equal


# In[2]:


json_files = sorted(glob('/software/nipy-repo/w3c/prov/prov/tests/json/*.json'))

url = 'https://provenance.ecs.soton.ac.uk/validator/provapi/documents/'
for fname in json_files:
    _, ttl_file = os.path.split(fname)
    ttl_file = os.path.join('/software/nipy-repo/w3c/prov/prov/tests/rdf/',
                            ttl_file.replace('json', 'ttl'))
    g = pm.ProvDocument.deserialize(fname)
    if len(g.bundles) == 0:
        format = 'turtle'
    else:
        format = 'trig'
    headers= {'Accept': "text/turtle",
              'Content-type': "application/json"}
    if format == 'trig':
        ttl_file = ttl_file.replace('ttl', 'trig')
        headers= {'Accept': "application/trig",
                  'Content-type': "application/json"}
    if not os.path.exists(ttl_file):
        resp = requests.post(url=url, json=json.load(open(fname, 'r')), headers=headers)
        if resp.ok:
            with open(ttl_file, 'wt') as fp:
                fp.write(resp.text)
        else:
            print('could not convert: %s' % fname)


# In[3]:


def dummy_func(message):
    pass
len(json_files)


# In[4]:


json_files = sorted(glob('/software/nipy-repo/w3c/prov/prov/tests/json/*.json'))

start = 0
target = 397
skip =  [354, 355, 356, 357, 358, 359] + [362, 363, 366, 367] + \
    [370, 371, 372, 373, 374, 375] + [378, 379]
# bundles [273, 274, 275, 276]
for idx, fname in enumerate(json_files[(start):(target + 1)]):
    logger2.setLevel(logging.getLevelName('INFO'))
    logger2.debug = dummy_func
    _, ttl_file = os.path.split(fname)
    ttl_file = os.path.join('/software/nipy-repo/w3c/prov/prov/tests/rdf/',
                            ttl_file.replace('json', 'ttl'))
    if idx in [val - start for val in skip]: # or idx != target:
        logger2.info('Skipping: %s' % fname)
        continue
    logger2.info('Testing: %d, %s' % (idx, fname))
    try:
        g = pm.ProvDocument.deserialize(fname)
        if idx == target - start:
            logger2.setLevel(logging.getLevelName('DEBUG'))
            logger2.debug = print
            logger2.debug(g.get_provn())
        if len(g.bundles) == 0:
            format = 'turtle'
        else:
            format = 'trig'
        logger2.info(format)
        if format == 'trig':
            ttl_file = ttl_file.replace('ttl', 'trig')
        
        rdf_graph = None
        if idx == target - start:
            with open(ttl_file, 'rt') as fp:
                logger2.debug('-------- Expected RDF ---------')
                logger2.debug(''.join(fp.readlines()))
            logger2.debug('-------- Converting to rdf ---------')
            rdf_graph = g.serialize(format='rdf', rdf_format=format)
            logger2.debug(rdf_graph)
        logger2.debug('-------- Deserializing ---------')
        #g0 = rl.ConjunctiveGraph().parse(StringIO(g.serialize(format='rdf', rdf_format=format)),
        #                                 format=format)
        if rdf_graph is None:
            g1 = pm.ProvDocument.deserialize(content=g.serialize(format='rdf', rdf_format=format), 
                                             format='rdf', rdf_format=format)
        else:
            g1 = pm.ProvDocument.deserialize(content=rdf_graph, 
                                             format='rdf', rdf_format=format)
        assert_equal(g, g1)
    except Exception, e:
        print(e)
        print(g.get_provn())
        #print(g.serialize(format='rdf', rdf_format=format))
        print(g1.get_provn())
        raise


# In[5]:


rec2 = [rec.copy() for rec in g._records]

missing_recs = []
for rec in g1._records:
    found = False
    for idx, rec1 in enumerate(rec2):
        if rec == rec1:
            rec2.remove(rec1)
            found=True
    if not found:
        missing_recs.append(rec)
        #print(rec)
print('----ORIGINAL----')
for rec in rec2:
    pass
    #print(rec)
print('----DIFFS----')
if len(missing_recs) == 1 and len(rec2) == 1:
    attr1 = missing_recs[0].attributes
    attr2 = [pair for pair in rec2[0].attributes]
    for pair in attr1:
        if pair in attr2:
             attr2.remove(pair)
        else:
            print(pair)
print('----ATTR2 - rec2-----')
for val in attr2:
    print(val)


# In[ ]:


print(g.get_provn())
print(g1.get_provn())
print(g.serialize(format='rdf', rdf_format='turtle'))