#!/usr/bin/env python # coding: utf-8 # In[1]: from __future__ import print_function import prov.model as pm import rdflib as rl from StringIO import StringIO from glob import glob import logging reload(logging) import sys logging.basicConfig(format=('%(name)s - %(message)s'), stream=sys.stdout) logger = logging.getLogger('prov.model') logger.setLevel(logging.getLevelName('INFO')) logger2 = logging.getLogger('rdf') logger2.setLevel(logging.getLevelName('INFO')) logger2.info = print import requests import json import os from numpy.testing import assert_equal # In[2]: json_files = sorted(glob('/software/nipy-repo/w3c/prov/prov/tests/json/*.json')) url = 'https://provenance.ecs.soton.ac.uk/validator/provapi/documents/' for fname in json_files: _, ttl_file = os.path.split(fname) ttl_file = os.path.join('/software/nipy-repo/w3c/prov/prov/tests/rdf/', ttl_file.replace('json', 'ttl')) g = pm.ProvDocument.deserialize(fname) if len(g.bundles) == 0: format = 'turtle' else: format = 'trig' headers= {'Accept': "text/turtle", 'Content-type': "application/json"} if format == 'trig': ttl_file = ttl_file.replace('ttl', 'trig') headers= {'Accept': "application/trig", 'Content-type': "application/json"} if not os.path.exists(ttl_file): resp = requests.post(url=url, json=json.load(open(fname, 'r')), headers=headers) if resp.ok: with open(ttl_file, 'wt') as fp: fp.write(resp.text) else: print('could not convert: %s' % fname) # In[3]: def dummy_func(message): pass len(json_files) # In[4]: json_files = sorted(glob('/software/nipy-repo/w3c/prov/prov/tests/json/*.json')) start = 0 target = 397 skip = [354, 355, 356, 357, 358, 359] + [362, 363, 366, 367] + \ [370, 371, 372, 373, 374, 375] + [378, 379] # bundles [273, 274, 275, 276] for idx, fname in enumerate(json_files[(start):(target + 1)]): logger2.setLevel(logging.getLevelName('INFO')) logger2.debug = dummy_func _, ttl_file = os.path.split(fname) ttl_file = os.path.join('/software/nipy-repo/w3c/prov/prov/tests/rdf/', ttl_file.replace('json', 'ttl')) if idx in [val - start for val in skip]: # or idx != target: logger2.info('Skipping: %s' % fname) continue logger2.info('Testing: %d, %s' % (idx, fname)) try: g = pm.ProvDocument.deserialize(fname) if idx == target - start: logger2.setLevel(logging.getLevelName('DEBUG')) logger2.debug = print logger2.debug(g.get_provn()) if len(g.bundles) == 0: format = 'turtle' else: format = 'trig' logger2.info(format) if format == 'trig': ttl_file = ttl_file.replace('ttl', 'trig') rdf_graph = None if idx == target - start: with open(ttl_file, 'rt') as fp: logger2.debug('-------- Expected RDF ---------') logger2.debug(''.join(fp.readlines())) logger2.debug('-------- Converting to rdf ---------') rdf_graph = g.serialize(format='rdf', rdf_format=format) logger2.debug(rdf_graph) logger2.debug('-------- Deserializing ---------') #g0 = rl.ConjunctiveGraph().parse(StringIO(g.serialize(format='rdf', rdf_format=format)), # format=format) if rdf_graph is None: g1 = pm.ProvDocument.deserialize(content=g.serialize(format='rdf', rdf_format=format), format='rdf', rdf_format=format) else: g1 = pm.ProvDocument.deserialize(content=rdf_graph, format='rdf', rdf_format=format) assert_equal(g, g1) except Exception, e: print(e) print(g.get_provn()) #print(g.serialize(format='rdf', rdf_format=format)) print(g1.get_provn()) raise # In[5]: rec2 = [rec.copy() for rec in g._records] missing_recs = [] for rec in g1._records: found = False for idx, rec1 in enumerate(rec2): if rec == rec1: rec2.remove(rec1) found=True if not found: missing_recs.append(rec) #print(rec) print('----ORIGINAL----') for rec in rec2: pass #print(rec) print('----DIFFS----') if len(missing_recs) == 1 and len(rec2) == 1: attr1 = missing_recs[0].attributes attr2 = [pair for pair in rec2[0].attributes] for pair in attr1: if pair in attr2: attr2.remove(pair) else: print(pair) print('----ATTR2 - rec2-----') for val in attr2: print(val) # In[ ]: print(g.get_provn()) print(g1.get_provn()) print(g.serialize(format='rdf', rdf_format='turtle'))