cd ../../ import csv !git annex unlock datasource.tab #this script should be updated to add new features when available f = open("datasource.tab", "w") c = csv.writer(f,delimiter="\t") # Gene Ontology features c.writerow(["Gene_Ontology","Gene_Ontology","generator=geneontology/testgen.pickle"]) # Y2H SVM feature c.writerow(["Y2H/Y2H.txt","Y2H/Y2H.db","valindexes=(4);ignoreheader=1;zeromissing=1"]) # ENTS feature c.writerow(["ENTS","ENTS","generator=ents/human.ENTS.features.pickle"]) # ENTS summary feature c.writerow(["ENTS_summary","ENTS_summary","generator=ents/human.Entrez.ENTS.summary.pickle"]) f.close() import sys sys.path.append("opencast-bio/") import ocbio.extract reload(ocbio.extract) !git annex unlock Y2H/Y2H.db assembler = ocbio.extract.FeatureVectorAssembler("datasource.tab", verbose=True) assembler.regenerate(verbose=True) assembler.assemble("iRefIndex/human.iRefIndex.positive.pairs.txt", "features/human.iRefIndex.positive.vectors.txt",verbose=True) assembler.assemble("iRefIndex/human.iRefIndex.negative.pairs.txt", "features/human.iRefIndex.negative.vectors.txt",verbose=True) assembler.assemble("forGAVIN/mergecode/OUT/edgelist.txt", "features/human.activezone.txt",verbose=Tfeatures/