from bcbio import isatab
import synapseclient
import tempfile
#login to synapse
syn=synapseclient.Synapse()
syn.login()
Welcome, Abhishek Pratap!
#create a project
#project = synapseclient.Project('test_isa_to_synpase')
project = synapseclient.Project('test_NSD_isa_to_synpase')
project = syn.store(project)
#parse a ISA tab file
#rec = isatab.parse('/Users/abhishek/projects/isa_tab/data/BII-S-3')
rec= isatab.parse('/Users/abhishek/projects/isa_tab/data/ISA-tab_Munoz-Heck_July26')
study = rec.studies[0]
#start building the study level markdown
study_markdown_text = '###Title: %s \n' % study.metadata['Study Title']
study_markdown_text += '---\n'
#study organism
study_markdown_text += '#####Organism(s): \n %s \n\n' % 'TBD'
#study description
study_markdown_text += '>**Description:** %s \n\n' % study.metadata['Study Description']
#Study Design Type
study_markdown_text += '#####Design(s): \n %s\n\n' % ' '.join([ descp['Study Design Type'] for descp in study.design_descriptors ])
#Publications
publications_line = ''
for publication in study.publications:
publications_line += publication['Study Publication Author List'] + '\n'
publications_line += publication['Study Publication Title']
publications_line += '[Pubmed](http://www.ncbi.nlm.nih.gov/pubmed/%s) \n\n' % publication.get('Study PubMed ID','NA')
study_markdown_text += '#####Publication(s):\n %s \n ' % publications_line
#get the contact names
all_full_names = []
for contact in study.contacts:
full_name = '%s %s %s' % ( contact.get('Study Person First Name','NA'),
contact.get('Study Person Mid Initials','NA'),
contact.get('Study Person Last Name'))
all_full_names.append(full_name)
study_markdown_text += '**Contact(s):** \n %s \n\n' % ', '.join(all_full_names)
#create a project wiki and add the markdown text
project_wiki = synapseclient.Wiki(owner = project,
markdown = study_markdown_text )
project_wiki = syn.store(project_wiki)
#workaround
#next two lines mainly to make sure if new markdown content is there, the wiki content can be updated
project_wiki = syn.getWiki(project)
project_wiki.markdown = study_markdown_text
project_wiki = syn.store(project_wiki)
#adding the annotations
project['Data_Submission_Date'] = study.metadata['Study Submission Date']
project['Data_Release_Date'] = study.metadata['Study Public Release Date']
project = syn.store(project)
#store protocols unders a separate folder
protocols_folder = synapseclient.Folder('Protocols',parent=project)
protocols_folder = syn.store(protocols_folder)
#store protocols name and corresponding synapse File entities object
#this wud be needed for creating provenance records
protocols_synapse_files = {}
for count,protocol in enumerate(study.protocols):
temp_file_name = protocol.get('Study Protocol Name','NA').replace(' ','_') + '.txt'
temp_file = open(temp_file_name,'w')
protocols_markdown_text = '%s \n' % (unicode(protocol.get('Study Protocol Name','NA')) )
protocols_markdown_text += '%r \n\n' % (protocol.get('Study Protocol Description', 'NA'))
temp_file.write(protocols_markdown_text)
temp_file.close()
syn_file = synapseclient.File(path=temp_file_name,parent=protocols_folder)
syn_file = syn.store(syn_file)
protocols_synapse_files[protocol.get('Study Protocol Name','NA')] = syn_file
..,!! Upload completed in 3 seconds. ..,!! Upload completed in 2 seconds. ..,!! Upload completed in 2 seconds. ..,!! Upload completed in 2 seconds. ..,!! Upload completed in 2 seconds. ..,!! Upload completed in 2 seconds. ..,!! Upload completed in 3 seconds. ..,!! Upload completed in 3 seconds. ..,!! Upload completed in 3 seconds. ..,!! Upload completed in 3 seconds. ..,!! Upload completed in 3 seconds.
#store assays under separate folder
assays_folder = synapseclient.Folder('Assays',parent=project)
assays_folder = syn.store(assays_folder)
for assay in study.assays:
assay_name = assay.metadata.get('Study Assay Measurement Type') + ' using ' + assay.metadata.get('Study Assay Technology Type')
print 'Assay Name', assay_name
#create a new folder for each assay type
individual_assay_folder = synapseclient.Folder(assay_name, parent=assays_folder)
#inserting assay level metadata
for key,val in assay.metadata.iteritems():
if val == '':
val = 'NA'
#replacing the spaces as currently synapse doesnt allow spaces in annotation keys
key = key.replace(' ','_')
individual_assay_folder[key] = val
individual_assay_folder = syn.store(individual_assay_folder)
#uploading files under each assay as links to synapse
for node in assay.nodes:
print 'Node name', node
assay_file = synapseclient.File(node.replace('*',''), parent = individual_assay_folder,synapseStore = False)
assay_file['Sample_Name'] = assay.nodes[node].metadata['Sample Name']
#assay_file['Derived_Data_File'] = assay.nodes[node].metadata['Derived Data File']
assay_file['Raw_Data_File'] = assay.nodes[node].metadata['Raw Data File']
if assay.nodes[node].metadata.get('Material Type') is not None:
assay_file['Material_Type'] = '%s : %s : %s' % ( assay.nodes[node].metadata['Material Type'][0].Material_Type,
assay.nodes[node].metadata['Material Type'][0].Term_Source_REF,
assay.nodes[node].metadata['Material Type'][0].Term_Accession_Number)
else:
assay_file['Material_Type'] = 'NA : NA : NA'
#create the provenance for how these files were generated
used_protocols_synapse_files = []
for used_protocol in assay.nodes[node].metadata['Protocol REF']:
if protocols_synapse_files.get(used_protocol.Protocol_REF,None) is None:
print 'Error: protocol %s not found in the study protocols listed' % used_protocol
else:
used_protocols_synapse_files.append(protocols_synapse_files.get(used_protocol.Protocol_REF,None))
#generate the provenance
assay_file_provenance_activity = synapseclient.Activity(used = used_protocols_synapse_files)
assay_file = syn.store(assay_file, activity = assay_file_provenance_activity )
Assay Name protein expression profiling using mass spectrometry Node name ftp://ftp.pride.ebi.ac.uk/2013/02/PXD000134/OR1_091015_Javier_iPS_Singapore_Mix1_BigSCX*.raw Error: protocol not found in the study protocols listed Node name ftp://ftp.pride.ebi.ac.uk/2013/02/PXD000134/4Skin_Mix2_SCX*.raw Error: protocol not found in the study protocols listed Node name ftp://ftp.pride.ebi.ac.uk/2013/02/PXD000134/4Skin_Mix1_SCX*.raw Error: protocol not found in the study protocols listed Node name ftp://ftp.pride.ebi.ac.uk/2013/02/PXD000134/OR1_091015_Javier_iPS_Singapore_Mix2_BigSCX*.raw Error: protocol not found in the study protocols listed Assay Name transcription profiling using DNA microarray Node name
--------------------------------------------------------------------------- SynapseHTTPError Traceback (most recent call last) <ipython-input-26-f03a3a864d98> in <module>() 50 #generate the provenance 51 assay_file_provenance_activity = synapseclient.Activity(used = used_protocols_synapse_files) ---> 52 assay_file = syn.store(assay_file, activity = assay_file_provenance_activity ) /Users/abhishek/anaconda/lib/python2.7/site-packages/synapseclient/client.pyc in store(self, obj, **kwargs) 671 else: 672 try: --> 673 properties = self._createEntity(properties) 674 except SynapseHTTPError as ex: 675 if createOrUpdate and ex.response.status_code == 409: /Users/abhishek/anaconda/lib/python2.7/site-packages/synapseclient/client.pyc in _createEntity(self, entity) 1975 1976 if self.debug: print "\n\n~~~ creating ~~~\n" + json.dumps(get_properties(entity), indent=2) -> 1977 return self.restPOST(uri='/entity', body=json.dumps(get_properties(entity))) 1978 1979 /Users/abhishek/anaconda/lib/python2.7/site-packages/synapseclient/retry.pyc in with_retry(*args, **kwargs) 53 # Try making the call 54 try: ---> 55 response = fn(*args, **kwargs) 56 except Exception as ex: 57 exc_info = sys.exc_info() /Users/abhishek/anaconda/lib/python2.7/site-packages/synapseclient/client.pyc in restPOST(self, uri, body, endpoint, headers, **kwargs) 2099 if self.debug: 2100 utils.debug_response(response) -> 2101 exceptions._raise_for_status(response) 2102 2103 if response.headers.get('content-type',None) == 'application/json': /Users/abhishek/anaconda/lib/python2.7/site-packages/synapseclient/exceptions.pyc in _raise_for_status(response) 102 ## TODO: Might as well append more information to the exception message 103 --> 104 raise SynapseHTTPError(message, response=response) 105 SynapseHTTPError: 400 Client Error: Bad Request Invalid Entity name: '?acc=GSM649323&format=file&file=GSM649323%2ECEL%2Egz'. Entity names may only contain: letters, numbers, spaces, underscores, hypens, periods, plus signs, and parentheses
http://www.ncbi.nlm.nih.gov/geo/download/?acc=GSM649323&format=file&file=GSM649323%2ECEL%2Egz Error: protocol not found in the study protocols listed
assay.nodes[node].metadata
{'Data Record Accession': [Attrs(Data_Record_Accession='GSE26451'), Attrs(Data_Record_Accession='')], 'Data Repository': [Attrs(Data_Repository=''), Attrs(Data_Repository='Gene Expression Omnibus')], 'Derived Data File': [''], 'Label': [Attrs(Label='')], 'Labeled Extract Name': [Attrs(Labeled_Extract_Name='')], 'Protocol REF': [Attrs(Protocol_REF='Microarray data analysis'), Attrs(Protocol_REF=''), Attrs(Protocol_REF='Transcriptome analysis'), Attrs(Protocol_REF='RNA isolation')], 'Raw Data File': ['http://www.ncbi.nlm.nih.gov/geo/download/?acc=GSM649323&format=file&file=GSM649323%2ECEL%2Egz'], 'Sample Name': ['4Skin_Fibro_1']}
print node
EVUSNDQ02.sff