#!/usr/bin/env python # coding: utf-8 # # Submit Structures to MPComplete # This notebook documents the process of # 1. Taking and validating a collection of CIFs (e.g. in a ZIP file), creating pymatgen Structure objects # 3. Filtering for structures that are submittable to MP (e.g. they are not duplicates) # 4. Taking and validating common metadata for the structures (authors, references, etc.) # 5. Submitting the resulting StructureNL objects to MP-Complete via the pymatgen MPRester # ## Taking and validating a collection of CIFs # We first need a filename for the ZIP archive. # In[ ]: zipfilename = '/Users/dwinston/Dropbox/best/structures/ever.zip' # Let's create a list of structures from the ZIP archive's CIF files. Anything invalid about the ZIP archive or CIF files will raise an exception here. # In[ ]: from zipfile import ZipFile from pymatgen.io.cif import CifParser structures = [] myzip = ZipFile(zipfilename, 'r') for name in myzip.namelist(): with myzip.open(name) as cif_file: structures.extend(CifParser(cif_file).get_structures()) # In[ ]: len(structures) # ## Filtering for structures that are submittable to MP # Reject structures already on MP web site. # In[ ]: from pymatgen import MPRester mpr = MPRester() mp_ids = [] new_structures = [] for s in structures: found = mpr.find_structure(s) if len(found) > 0: mp_ids.extend(found) else: new_structures.append(s) if len(mp_ids) > 0: print("Filtered out structures already on MP: {}".format(mp_ids)) # In[ ]: len(new_structures) # Create a mock "job" for each structure, and then simulate the checks the submission processor does to reject jobs. The structures that pass here will actually spawn a ready workflow, so we will filter for such structures. # In[ ]: from pymatgen import Composition from pymatgen.util.provenance import StructureNL def get_meta_from_structure(structure): """Used by `structure_to_mock_job`, to "fill out" a job document.""" comp = structure.composition elsyms = sorted(set([e.symbol for e in comp.elements])) meta = {'nsites': len(structure), 'elements': elsyms, 'nelements': len(elsyms), 'formula': comp.formula, 'reduced_cell_formula': comp.reduced_formula, 'reduced_cell_formula_abc': Composition(comp.reduced_formula) .alphabetical_formula, 'anonymized_formula': comp.anonymized_formula, 'chemsystem': '-'.join(elsyms), 'is_ordered': structure.is_ordered, 'is_valid': structure.is_valid()} return meta def structure_to_mock_job(structure): # Needs at least one author. This is for a mock job, so can put whatever. snl = StructureNL(structure, [{"name": "Evgraf Fedorov", "email": "symmetry@ftw.org"}]) job = snl.as_dict() if 'is_valid' not in job: job.update(get_meta_from_structure(snl.structure)) sorted_structure = snl.structure.get_sorted_structure() job.update(sorted_structure.as_dict()) return job # mpworks.processors.process_submissions.SubmissionProcessor#submit_new_workflow MAX_SITES = 200 # SubmissionProcessor.MAX_SITES above # from mpworks.workflows.wf_utils import NO_POTCARS NO_POTCARS = ['Po', 'At', 'Rn', 'Fr', 'Ra', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'] def job_is_submittable(job): snl = StructureNL.from_dict(job) if len(snl.structure.sites) > MAX_SITES: print 'REJECTED WORKFLOW FOR {} - too many sites ({})'.format( snl.structure.formula, len(snl.structure.sites)) elif not job['is_valid']: print 'REJECTED WORKFLOW FOR {} - invalid structure (atoms too close)'.format( snl.structure.formula) elif len(set(NO_POTCARS) & set(job['elements'])) > 0: print 'REJECTED WORKFLOW FOR {} - invalid element (No POTCAR)'.format( snl.structure.formula) elif not job['is_ordered']: print 'REJECTED WORKFLOW FOR {} - invalid structure (disordered)'.format( snl.structure.formula) else: return True return False # No longer need separate reference for new_structures structures = new_structures submittables = [] for s in structures: if job_is_submittable(structure_to_mock_job(s)): submittables.append(s) # ## Taking and validating common metadata for the structures # If there are issues with the metadata, an exception will be raised on attempting to create `snl_list`. # In[ ]: # No longer need separate reference for submittables structures = submittables # List of (name, email) pairs authors = [ ('Evgraf Fedorov', 'symmetry@ftw.org'), ('Arthur Schoenflies', 'art@berlin.de'), ] # BiBTeX string of references references = """ @article{Graf1961, author = {Graf, Donald L}, journal = {American Mineralogist}, number = {11}, pages = {1283--1316}, title = {{Crystallographic tables for the rhombohedral carbonates}}, volume = {46}, year = {1961} } @article{Akao_1977, author = {Akao, M and Iwai, S}, doi = {10.1107/s0567740877005834}, journal = {Acta Crystallogr Sect B}, month = {apr}, number = {4}, pages = {1273--1275}, publisher = {International Union of Crystallography ({\{}IUCr{\}})}, title = {{The hydrogen bonding of hydromagnesite}}, url = {http://dx.doi.org/10.1107/s0567740877005834}, volume = {33}, year = {1977} } """ # Projects? List of strings. projects = [] # Remarks? List of strings. remarks = [] snl_list = StructureNL.from_structures(structures, authors, references=references, projects=projects, remarks=remarks) # ## Submitting the structures to MP-Complete # In[ ]: # Using v1 endpoint mpr = MPRester(endpoint="https://www.materialsproject.org/rest/v1") #mpr.submit_snl(snl_list)