From ae403871949db44cf7d04efdfb21da6f07308ff3 Mon Sep 17 00:00:00 2001 From: RealPolitiX <xrpatrick@gmail.com> Date: Fri, 3 May 2019 01:30:10 +0200 Subject: [PATCH] Update with mpes-parser content --- examples/mpes_metadata.json | 0 mpesparser/__init__.py | 176 +++++ {skeletonparser => mpesparser}/__main__.py | 4 +- mpesparser/mpes.nomadmetainfo.json | 718 +++++++++++++++++++++ setup.py | 8 +- skeletonparser/__init__.py | 96 --- skeletonparser/skeleton.nomadmetainfo.json | 20 - tests/example.metadata.json | 10 - tests/mpes_data.json | 77 +++ tests/mpes_data_redacted.json | 74 +++ 10 files changed, 1051 insertions(+), 132 deletions(-) create mode 100644 examples/mpes_metadata.json create mode 100644 mpesparser/__init__.py rename {skeletonparser => mpesparser}/__main__.py (90%) create mode 100644 mpesparser/mpes.nomadmetainfo.json delete mode 100644 skeletonparser/__init__.py delete mode 100644 skeletonparser/skeleton.nomadmetainfo.json delete mode 100644 tests/example.metadata.json create mode 100644 tests/mpes_data.json create mode 100644 tests/mpes_data_redacted.json diff --git a/examples/mpes_metadata.json b/examples/mpes_metadata.json new file mode 100644 index 0000000..e69de29 diff --git a/mpesparser/__init__.py b/mpesparser/__init__.py new file mode 100644 index 0000000..83158d7 --- /dev/null +++ b/mpesparser/__init__.py @@ -0,0 +1,176 @@ +# Copyright 2016-2018 Markus Scheidgen +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os.path +import json +import ase +import re +import numpy as np +from datetime import datetime + +from nomadcore.simple_parser import SimpleMatcher +from nomadcore.baseclasses import ParserInterface, AbstractBaseParser + +from nomad.parsing import LocalBackend + + +class MPESParserInterface(ParserInterface): + def get_metainfo_filename(self): + """ + The parser specific metainfo. To include other metadata definitions, use + the 'dependencies' key to refer to other local nomadmetainfo.json files or + to nomadmetainfo.json files that are part of the general nomad-meta-info + submodule (i.e. ``dependencies/nomad-meta-info``). + """ + return os.path.join(os.path.dirname(__file__), 'mpes.nomadmetainfo.json') + + def get_parser_info(self): + """ Basic info about parser used in archive data and logs. """ + return { + 'name': 'mpes-parser', + 'version': '1.0.0' + } + + def setup_version(self): + """ Can be used to call :func:`setup_main_parser` differently for different code versions. """ + self.setup_main_parser(None) + + def setup_main_parser(self, _): + """ Setup the actual parser (behind this interface) """ + self.main_parser = MPESParser(self.parser_context) + + +class MPESParser(AbstractBaseParser): + + def parse(self, filepath): + backend = self.parser_context.super_backend + + with open(filepath, 'rt') as f: + data = json.load(f) + # print(data) + + # # You need to open sections before you can add values or sub sections to it. + # # The returned 'gid' can be used to reference a specific section if multiple + # # sections of the same type are opened. + root_gid = backend.openSection('section_experiment') + # # Values do not necessarely have to be read from the parsed file. + # # The backend will check the type of the given value agains the metadata definition. + # backend.addValue('experiment_time', int(datetime.strptime(data.get('date'), '%d.%M.%Y').timestamp())) + # + # # Read data . + # data_gid = backend.openSection('section_data') + # backend.addValue('data_repository_name', 'zenodo.org') + # backend.addValue('data_repository_url', 'https://zenodo.org/path/to/mydata') + # backend.addValue('data_preview_url', 'https://www.physicsforums.com/insights/wp-content/uploads/2015/09/fem.jpg') + # backend.closeSection('section_data', data_gid) + + # Read general experimental parameters + # general_gid = backend.openSection('section_experiment_general_parameters') + backend.addValue('general_experiment_method', data.get('experiment_method')) + backend.addValue('general_experiment_method_abbreviation', data.get('experiment_method_abbrv')) + backend.addArrayValues('general_experiment_location', np.array(re.findall(r"[\w']+", data.get('experiment_location')))) + backend.addValue('general_experiment_date', data.get('experiment_date')) + backend.addValue('general_experiment_summary', data.get('experiment_summary')) + backend.addValue('general_experiment_facility_institution', data.get('facility_institution')) + backend.addValue('general_experiment_facility_name', data.get('facility_name')) + backend.addValue('general_beamline', data.get('beamline')) + backend.addValue('general_source_pump', data.get('source_pump')) + backend.addValue('general_source_probe', data.get('source_probe')) + backend.addValue('general_equipment_description', data.get('equipment_description')) + backend.addValue('general_sample_description', data.get('sample_description')) + backend.addArrayValues('general_measurement_axis', np.array(re.findall(r"[\w']+", data.get('measurement_axis')))) + backend.addArrayValues('general_physical_axis', np.array(re.findall(r"[\w']+", data.get('physical_axis')))) + + # Read parameters related to experimental source + # source_gid = backend.openSection('section_experiment_source_parameters') + backend.addValue('source_pump_repetition_rate', data.get('pump_rep_rate')) + backend.addValue('source_pump_pulse_duration', data.get('pump_pulse_duration')) + backend.addValue('source_pump_wavelength', data.get('pump_wavelength')) + backend.addArrayValues('source_pump_spectrum', np.array(data.get('pump_spectrum'))) + backend.addValue('source_pump_photon_energy', data.get('pump_photon_energy')) + backend.addArrayValues('source_pump_size', np.array(data.get('pump_size'))) + backend.addArrayValues('source_pump_fluence', np.array(data.get('pump_fluence'))) + backend.addValue('source_pump_polarization', data.get('pump_polarization')) + backend.addValue('source_pump_bunch', data.get('pump_bunch')) + backend.addValue('source_probe_repetition_rate', data.get('probe_rep_rate')) + backend.addValue('source_probe_pulse_duration', data.get('probe_pulse_duration')) + backend.addValue('source_probe_wavelength', data.get('probe_wavelength')) + backend.addArrayValues('source_probe_spectrum', np.array(data.get('probe_spectrum'))) + backend.addValue('source_probe_photon_energy', data.get('probe_photon_energy')) + backend.addArrayValues('source_probe_size', np.array(data.get('probe_size'))) + backend.addArrayValues('source_probe_fluence', np.array(data.get('probe_fluence'))) + backend.addValue('source_probe_polarization', data.get('probe_polarization')) + backend.addValue('source_probe_bunch', data.get('probe_bunch')) + backend.addValue('source_temporal_resolution', data.get('temporal_resolution')) + + # Read parameters related to detector + # detector_gid = backend.openSection('section_experiment_detector_parameters') + backend.addValue('detector_extractor_voltage', data.get('extractor_voltage')) + backend.addValue('detector_work_distance', data.get('work_distance')) + backend.addArrayValues('detector_lens_names', np.array(re.findall(r"[\w']+", data.get('lens_names')))) + backend.addArrayValues('detector_lens_voltages', np.array(data.get('lens_voltages'))) + backend.addValue('detector_tof_distance', data.get('tof_distance')) + backend.addArrayValues('detector_tof_voltages', np.array(data.get('tof_voltages'))) + backend.addValue('detector_sample_bias', data.get('sample_bias')) + backend.addValue('detector_magnification', data.get('magnification')) + backend.addArrayValues('detector_voltages', np.array(data.get('detector_voltages'))) + backend.addValue('detector_type', data.get('detector_type')) + backend.addArrayValues('detector_sensor_size', np.array(data.get('sensor_size'))) + backend.addValue('detector_sensor_count', data.get('sensor_count')) + backend.addArrayValues('detector_sensor_pixel_size', np.array(data.get('sensor_pixel_size'))) + backend.addArrayValues('detector_calibration_x_to_momentum', np.array(data.get('calibration_x_to_momentum'))) + backend.addArrayValues('detector_calibration_y_to_momentum', np.array(data.get('calibration_y_to_momentum'))) + backend.addArrayValues('detector_calibration_tof_to_energy', np.array(data.get('calibration_tof_to_energy'))) + backend.addArrayValues('detector_calibration_stage_to_delay', np.array(data.get('calibration_stage_to_delay'))) + backend.addArrayValues('detector_calibration_other_converts', np.array(data.get('calibration_other_converts'))) + backend.addArrayValues('detector_momentum_resolution', np.array(data.get('momentum_resolution'))) + backend.addArrayValues('detector_spatial_resolution', np.array(data.get('spatial_resolution'))) + backend.addArrayValues('detector_energy_resolution', np.array(data.get('energy_resolution'))) + + # Read parameters related to sample + # sample_gid = backend.openSection('section_experiment_sample_parameters') + backend.addValue('sample_id', data.get('sample_id')) + backend.addValue('sample_state_of_matter', data.get('sample_state')) + backend.addValue('sample_purity', data.get('sample_purity')) + backend.addValue('sample_surface_termination', data.get('sample_surface_termination')) + backend.addValue('sample_layers', data.get('sample_layers')) + backend.addValue('sample_stacking_order', data.get('sample_stacking_order')) + backend.addValue('sample_space_group', data.get('sample_space_group')) + backend.addValue('sample_chemical_name', data.get('chemical_name')) + backend.addValue('sample_chemical_formula', data.get('chemical_formula')) + # backend.addArrayValues('sample_chemical_elements', np.array(re.findall(r"[\w']+", data.get('chemical_elements')))) + atoms = set(ase.Atoms(data.get('chemical_formula')).get_chemical_symbols()) + backend.addArrayValues('sample_atom_labels', np.array(list(atoms))) + backend.addValue('sample_chemical_id_cas', data.get('chemical_id_cas')) + backend.addValue('sample_temperature', data.get('sample_temperature')) + backend.addValue('sample_pressure', data.get('sample_pressure')) + backend.addValue('sample_growth_method', data.get('growth_method')) + backend.addValue('sample_preparation_method', data.get('preparation_method')) + backend.addValue('sample_vendor', data.get('sample_vendor')) + backend.addValue('sample_substrate_material', data.get('substrate_material')) + backend.addValue('sample_substrate_state_of_matter', data.get('substrate_state')) + backend.addValue('sample_substrate_vendor', data.get('substrate_vendor')) + + # To add arrays (vectors, matrices, etc.) use addArrayValues and provide a + # numpy array. The shape of the numpy array must match the shape defined in + # the respective metadata definition. + + + # Close sections in the reverse order + # backend.closeSection('section_data', data_gid) + backend.closeSection('section_experiment', root_gid) + # backend.closeSection('section_experiment_general_parameters', general_gid) + # backend.closeSection('section_experiment_source_parameters', source_gid) + # backend.closeSection('section_experiment_detector_parameters', detector_gid) + # backend.closeSection('section_experiment_sample_parameters', sample_gid) diff --git a/skeletonparser/__main__.py b/mpesparser/__main__.py similarity index 90% rename from skeletonparser/__main__.py rename to mpesparser/__main__.py index d393f8d..02973c9 100644 --- a/skeletonparser/__main__.py +++ b/mpesparser/__main__.py @@ -14,11 +14,11 @@ import sys from nomad.parsing import LocalBackend -from skeletonparser import SkeletonParserInterface +from mpesparser import MPESParserInterface if __name__ == "__main__": # instantiate the parser via its interface with a LocalBackend - parser = SkeletonParserInterface(backend=LocalBackend) + parser = MPESParserInterface(backend=LocalBackend) # call the actual parsing with the given mainfile parser.parse(sys.argv[1]) # print the results stored in the LocalBackend diff --git a/mpesparser/mpes.nomadmetainfo.json b/mpesparser/mpes.nomadmetainfo.json new file mode 100644 index 0000000..10fe096 --- /dev/null +++ b/mpesparser/mpes.nomadmetainfo.json @@ -0,0 +1,718 @@ +{ + "type": "nomad_meta_info_1_0", + "description": "Metadata a multidimensional photoemission spectroscopy experiment.", + "dependencies": [ + { + "metainfoPath":"general.nomadmetainfo.json" + }, + { + "metainfoPath":"general.experimental.nomadmetainfo.json" + } + ], + "metaInfos": [ + { + "description": "Name of the city and country the experiment took place, format 'Country, City'", + "name": "data_repository_name", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Name of the city and country the experiment took place, format 'Country, City'", + "name": "data_repository_url", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Name of the city and country the experiment took place, format 'Country, City'", + "name": "data_preview_url", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Shape of the None/Null object", + "name": "none_shape", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Full name of the experimental method in use", + "name": "general_experiment_method", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Abbreviated name (e.g. acronym) of the experimental method", + "name": "general_experiment_method_abbreviation", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Number of name segments in the experiment location", + "name": "number_of_location_names", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Name of the city and country the experiment took place, format 'Country, City'", + "name": "general_experiment_location", + "dtypeStr": "C", + "shape": ["number_of_location_names"], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Start and end dates of the experiment, format 'DD.MM.YYYY - DD.MM.YYYY'", + "name": "general_experiment_date", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "C" + }, + { + "description": "Descriptive summary of the content of the experiment.", + "name": "general_experiment_summary", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Name of the institution hosting the experimental facility (e.g. in an acronym).", + "name": "general_experiment_facility_institution", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Name of the experimental facility (e.g. in an acronym).", + "name": "general_experiment_facility_name", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Name of the beamline the experiment took place.", + "name": "general_beamline", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Name or model of the pump light source.", + "name": "general_source_pump", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Name or model of the probe light source.", + "name": "general_source_probe", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Name or model of the equipment (e.g. in an acronym).", + "name": "general_equipment_description", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Description of the sample used in the experiment.", + "name": "general_sample_description", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Number of axes in the measurement hardware.", + "name": "number_of_axes", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Names of the axes in the measurement hardware.", + "name": "general_measurement_axis", + "dtypeStr": "C", + "shape": ["number_of_axes"], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Names of the axes in physical terms.", + "name": "general_physical_axis", + "dtypeStr": "C", + "shape": ["number_of_axes"], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Repetition rate of the pump source.", + "name": "source_pump_repetition_rate", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "Hz" + }, + { + "description": "Pulse duration of the pump source.", + "name": "source_pump_pulse_duration", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "fs" + }, + { + "description": "Center wavelength of the pump source.", + "name": "source_pump_wavelength", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "nm" + }, + { + "description": "Spectrum of the pump source.", + "name": "source_pump_spectrum", + "dtypeStr": "f", + "shape": ["length_of_spectrum"], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Photon energy of the pump source.", + "name": "source_pump_photon_energy", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "eV" + }, + { + "description": "Full-width at half-maximum size of the pump source at or closest to the sample position.", + "name": "source_pump_size", + "dtypeStr": "f", + "shape": ["none_shape"], + "superNames": ["section_experiment"], + "units": "mm**2" + }, + { + "description": "Fluence of the pump source at or closest to the sample position.", + "name": "source_pump_fluence", + "dtypeStr": "f", + "shape": ["none_shape"], + "superNames": ["section_experiment"], + "units": "mJ/mm**2" + }, + { + "description": "Polarization of the pump source.", + "name": "source_pump_polarization", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Total bunch number of the pump source.", + "name": "source_pump_bunch", + "dtypeStr": "i", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Repetition rate of the probe source.", + "name": "source_probe_repetition_rate", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "Hz" + }, + { + "description": "Pulse duration of the probe source.", + "name": "source_probe_pulse_duration", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "fs" + }, + { + "description": "Center wavelength of the probe source.", + "name": "source_probe_wavelength", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "nm" + }, + { + "description": "Number of pixel elements in the spectrum.", + "name": "length_of_spectrum", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Spectrum of the probe source.", + "name": "source_probe_spectrum", + "dtypeStr": "f", + "shape": ["length_of_spectrum"], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Photon energy of the probe source.", + "name": "source_probe_photon_energy", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "eV" + }, + { + "description": "Full-width at half-maximum size of the probe source at or closest to the sample position.", + "name": "source_probe_size", + "dtypeStr": "f", + "shape": ["none_shape"], + "superNames": ["section_experiment"], + "units": "mm**2" + }, + { + "description": "Fluence of the probe source at or closest to the sample position.", + "name": "source_probe_fluence", + "dtypeStr": "f", + "shape": ["none_shape"], + "superNames": ["section_experiment"], + "units": "mJ/mm**2" + }, + { + "description": "Polarization of the probe source.", + "name": "source_probe_polarization", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Total bunch number of the probe source.", + "name": "source_probe_bunch", + "dtypeStr": "i", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Full-width at half-maximum of the pump-probe cross-correlation function.", + "name": "source_temporal_resolution", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "fs" + }, + { + "description": "Voltage between the extractor and the sample.", + "name": "detector_extractor_voltage", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "V" + }, + { + "description": "Distance between the sample and the detector entrance.", + "name": "detector_work_distance", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "mm" + }, + { + "description": "Number of electron lenses in the electron detector.", + "name": "number_of_lenses", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Set of names for the electron-optic lenses.", + "name": "detector_lens_names", + "dtypeStr": "C", + "shape": ["number_of_lenses"], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Set of electron-optic lens voltages.", + "name": "detector_lens_voltages", + "dtypeStr": "f", + "shape": ["number_of_lenses"], + "superNames": ["section_experiment"], + "units": "V" + }, + { + "description": "Drift distance of the time-of-flight tube.", + "name": "detector_tof_distance", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "m" + }, + { + "description": "Number of time-of-flight (TOF) drift tube voltage values in the electron detector.", + "name": "number_of_tof_voltages", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Voltage applied to the time-of-flight tube.", + "name": "detector_tof_voltages", + "dtypeStr": "f", + "shape": ["number_of_tof_voltages"], + "superNames": ["section_experiment"], + "units": "V" + }, + { + "description": "Voltage bias applied to sample.", + "name": "detector_sample_bias", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "V" + }, + { + "description": "Detector magnification.", + "name": "detector_magnification", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Number of detector voltage settings in the electron detector.", + "name": "number_of_detector_voltages", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Voltage applied to detector.", + "name": "detector_voltages", + "dtypeStr": "f", + "shape": ["number_of_detector_voltages"], + "superNames": ["section_experiment"], + "units": "V" + }, + { + "description": "Description of the detector type (e.g. ‘MCP’, ‘CCD’, ‘CMOS’, etc.).", + "name": "detector_type", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Number of detector sensor size dimensions (depending on the number of sensors).", + "name": "number_of_sensor_sizes", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Size of each of the imaging sensor chip on the detector.", + "name": "detector_sensor_size", + "dtypeStr": "f", + "shape": ["number_of_sensor_sizes"], + "superNames": ["section_experiment"], + "units": "mm" + }, + { + "description": "Number of imaging sensor chips on the detector.", + "name": "detector_sensor_count", + "dtypeStr": "i", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Pixel size of the imaging sensor chip on the detector.", + "name": "detector_sensor_pixel_size", + "dtypeStr": "f", + "shape": ["none_shape"], + "superNames": ["section_experiment"], + "units": "um" + }, + { + "description": "Number of the momentum calibration parameters for the detector.", + "name": "number_of_momentum_calibration_coefficients", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Pixel x axis to kx momentum calibration.", + "name": "detector_calibration_x_to_momentum", + "dtypeStr": "f", + "shape": ["number_of_momentum_calibration_coefficients"], + "superNames": ["section_experiment"], + "units": "AA**-1" + }, + { + "description": "Pixel y axis to ky momentum calibration.", + "name": "detector_calibration_y_to_momentum", + "dtypeStr": "f", + "shape": ["number_of_momentum_calibration_coefficients"], + "superNames": ["section_experiment"], + "units": "AA**-1" + }, + { + "description": "Number of the energy calibration parameters for the detector.", + "name": "number_of_energy_calibration_coefficients", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Time-of-flight to energy calibration.", + "name": "detector_calibration_tof_to_energy", + "dtypeStr": "f", + "shape": ["number_of_energy_calibration_coefficients"], + "superNames": ["section_experiment"], + "units": "eV" + }, + { + "description": "Translation stage delay to pump-probe delay calibration.", + "name": "detector_calibration_stage_to_delay", + "dtypeStr": "f", + "shape": ["number_of_delay_calibration_coefficients"], + "superNames": ["section_experiment"], + "units": "fs" + }, + { + "description": "Number of the other calibration parameters for the detector.", + "name": "number_of_other_calibration_coefficients", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Conversion factor between other measured and physical axes.", + "name": "detector_calibration_other_converts", + "dtypeStr": "f", + "shape": ["number_of_other_calibration_coefficients"], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Momentum resolution of the detector.", + "name": "detector_momentum_resolution", + "dtypeStr": "f", + "shape": ["none_shape"], + "superNames": ["section_experiment"], + "units": "AA**-1" + }, + { + "description": "Spatial resolution of the source.", + "name": "detector_spatial_resolution", + "dtypeStr": "f", + "shape": ["none_shape"], + "superNames": ["section_experiment"], + "units": "um" + }, + { + "description": "Energy resolution of the detector.", + "name": "detector_energy_resolution", + "dtypeStr": "f", + "shape": ["none_shape"], + "superNames": ["section_experiment"], + "units": "eV" + }, + { + "description": "Identification number or signatures of the sample used.", + "name": "sample_id", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Physical state of the sample.", + "name": "sample_state_of_matter", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Chemical purity of the sample.", + "name": "sample_purity", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Surface termination of the sample (if crystalline).", + "name": "sample_surface_termination", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Sample layer or bulk structure.", + "name": "sample_layers", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Stacking order of the solid surface (if crystalline).", + "name": "sample_stacking_order", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Space group of the sample compound (if crystalline).", + "name": "sample_space_group", + "dtypeStr": "i", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Chemical name of the sample.", + "name": "sample_chemical_name", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Chemical formula of the sample.", + "name": "sample_chemical_formula", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Number of distinct chemical elements in the sample.", + "name": "number_of_elements", + "dtypeStr": "i", + "kindStr": "type_dimension", + "shape": [], + "superNames": ["section_experiment"] + }, + { + "description": "Symbols of the chemical elements contained in the sample.", + "name": "sample_atom_labels", + "dtypeStr": "C", + "shape": ["number_of_elements"], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "CAS registry number of the sample’s chemical content.", + "name": "sample_chemical_id_cas", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Temperature of the sample at the time of measurement.", + "name": "sample_temperature", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "K" + }, + { + "description": "Pressure surrounding the sample at the time of measurement.", + "name": "sample_pressure", + "dtypeStr": "f", + "shape": [], + "superNames": ["section_experiment"], + "units": "Pa" + }, + { + "description": "Sample growth method.", + "name": "sample_growth_method", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Sample preparation method.", + "name": "sample_preparation_method", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Name of the sample vendor.", + "name": "sample_vendor", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Material of the substrate the sample has immediate contact with.", + "name": "sample_substrate_material", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "State of matter of the substrate material.", + "name": "sample_substrate_state_of_matter", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + }, + { + "description": "Name of the substrate vendor.", + "name": "sample_substrate_vendor", + "dtypeStr": "C", + "shape": [], + "superNames": ["section_experiment"], + "units": "" + } + ] +} diff --git a/setup.py b/setup.py index 89a925c..d669a95 100644 --- a/setup.py +++ b/setup.py @@ -16,14 +16,14 @@ from setuptools import setup, find_packages def main(): setup( - name='skeletonparser', # replace with new name for parser's python package + name='mpesparser', version='0.1', - description='A skeleton NOMAD parser implementation.', # change accordingly - author='', # add your names + description='NOMAD parser implementation for multidimensional photoemission spectroscopy data.', + author='R. Patrick Xian', license='APACHE 2.0', packages=find_packages(), package_data={ - 'skeletonparser': ['*.json'] + 'mpesparser': ['*.json'] }, install_requires=[ 'nomadcore' diff --git a/skeletonparser/__init__.py b/skeletonparser/__init__.py deleted file mode 100644 index b7f0fd6..0000000 --- a/skeletonparser/__init__.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2016-2018 Markus Scheidgen -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os.path -import json -import ase -import numpy as np -from datetime import datetime - -from nomadcore.simple_parser import SimpleMatcher -from nomadcore.baseclasses import ParserInterface, AbstractBaseParser - -from nomad.parsing import LocalBackend - - -class SkeletonParserInterface(ParserInterface): - - def get_metainfo_filename(self): - """ - The parser specific metainfo. To include other metadata definitions, use - the 'dependencies' key to refer to other local nomadmetainfo.json files or - to nomadmetainfo.json files that are part of the general nomad-meta-info - submodule (i.e. ``dependencies/nomad-meta-info``). - """ - return os.path.join(os.path.dirname(__file__), 'skeleton.nomadmetainfo.json') - - def get_parser_info(self): - """ Basic info about parser used in archive data and logs. """ - return { - 'name': 'you parser name', - 'version': '1.0.0' - } - - def setup_version(self): - """ Can be used to call :func:`setup_main_parser` differently for different code versions. """ - self.setup_main_parser(None) - - def setup_main_parser(self, _): - """ Setup the actual parser (behind this interface) """ - self.main_parser = SkeletonParser(self.parser_context) - - -class SkeletonParser(AbstractBaseParser): - def parse(self, filepath): - backend = self.parser_context.super_backend - - with open(filepath, 'rt') as f: - data = json.load(f) - - # You need to open sections before you can add values or sub sections to it. - # The returned 'gid' can be used to reference a specific section if multiple - # sections of the same type are opened. - root_gid = backend.openSection('section_experiment') - # Values are added to the open section of the given metadata definitions. In - # the following case 'experiment_location' is a quantity of 'section_experiment'. - # When multiple sections of the same type (e.g. 'section_experiment') are open, - # you can use the 'gid' as an additional argument. - backend.addValue('experiment_location', data.get('location')) - # Values do not necessarely have to be read from the parsed file. - backend.addValue('experiment_method_name', data.get('method', 'Bare eyes')) - # The backend will check the type of the given value agains the metadata definition. - backend.addValue('experiment_time', int(datetime.strptime(data.get('date'), '%d.%M.%Y').timestamp())) - - # Subsections work like before. The parent section must still be open. - data_gid = backend.openSection('section_data') - backend.addValue('data_repository_name', 'zenedo.org') - backend.addValue('data_repository_url', 'https://zenedo.org/path/to/mydata') - backend.addValue('data_preview_url', 'https://www.physicsforums.com/insights/wp-content/uploads/2015/09/fem.jpg') - backend.closeSection('section_data', data_gid) - - # Subsections work like before. The parent section must still be open. - sample_gid = backend.openSection('section_sample') - backend.addValue('sample_chemical_name', data.get('sample_chemical')) - backend.addValue('sample_chemical_formula', data.get('sample_formula')) - backend.addValue('sample_temperature', data.get('sample_temp')) - - atoms = set(ase.Atoms(data.get('sample_formula')).get_chemical_symbols()) - # To add arrays (vectors, matrices, etc.) use addArrayValues and provide a - # numpy array. The shape of the numpy array must match the shape defined in - # the respective metadata definition. - backend.addArrayValues('sample_atom_labels', np.array(list(atoms))) - - # Close sections in the reverse order. - backend.closeSection('section_sample', sample_gid) - backend.closeSection('section_experiment', root_gid) diff --git a/skeletonparser/skeleton.nomadmetainfo.json b/skeletonparser/skeleton.nomadmetainfo.json deleted file mode 100644 index 6720cef..0000000 --- a/skeletonparser/skeleton.nomadmetainfo.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "type": "nomad_meta_info_1_0", - "description": "Parser specific metadata definitions.", - "dependencies":[ - { - "metainfoPath":"general.nomadmetainfo.json" - }, - { - "metainfoPath":"general.experimental.nomadmetainfo.json" - } - ], - "metaInfos": [ - { - "description": "Contains information relating to an archive.", - "name": "experiment_location", - "dtypeStr": "C", - "superNames": ["section_experiment"] - } - ] -} diff --git a/tests/example.metadata.json b/tests/example.metadata.json deleted file mode 100644 index 22933e8..0000000 --- a/tests/example.metadata.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "type":"skeleton experimental metadata format 1.0", - "date":"24.12.2018", - "location":"Northpole", - "sample_formula":"H2O", - "sample_chemical":"Ice", - "sample_state":"Frozen", - "result":"https://bitcoinist.com/wp-content/uploads/2018/08/shutterstock_764225425.jpg", - "sample_temp":384 -} \ No newline at end of file diff --git a/tests/mpes_data.json b/tests/mpes_data.json new file mode 100644 index 0000000..8bc30b5 --- /dev/null +++ b/tests/mpes_data.json @@ -0,0 +1,77 @@ +{"GeneralParameters": + {"experiment_location": "Hamburg, Germany", + "experiment_date": "04.2018 - 05.2018", + "experiment_summary": "Characterization of excited-state circular dichroism of WSe2", + "institution": "DESY", + "facility": "FLASH", + "beamline": "PG-2", + "source_pump": "Free electron laser", + "source_probe": "Femtosecond laser", + "equipment": "HEXTOF detector", + "sample": "Bulk tungsten diselenide", + "measurement_axis": ["X", "Y", "t", "ADC"], + "physical_axis": ["kx", "ky", "E", "tpp"]}, +"SourceParameters": + {"pump_rep_rate": 1000, + "pump_pulse_duration": 100, + "pump_wavelength": 800, + "pump_spectrum": [], + "pump_photon_energy": 1.55, + "pump_size": "", + "pump_fluence": 1.5, + "pump_polarization": "linear", + "pump_bunch": 400, + "probe_rep_rate": 1000, + "probe_pulse_duration": 100, + "probe_wavelength": 800, + "probe_spectrum": [], + "probe_photon_energy": 109, + "probe_size": "", + "probe_fluence": "", + "probe_polarization": "circular", + "probe_bunch": 400, + "temporal_resolution": 100}, +"DetectorParameters": + {"extractor_voltage": 6000, + "work_distance": 4, + "lens_names": ["A", "B", "C", "D", "E", "F", "G", "H"], + "lens_voltages": [], + "tof_distance": 0.9, + "tof_voltages": 30, + "sample_bias": 15, + "magnification": [], + "detector_voltage": [], + "detector_type": "MCP", + "sensor_size": [], + "sensor_count": 4, + "sensor_pixel_size": [], + "x_to_momentum": [], + "y_to_momentum": [], + "tof_to_energy": [], + "stage_to_delay": [], + "other_converts": [], + "momentum_resolution": 0.01, + "spatial_resolution": "", + "energy_resolution": ""}, +"SampleParameters": + {"sample_id": "000", + "sample_state": "solid", + "sample_purity": 0.99, + "sample_surface_term": "", + "sample_layer": "bulk", + "sample_stacking": "2H", + "sample_space_group": 194, + "chem_formula": "WSe2", + "chem_elements": ["W", "Se"], + "chem_name": "tungsten diselenide", + "chem_id_cas": "12067-46-8", + "sample_temp": 300, + "sample_pressure": 1e-11, + "growth_method": "chemical vaport transport", + "preparation_method": "in-vacuum cleaving", + "sample_vendor": "HQ Graphene", + "substrate_material": "copper", + "substrate_state": "solid", + "substrate_vendor": "custom" + } +} diff --git a/tests/mpes_data_redacted.json b/tests/mpes_data_redacted.json new file mode 100644 index 0000000..30e619f --- /dev/null +++ b/tests/mpes_data_redacted.json @@ -0,0 +1,74 @@ +{ + "experiment_method": "multidimensional photoemission spectroscopy", + "experiment_method_abbrv": "MPES", + "experiment_location": "Hamburg, Germany", + "experiment_date": "04.2018 05.2018", + "experiment_summary": "Characterization of excited-state circular dichroism of WSe2", + "facility_institution": "DESY", + "facility_name": "FLASH", + "beamline": "PG-2", + "source_pump": "Free electron laser", + "source_probe": "Femtosecond laser", + "equipment_description": "HEXTOF detector", + "sample_description": "Bulk tungsten diselenide", + "measurement_axis": "X, Y, TOF, ADC", + "physical_axis": "kx, ky, E, tpp", + "pump_rep_rate": 1000, + "pump_pulse_duration": 100, + "pump_wavelength": 800, + "pump_spectrum": [], + "pump_photon_energy": 1.55, + "pump_size": [], + "pump_fluence": [], + "pump_polarization": "linear", + "pump_bunch": 400, + "probe_rep_rate": 1000, + "probe_pulse_duration": 100, + "probe_wavelength": 800, + "probe_spectrum": [], + "probe_photon_energy": 36.4970, + "probe_size": [], + "probe_fluence": [], + "probe_polarization": "circular", + "probe_bunch": 400, + "temporal_resolution": 100, + "extractor_voltage": 6030, + "work_distance": 4, + "lens_names": "A, B, C, D, E, F, G, H, I", + "lens_voltages": [], + "tof_distance": 0.9, + "tof_voltages": [20], + "sample_bias": 29, + "magnification": [-1.5], + "detector_voltages": [], + "detector_type": "MCP", + "sensor_size": [], + "sensor_count": 4, + "sensor_pixel_size": [], + "calibration_x_to_momentum": [], + "calibration_y_to_momentum": [], + "calibration_tof_to_energy": [], + "calibration_stage_to_delay": [], + "calibration_other_converts": [], + "momentum_resolution": [0.01], + "spatial_resolution": [], + "energy_resolution": [], + "sample_id": "000", + "sample_state": "solid", + "sample_purity": 0.99, + "sample_surface_termination": "0001", + "sample_layers": "bulk", + "sample_stacking_order": "2H", + "sample_space_group": 194, + "chemical_name": "tungsten diselenide", + "chemical_formula": "WSe2", + "chemical_id_cas": "12067-46-8", + "sample_temperature": 300, + "sample_pressure": 3.85e-10, + "growth_method": "chemical vaport transport", + "preparation_method": "in-vacuum cleaving", + "sample_vendor": "HQ Graphene", + "substrate_material": "copper", + "substrate_state": "solid", + "substrate_vendor": "custom" +} -- GitLab