diff --git a/docs/reference.rst b/docs/reference.rst index be66aeee07b2c784d1ff09e3b3e2441049653c58..1a1636e3a8ff71155123a8b8a9a8f2d9cc66bfe7 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -1,6 +1,10 @@ Reference ========= +nomad.metainfo +-------------- +.. automodule:: nomad.metainfo.metainfo + nomad.config ------------ .. automodule:: nomad.config diff --git a/nomad/app/optimade/data.py b/nomad/app/optimade/data.py new file mode 100644 index 0000000000000000000000000000000000000000..e891b3cc22530632b4b6207045fb96faaf359c51 --- /dev/null +++ b/nomad/app/optimade/data.py @@ -0,0 +1,205 @@ +from ase.data import chemical_symbols +from elasticsearch_dsl import Keyword, Integer, Float, Text, InnerDoc, Nested + +from nomad.metainfo import MObject, Section, Quantity, Enum, Units + + +class ElementRatio(InnerDoc): + element = Keyword() + ratio = Float() + + @staticmethod + def from_structure_entry(entry: 'StructureEntry'): + return [ + ElementRatio(element=entry.elements[i], ratio=entry.elements_ratios[i]) + for i in range(0, entry.nelements)] + + +class Optimade(): + def __init__(self, query: bool = False, entry: bool = False): + pass + + +class StructureEntry(MObject): + m_section = Section(a_flask=dict(skip_none=True), a_elastic=dict(type=InnerDoc)) + + elements = Quantity( + type=Enum(chemical_symbols), shape=['1..*'], + a_elastic=dict(type=Keyword), + a_optimade=Optimade(query=True, entry=True)) + """ Names of the different elements present in the structure. """ + + nelements = Quantity( + type=int, + a_elastic=dict(type=Integer), + a_optimade=Optimade(query=True, entry=True)) + """ Number of different elements in the structure as an integer. """ + + elements_ratios = Quantity( + type=float, shape=['nelements'], + a_elastic=dict(type=lambda: Nested(ElementRatio), mapping=ElementRatio.from_structure_entry), + a_optimade=Optimade(query=True, entry=True)) + """ Relative proportions of different elements in the structure. """ + + chemical_formula_descriptive = Quantity( + type=str, + a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)), + a_optimade=Optimade(query=True, entry=True)) + """ + The chemical formula for a structure as a string in a form chosen by the API + implementation. + """ + + chemical_formula_reduced = Quantity( + type=str, + a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)), + a_optimade=Optimade(query=True, entry=True)) + """ + The reduced chemical formula for a structure as a string with element symbols and + integer chemical proportion numbers. The proportion number MUST be omitted if it is 1. + """ + + chemical_formula_hill = Quantity( + type=str, + a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)), + a_optimade=Optimade(query=True, entry=False)) + """ + The chemical formula for a structure in Hill form with element symbols followed by + integer chemical proportion numbers. The proportion number MUST be omitted if it is 1. + """ + + chemical_formula_anonymous = Quantity( + type=str, + a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)), + a_optimade=Optimade(query=True, entry=True)) + """ + The anonymous formula is the chemical_formula_reduced, but where the elements are + instead first ordered by their chemical proportion number, and then, in order left to + right, replaced by anonymous symbols A, B, C, ..., Z, Aa, Ba, ..., Za, Ab, Bb, ... and + so on. + """ + + dimension_types = Quantity( + type=int, shape=[3], + a_elastic=dict(type=Integer, mapping=lambda a: sum(a.dimension_types)), + a_optimade=Optimade(query=True, entry=True)) + """ + List of three integers. For each of the three directions indicated by the three lattice + vectors (see property lattice_vectors). This list indicates if the direction is + periodic (value 1) or non-periodic (value 0). Note: the elements in this list each + refer to the direction of the corresponding entry in lattice_vectors and not + the Cartesian x, y, z directions. + """ + + lattice_vectors = Quantity( + type=float, shape=[3, 3], unit=Units.Angstrom, + a_optimade=Optimade(query=False, entry=True)) + """ The three lattice vectors in Cartesian coordinates, in ångström (Å). """ + + cartesian_site_positions = Quantity( + type=float, shape=['nsites', 3], unit=Units.Angstrom, + a_optimade=Optimade(query=False, entry=True)) + """ + Cartesian positions of each site. A site is an atom, a site potentially occupied by + an atom, or a placeholder for a virtual mixture of atoms (e.g., in a virtual crystal + approximation). + """ + + nsites = Quantity( + type=int, + a_elastic=dict(type=Integer), + a_optimade=Optimade(query=True, entry=True)) + """ An integer specifying the length of the cartesian_site_positions property. """ + + species_at_sites = Quantity( + type=str, shape=['nsites'], + a_optimade=Optimade(query=False, entry=True)) + """ + Name of the species at each site (where values for sites are specified with the same + order of the cartesian_site_positions property). The properties of the species are + found in the species property. + """ + + # TODO assemblies + + structure_features = Quantity( + type=Enum(['disorder', 'unknown_positions', 'assemblies']), shape=['1..*'], + a_elastic=dict(type=Keyword), + a_optimade=Optimade(query=True, entry=True)) + """ + A list of strings that flag which special features are used by the structure. + + - disorder: This flag MUST be present if any one entry in the species list has a + chemical_symbols list that is longer than 1 element. + - unknown_positions: This flag MUST be present if at least one component of the + cartesian_site_positions list of lists has value null. + - assemblies: This flag MUST be present if the assemblies list is present. + """ + + +class Species(MObject): + """ + Used to describe the species of the sites of this structure. Species can be pure + chemical elements, or virtual-crystal atoms representing a statistical occupation of a + given site by multiple chemical elements. + """ + + m_section = Section(repeats=True, parent=StructureEntry.m_section) + + name = Quantity( + type=str, + a_optimade=Optimade(entry=True)) + """ The name of the species; the name value MUST be unique in the species list. """ + + chemical_symbols = Quantity( + type=Enum(chemical_symbols + ['x', 'vacancy']), shape=['1..*'], + a_optimade=Optimade(entry=True)) + """ + A list of strings of all chemical elements composing this species. + + It MUST be one of the following: + + - a valid chemical-element name, or + - the special value "X" to represent a non-chemical element, or + - the special value "vacancy" to represent that this site has a non-zero probability + of having a vacancy (the respective probability is indicated in the concentration + list, see below). + + If any one entry in the species list has a chemical_symbols list that is longer than 1 + element, the correct flag MUST be set in the list structure_features (see + structure_features) + """ + + concentration = Quantity( + type=float, shape=['1..*'], + a_optimade=Optimade(entry=True)) + """ + A list of floats, with same length as chemical_symbols. The numbers represent the + relative concentration of the corresponding chemical symbol in this species. The + numbers SHOULD sum to one. Cases in which the numbers do not sum to one typically fall + only in the following two categories: + + - Numerical errors when representing float numbers in fixed precision, e.g. for two + chemical symbols with concentrations 1/3 and 2/3, the concentration might look + something like [0.33333333333, 0.66666666666]. If the client is aware that the sum + is not one because of numerical precision, it can renormalize the values so that the + sum is exactly one. + - Experimental errors in the data present in the database. In this case, it is the + responsibility of the client to decide how to process the data. + + Note that concentrations are uncorrelated between different sites (even of the same + species). + """ + + mass = Quantity(type=float, unit=Units.amu, a_optimade=dict(entry='optional')) + + original_name = Quantity(type=str, a_optimade=dict(entry='optional')) + """ + Can be any valid Unicode string, and SHOULD contain (if specified) the name of the + species that is used internally in the source database. + + Note: With regards to "source database", we refer to the immediate source being + queried via the OPTiMaDe API implementation. The main use of this field is for source + databases that use species names, containing characters that are not allowed (see + description of the species_at_sites list). + """ diff --git a/nomad/metainfo/__init__.py b/nomad/metainfo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..08d60fec48e5cc4594e5769ae39aad386b76468e --- /dev/null +++ b/nomad/metainfo/__init__.py @@ -0,0 +1 @@ +from .metainfo import MObject, Section, Quantity, Enum, Units diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index ac8ce3d7cd54a3eb86937e8674e1e50026a0ecb3..94c4aca8ccb860ad625ef4a5feb5467ba61b4ed8 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -12,20 +12,135 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Type, TypeVar, Union, Tuple, Iterable, List, Any, Dict, cast -import sys +""" +The NOMAD meta-info allows to define physics data quantities. These definitions are +necessary for all computer representations of respective data (e.g. in Python, +search engines, data-bases, and files). +This modules provides various Python interfaces for -__module__ = sys.modules[__name__] -MObjectBound = TypeVar('MObjectBound', bound='MObject') +- defining meta-info data +- to create and manipulate data that follows these definitions +- to (de-)serialize meta-info data in JSON (i.e. represent data in JSON formatted files) -""" +Here is a simple example that demonstrates the definition of System related quantities: + +.. code-block:: python + + class Run(MObject): + pass + + class System(MObject): + \"\"\" + A system section includes all quantities that describe a single a simulated + system (a.k.a. geometry). + \"\"\" + + m_section = Section(repeats=True, parent=Run.m_section) + + n_atoms = Quantity(type=int) + \"\"\" A Defines the number of atoms in the system. \"\"\" -Discussion: ------------ + atom_labels = Quantity(type=Enum(ase.data.chemical_symbols), shape['n_atoms']) + atom_positions = Quantity(type=float, shape=['n_atoms', 3], unit=Units.m) + simulation_cell = Quantity(type=float, shape=[3, 3], unit=Units.m) + pbc = Quantity(type=bool, shape=[3]) +Here, we define a `section` called ``System``. The section mechanism allows to organize +related data into, well, sections. Sections form containment hierarchies. Here +containment is a parent-child (whole-part) relationship. In this example many ``Systems``, +are part of one ``Run``. Each ``System`` can contain values for the defined quantities: +``n_atoms``, ``atom_labels``, ``atom_positions``, ``simulation_cell``, and ``pbc``. +Quantities allow to state type, shape, and physics unit to specify possible quantity +values. + +Here is an example, were we use the above definition to create, read, and manipulate +data that follows these definitions: + +.. code-bock:: python + + run = Run() + system = run.m_create(System) + system.n_atoms = 3 + system.atom_labels = ['H', 'H', 'O'] + + print(system.atom_labels) + print(run.m_to_json(ident=2)) + +This last statement, will produce the following JSON: + +.. code-block:: JSON + + { + "m_section" = "Run", + "System": [ + { + "m_section" = "System", + "m_parent_index" = 0, + "n_atoms" = 3, + "atom_labels" = [ + "H", + "H", + "O" + ] + } + ] + } + +This is the JSON representation, a serialized version of the Python representation in +the example above. + +Sections can be extended with new quantities outside the original section definition. +This provides the key mechanism to extend commonly defined parts with (code) specific +quantities: + +.. code-block:: Python + + class Method(nomad.metainfo.common.Method): + x_vasp_incar_ALGO=Quantity( + type=Enum(['Normal', 'VeryFast', ...]), + links=['https://cms.mpi.univie.ac.at/wiki/index.php/ALGO']) + \"\"\" + A convenient option to specify the electronic minimisation algorithm (as of VASP.4.5) + and/or to select the type of GW calculations. + \"\"\" + + +All meta-info definitions and classes for meta-info data objects (i.e. section instances) +inherit from :class:` MObject`. This base-class provides common functions and attributes +for all meta-info data objects. Names of these common parts are prefixed with ``m_`` +to distinguish them from user defined quantities. This also constitute's the `reflection` +interface (in addition to Python's build in ``getattr``, ``setattr``) that allows to +create and manipulate meta-info data, without prior program time knowledge of the underlying +definitions. + +.. autoclass:: MObject + +The following classes can be used to define and structure meta-info data: + +- sections are defined by sub-classes :class:`MObject` and using :class:`Section` to + populate the classattribute `m_section` +- quantities are defined by assigning classattributes of a section with :class:`Quantity` + instances +- references (from one section to another) can be defined with quantities that use + section definitions as type +- dimensions can use defined by simply using quantity names in shapes +- categories (former `abstract type definitions`) can be given in quantity definitions + to assign quantities to additional specialization-generalization hierarchies + +See the reference of classes :class:`Section` and :class:`Quantities` for details. + +.. autoclass:: Section +.. autoclass:: Quantity """ +from typing import Type, TypeVar, Union, Tuple, Iterable, List, Any, Dict, cast +import sys + + +__module__ = sys.modules[__name__] +MObjectBound = TypeVar('MObjectBound', bound='MObject') + # Reflection @@ -66,11 +181,11 @@ class MObject(metaclass=MObjectMeta): Sub-sections and parent sections can be read and manipulated with :data:`m_parent`, :func:`m_sub_section`, :func:`m_create`. - ``` - system = run.m_create(System) - assert system.m_parent == run - assert run.m_sub_section(System, system.m_parent_index) == system - ``` + .. code-block:: python + + system = run.m_create(System) + assert system.m_parent == run + assert run.m_sub_section(System, system.m_parent_index) == system Attributes: m_section: The section definition that defines this sections, its possible @@ -407,3 +522,13 @@ class Definition(MObject): m_section = Section(extends=[Section.m_section, Quantity.m_section, Package.m_section]) description = Quantity(type=str) + + +class Unit: + pass + + +class Units: + + Angstrom = Unit() + amu = Unit()