Commit 1a0cce8e authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added metainfo documentation. Added optimade structure entry as an example section.

parent 89c1b8f7
Reference
=========
nomad.metainfo
--------------
.. automodule:: nomad.metainfo.metainfo
nomad.config
------------
.. automodule:: nomad.config
......
from ase.data import chemical_symbols
from elasticsearch_dsl import Keyword, Integer, Float, Text, InnerDoc, Nested
from nomad.metainfo import MObject, Section, Quantity, Enum, Units
class ElementRatio(InnerDoc):
element = Keyword()
ratio = Float()
@staticmethod
def from_structure_entry(entry: 'StructureEntry'):
return [
ElementRatio(element=entry.elements[i], ratio=entry.elements_ratios[i])
for i in range(0, entry.nelements)]
class Optimade():
def __init__(self, query: bool = False, entry: bool = False):
pass
class StructureEntry(MObject):
m_section = Section(a_flask=dict(skip_none=True), a_elastic=dict(type=InnerDoc))
elements = Quantity(
type=Enum(chemical_symbols), shape=['1..*'],
a_elastic=dict(type=Keyword),
a_optimade=Optimade(query=True, entry=True))
""" Names of the different elements present in the structure. """
nelements = Quantity(
type=int,
a_elastic=dict(type=Integer),
a_optimade=Optimade(query=True, entry=True))
""" Number of different elements in the structure as an integer. """
elements_ratios = Quantity(
type=float, shape=['nelements'],
a_elastic=dict(type=lambda: Nested(ElementRatio), mapping=ElementRatio.from_structure_entry),
a_optimade=Optimade(query=True, entry=True))
""" Relative proportions of different elements in the structure. """
chemical_formula_descriptive = Quantity(
type=str,
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_optimade=Optimade(query=True, entry=True))
"""
The chemical formula for a structure as a string in a form chosen by the API
implementation.
"""
chemical_formula_reduced = Quantity(
type=str,
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_optimade=Optimade(query=True, entry=True))
"""
The reduced chemical formula for a structure as a string with element symbols and
integer chemical proportion numbers. The proportion number MUST be omitted if it is 1.
"""
chemical_formula_hill = Quantity(
type=str,
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_optimade=Optimade(query=True, entry=False))
"""
The chemical formula for a structure in Hill form with element symbols followed by
integer chemical proportion numbers. The proportion number MUST be omitted if it is 1.
"""
chemical_formula_anonymous = Quantity(
type=str,
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_optimade=Optimade(query=True, entry=True))
"""
The anonymous formula is the chemical_formula_reduced, but where the elements are
instead first ordered by their chemical proportion number, and then, in order left to
right, replaced by anonymous symbols A, B, C, ..., Z, Aa, Ba, ..., Za, Ab, Bb, ... and
so on.
"""
dimension_types = Quantity(
type=int, shape=[3],
a_elastic=dict(type=Integer, mapping=lambda a: sum(a.dimension_types)),
a_optimade=Optimade(query=True, entry=True))
"""
List of three integers. For each of the three directions indicated by the three lattice
vectors (see property lattice_vectors). This list indicates if the direction is
periodic (value 1) or non-periodic (value 0). Note: the elements in this list each
refer to the direction of the corresponding entry in lattice_vectors and not
the Cartesian x, y, z directions.
"""
lattice_vectors = Quantity(
type=float, shape=[3, 3], unit=Units.Angstrom,
a_optimade=Optimade(query=False, entry=True))
""" The three lattice vectors in Cartesian coordinates, in ångström (Å). """
cartesian_site_positions = Quantity(
type=float, shape=['nsites', 3], unit=Units.Angstrom,
a_optimade=Optimade(query=False, entry=True))
"""
Cartesian positions of each site. A site is an atom, a site potentially occupied by
an atom, or a placeholder for a virtual mixture of atoms (e.g., in a virtual crystal
approximation).
"""
nsites = Quantity(
type=int,
a_elastic=dict(type=Integer),
a_optimade=Optimade(query=True, entry=True))
""" An integer specifying the length of the cartesian_site_positions property. """
species_at_sites = Quantity(
type=str, shape=['nsites'],
a_optimade=Optimade(query=False, entry=True))
"""
Name of the species at each site (where values for sites are specified with the same
order of the cartesian_site_positions property). The properties of the species are
found in the species property.
"""
# TODO assemblies
structure_features = Quantity(
type=Enum(['disorder', 'unknown_positions', 'assemblies']), shape=['1..*'],
a_elastic=dict(type=Keyword),
a_optimade=Optimade(query=True, entry=True))
"""
A list of strings that flag which special features are used by the structure.
- disorder: This flag MUST be present if any one entry in the species list has a
chemical_symbols list that is longer than 1 element.
- unknown_positions: This flag MUST be present if at least one component of the
cartesian_site_positions list of lists has value null.
- assemblies: This flag MUST be present if the assemblies list is present.
"""
class Species(MObject):
"""
Used to describe the species of the sites of this structure. Species can be pure
chemical elements, or virtual-crystal atoms representing a statistical occupation of a
given site by multiple chemical elements.
"""
m_section = Section(repeats=True, parent=StructureEntry.m_section)
name = Quantity(
type=str,
a_optimade=Optimade(entry=True))
""" The name of the species; the name value MUST be unique in the species list. """
chemical_symbols = Quantity(
type=Enum(chemical_symbols + ['x', 'vacancy']), shape=['1..*'],
a_optimade=Optimade(entry=True))
"""
A list of strings of all chemical elements composing this species.
It MUST be one of the following:
- a valid chemical-element name, or
- the special value "X" to represent a non-chemical element, or
- the special value "vacancy" to represent that this site has a non-zero probability
of having a vacancy (the respective probability is indicated in the concentration
list, see below).
If any one entry in the species list has a chemical_symbols list that is longer than 1
element, the correct flag MUST be set in the list structure_features (see
structure_features)
"""
concentration = Quantity(
type=float, shape=['1..*'],
a_optimade=Optimade(entry=True))
"""
A list of floats, with same length as chemical_symbols. The numbers represent the
relative concentration of the corresponding chemical symbol in this species. The
numbers SHOULD sum to one. Cases in which the numbers do not sum to one typically fall
only in the following two categories:
- Numerical errors when representing float numbers in fixed precision, e.g. for two
chemical symbols with concentrations 1/3 and 2/3, the concentration might look
something like [0.33333333333, 0.66666666666]. If the client is aware that the sum
is not one because of numerical precision, it can renormalize the values so that the
sum is exactly one.
- Experimental errors in the data present in the database. In this case, it is the
responsibility of the client to decide how to process the data.
Note that concentrations are uncorrelated between different sites (even of the same
species).
"""
mass = Quantity(type=float, unit=Units.amu, a_optimade=dict(entry='optional'))
original_name = Quantity(type=str, a_optimade=dict(entry='optional'))
"""
Can be any valid Unicode string, and SHOULD contain (if specified) the name of the
species that is used internally in the source database.
Note: With regards to "source database", we refer to the immediate source being
queried via the OPTiMaDe API implementation. The main use of this field is for source
databases that use species names, containing characters that are not allowed (see
description of the species_at_sites list).
"""
from .metainfo import MObject, Section, Quantity, Enum, Units
......@@ -12,20 +12,135 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Type, TypeVar, Union, Tuple, Iterable, List, Any, Dict, cast
import sys
"""
The NOMAD meta-info allows to define physics data quantities. These definitions are
necessary for all computer representations of respective data (e.g. in Python,
search engines, data-bases, and files).
This modules provides various Python interfaces for
__module__ = sys.modules[__name__]
MObjectBound = TypeVar('MObjectBound', bound='MObject')
- defining meta-info data
- to create and manipulate data that follows these definitions
- to (de-)serialize meta-info data in JSON (i.e. represent data in JSON formatted files)
"""
Here is a simple example that demonstrates the definition of System related quantities:
.. code-block:: python
class Run(MObject):
pass
class System(MObject):
\"\"\"
A system section includes all quantities that describe a single a simulated
system (a.k.a. geometry).
\"\"\"
m_section = Section(repeats=True, parent=Run.m_section)
n_atoms = Quantity(type=int)
\"\"\" A Defines the number of atoms in the system. \"\"\"
Discussion:
-----------
atom_labels = Quantity(type=Enum(ase.data.chemical_symbols), shape['n_atoms'])
atom_positions = Quantity(type=float, shape=['n_atoms', 3], unit=Units.m)
simulation_cell = Quantity(type=float, shape=[3, 3], unit=Units.m)
pbc = Quantity(type=bool, shape=[3])
Here, we define a `section` called ``System``. The section mechanism allows to organize
related data into, well, sections. Sections form containment hierarchies. Here
containment is a parent-child (whole-part) relationship. In this example many ``Systems``,
are part of one ``Run``. Each ``System`` can contain values for the defined quantities:
``n_atoms``, ``atom_labels``, ``atom_positions``, ``simulation_cell``, and ``pbc``.
Quantities allow to state type, shape, and physics unit to specify possible quantity
values.
Here is an example, were we use the above definition to create, read, and manipulate
data that follows these definitions:
.. code-bock:: python
run = Run()
system = run.m_create(System)
system.n_atoms = 3
system.atom_labels = ['H', 'H', 'O']
print(system.atom_labels)
print(run.m_to_json(ident=2))
This last statement, will produce the following JSON:
.. code-block:: JSON
{
"m_section" = "Run",
"System": [
{
"m_section" = "System",
"m_parent_index" = 0,
"n_atoms" = 3,
"atom_labels" = [
"H",
"H",
"O"
]
}
]
}
This is the JSON representation, a serialized version of the Python representation in
the example above.
Sections can be extended with new quantities outside the original section definition.
This provides the key mechanism to extend commonly defined parts with (code) specific
quantities:
.. code-block:: Python
class Method(nomad.metainfo.common.Method):
x_vasp_incar_ALGO=Quantity(
type=Enum(['Normal', 'VeryFast', ...]),
links=['https://cms.mpi.univie.ac.at/wiki/index.php/ALGO'])
\"\"\"
A convenient option to specify the electronic minimisation algorithm (as of VASP.4.5)
and/or to select the type of GW calculations.
\"\"\"
All meta-info definitions and classes for meta-info data objects (i.e. section instances)
inherit from :class:` MObject`. This base-class provides common functions and attributes
for all meta-info data objects. Names of these common parts are prefixed with ``m_``
to distinguish them from user defined quantities. This also constitute's the `reflection`
interface (in addition to Python's build in ``getattr``, ``setattr``) that allows to
create and manipulate meta-info data, without prior program time knowledge of the underlying
definitions.
.. autoclass:: MObject
The following classes can be used to define and structure meta-info data:
- sections are defined by sub-classes :class:`MObject` and using :class:`Section` to
populate the classattribute `m_section`
- quantities are defined by assigning classattributes of a section with :class:`Quantity`
instances
- references (from one section to another) can be defined with quantities that use
section definitions as type
- dimensions can use defined by simply using quantity names in shapes
- categories (former `abstract type definitions`) can be given in quantity definitions
to assign quantities to additional specialization-generalization hierarchies
See the reference of classes :class:`Section` and :class:`Quantities` for details.
.. autoclass:: Section
.. autoclass:: Quantity
"""
from typing import Type, TypeVar, Union, Tuple, Iterable, List, Any, Dict, cast
import sys
__module__ = sys.modules[__name__]
MObjectBound = TypeVar('MObjectBound', bound='MObject')
# Reflection
......@@ -66,11 +181,11 @@ class MObject(metaclass=MObjectMeta):
Sub-sections and parent sections can be read and manipulated with :data:`m_parent`,
:func:`m_sub_section`, :func:`m_create`.
```
system = run.m_create(System)
assert system.m_parent == run
assert run.m_sub_section(System, system.m_parent_index) == system
```
.. code-block:: python
system = run.m_create(System)
assert system.m_parent == run
assert run.m_sub_section(System, system.m_parent_index) == system
Attributes:
m_section: The section definition that defines this sections, its possible
......@@ -407,3 +522,13 @@ class Definition(MObject):
m_section = Section(extends=[Section.m_section, Quantity.m_section, Package.m_section])
description = Quantity(type=str)
class Unit:
pass
class Units:
Angstrom = Unit()
amu = Unit()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment