diff --git a/MANIFEST.in b/MANIFEST.in index 18c39cefaa38740d5fb7d30238e70438b18d8769..fe15e110b79a7b3e7e4c51e35750ebe64b8f8dde 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -8,6 +8,7 @@ include auto_complete_install.sh include setup.json recursive-include nomad/app/static/gui/ *.css *.ico *.html *.json *.js *.map *.txt *.svg *.png recursive-include docs/build/ *.css *.ico *.html *.json *.js *.map *.txt *.svg *.png +recursive-include dependencies/parsers/ metadata.yaml recursive-include dependencies/parsers/nexus/nexusparser/definitions/base_classes/ *.xml recursive-include dependencies/parsers/nexus/nexusparser/definitions/applications/ *.xml recursive-include dependencies/parsers/nexus/nexusparser/definitions/contributed_definitions/ *.xml diff --git a/dependencies/parsers/atomistic b/dependencies/parsers/atomistic index c7fed17419d917ea95d3ab5875916099ade8479e..35eef1fa4705ddd18d3787d4519f08a37f6a8644 160000 --- a/dependencies/parsers/atomistic +++ b/dependencies/parsers/atomistic @@ -1 +1 @@ -Subproject commit c7fed17419d917ea95d3ab5875916099ade8479e +Subproject commit 35eef1fa4705ddd18d3787d4519f08a37f6a8644 diff --git a/dependencies/parsers/database b/dependencies/parsers/database index 051781cff4d21a8eaf69eb1d992de52dd42eba31..806fda467d037dfcc2d665c5b735650fd3862632 160000 --- a/dependencies/parsers/database +++ b/dependencies/parsers/database @@ -1 +1 @@ -Subproject commit 051781cff4d21a8eaf69eb1d992de52dd42eba31 +Subproject commit 806fda467d037dfcc2d665c5b735650fd3862632 diff --git a/dependencies/parsers/eelsdb b/dependencies/parsers/eelsdb index 79e687d2c83d3240dfa2f399155c4487c94f984f..007ddd3bf4884a09bead33114dda5167b8b51f96 160000 --- a/dependencies/parsers/eelsdb +++ b/dependencies/parsers/eelsdb @@ -1 +1 @@ -Subproject commit 79e687d2c83d3240dfa2f399155c4487c94f984f +Subproject commit 007ddd3bf4884a09bead33114dda5167b8b51f96 diff --git a/dependencies/parsers/electronic b/dependencies/parsers/electronic index ffe7f5c7105688d38421edb33caa8054d4aee997..5c077f48adcda8e6ec2f5cfb2220adc04100c3e6 160000 --- a/dependencies/parsers/electronic +++ b/dependencies/parsers/electronic @@ -1 +1 @@ -Subproject commit ffe7f5c7105688d38421edb33caa8054d4aee997 +Subproject commit 5c077f48adcda8e6ec2f5cfb2220adc04100c3e6 diff --git a/dependencies/parsers/nexus b/dependencies/parsers/nexus index a866117f5bc61558dba734ee9c91aaf5ddf94522..78b721318cb824e404234702c0aef19a7d8b6dba 160000 --- a/dependencies/parsers/nexus +++ b/dependencies/parsers/nexus @@ -1 +1 @@ -Subproject commit a866117f5bc61558dba734ee9c91aaf5ddf94522 +Subproject commit 78b721318cb824e404234702c0aef19a7d8b6dba diff --git a/dependencies/parsers/workflow b/dependencies/parsers/workflow index b3565e46b741c6c36c70e233dc55caa444b81fd8..ed2592c0953ebad15e59ca6f60b2b7eb3c2afcd1 160000 --- a/dependencies/parsers/workflow +++ b/dependencies/parsers/workflow @@ -1 +1 @@ -Subproject commit b3565e46b741c6c36c70e233dc55caa444b81fd8 +Subproject commit ed2592c0953ebad15e59ca6f60b2b7eb3c2afcd1 diff --git a/gui/src/components/About.js b/gui/src/components/About.js index fb7a3176d2ca08a421f938c14cfef1636bcd94fd..d5a74561c5a65b2e4ca225f1462770a02014bacd 100644 --- a/gui/src/components/About.js +++ b/gui/src/components/About.js @@ -19,13 +19,27 @@ import React, { useLayoutEffect, useRef, useCallback, useEffect, useState } from import { ReactComponent as AboutSvg } from '../images/about.svg' import PropTypes from 'prop-types' import Markdown from './Markdown' +import { isNil } from 'lodash' import { appBase, debug, aitoolkitEnabled, encyclopediaBase } from '../config' import packageJson from '../../package.json' -import { Grid, Card, CardContent, Typography, makeStyles, Link, Dialog, DialogTitle, DialogContent, DialogActions, Button } from '@material-ui/core' +import { + Button, + Card, + CardContent, + Dialog, + DialogTitle, + DialogContent, + DialogActions, + Grid, + Link, + makeStyles, + Typography +} from '@material-ui/core' import { Link as RouterLink, useHistory } from 'react-router-dom' import tutorials from '../toolkitMetadata' import parserMetadata from '../parserMetadata' import { useInfo } from './api' +import { pluralize } from '../utils' function CodeInfo({code, ...props}) { if (!code) { @@ -76,41 +90,84 @@ CodeInfo.propTypes = { onClose: PropTypes.func } -export const CodeList = ({withUploadInstructions}) => { +export const CodeList = React.memo(({withUploadInstructions}) => { const [selected, setSelected] = useState(null) - const codes = Object.keys(parserMetadata).map(code => { - const metadata = parserMetadata[code] - if (!metadata) { - return code + // Create lists containing code name and category + const codes = [] + const categorySizes = {} + Object.entries(parserMetadata).forEach(([code, metadata]) => { + const name = metadata.codeLabel || code + const category = metadata.codeCategory + if (categorySizes[category]) { + categorySizes[category] += 1 + } else { + categorySizes[category] = 1 } - - if (withUploadInstructions) { - return <Link - href="#" key={code} onClick={() => setSelected(code)} - >{code.codeLabel || code}</Link> + if (!metadata || code === 'example') { + return } - if (metadata.codeUrl) { - return <Link href={metadata.codeUrl} key={code} target="code">{code.codeLabel || code}</Link> - } + const link = withUploadInstructions + ? [code, category, <Link href="#" key={code} onClick={() => setSelected(code)}>{name}</Link>] + : metadata.codeUrl + ? [code, category, <Link href={metadata.codeUrl} key={code} target="code">{name}</Link>] + : [code, category, name] - return code + codes.push(link) }) - const toRender = codes.reduce((list, value, index) => { - if (index !== 0) { - list.push(', ') + // Sort by category size, then by program name. Codes without category go to + // the end. + codes.sort((a, b) => { + const nameA = a[0] + const nameB = b[0] + const categoryA = a[1] + const categoryB = b[1] + const sizeA = categorySizes[categoryA] + const sizeB = categorySizes[categoryB] + if (isNil(categoryA) && !isNil(categoryB)) return 1 + if (isNil(categoryB) && !isNil(categoryA)) return -1 + if (sizeA > sizeB) return -1 + if (sizeA < sizeB) return 1 + if (nameA > nameB) return 1 + if (nameA < nameB) return -1 + return 0 + }) + + // Create a renderable version + let currentCategory = null + let categoryIndex = 0 + const codeshtml = codes.reduce((list, value) => { + let index = -1 + const categoryTmp = value[1] + const html = value[2] + const category = categoryTmp ? pluralize(categoryTmp) : 'Miscellaneous' + + if (currentCategory !== category) { + index = 0 + if (categoryIndex !== 0) { + list.push(', ') + } + list.push(<b> {category}: </b>) + categoryIndex += 1 + currentCategory = category + } + + if (html) { + if (index !== 0) { + list.push(', ') + } + list.push(html) } - list.push(value) return list }, []) - return <React.Fragment> - {toRender} + return <span data-testid="code-list"> + {codeshtml} <CodeInfo code={selected} onClose={() => setSelected(null)} /> - </React.Fragment> -} + </span> +}) CodeList.propTypes = { withUploadInstructions: PropTypes.bool } @@ -370,7 +427,6 @@ export default function About() { - version (GUI): \`${packageJson.version}/${packageJson.commit}\` - git: \`${info ? info.git.ref : 'loading'}; ${info ? info.git.version : 'loading'}\` - last commit message: *${info ? info.git.log : 'loading'}* - - supported codes: ${info ? info.codes.map(code => code.code_name).join(', ') : 'loading'} - parsers: ${info ? info.parsers.join(', ') : 'loading'} - normalizers: ${info ? info.normalizers.join(', ') : 'loading'} `}</Markdown> diff --git a/gui/src/components/About.spec.js b/gui/src/components/About.spec.js new file mode 100644 index 0000000000000000000000000000000000000000..6acb6d71c77aeaae05f04a414af8de85eac9f5c1 --- /dev/null +++ b/gui/src/components/About.spec.js @@ -0,0 +1,31 @@ +/* + * Copyright The NOMAD Authors. + * + * This file is part of NOMAD. See https://nomad-lab.eu for further info. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from 'react' +import { render, screen } from './conftest.spec' +import {CodeList} from './About' + +test('list of codes renders correctly', async () => { + render(<CodeList />) + // Check that the list of parsers is printed correctly within the correct categories. + // This list should be updated if new codes are added or the code names are updated. + // This test value is hardcoded so that any unintentional changes to the code names and + // categories can be avoided. + const list = /Atomistic codes: ABACUS, ABINIT, AMS, ASAP, Amber, BigDFT, CASTEP, CHARMM, CP2K, CPMD, CRYSTAL, DFTB\+, DL_POLY, DMol3, Elk, FHI-aims, FLEUR, FPLO, GAMESS, GPAW, GROMACS, GROMOS, GULP, Gaussian, LAMMPS, MOPAC, Molcas, NAMD, NWChem, ONETEP, ORCA, Octopus, OpenMX, Psi4, Qball, Qbox, QuantumATK, QuantumESPRESSO, SIESTA, TURBOMOLE, Tinker, VASP, WIEN2k, YAMBO, exciting, libAtoms, Workflow managers: AFLOW, ASR, Atomate, ElaStic, FHI-vibes, LOBSTER, MOFStructures, phonopy, Database managers: EELSDB, NeXus, OpenKIM$/ + expect(screen.getByTestId('code-list')).toHaveTextContent(list) +}) diff --git a/gui/src/utils.js b/gui/src/utils.js index dae17ab559fd7c1042bae17cae0249ac2e09998c..4d1ea21113268920276d3cf4e5b8a6737d7c037f 100644 --- a/gui/src/utils.js +++ b/gui/src/utils.js @@ -609,7 +609,7 @@ export function delay(func) { * Returns the correct form (plural/singular) for the given word. The syntax is * similar to the "pluralize"-library. * - * @param {string} word The word to plurarize + * @param {string} word The word to pluralize * @param {count} number How many of the words exist * @param {boolean} inclusive Whether to prefix with the number (e.g. 3 ducks) * @param {boolean} format Whether to format the number. @@ -627,12 +627,18 @@ export function pluralize(word, count, inclusive, format = true, prefix) { 'material': 'materials', 'dataset': 'datasets', 'item': 'items', - 'upload': 'uploads' + 'upload': 'uploads', + 'code': 'codes', + 'manager': 'managers' } - const plural = dictionary[word] + const words = word.trim().split(" ") + const lastWord = words[words.length - 1] + let plural = dictionary[lastWord] if (isNil(plural)) { throw Error(`The word ${word} is not in the dictionary, please add it.`) } + words[words.length - 1] = plural + plural = words.join(" ") const form = count === 1 ? word : plural diff --git a/nomad/app/v1/routers/info.py b/nomad/app/v1/routers/info.py index dd799de6ecb220a5d4027cc37e5255274fddc123..8f294ca0810c2d6ace60f052aa841059161dcd19 100644 --- a/nomad/app/v1/routers/info.py +++ b/nomad/app/v1/routers/info.py @@ -29,7 +29,8 @@ from pydantic.main import BaseModel from nomad import config, normalizing, gitinfo from nomad.utils import strip from nomad.search import search -from nomad.parsing import parsers, MatchingParser +from nomad.parsing import parsers +from nomad.parsing.parsers import code_metadata from nomad.app.v1.models import Aggregation, StatisticsAggregation from nomad.metainfo.elasticsearch_extension import entry_type @@ -105,15 +106,6 @@ def statistics(): response_model=InfoModel) async def get_info(): ''' Return information about the nomad backend and its configuration. ''' - codes_dict = {} - for parser in parsers.parser_dict.values(): - if isinstance(parser, MatchingParser) and parser.domain == 'dft': - code_name = parser.code_name - if code_name in codes_dict: - continue - codes_dict[code_name] = dict(code_name=code_name, code_homepage=parser.code_homepage) - codes = sorted(list(codes_dict.values()), key=lambda code_info: code_info['code_name'].lower()) - return { 'parsers': [ key[key.index('/') + 1:] @@ -121,7 +113,10 @@ async def get_info(): 'metainfo_packages': ['general', 'general.experimental', 'common', 'public'] + sorted([ key[key.index('/') + 1:] for key in parsers.parser_dict.keys()]), - 'codes': codes, + 'codes': [ + {'code_name': x['codeLabel'], 'code_homepage': x['codeUrl']} + for x in sorted(code_metadata.values(), key=lambda info: info['codeLabel'].lower()) + ], 'normalizers': [normalizer.__name__ for normalizer in normalizing.normalizers], 'statistics': statistics(), 'search_quantities': { diff --git a/nomad/cli/dev.py b/nomad/cli/dev.py index 13c8d296f48e3cfa8a60b4c4dc40399d3c3c1aac..278d272e6b398547fcceea32e722dd7c436260e5 100644 --- a/nomad/cli/dev.py +++ b/nomad/cli/dev.py @@ -162,23 +162,9 @@ def search_quantities(): @dev.command(help='Generates a JSON file that compiles all the parser metadata from each parser project.') def parser_metadata(): import json - import yaml - import os - import os.path - from glob import glob - - parsers_metadata = {} - parsers_path = './dependencies/parsers' - for parser_metadata_file in sorted(glob(f'{parsers_path}/**/metadata.yaml', recursive=True)): - with open(parser_metadata_file) as f: - parser_metadata = yaml.load(f, Loader=yaml.FullLoader) - parsers_metadata[os.path.basename(os.path.dirname(parser_metadata_file))] = parser_metadata - - parsers_metadata = { - key: parsers_metadata[key] - for _, key in sorted([(key.lower(), key) for key in parsers_metadata], key=lambda x: x[0])} + from nomad.parsing.parsers import code_metadata - print(json.dumps(parsers_metadata, indent=2)) + print(json.dumps(code_metadata, indent=2, sort_keys=True)) @dev.command(help='Generates a JSON file from example-uploads metadata in the YAML file.') diff --git a/nomad/datamodel/metainfo/simulation/calculation.py b/nomad/datamodel/metainfo/simulation/calculation.py index 8eb6ec93a2ccda46b8123d7707b8978ad4d80271..01f16dea689190964901a8c7e37ea2bdf9227348 100644 --- a/nomad/datamodel/metainfo/simulation/calculation.py +++ b/nomad/datamodel/metainfo/simulation/calculation.py @@ -21,7 +21,7 @@ import typing # pylint: disable=unused-import from nomad.metainfo import ( # pylint: disable=unused-import MSection, MCategory, Category, Package, Quantity, Section, SubSection, SectionProxy, Reference, MEnum, derived) -from nomad.datamodel.metainfo.simulation.system import System +from nomad.datamodel.metainfo.simulation.system import System, AtomsGroup from nomad.datamodel.metainfo.simulation.method import Method from ..common import FastAccess @@ -178,6 +178,44 @@ class AtomicValues(MSection): ''') +class AtomicGroup(MSection): + ''' + Generic section containing the values and information reqarding a molecular or sub-molecular + quantity that is a function of an atomic group such as radius of gyration... + ''' + + m_def = Section(validate=False) + + kind = Quantity( + type=str, + shape=[], + description=''' + Kind of the quantity. + ''') + + +class AtomicGroupValues(MSection): + ''' + Generic section containing information regarding the values of a trajectory property. + ''' + + m_def = Section(validate=False) + + label = Quantity( + type=str, + shape=[], + description=''' + Describes the atoms or molecule types involved in determining the property. + ''') + + atomsgroup_ref = Quantity( + type=Reference(AtomsGroup.m_def), + shape=[1], + description=''' + References to the atoms_group section containing the molecule for which Rg was calculated. + ''') + + class EnergyEntry(Atomic): ''' Section describing a type of energy or a contribution to the total energy. @@ -1538,6 +1576,34 @@ class VibrationalFrequencies(MSection): infrared = SubSection(sub_section=VibrationalFrequenciesValues.m_def, repeats=False) +class RadiusOfGyrationValues(AtomicGroupValues): + ''' + Section containing information regarding the values of + radius of gyration (Rg). + ''' + + m_def = Section(validate=False) + + value = Quantity( + type=np.dtype(np.float64), + shape=[], + unit='m', + description=''' + Value of Rg. + ''') + + +class RadiusOfGyration(AtomicGroup): + ''' + Section containing information about the calculation of + radius of gyration (Rg). + ''' + + m_def = Section(validate=False) + + radius_of_gyration_values = SubSection(sub_section=RadiusOfGyrationValues.m_def, repeats=True) + + class BaseCalculation(MSection): ''' Contains computed properties of a configuration as defined by the corresponding @@ -1675,6 +1741,8 @@ class BaseCalculation(MSection): density_charge = SubSection(sub_section=Density.m_def, repeats=True) + radius_of_gyration = SubSection(sub_section=RadiusOfGyration.m_def, repeats=True) + volume = Quantity( type=np.dtype(np.float64), shape=[], diff --git a/nomad/parsing/parser.py b/nomad/parsing/parser.py index fbe0d530ae5fc767beb1f87991e77301d5db45c2..e488a22d94a807d179430c7c5f02c1f86fdf3aff 100644 --- a/nomad/parsing/parser.py +++ b/nomad/parsing/parser.py @@ -16,19 +16,35 @@ # limitations under the License. # -from typing import List, Iterable, Dict, Union +from typing import List, Iterable, Dict, Union, Any, Optional from abc import ABCMeta, abstractmethod import re import os import os.path +from enum import Enum from functools import lru_cache import importlib +from pydantic import BaseModel, Extra # pylint: disable=unused-import +import yaml from nomad import config, utils from nomad.datamodel import EntryArchive, EntryMetadata from nomad.metainfo import Package +class ParserStatus(Enum): + PRODUCTION = "production" + BETA = "beta" + + +class ParserMetadata(BaseModel, use_enum_values=True, extra=Extra.allow): + codeLabel: str + codeUrl: Optional[str] + codeCategory: str + codeName: str + status: ParserStatus + + class Parser(metaclass=ABCMeta): ''' Instances specify a parser. It allows to find *main files* from given uploaded @@ -44,6 +60,7 @@ class Parser(metaclass=ABCMeta): def __init__(self): self.domain = 'dft' + self.metadata = None @abstractmethod def is_mainfile( @@ -161,8 +178,11 @@ class MatchingParser(Parser): A parser implementation that uses regular expressions to match mainfiles. Arguments: - code_name: The name of the code or input format + name: The internally used name for the parser. The prefix 'parser/' will + be automatically added for legacy reasons. + code_name: The displayed name for the parser code_homepage: The homepage of the code or input format + code_catogory: An optional category for the code. mainfile_mime_re: A regexp that is used to match against a files mime type mainfile_contents_re: A regexp that is used to match the first 1024 bytes of a potential mainfile. @@ -173,7 +193,12 @@ class MatchingParser(Parser): supported_compressions: A list of [gz, bz2], if the parser supports compressed files ''' def __init__( - self, name: str, code_name: str, code_homepage: str = None, + self, + name: str = None, + code_name: str = None, + code_homepage: str = None, + code_category: str = None, + metadata_path: str = None, mainfile_contents_re: str = None, mainfile_binary_header: bytes = None, mainfile_binary_header_re: bytes = None, @@ -184,14 +209,42 @@ class MatchingParser(Parser): supported_compressions: List[str] = []) -> None: super().__init__() + self.name = name self.code_name = code_name self.code_homepage = code_homepage + self.code_category = code_category + + # If a metainfo path is given, read the code metainfo from there. + self.metadata_path = metadata_path + metadata_keys = { + 'code_name': 'codeLabel', + 'code_homepage': 'codeUrl', + 'code_category': 'codeCategory', + 'status': 'status', + 'name': 'codeName' + } + if metadata_path is not None: + metadata = ParserMetadata(**self.read_metadata_file(metadata_path)) + self.metadata = metadata + for key_var, key_file in metadata_keys.items(): + val_local = locals().get(key_var) + val_file = getattr(metadata, key_file) + if val_local is not None: + raise ValueError( + f'{key_var} specified both in metadata file ({val_file}) and in ' + f'parser constructor ({val_local})') + if key_file == 'codeName': + val_file = f'parsers/{val_file}' + setattr(self, key_var, val_file) + + assert self.code_name, f'please provide a code name for {name}' self.domain = domain self._mainfile_binary_header = mainfile_binary_header self._mainfile_mime_re = re.compile(mainfile_mime_re) self._mainfile_name_re = re.compile(mainfile_name_re) self._mainfile_alternative = mainfile_alternative + # Assign private variable this way to avoid static check issue. if mainfile_contents_re is not None: self._mainfile_contents_re = re.compile(mainfile_contents_re) @@ -205,6 +258,20 @@ class MatchingParser(Parser): self._ls = lru_cache(maxsize=16)(lambda directory: os.listdir(directory)) + def read_metadata_file(self, metadata_file: str) -> Dict[str, Any]: + ''' + Read parser metadata from a yaml file. + ''' + logger = utils.get_logger(__name__) + try: + with open(metadata_file, 'r', encoding='UTF-8') as f: + parser_metadata = yaml.load(f, Loader=yaml.FullLoader) + except Exception as e: + logger.warning('failed to read parser metadata', exc_info=e) + raise + + return parser_metadata + def is_mainfile( self, filename: str, mime: str, buffer: bytes, decoded_buffer: str, compression: str = None) -> Union[bool, Iterable[str]]: @@ -283,15 +350,17 @@ class MatchingParserInterface(MatchingParser): self.mainfile_parser.parse(mainfile, archive, logger) def import_parser_class(self): + logger = utils.get_logger(__name__) try: module_path, parser_class = self._parser_class_name.rsplit('.', 1) module = importlib.import_module(module_path) - return getattr(module, parser_class) + parser = getattr(module, parser_class) except Exception as e: - logger = utils.get_logger(__name__) logger.error('cannot import parser', exc_info=e) raise e + return parser + class ArchiveParser(MatchingParser): def __init__(self): diff --git a/nomad/parsing/parsers.py b/nomad/parsing/parsers.py index b70f391218f063c3f278f6c8b0189d256008ad98..654623fe3bd3f294bda5b6d9fa72ca90c4ad82f0 100644 --- a/nomad/parsing/parsers.py +++ b/nomad/parsing/parsers.py @@ -19,6 +19,8 @@ import os.path from typing import Tuple, List, Dict from collections.abc import Iterable +import pkgutil +from pathlib import Path from nomad import config from nomad.datamodel import EntryArchive, EntryMetadata, results @@ -196,30 +198,37 @@ def run_parser( return entry_archives +# Here we resolve the path of the installation directories of various parsers. +# Note that this should be done in a way that does not yet import the modules +# themselves (this takes a while). The parser modules are imported lazily later. +prefix_electronic = f'{Path(pkgutil.get_loader("electronicparsers").path).parent.absolute()}' # type: ignore +prefix_atomistic = f'{Path(pkgutil.get_loader("atomisticparsers").path).parent.absolute()}' # type: ignore +prefix_workflow = f'{Path(pkgutil.get_loader("workflowparsers").path).parent.absolute()}' # type: ignore +prefix_database = f'{Path(pkgutil.get_loader("databaseparsers").path).parent.absolute()}' # type: ignore +prefix_eels = f'{Path(pkgutil.get_loader("eelsdbparser").path).parent.absolute()}' # type: ignore + parsers = [ GenerateRandomParser(), TemplateParser(), ChaosParser(), MatchingParserInterface( 'electronicparsers.AbinitParser', - name='parsers/abinit', code_name='ABINIT', code_homepage='https://www.abinit.org/', + metadata_path=f'{prefix_electronic}/abinit/metadata.yaml', mainfile_contents_re=(r'^\n*\.Version\s*[0-9.]*\s*of ABINIT\s*') ), - MatchingParserInterface( - 'electronicparsers.ATKParser', - name='parsers/atk', code_name='AtomistixToolKit', - code_homepage='https://www.synopsys.com/silicon/quantumatk.html', - mainfile_name_re=r'^.*\.nc', mainfile_mime_re=r'application/octet-stream' - ), MatchingParserInterface( 'electronicparsers.AMSParser', - name='parsers/ams', code_name='AMS', - code_homepage='https://www.scm.com', + metadata_path=f'{prefix_electronic}/ams/metadata.yaml', mainfile_contents_re=r'\* +\| +A M S +\| +\*' ), + MatchingParserInterface( + 'electronicparsers.ATKParser', + metadata_path=f'{prefix_electronic}/atk/metadata.yaml', + mainfile_name_re=r'^.*\.nc', mainfile_mime_re=r'application/octet-stream' + ), MatchingParserInterface( 'electronicparsers.BigDFTParser', - name='parsers/bigdft', code_name='BigDFT', code_homepage='http://bigdft.org/', + metadata_path=f'{prefix_electronic}/bigdft/metadata.yaml', mainfile_contents_re=( # r'__________________________________ A fast and precise DFT wavelet code\s*' # r'\| \| \| \| \| \|\s*' @@ -250,18 +259,18 @@ parsers = [ ), MatchingParserInterface( 'electronicparsers.CastepParser', - name='parsers/castep', code_name='CASTEP', code_homepage='http://www.castep.org/', + metadata_path=f'{prefix_electronic}/castep/metadata.yaml', mainfile_contents_re=(r'\s\|\s*CCC\s*AA\s*SSS\s*TTTTT\s*EEEEE\s*PPPP\s*\|\s*') ), MatchingParserInterface( 'electronicparsers.CharmmParser', - name='parsers/charmm', code_name='Charmm', domain='dft', + metadata_path=f'{prefix_electronic}/charmm/metadata.yaml', mainfile_contents_re=r'\s*Chemistry\s*at\s*HARvard\s*Macromolecular\s*Mechanics\s*', mainfile_mime_re=r'text/.*' ), MatchingParserInterface( 'electronicparsers.CP2KParser', - name='parsers/cp2k', code_name='CP2K', code_homepage='https://www.cp2k.org/', + metadata_path=f'{prefix_electronic}/cp2k/metadata.yaml', mainfile_contents_re=( r'\*\*\*\* \*\*\*\* \*\*\*\*\*\* \*\* PROGRAM STARTED AT\s.*\n' r' \*\*\*\*\* \*\* \*\*\* \*\*\* \*\* PROGRAM STARTED ON\s*.*\n' @@ -271,59 +280,52 @@ parsers = [ ), MatchingParserInterface( 'electronicparsers.CPMDParser', - name='parsers/cpmd', code_name='CPMD', - code_homepage='https://www.lcrc.anl.gov/for-users/software/available-software/cpmd/', + metadata_path=f'{prefix_electronic}/cpmd/metadata.yaml', mainfile_contents_re=(r'\*\*\* \*\* \*\*\* \*\* \*\*\*\* \*\* \*\* \*\*\*') ), MatchingParserInterface( 'electronicparsers.CrystalParser', - name='parsers/crystal', - code_name='Crystal', - code_homepage='https://www.crystal.unito.it/', + metadata_path=f'{prefix_electronic}/crystal/metadata.yaml', mainfile_contents_re=( fr'(\r?\n \*\s+CRYSTAL[\d]+\s+\*\r?\n \*\s*[a-zA-Z]+ : \d+[\.\d+]*)') ), MatchingParserInterface( 'electronicparsers.Dmol3Parser', - name='parsers/dmol', code_name='DMol3', - code_homepage='http://dmol3.web.psi.ch/dmol3.html', domain='dft', + metadata_path=f'{prefix_electronic}/dmol3/metadata.yaml', mainfile_name_re=r'.*\.outmol', mainfile_contents_re=r'Materials Studio DMol\^3' ), MatchingParserInterface( 'electronicparsers.ElkParser', - name='parsers/elk', code_name='elk', code_homepage='http://elk.sourceforge.net/', + metadata_path=f'{prefix_electronic}/elk/metadata.yaml', mainfile_contents_re=r'\| Elk version [0-9.a-zA-Z]+ started \|' ), MatchingParserInterface( 'electronicparsers.ExcitingParser', - name='parsers/exciting', code_name='exciting', code_homepage='http://exciting-code.org/', + metadata_path=f'{prefix_electronic}/exciting/metadata.yaml', mainfile_name_re=r'^.*.OUT(\.[^/]*)?$', mainfile_contents_re=(r'EXCITING.*started') ), MatchingParserInterface( 'electronicparsers.FHIAimsParser', - name='parsers/fhi-aims', code_name='FHI-aims', - code_homepage='https://aimsclub.fhi-berlin.mpg.de/', + metadata_path=f'{prefix_electronic}/fhiaims/metadata.yaml', mainfile_contents_re=( r'^(.*\n)*' r'?\s*Invoking FHI-aims \.\.\.') ), MatchingParserInterface( 'electronicparsers.FleurParser', - name='parsers/fleur', code_name='fleur', - code_homepage='https://www.flapw.de/', domain='dft', + metadata_path=f'{prefix_electronic}/fleur/metadata.yaml', mainfile_contents_re=r'This output is generated by fleur.' ), MatchingParserInterface( 'electronicparsers.FploParser', - name='parsers/fplo', code_name='fplo', domain='dft', + metadata_path=f'{prefix_electronic}/fplo/metadata.yaml', mainfile_contents_re=r'\s*\|\s*FULL-POTENTIAL LOCAL-ORBITAL MINIMUM BASIS BANDSTRUCTURE CODE\s*\|\s*', mainfile_mime_re=r'text/.*' ), MatchingParserInterface( 'electronicparsers.GamessParser', - name='parsers/gamess', code_name='GAMESS', - code_homepage='https://www.msg.chem.iastate.edu/gamess/versions.html', + metadata_path=f'{prefix_electronic}/gamess/metadata.yaml', mainfile_contents_re=( r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*' r'\s*\*\s*GAMESS VERSION =\s*(.*)\*\s*' @@ -331,48 +333,55 @@ parsers = [ ), MatchingParserInterface( 'electronicparsers.GaussianParser', - name='parsers/gaussian', code_name='Gaussian', code_homepage='http://gaussian.com/', - mainfile_mime_re=r'.*', mainfile_contents_re=( + metadata_path=f'{prefix_electronic}/gaussian/metadata.yaml', + mainfile_mime_re=r'.*', + mainfile_contents_re=( r'\s*Cite this work as:' r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9\.]*,') ), MatchingParserInterface( 'electronicparsers.GPAWParser', - name='parsers/gpaw', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/', + metadata_path=f'{prefix_electronic}/gpaw/metadata.yaml', mainfile_name_re=(r'^.*\.(gpw2|gpw)$'), mainfile_mime_re=r'application/(x-tar|octet-stream)' ), MatchingParserInterface( 'electronicparsers.MolcasParser', - name='parsers/molcas', code_name='MOLCAS', code_homepage='http://www.molcas.org/', - domain='dft', mainfile_contents_re=r'M O L C A S' + metadata_path=f'{prefix_electronic}/molcas/metadata.yaml', + mainfile_contents_re=r'M O L C A S' + ), + MatchingParserInterface( + 'electronicparsers.MopacParser', + metadata_path=f'{prefix_electronic}/mopac/metadata.yaml', + mainfile_contents_re=r'\s*\*\*\s*MOPAC\s*([0-9a-zA-Z]*)\s*\*\*\s*', + mainfile_mime_re=r'text/.*', ), MatchingParserInterface( 'electronicparsers.NWChemParser', - name='parsers/nwchem', code_name='NWChem', code_homepage='http://www.nwchem-sw.org/', + metadata_path=f'{prefix_electronic}/nwchem/metadata.yaml', mainfile_contents_re=( r'Northwest Computational Chemistry Package \(NWChem\) (\d+\.)+\d+') ), MatchingParserInterface( 'electronicparsers.OctopusParser', - name='parsers/octopus', code_name='Octopus', code_homepage='https://octopus-code.org/', + metadata_path=f'{prefix_electronic}/octopus/metadata.yaml', mainfile_contents_re=(r'\|0\) ~ \(0\) \|') ), MatchingParserInterface( 'electronicparsers.OnetepParser', - name='parsers/onetep', code_name='ONETEP', code_homepage='https://www.onetep.org/', - domain='dft', mainfile_contents_re=r'####### # # ####### ####### ####### ######' + metadata_path=f'{prefix_electronic}/onetep/metadata.yaml', + mainfile_contents_re=r'####### # # ####### ####### ####### ######' ), MatchingParserInterface( 'electronicparsers.OpenmxParser', - name='parsers/openmx', code_name='OpenMX', code_homepage='http://www.openmx-square.org/', + metadata_path=f'{prefix_electronic}/openmx/metadata.yaml', mainfile_mime_re=r'(text/.*)', mainfile_name_re=r'.*\.out$', mainfile_contents_re=(r'^\*{59}\s+\*{59}\s+This calculation was performed by OpenMX'), ), MatchingParserInterface( 'electronicparsers.OrcaParser', - name='parsers/orca', code_name='ORCA', code_homepage='https://orcaforum.kofo.mpg.de/', + metadata_path=f'{prefix_electronic}/orca/metadata.yaml', mainfile_contents_re=( r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*' r'\s+\* O R C A \*\s*' @@ -382,45 +391,41 @@ parsers = [ ), MatchingParserInterface( 'electronicparsers.Psi4Parser', - name='parsers/psi4', code_name='Psi4', code_homepage='https://psicode.org/', + metadata_path=f'{prefix_electronic}/psi4/metadata.yaml', mainfile_contents_re=(r'Psi4: An Open-Source Ab Initio Electronic Structure Package') ), MatchingParserInterface( 'electronicparsers.QBallParser', - name='parsers/qball', - code_name='qball', + metadata_path=f'{prefix_electronic}/qball/metadata.yaml', mainfile_contents_re='qball', supported_compressions=["gz", "bz2", "xz"] ), MatchingParserInterface( 'electronicparsers.QboxParser', - name='parsers/qbox', code_name='qbox', code_homepage='http://qboxcode.org/', - domain='dft', mainfile_mime_re=r'(application/xml)|(text/.*)', + metadata_path=f'{prefix_electronic}/qbox/metadata.yaml', + mainfile_mime_re=r'(application/xml)|(text/.*)', mainfile_contents_re=(r'http://qboxcode.org') ), MatchingParserInterface( 'electronicparsers.QuantumEspressoParser', - name='parsers/quantumespresso', code_name='Quantum Espresso', - code_homepage='https://www.quantum-espresso.org/', + metadata_path=f'{prefix_electronic}/quantumespresso/metadata.yaml', mainfile_contents_re=(r'(Program PWSCF.*starts)|(Current dimensions of program PWSCF are)') ), MatchingParserInterface( 'electronicparsers.SiestaParser', - name='parsers/siesta', code_name='Siesta', - code_homepage='https://departments.icmab.es/leem/siesta/', + metadata_path=f'{prefix_electronic}/siesta/metadata.yaml', mainfile_contents_re=( r'(Siesta Version: siesta-|SIESTA [0-9]\.[0-9]\.[0-9])|' r'(\*\s*WELCOME TO SIESTA\s*\*)') ), MatchingParserInterface( 'electronicparsers.TurbomoleParser', - name='parsers/turbomole', code_name='turbomole', - code_homepage='https://www.turbomole.org/', + metadata_path=f'{prefix_electronic}/turbomole/metadata.yaml', mainfile_contents_re=(r'Copyright \(C\) [0-9]+ TURBOMOLE GmbH, Karlsruhe') ), MatchingParserInterface( 'electronicparsers.VASPParser', - name='parsers/vasp', code_name='VASP', code_homepage='https://www.vasp.at/', + metadata_path=f'{prefix_electronic}/vasp/metadata.yaml', mainfile_mime_re=r'(application/.*)|(text/.*)', mainfile_name_re=r'.*[^/]*xml[^/]*', # only the alternative mainfile name should match mainfile_contents_re=( @@ -433,56 +438,55 @@ parsers = [ ), MatchingParserInterface( 'electronicparsers.Wien2kParser', - name='parsers/wien2k', code_name='WIEN2k', code_homepage='http://www.wien2k.at/', + metadata_path=f'{prefix_electronic}/wien2k/metadata.yaml', mainfile_name_re=r'.*\.scf$', mainfile_alternative=True, mainfile_contents_re=r'\s*---------\s*:ITE[0-9]+:\s*[0-9]+\.\s*ITERATION\s*---------' ), MatchingParserInterface( 'electronicparsers.YamboParser', - name='parsers/yambo', code_name='YAMBO', code_homepage='https://yambo-code.org/', + metadata_path=f'{prefix_electronic}/yambo/metadata.yaml', mainfile_contents_re=(r'Build.+\s+http://www\.yambo-code\.org') ), MatchingParserInterface( 'electronicparsers.ABACUSParser', - name='parsers/abacus', code_name='ABACUS', code_homepage='http://abacus.ustc.edu.cn/', + metadata_path=f'{prefix_electronic}/abacus/metadata.yaml', mainfile_contents_re=(r'\s*\n\s*WELCOME TO ABACUS') ), MatchingParserInterface( 'atomisticparsers.AmberParser', - name='parsers/amber', code_name='Amber', domain='dft', + metadata_path=f'{prefix_atomistic}/amber/metadata.yaml', mainfile_contents_re=r'\s*Amber\s[0-9]+\s[A-Z]+\s*[0-9]+' ), MatchingParserInterface( 'atomisticparsers.AsapParser', - name='parsers/asap', code_name='ASAP', domain='dft', + metadata_path=f'{prefix_atomistic}/asap/metadata.yaml', mainfile_name_re=r'.*.traj$', mainfile_mime_re=r'application/octet-stream' ), MatchingParserInterface( 'atomisticparsers.DFTBPlusParser', - name='parsers/dftbplus', code_name='DFTB+', domain='dft', + metadata_path=f'{prefix_atomistic}/dftbplus/metadata.yaml', mainfile_contents_re=r'\| DFTB\+', mainfile_mime_re=r'text/.*' ), MatchingParserInterface( 'atomisticparsers.DLPolyParser', - name='parsers/dl-poly', code_name='DL_POLY', - code_homepage='https://www.scd.stfc.ac.uk/Pages/DL_POLY.aspx', + metadata_path=f'{prefix_atomistic}/dlpoly/metadata.yaml', mainfile_contents_re=(r'\*\* DL_POLY \*\*'), ), MatchingParserInterface( 'atomisticparsers.GromacsParser', - name='parsers/gromacs', code_name='Gromacs', code_homepage='http://www.gromacs.org/', - domain='dft', mainfile_contents_re=r'gmx mdrun, (VERSION|version)' + metadata_path=f'{prefix_atomistic}/gromacs/metadata.yaml', + mainfile_contents_re=r'gmx mdrun, (VERSION|version)' ), MatchingParserInterface( 'atomisticparsers.GromosParser', - name='parsers/gromos', code_name='Gromos', domain='dft', + metadata_path=f'{prefix_atomistic}/gromos/metadata.yaml', mainfile_contents_re=r'Bugreports to http://www.gromos.net' ), MatchingParserInterface( 'atomisticparsers.GulpParser', - name='parsers/gulp', code_name='gulp', code_homepage='http://gulp.curtin.edu.au/gulp/', + metadata_path=f'{prefix_atomistic}/gulp/metadata.yaml', mainfile_contents_re=( r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*' r'\*\*\*\*\*\*\*\*\*\*\*\*\*\s*' @@ -490,40 +494,28 @@ parsers = [ ), MatchingParserInterface( 'atomisticparsers.LammpsParser', - name='parsers/lammps', code_name='LAMMPS', code_homepage='https://lammps.sandia.gov/', - domain='dft', mainfile_contents_re=r'^LAMMPS' + metadata_path=f'{prefix_atomistic}/lammps/metadata.yaml', + mainfile_contents_re=r'^LAMMPS' ), MatchingParserInterface( 'atomisticparsers.LibAtomsParser', - name='parsers/lib-atoms', code_name='libAtoms', code_homepage='https://libatoms.github.io/', + metadata_path=f'{prefix_atomistic}/libatoms/metadata.yaml', mainfile_contents_re=(r'\s*<GAP_params\s'), ), - MatchingParserInterface( - 'electronicparsers.MopacParser', - name='parsers/mopac', code_name='MOPAC', domain='dft', - mainfile_contents_re=r'\s*\*\*\s*MOPAC\s*([0-9a-zA-Z]*)\s*\*\*\s*', - mainfile_mime_re=r'text/.*', - ), MatchingParserInterface( 'atomisticparsers.NAMDParser', - name='parsers/namd', code_name='Namd', domain='dft', + metadata_path=f'{prefix_atomistic}/namd/metadata.yaml', mainfile_contents_re=r'\s*Info:\s*NAMD\s*[0-9.]+\s*for\s*', mainfile_mime_re=r'text/.*', ), - MatchingParserInterface( - 'databaseparsers.OpenKIMParser', - name='parsers/openkim', code_name='OpenKIM', domain='dft', - mainfile_mime_re=r'(application/json)|(text/.*)', - mainfile_contents_re=r'openkim|OPENKIM|OpenKIM' - ), MatchingParserInterface( 'atomisticparsers.TinkerParser', - name='parsers/tinker', code_name='TINKER', domain='dft', + metadata_path=f'{prefix_atomistic}/tinker/metadata.yaml', mainfile_contents_re=r'TINKER --- Software Tools for Molecular Design' ), MatchingParserInterface( 'workflowparsers.AFLOWParser', - name='parsers/aflow', code_name='AFlow', code_homepage='http://www.aflowlib.org/', + metadata_path=f'{prefix_workflow}/aflow/metadata.yaml', mainfile_mime_re=r'(application/json)|(text/.*)', mainfile_name_re=r'.*aflowlib\.json.*', # only the alternative mainfile name should match mainfile_contents_re=( @@ -541,59 +533,60 @@ parsers = [ ), MatchingParserInterface( 'workflowparsers.ASRParser', - name='parsers/asr', code_name='ASR', - code_homepage='https://asr.readthedocs.io/en/latest/index.html', + metadata_path=f'{prefix_workflow}/asr/metadata.yaml', mainfile_mime_re=r'(application/json)|(text/.*)', mainfile_name_re=r'.*archive_.*\.json', mainfile_contents_re=(r'"name": "ASR"') ), MatchingParserInterface( 'workflowparsers.ElasticParser', - name='parsers/elastic', code_name='elastic', code_homepage='http://exciting-code.org/elastic', + metadata_path=f'{prefix_workflow}/elastic/metadata.yaml', mainfile_contents_re=r'\s*Order of elastic constants\s*=\s*[0-9]+\s*', mainfile_name_re=(r'.*/INFO_ElaStic') ), MatchingParserInterface( 'workflowparsers.FHIVibesParser', - name='parsers/fhi-vibes', code_name='FHI-vibes', - code_homepage='https://vibes.fhi-berlin.mpg.de/', + metadata_path=f'{prefix_workflow}/fhivibes/metadata.yaml', mainfile_name_re=(r'^.*\.(nc)$'), mainfile_mime_re=r'(application/x-hdf)', mainfile_binary_header_re=br'^\x89HDF' ), MatchingParserInterface( 'workflowparsers.LobsterParser', - name='parsers/lobster', code_name='LOBSTER', - code_homepage='http://schmeling.ac.rwth-aachen.de/cohp/', + metadata_path=f'{prefix_workflow}/lobster/metadata.yaml', mainfile_name_re=r'.*lobsterout$', mainfile_contents_re=(r'^LOBSTER\s*v[\d\.]+.*'), ), MatchingParserInterface( 'workflowparsers.AtomateParser', - name='parsers/atomate', code_name='Atomate', - code_homepage='https://materialsproject.org', + metadata_path=f'{prefix_workflow}/atomate/metadata.yaml', mainfile_mime_re=r'(application/json)|(text/.*)', mainfile_name_re=r'.*mp.+materials\.json', mainfile_contents_re=(r'"pymatgen_version":') ), MatchingParserInterface( 'workflowparsers.PhonopyParser', - name='parsers/phonopy', code_name='Phonopy', code_homepage='https://phonopy.github.io/phonopy/', + metadata_path=f'{prefix_workflow}/phonopy/metadata.yaml', mainfile_name_re=(r'(.*/phonopy-FHI-aims-displacement-0*1/control.in$)|(.*/phon.+yaml)') ), MatchingParserInterface( 'eelsdbparser.EELSDBParser', - name='parsers/eels', code_name='eels', code_homepage='https://eelsdb.eu/', - domain='ems', + metadata_path=f'{prefix_eels}/metadata.yaml', mainfile_mime_re=r'application/json', mainfile_contents_re=(r'https://eelsdb.eu/spectra') ), MatchingParserInterface( 'workflowparsers.MOFStructuresParser', - name='parsers/mofstructures', code_name='MOF Structures', + metadata_path=f'{prefix_workflow}/mofstructures/metadata.yaml', mainfile_mime_re=r'(application/json)|(text/.*)', mainfile_name_re=r'.*mof_.*\.json', mainfile_contents_re=r'MOF Structures' ), + MatchingParserInterface( + 'databaseparsers.OpenKIMParser', + metadata_path=f'{prefix_database}/openkim/metadata.yaml', + mainfile_mime_re=r'(application/json)|(text/.*)', + mainfile_contents_re=r'openkim|OPENKIM|OpenKIM' + ), NexusParser(), TabularDataParser(), ArchiveParser() @@ -602,22 +595,18 @@ parsers = [ empty_parsers = [ EmptyParser( name='missing/octopus', code_name='Octopus', code_homepage='https://octopus-code.org/', - domain='dft', mainfile_name_re=r'(inp)|(.*/inp)' ), EmptyParser( - name='missing/crystal', code_name='Crystal', code_homepage='https://www.crystal.unito.it/index.php', - domain='dft', + name='missing/crystal', code_name='CRYSTAL', code_homepage='https://www.crystal.unito.it/index.php', mainfile_name_re=r'.*\.cryst\.out' ), EmptyParser( name='missing/wien2k', code_name='WIEN2k', code_homepage='http://www.wien2k.at/', - domain='dft', mainfile_name_re=r'.*\.scf' ), EmptyParser( name='missing/fhi-aims', code_name='FHI-aims', code_homepage='https://aimsclub.fhi-berlin.mpg.de/', - domain='dft', mainfile_name_re=r'.*\.fhiaims' ) ] @@ -645,12 +634,15 @@ parser_dict['parser/onetep'] = parser_dict['parsers/onetep'] # register code names as possible statistic value to the dft datamodel code_names = [] +code_metadata = {} for parser in parsers: + code_name = getattr(parser, 'code_name', None) if parser.domain == 'dft' and \ - getattr(parser, 'code_name', None) is not None and \ - getattr(parser, 'code_name') != 'currupted mainfile' and \ - getattr(parser, 'code_name') != 'Template': - code_names.append(getattr(parser, 'code_name')) + code_name is not None and \ + code_name != 'currupted mainfile' and \ + code_name != 'Template': + code_names.append(code_name) + code_metadata[code_name] = parser.metadata.dict() code_names = sorted(set(code_names), key=lambda code_name: code_name.lower()) results.Simulation.program_name.a_elasticsearch[0].values = code_names + [ config.services.unavailable_value, config.services.not_processed_value] diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index d75307e8e4382b0a647800c6785c61ac8bba5331..a450e595923e2b6652d3cefd3db38f77b92432cf 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -234,6 +234,7 @@ class TabularDataParser(MatchingParser): def __init__(self) -> None: super().__init__( name='parser/tabular', code_name='tabular data', + domain=None, mainfile_mime_re=r'text/.*|application/.*', mainfile_name_re=r'.*\.archive\.(csv|xlsx?)$')