__init__.py 19.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
16
The *parsing* module is an interface for the existing NOMAD-coe parsers.
17
18
This module redefines some of the old NOMAD-coe python-common functionality to create a
more coherent interface to the parsers.
19
20
21
22
23

Assumption about parsers
------------------------
For now, we make a few assumption about parsers
- they always work on the same *meta-info* version
Markus Scheidgen's avatar
Markus Scheidgen committed
24
- they have no conflicting python requirements
25
26
- they can be loaded at the same time and can be used within the same python process
- they are uniquely identified by a GIT URL and publicly accessible
27
- their version is uniquely identified by a GIT commit SHA
28

Markus Scheidgen's avatar
Markus Scheidgen committed
29
Each parser is defined via an instance of :class:`Parser`. The implementation :class:`LegacyParser` is used for most NOMAD-coe parsers.
30
31
32
33

.. autoclass:: nomad.parsing.Parser
    :members:

Markus Scheidgen's avatar
Markus Scheidgen committed
34
35
36
37
38
39
40
41
42
43
44
45
The are sub-classes for parsers with special purposes.

.. autoclass:: nomad.parsing.Parser
.. autoclass:: nomad.parsing.MatchingParser
.. autoclass:: nomad.parsing.MissingParser
.. autoclass:: nomad.parsing.BrokenParser
.. autoclass:: nomad.parsing.TemplateParser
.. autoclass:: nomad.parsing.GenerateRandomParser
.. autoclass:: nomad.parsing.ChaosParser
.. autoclass:: nomad.parsing.EmptyParser


Markus Scheidgen's avatar
Markus Scheidgen committed
46
47
48
49
The implementation :class:`LegacyParser` is used for most NOMAD-coe parsers.

.. autoclass:: nomad.parsing.LegacyParser

Markus Scheidgen's avatar
Markus Scheidgen committed
50

51
52
53
54
55
The parser definitions are available via the following two variables.

.. autodata:: nomad.parsing.parsers
.. autodata:: nomad.parsing.parser_dict

Markus Scheidgen's avatar
Markus Scheidgen committed
56
Parsers are reused for multiple calculations.
Markus Scheidgen's avatar
Markus Scheidgen committed
57

58
59
Parsers and calculation files are matched via regular expressions.

Markus Scheidgen's avatar
Markus Scheidgen committed
60
.. autofunction:: nomad.parsing.match_parser
61

Markus Scheidgen's avatar
Markus Scheidgen committed
62
Parsers in NOMAD-coe use a *backend* to create output. There are different NOMAD-coe
Markus Scheidgen's avatar
Markus Scheidgen committed
63
basends. In nomad@FAIRDI, we only currently only use a single backed. A version of
Markus Scheidgen's avatar
Markus Scheidgen committed
64
65
66
NOMAD-coe's *LocalBackend*. It stores all parser results in memory. The following
classes provide a interface definition for *backends* as an ABC and a concrete implementation
based on NOMAD-coe's *python-common* module.
67
68
69
70
71
72

.. autoclass:: nomad.parsing.AbstractParserBackend
    :members:
.. autoclass:: nomad.parsing.LocalBackend
    :members:
"""
Markus Scheidgen's avatar
Markus Scheidgen committed
73

74
from typing import Callable, IO, Union
75
import magic
76
77
import gzip
import bz2
78
import os.path
79

80
from nomad import files, config
81
82

from nomad.parsing.backend import AbstractParserBackend, LocalBackend, LegacyLocalBackend, JSONStreamWriter, BadContextURI, WrongContextState
83
from nomad.parsing.parser import Parser, LegacyParser, VaspOutcarParser, BrokenParser, MissingParser, MatchingParser
84
from nomad.parsing.artificial import TemplateParser, GenerateRandomParser, ChaosParser, EmptyParser
85

86

87
88
89
90
91
92
_compressions = {
    b'\x1f\x8b\x08': ('gz', gzip.open),
    b'\x42\x5a\x68': ('bz2', bz2.open)
}


93
def match_parser(mainfile: str, upload_files: Union[str, files.StagingUploadFiles], strict=True) -> 'Parser':
94
95
96
97
98
99
100
101
102
103
    """
    Performs parser matching. This means it take the given mainfile and potentially
    opens it with the given callback and tries to identify a parser that can parse
    the file.

    This is determined by filename (e.g. *.out), mime type (e.g. text/*, application/xml),
    and beginning file contents.

    Arguments:
        mainfile: The upload relative path to the mainfile
104
105
        upload_files: Either a :class:`files.StagingUploadFiles` object or a directory name.
            Directory name + mainfile needs to point to the file.
106
        strict: Only match strict parsers, e.g. no artificial parsers for missing or empty entries.
107
108
109

    Returns: The parser, or None if no parser could be matched.
    """
110
111
112
113
114
115
    if isinstance(upload_files, str):
        mainfile_path = os.path.join(upload_files, mainfile)
    else:
        mainfile_path = upload_files.raw_file_object(mainfile).os_path

    with open(mainfile_path, 'rb') as f:
116
        compression, open_compressed = _compressions.get(f.read(3), (None, open))
117
118

    with open_compressed(mainfile_path, 'rb') as cf:
119
        buffer = cf.read(config.parser_matching_size)
120
121
122

    mime_type = magic.from_buffer(buffer, mime=True)
    for parser in parsers:
123
124
125
126
127
128
129
130
131
        if strict and (isinstance(parser, MissingParser) or isinstance(parser, EmptyParser)):
            continue

        if parser.domain != config.domain:
            continue

        if parser.is_mainfile(mainfile_path, mime_type, buffer, compression):
            # TODO: deal with multiple possible parser specs
            return parser
132
133
134
135

    return None


136
parsers = [
137
    GenerateRandomParser(),
138
    TemplateParser(),
139
    ChaosParser(),
Daniel Speckhard's avatar
Daniel Speckhard committed
140
    LegacyParser(
141
        name='parsers/phonopy', code_name='Phonopy',
Daniel Speckhard's avatar
Daniel Speckhard committed
142
        parser_class_name='phonopyparser.PhonopyParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
143
        # mainfile_contents_re=r'',  # Empty regex since this code calls other DFT codes.
Daniel Speckhard's avatar
Daniel Speckhard committed
144
145
        mainfile_name_re=(r'.*/phonopy-FHI-aims-displacement-0*1/control.in$')
    ),
146
    LegacyParser(
147
        name='parsers/vasp', code_name='VASP',
148
        parser_class_name='vaspparser.VASPRunParserInterface',
149
150
        mainfile_mime_re=r'(application/xml)|(text/.*)',
        mainfile_contents_re=(
151
152
153
154
            r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
            r'?\s*<modeling>'
            r'?\s*<generator>'
            r'?\s*<i name="program" type="string">\s*vasp\s*</i>'
155
156
            r'?'),
        supported_compressions=['gz', 'bz2']
157
    ),
158
    VaspOutcarParser(
159
        name='parsers/vasp-outcar', code_name='VASP',
160
        parser_class_name='vaspparser.VaspOutcarParser',
161
162
        mainfile_name_re=r'(.*/)?OUTCAR(\.[^\.]*)?',
        mainfile_contents_re=(r'^\svasp\.')
163
    ),
164
    LegacyParser(
165
        name='parsers/exciting', code_name='exciting',
166
        parser_class_name='excitingparser.ExcitingParser',
167
168
        mainfile_name_re=r'^.*.OUT?',
        mainfile_contents_re=(r'EXCITING.*started')
169
170
    ),
    LegacyParser(
171
        name='parsers/fhi-aims', code_name='FHI-aims',
172
        parser_class_name='fhiaimsparser.FHIaimsParser',
173
        mainfile_contents_re=(
174
175
            r'^(.*\n)*'
            r'?\s*Invoking FHI-aims \.\.\.'
Markus Scheidgen's avatar
Markus Scheidgen committed
176
177
            # r'?\s*Version'
        )
178
179
    ),
    LegacyParser(
180
        name='parsers/cp2k', code_name='CP2K',
181
        parser_class_name='cp2kparser.CP2KParser',
182
        mainfile_contents_re=(
183
184
185
186
            r'\*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n'
            r' \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n'
            r' \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n'
            r' \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n'
speckhard's avatar
speckhard committed
187
188
            r'  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n'
        )
189
    ),
190
    LegacyParser(
191
        name='parsers/crystal', code_name='Crystal',
192
        parser_class_name='crystalparser.CrystalParser',
193
        mainfile_contents_re=(
194
            r'(CRYSTAL\s*\n0 0 0)|('
speckhard's avatar
speckhard committed
195
            r'\s*\*\s{10,}CRYSTAL(?P<majorVersion>[\d]+)\s{10,}\*'
196
            r'\s*\*\s{10,}public \: (?P<minorVersion>[\d\.]+) \- .*\*)'
speckhard's avatar
speckhard committed
197
        )
198
    ),
199
200
201
    # The main contents regex of CPMD was causing a catostrophic backtracking issue
    # when searching through the first 500 bytes of main files. We decided
    # to use only a portion of the regex to avoid that issue.
202
    LegacyParser(
203
        name='parsers/cpmd', code_name='CPMD',
204
        parser_class_name='cpmdparser.CPMDParser',
205
        mainfile_contents_re=(
206
207
208
209
210
211
212
213
            # r'\s+\*\*\*\*\*\*  \*\*\*\*\*\*    \*\*\*\*  \*\*\*\*  \*\*\*\*\*\*\s*'
            # r'\s+\*\*\*\*\*\*\*  \*\*\*\*\*\*\*   \*\*\*\*\*\*\*\*\*\*  \*\*\*\*\*\*\*\s+'
            r'\*\*\*       \*\*   \*\*\*  \*\* \*\*\*\* \*\*  \*\*   \*\*\*'
            # r'\s+\*\*        \*\*   \*\*\*  \*\*  \*\*  \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*        \*\*\*\*\*\*\*   \*\*      \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*\*       \*\*\*\*\*\*    \*\*      \*\*  \*\*   \*\*\*\s+'
            # r'\s+\*\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\*\s+'
            # r'\s+\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\s+'
speckhard's avatar
speckhard committed
214
        )
215
    ),
speckhard's avatar
speckhard committed
216
    LegacyParser(
217
        name='parsers/nwchem', code_name='NWChem',
speckhard's avatar
speckhard committed
218
        parser_class_name='nwchemparser.NWChemParser',
219
        mainfile_contents_re=(
220
            r'Northwest Computational Chemistry Package \(NWChem\) (\d+\.)+\d+'
speckhard's avatar
speckhard committed
221
222
223
        )
    ),
    LegacyParser(
224
        name='parsers/bigdft', code_name='BigDFT',
speckhard's avatar
speckhard committed
225
        parser_class_name='bigdftparser.BigDFTParser',
226
        mainfile_contents_re=(
Markus Scheidgen's avatar
Markus Scheidgen committed
227
228
229
230
231
232
            # r'__________________________________ A fast and precise DFT wavelet code\s*'
            # r'\|     \|     \|     \|     \|     \|\s*'
            # r'\|     \|     \|     \|     \|     \|      BBBB         i       gggggg\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|     B    B               g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B     B        i     g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B    B         i     g        g\s*'
speckhard's avatar
speckhard committed
233
            r'\|_____\|__:__\|__:__\|_____\|_____\|___ BBBBB          i     g         g\s*'
Markus Scheidgen's avatar
Markus Scheidgen committed
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
            # r'\|  :  \|     \|     \|  :  \|     \|    B    B         i     g         g\s*'
            # r'\|--\+0-\|     \|     \|-0\+--\|     \|    B     B     iiii     g         g\s*'
            # r'\|__:__\|_____\|_____\|__:__\|_____\|    B     B        i      g        g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B BBBB        i        g      g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B        iiiii          gggggg\s*'
            # r'\|_____\|__:__\|__:__\|_____\|_____\|__BBBBB\s*'
            # r'\|     \|     \|     \|  :  \|     \|                           TTTTTTTTT\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|  DDDDDD          FFFFF        T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\| D      D        F        TTTT T\s*'
            # r'\|     \|     \|     \|  :  \|     \|D        D      F        T     T\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|D         D     FFFF     T     T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\|D___      D     F         T    T\s*'
            # r'\|     \|     \|  :  \|     \|     \|D         D     F          TTTTT\s*'
            # r'\|     \|     \|--\+0-\|     \|     \| D        D     F         T    T\s*'
            # r'\|_____\|_____\|__:__\|_____\|_____\|          D     F        T     T\s*'
            # r'\|     \|     \|     \|     \|     \|         D               T    T\s*'
            # r'\|     \|     \|     \|     \|     \|   DDDDDD       F         TTTT\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|______                    www\.bigdft\.org'
speckhard's avatar
speckhard committed
252
        )
253
254
    ),
    LegacyParser(
255
        name='parsers/wien2k', code_name='WIEN2k',
256
        parser_class_name='wien2kparser.Wien2kParser',
257
        mainfile_contents_re=r':LABEL\d+: using WIEN2k_\d+\.\d+'
Markus Scheidgen's avatar
Markus Scheidgen committed
258
259
    ),
    LegacyParser(
Markus Scheidgen's avatar
Markus Scheidgen committed
260
        name='parsers/band', code_name='BAND',
261
        parser_class_name='bandparser.BANDParser',
Markus Scheidgen's avatar
Markus Scheidgen committed
262
        mainfile_contents_re=r' +\* +Amsterdam Density Functional +\(ADF\)'),
Daniel Speckhard's avatar
Daniel Speckhard committed
263
    LegacyParser(
264
        name='parsers/gaussian', code_name='Gaussian',
Daniel Speckhard's avatar
Daniel Speckhard committed
265
        parser_class_name='gaussianparser.GaussianParser',
266
267
268
269
270
271
        mainfile_contents_re=(
            r'\s*Cite this work as:'
            r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9.]*,'
            r'\s\*\*\*\*\*\*\*\*\*\*\*\**'
            r'\s*Gaussian\s*([0-9]+):\s*([A-Za-z0-9-.]+)\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)'
            r'\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)')),
272
    LegacyParser(
273
        name='parsers/quantumespresso', code_name='Quantum Espresso',
274
        parser_class_name='quantumespressoparser.QuantumEspressoParserPWSCF',
275
        mainfile_contents_re=r'Program PWSCF.*starts'
276
277
278
279
        #    r'^(.*\n)*'
        #    r'\s*Program (\S+)\s+v\.(\S+)(?:\s+\(svn\s+rev\.\s+'
        #    r'(\d+)\s*\))?\s+starts[^\n]+'
        #    r'(?:\s*\n?)*This program is part of the open-source Quantum')
Daniel Speckhard's avatar
Daniel Speckhard committed
280
281
    ),
    LegacyParser(
282
        name='parsers/abinit', code_name='ABINIT',
Daniel Speckhard's avatar
Daniel Speckhard committed
283
        parser_class_name='abinitparser.AbinitParser',
284
        mainfile_contents_re=(r'^\n*\.Version\s*[0-9.]*\s*of ABINIT\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
285
286
    ),
    LegacyParser(
287
        name='parsers/orca', code_name='ORCA',
Daniel Speckhard's avatar
Daniel Speckhard committed
288
289
290
291
292
293
294
        parser_class_name='orcaparser.OrcaParser',
        mainfile_contents_re=(
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s+\* O   R   C   A \*\s*'
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*'
            r'\s*--- An Ab Initio, DFT and Semiempirical electronic structure package ---\s*')
295
296
    ),
    LegacyParser(
297
        name='parsers/castep', code_name='CASTEP',
298
299
        parser_class_name='castepparser.CastepParser',
        mainfile_contents_re=(r'\s\|\s*CCC\s*AA\s*SSS\s*TTTTT\s*EEEEE\s*PPPP\s*\|\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
300
301
    ),
    LegacyParser(
302
        name='parsers/dl-poly', code_name='DL_POLY',
Daniel Speckhard's avatar
Daniel Speckhard committed
303
304
        parser_class_name='dlpolyparser.DlPolyParserWrapper',
        mainfile_contents_re=(r'\*\* DL_POLY \*\*')
Daniel Speckhard's avatar
Daniel Speckhard committed
305
306
    ),
    LegacyParser(
307
        name='parsers/lib-atoms', code_name='libAtoms',
Daniel Speckhard's avatar
Daniel Speckhard committed
308
309
        parser_class_name='libatomsparser.LibAtomsParserWrapper',
        mainfile_contents_re=(r'\s*<GAP_params\s')
Daniel Speckhard's avatar
Daniel Speckhard committed
310
311
    ),
    LegacyParser(
312
        name='parsers/octopus', code_name='Octopus',
Daniel Speckhard's avatar
Daniel Speckhard committed
313
        parser_class_name='octopusparser.OctopusParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
314
315
316
        mainfile_contents_re=(r'\|0\) ~ \(0\) \|')
        # We decided to use the octopus eyes instead of
        # r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file.
Daniel Speckhard's avatar
Daniel Speckhard committed
317
318
    ),
    LegacyParser(
319
        name='parsers/gpaw', code_name='GPAW',
Daniel Speckhard's avatar
Daniel Speckhard committed
320
321
322
323
324
        parser_class_name='gpawparser.GPAWParserWrapper',
        mainfile_name_re=(r'^.*\.gpw$'),
        mainfile_mime_re=r'application/x-tar'
    ),
    LegacyParser(
325
        name='parsers/gpaw2', code_name='GPAW',
Daniel Speckhard's avatar
Daniel Speckhard committed
326
327
328
329
330
331
        parser_class_name='gpawparser.GPAWParser2Wrapper',
        # mainfile_contents_re=r'',  # We can't read .gpw2 to match AFFormatGPAW'
        mainfile_name_re=(r'^.*\.gpw2$'),
        mainfile_mime_re=r'application/x-tar'
    ),
    LegacyParser(
332
        name='parsers/atk', code_name='ATK',
Daniel Speckhard's avatar
Daniel Speckhard committed
333
334
335
336
337
        parser_class_name='atkparser.ATKParserWrapper',
        # mainfile_contents_re=r'',  # We can't read .gpw as txt - of UlmGPAW|AFFormatGPAW'
        mainfile_name_re=r'^.*\.nc',
        # The previously used mime type r'application/x-netcdf' wasn't found by magic library.
        mainfile_mime_re=r'application/octet-stream'
Daniel Speckhard's avatar
Daniel Speckhard committed
338
339
    ),
    LegacyParser(
340
        name='parsers/gulp', code_name='gulp',
Daniel Speckhard's avatar
Daniel Speckhard committed
341
342
343
344
345
346
347
        parser_class_name='gulpparser.GULPParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*'
            r'\*\*\*\*\*\*\*\*\*\*\*\*\*\s*'
            r'\s*\*\s*GENERAL UTILITY LATTICE PROGRAM\s*\*\s*')
    ),
    LegacyParser(
348
        name='parsers/siesta', code_name='Siesta',
Daniel Speckhard's avatar
Daniel Speckhard committed
349
350
351
352
353
        parser_class_name='siestaparser.SiestaParser',
        mainfile_contents_re=(
            r'(Siesta Version: siesta-|SIESTA [0-9]\.[0-9]\.[0-9])')
    ),
    LegacyParser(
354
        name='parsers/elk', code_name='elk',
Daniel Speckhard's avatar
Daniel Speckhard committed
355
        parser_class_name='elkparser.ElkParser',
356
        mainfile_contents_re=r'\| Elk version [0-9.a-zA-Z]+ started \|'
Daniel Speckhard's avatar
Daniel Speckhard committed
357
358
    ),
    LegacyParser(
359
        name='parsers/elastic', code_name='elastic',
Daniel Speckhard's avatar
Daniel Speckhard committed
360
361
        parser_class_name='elasticparser.ElasticParser',
        mainfile_contents_re=r'\s*Order of elastic constants\s*=\s*[0-9]+\s*'
Daniel Speckhard's avatar
Daniel Speckhard committed
362
363
    ),
    LegacyParser(
364
        name='parsers/gamess', code_name='GAMESS',
Daniel Speckhard's avatar
Daniel Speckhard committed
365
366
367
368
369
        parser_class_name='gamessparser.GamessParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*\*\s*GAMESS VERSION =\s*(.*)\*\s*'
            r'\s*\*\s*FROM IOWA STATE UNIVERSITY\s*\*\s*')
370
371
    ),
    LegacyParser(
372
        name='parsers/turbomole', code_name='turbomole',
373
374
        parser_class_name='turbomoleparser.TurbomoleParser',
        mainfile_contents_re=(
375
            r'Copyright \(C\) [0-9]+ TURBOMOLE GmbH, Karlsruhe')
376
377
378
379
380
381
    ),
    LegacyParser(
        name='parsers/skeleton', code_name='skeleton', domain='EMS',
        parser_class_name='skeletonparser.SkeletonParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_contents_re=(r'skeleton experimental metadata format')
Markus Scheidgen's avatar
Markus Scheidgen committed
382
383
384
385
386
    ),
    LegacyParser(
        name='parsers/mpes', code_name='mpes', domain='EMS',
        parser_class_name='mpesparser.MPESParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
387
        mainfile_name_re=(r'.*.meta'),
388
        mainfile_contents_re=(r'"data_repository_name": "zenodo.org"')
389
390
391
392
393
394
    ),
    LegacyParser(
        name='parsers/aptfim', code_name='mpes', domain='EMS',
        parser_class_name='aptfimparser.APTFIMParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_name_re=(r'.*.aptfim')
395
    ),
396
    LegacyParser(
397
        name='parsers/qbox', code_name='qbox', domain='DFT',
398
399
        parser_class_name='qboxparser.QboxParser',
        mainfile_mime_re=r'(application/xml)|(text/.*)',
400
        mainfile_contents_re=(r'http://qboxcode.org')
401
    ),
402
    LegacyParser(
403
        name='parsers/dmol', code_name='DMol3', domain='DFT',
404
405
406
        parser_class_name='dmol3parser.Dmol3Parser',
        mainfile_name_re=r'.*\.outmol',
        mainfile_contents_re=r'Materials Studio DMol\^3'
407
    ),
408
    LegacyParser(
409
        name='parser/fleur', code_name='fleur', domain='DFT',
410
        parser_class_name='fleurparser.FleurParser',
411
412
        mainfile_contents_re=r'This output is generated by fleur.'
    ),
413
    LegacyParser(
414
        name='parser/molcas', code_name='MOLCAS', domain='DFT',
415
        parser_class_name='molcasparser.MolcasParser',
416
417
        mainfile_contents_re=r'M O L C A S'
    ),
418
    LegacyParser(
Markus Scheidgen's avatar
Markus Scheidgen committed
419
        name='parser/onetep', code_name='ONETEP', domain='DFT',
420
        parser_class_name='onetepparser.OnetepParser',
421
422
        mainfile_contents_re=r'####### #     # ####### ####### ####### ######'
    ),
423
424
425
426
427
428
429
430
431
    # There are some entries with PIDs that have mainfiles which do not match what
    # the actual parsers expect. We use the EmptyParser to produce placeholder entries
    # to keep the PIDs. These parsers will not match for new, non migrated data.
    EmptyParser(
        name='missing/octopus', code_name='Octopus', domain='DFT',
        mainfile_name_re=r'(inp)|(.*/inp)'
    ),
    EmptyParser(
        name='missing/crystal', code_name='Crystal',
432
        mainfile_name_re=r'.*\.cryst\.out'
433
    ),
434
435
    EmptyParser(
        name='missing/wien2k', code_name='WIEN2k',
436
        mainfile_name_re=r'.*\.scf'
437
    ),
438
439
    EmptyParser(
        name='missing/fhi-aims', code_name='FHI-aims', domain='DFT',
440
        mainfile_name_re=r'.*\.fhiaims'
441
442
    ),
    BrokenParser()
443
]
444

445
""" Instantiation and constructor based config of all parsers. """
446

Markus Scheidgen's avatar
Markus Scheidgen committed
447
parser_dict = {parser.name: parser for parser in parsers}  # type: ignore
448
""" A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. """