__init__.py 19 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
16
The *parsing* module is an interface for the existing NOMAD-coe parsers.
17
18
This module redefines some of the old NOMAD-coe python-common functionality to create a
more coherent interface to the parsers.
19
20
21
22
23

Assumption about parsers
------------------------
For now, we make a few assumption about parsers
- they always work on the same *meta-info* version
Markus Scheidgen's avatar
Markus Scheidgen committed
24
- they have no conflicting python requirements
25
26
- they can be loaded at the same time and can be used within the same python process
- they are uniquely identified by a GIT URL and publicly accessible
27
- their version is uniquely identified by a GIT commit SHA
28

Markus Scheidgen's avatar
Markus Scheidgen committed
29
Each parser is defined via an instance of :class:`Parser`. The implementation :class:`LegacyParser` is used for most NOMAD-coe parsers.
30
31
32
33

.. autoclass:: nomad.parsing.Parser
    :members:

Markus Scheidgen's avatar
Markus Scheidgen committed
34
35
36
37
38
39
40
41
42
43
44
45
The are sub-classes for parsers with special purposes.

.. autoclass:: nomad.parsing.Parser
.. autoclass:: nomad.parsing.MatchingParser
.. autoclass:: nomad.parsing.MissingParser
.. autoclass:: nomad.parsing.BrokenParser
.. autoclass:: nomad.parsing.TemplateParser
.. autoclass:: nomad.parsing.GenerateRandomParser
.. autoclass:: nomad.parsing.ChaosParser
.. autoclass:: nomad.parsing.EmptyParser


Markus Scheidgen's avatar
Markus Scheidgen committed
46
47
48
49
The implementation :class:`LegacyParser` is used for most NOMAD-coe parsers.

.. autoclass:: nomad.parsing.LegacyParser

Markus Scheidgen's avatar
Markus Scheidgen committed
50

51
52
53
54
55
The parser definitions are available via the following two variables.

.. autodata:: nomad.parsing.parsers
.. autodata:: nomad.parsing.parser_dict

Markus Scheidgen's avatar
Markus Scheidgen committed
56
Parsers are reused for multiple calculations.
Markus Scheidgen's avatar
Markus Scheidgen committed
57

58
59
Parsers and calculation files are matched via regular expressions.

Markus Scheidgen's avatar
Markus Scheidgen committed
60
.. autofunction:: nomad.parsing.match_parser
61

Markus Scheidgen's avatar
Markus Scheidgen committed
62
Parsers in NOMAD-coe use a *backend* to create output. There are different NOMAD-coe
Markus Scheidgen's avatar
Markus Scheidgen committed
63
basends. In nomad@FAIRDI, we only currently only use a single backed. A version of
Markus Scheidgen's avatar
Markus Scheidgen committed
64
65
66
NOMAD-coe's *LocalBackend*. It stores all parser results in memory. The following
classes provide a interface definition for *backends* as an ABC and a concrete implementation
based on NOMAD-coe's *python-common* module.
67
68
69
70
71
72

.. autoclass:: nomad.parsing.AbstractParserBackend
    :members:
.. autoclass:: nomad.parsing.LocalBackend
    :members:
"""
Markus Scheidgen's avatar
Markus Scheidgen committed
73

74
from typing import Callable, IO, Union
75
import magic
76
77
import gzip
import bz2
78
import os.path
79

80
from nomad import files, config
81
82

from nomad.parsing.backend import AbstractParserBackend, LocalBackend, LegacyLocalBackend, JSONStreamWriter, BadContextURI, WrongContextState
83
from nomad.parsing.metainfo import MetainfoBackend
84
from nomad.parsing.parser import Parser, LegacyParser, VaspOutcarParser, BrokenParser, MissingParser, MatchingParser
85
from nomad.parsing.artificial import TemplateParser, GenerateRandomParser, ChaosParser, EmptyParser
86

87

88
89
90
91
92
93
_compressions = {
    b'\x1f\x8b\x08': ('gz', gzip.open),
    b'\x42\x5a\x68': ('bz2', bz2.open)
}


94
def match_parser(mainfile: str, upload_files: Union[str, files.StagingUploadFiles], strict=True) -> 'Parser':
95
96
97
98
99
100
101
102
103
104
    """
    Performs parser matching. This means it take the given mainfile and potentially
    opens it with the given callback and tries to identify a parser that can parse
    the file.

    This is determined by filename (e.g. *.out), mime type (e.g. text/*, application/xml),
    and beginning file contents.

    Arguments:
        mainfile: The upload relative path to the mainfile
105
106
        upload_files: Either a :class:`files.StagingUploadFiles` object or a directory name.
            Directory name + mainfile needs to point to the file.
107
        strict: Only match strict parsers, e.g. no artificial parsers for missing or empty entries.
108
109
110

    Returns: The parser, or None if no parser could be matched.
    """
111
112
113
114
115
116
    if isinstance(upload_files, str):
        mainfile_path = os.path.join(upload_files, mainfile)
    else:
        mainfile_path = upload_files.raw_file_object(mainfile).os_path

    with open(mainfile_path, 'rb') as f:
117
        compression, open_compressed = _compressions.get(f.read(3), (None, open))
118
119

    with open_compressed(mainfile_path, 'rb') as cf:
120
        buffer = cf.read(config.parser_matching_size)
121
122
123

    mime_type = magic.from_buffer(buffer, mime=True)
    for parser in parsers:
124
125
126
127
128
129
130
131
132
        if strict and (isinstance(parser, MissingParser) or isinstance(parser, EmptyParser)):
            continue

        if parser.domain != config.domain:
            continue

        if parser.is_mainfile(mainfile_path, mime_type, buffer, compression):
            # TODO: deal with multiple possible parser specs
            return parser
133
134
135
136

    return None


137
parsers = [
138
    GenerateRandomParser(),
139
    TemplateParser(),
140
    ChaosParser(),
Daniel Speckhard's avatar
Daniel Speckhard committed
141
    LegacyParser(
142
        name='parsers/phonopy', code_name='Phonopy',
Daniel Speckhard's avatar
Daniel Speckhard committed
143
        parser_class_name='phonopyparser.PhonopyParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
144
        # mainfile_contents_re=r'',  # Empty regex since this code calls other DFT codes.
Daniel Speckhard's avatar
Daniel Speckhard committed
145
146
        mainfile_name_re=(r'.*/phonopy-FHI-aims-displacement-0*1/control.in$')
    ),
147
    LegacyParser(
148
        name='parsers/vasp', code_name='VASP',
149
        parser_class_name='vaspparser.VASPRunParserInterface',
150
151
        mainfile_mime_re=r'(application/xml)|(text/.*)',
        mainfile_contents_re=(
152
153
154
155
            r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
            r'?\s*<modeling>'
            r'?\s*<generator>'
            r'?\s*<i name="program" type="string">\s*vasp\s*</i>'
156
157
            r'?'),
        supported_compressions=['gz', 'bz2']
158
    ),
159
    VaspOutcarParser(
160
        name='parsers/vasp-outcar', code_name='VASP',
161
        parser_class_name='vaspparser.VaspOutcarParser',
162
163
        mainfile_name_re=r'(.*/)?OUTCAR(\.[^\.]*)?',
        mainfile_contents_re=(r'^\svasp\.')
164
    ),
165
    LegacyParser(
166
        name='parsers/exciting', code_name='exciting',
167
        parser_class_name='excitingparser.ExcitingParser',
168
169
        mainfile_name_re=r'^.*.OUT?',
        mainfile_contents_re=(r'EXCITING.*started')
170
171
    ),
    LegacyParser(
172
        name='parsers/fhi-aims', code_name='FHI-aims',
173
        parser_class_name='fhiaimsparser.FHIaimsParser',
174
        mainfile_contents_re=(
175
176
            r'^(.*\n)*'
            r'?\s*Invoking FHI-aims \.\.\.'
Markus Scheidgen's avatar
Markus Scheidgen committed
177
178
            # r'?\s*Version'
        )
179
180
    ),
    LegacyParser(
181
        name='parsers/cp2k', code_name='CP2K',
182
        parser_class_name='cp2kparser.CP2KParser',
183
        mainfile_contents_re=(
184
185
186
187
            r'\*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n'
            r' \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n'
            r' \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n'
            r' \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n'
speckhard's avatar
speckhard committed
188
189
            r'  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n'
        )
190
    ),
191
    LegacyParser(
192
        name='parsers/crystal', code_name='Crystal',
193
        parser_class_name='crystalparser.CrystalParser',
194
        mainfile_contents_re=(
195
            r'(CRYSTAL\s*\n0 0 0)|('
speckhard's avatar
speckhard committed
196
            r'\s*\*\s{10,}CRYSTAL(?P<majorVersion>[\d]+)\s{10,}\*'
197
            r'\s*\*\s{10,}public \: (?P<minorVersion>[\d\.]+) \- .*\*)'
speckhard's avatar
speckhard committed
198
        )
199
    ),
200
201
202
    # The main contents regex of CPMD was causing a catostrophic backtracking issue
    # when searching through the first 500 bytes of main files. We decided
    # to use only a portion of the regex to avoid that issue.
203
    LegacyParser(
204
        name='parsers/cpmd', code_name='CPMD',
205
        parser_class_name='cpmdparser.CPMDParser',
206
        mainfile_contents_re=(
207
208
209
210
211
212
213
214
            # r'\s+\*\*\*\*\*\*  \*\*\*\*\*\*    \*\*\*\*  \*\*\*\*  \*\*\*\*\*\*\s*'
            # r'\s+\*\*\*\*\*\*\*  \*\*\*\*\*\*\*   \*\*\*\*\*\*\*\*\*\*  \*\*\*\*\*\*\*\s+'
            r'\*\*\*       \*\*   \*\*\*  \*\* \*\*\*\* \*\*  \*\*   \*\*\*'
            # r'\s+\*\*        \*\*   \*\*\*  \*\*  \*\*  \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*        \*\*\*\*\*\*\*   \*\*      \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*\*       \*\*\*\*\*\*    \*\*      \*\*  \*\*   \*\*\*\s+'
            # r'\s+\*\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\*\s+'
            # r'\s+\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\s+'
speckhard's avatar
speckhard committed
215
        )
216
    ),
speckhard's avatar
speckhard committed
217
    LegacyParser(
218
        name='parsers/nwchem', code_name='NWChem',
speckhard's avatar
speckhard committed
219
        parser_class_name='nwchemparser.NWChemParser',
220
        mainfile_contents_re=(
221
            r'Northwest Computational Chemistry Package \(NWChem\) (\d+\.)+\d+'
speckhard's avatar
speckhard committed
222
223
224
        )
    ),
    LegacyParser(
225
        name='parsers/bigdft', code_name='BigDFT',
speckhard's avatar
speckhard committed
226
        parser_class_name='bigdftparser.BigDFTParser',
227
        mainfile_contents_re=(
Markus Scheidgen's avatar
Markus Scheidgen committed
228
229
230
231
232
233
            # r'__________________________________ A fast and precise DFT wavelet code\s*'
            # r'\|     \|     \|     \|     \|     \|\s*'
            # r'\|     \|     \|     \|     \|     \|      BBBB         i       gggggg\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|     B    B               g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B     B        i     g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B    B         i     g        g\s*'
speckhard's avatar
speckhard committed
234
            r'\|_____\|__:__\|__:__\|_____\|_____\|___ BBBBB          i     g         g\s*'
Markus Scheidgen's avatar
Markus Scheidgen committed
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
            # r'\|  :  \|     \|     \|  :  \|     \|    B    B         i     g         g\s*'
            # r'\|--\+0-\|     \|     \|-0\+--\|     \|    B     B     iiii     g         g\s*'
            # r'\|__:__\|_____\|_____\|__:__\|_____\|    B     B        i      g        g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B BBBB        i        g      g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B        iiiii          gggggg\s*'
            # r'\|_____\|__:__\|__:__\|_____\|_____\|__BBBBB\s*'
            # r'\|     \|     \|     \|  :  \|     \|                           TTTTTTTTT\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|  DDDDDD          FFFFF        T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\| D      D        F        TTTT T\s*'
            # r'\|     \|     \|     \|  :  \|     \|D        D      F        T     T\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|D         D     FFFF     T     T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\|D___      D     F         T    T\s*'
            # r'\|     \|     \|  :  \|     \|     \|D         D     F          TTTTT\s*'
            # r'\|     \|     \|--\+0-\|     \|     \| D        D     F         T    T\s*'
            # r'\|_____\|_____\|__:__\|_____\|_____\|          D     F        T     T\s*'
            # r'\|     \|     \|     \|     \|     \|         D               T    T\s*'
            # r'\|     \|     \|     \|     \|     \|   DDDDDD       F         TTTT\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|______                    www\.bigdft\.org'
speckhard's avatar
speckhard committed
253
        )
254
255
    ),
    LegacyParser(
256
        name='parsers/wien2k', code_name='WIEN2k',
257
        parser_class_name='wien2kparser.Wien2kParser',
258
        mainfile_contents_re=r':LABEL\d+: using WIEN2k_\d+\.\d+'
Markus Scheidgen's avatar
Markus Scheidgen committed
259
260
    ),
    LegacyParser(
Markus Scheidgen's avatar
Markus Scheidgen committed
261
        name='parsers/band', code_name='BAND',
262
        parser_class_name='bandparser.BANDParser',
Markus Scheidgen's avatar
Markus Scheidgen committed
263
        mainfile_contents_re=r' +\* +Amsterdam Density Functional +\(ADF\)'),
Daniel Speckhard's avatar
Daniel Speckhard committed
264
    LegacyParser(
265
        name='parsers/gaussian', code_name='Gaussian',
Daniel Speckhard's avatar
Daniel Speckhard committed
266
        parser_class_name='gaussianparser.GaussianParser',
267
268
        mainfile_contents_re=(
            r'\s*Cite this work as:'
269
270
            r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9\.]*,')
    ),
271
    LegacyParser(
272
        name='parsers/quantumespresso', code_name='Quantum Espresso',
273
        parser_class_name='quantumespressoparser.QuantumEspressoParserPWSCF',
274
        mainfile_contents_re=r'Program PWSCF.*starts'
275
276
277
278
        #    r'^(.*\n)*'
        #    r'\s*Program (\S+)\s+v\.(\S+)(?:\s+\(svn\s+rev\.\s+'
        #    r'(\d+)\s*\))?\s+starts[^\n]+'
        #    r'(?:\s*\n?)*This program is part of the open-source Quantum')
Daniel Speckhard's avatar
Daniel Speckhard committed
279
280
    ),
    LegacyParser(
281
        name='parsers/abinit', code_name='ABINIT',
Daniel Speckhard's avatar
Daniel Speckhard committed
282
        parser_class_name='abinitparser.AbinitParser',
283
        mainfile_contents_re=(r'^\n*\.Version\s*[0-9.]*\s*of ABINIT\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
284
285
    ),
    LegacyParser(
286
        name='parsers/orca', code_name='ORCA',
Daniel Speckhard's avatar
Daniel Speckhard committed
287
288
289
290
291
292
293
        parser_class_name='orcaparser.OrcaParser',
        mainfile_contents_re=(
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s+\* O   R   C   A \*\s*'
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*'
            r'\s*--- An Ab Initio, DFT and Semiempirical electronic structure package ---\s*')
294
295
    ),
    LegacyParser(
296
        name='parsers/castep', code_name='CASTEP',
297
298
        parser_class_name='castepparser.CastepParser',
        mainfile_contents_re=(r'\s\|\s*CCC\s*AA\s*SSS\s*TTTTT\s*EEEEE\s*PPPP\s*\|\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
299
300
    ),
    LegacyParser(
301
        name='parsers/dl-poly', code_name='DL_POLY',
Daniel Speckhard's avatar
Daniel Speckhard committed
302
303
        parser_class_name='dlpolyparser.DlPolyParserWrapper',
        mainfile_contents_re=(r'\*\* DL_POLY \*\*')
Daniel Speckhard's avatar
Daniel Speckhard committed
304
305
    ),
    LegacyParser(
306
        name='parsers/lib-atoms', code_name='libAtoms',
Daniel Speckhard's avatar
Daniel Speckhard committed
307
308
        parser_class_name='libatomsparser.LibAtomsParserWrapper',
        mainfile_contents_re=(r'\s*<GAP_params\s')
Daniel Speckhard's avatar
Daniel Speckhard committed
309
310
    ),
    LegacyParser(
311
        name='parsers/octopus', code_name='Octopus',
Daniel Speckhard's avatar
Daniel Speckhard committed
312
        parser_class_name='octopusparser.OctopusParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
313
314
315
        mainfile_contents_re=(r'\|0\) ~ \(0\) \|')
        # We decided to use the octopus eyes instead of
        # r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file.
Daniel Speckhard's avatar
Daniel Speckhard committed
316
    ),
317
318
319
320
321
322
323
324
    # match gpaw2 first, other .gpw files are then considered to be "gpaw1"
    LegacyParser(
        name='parsers/gpaw2', code_name='GPAW',
        parser_class_name='gpawparser.GPAWParser2Wrapper',
        mainfile_binary_header=b'GPAW',
        mainfile_name_re=(r'^.*\.(gpw2|gpw)$'),
        mainfile_mime_re=r'application/(x-tar|octet-stream)'
    ),
Daniel Speckhard's avatar
Daniel Speckhard committed
325
    LegacyParser(
326
        name='parsers/gpaw', code_name='GPAW',
Daniel Speckhard's avatar
Daniel Speckhard committed
327
328
        parser_class_name='gpawparser.GPAWParserWrapper',
        mainfile_name_re=(r'^.*\.gpw$'),
329
        mainfile_mime_re=r'application/(x-tar|octet-stream)'
Daniel Speckhard's avatar
Daniel Speckhard committed
330
331
    ),
    LegacyParser(
332
        name='parsers/atk', code_name='ATK',
Daniel Speckhard's avatar
Daniel Speckhard committed
333
334
335
336
337
        parser_class_name='atkparser.ATKParserWrapper',
        # mainfile_contents_re=r'',  # We can't read .gpw as txt - of UlmGPAW|AFFormatGPAW'
        mainfile_name_re=r'^.*\.nc',
        # The previously used mime type r'application/x-netcdf' wasn't found by magic library.
        mainfile_mime_re=r'application/octet-stream'
Daniel Speckhard's avatar
Daniel Speckhard committed
338
339
    ),
    LegacyParser(
340
        name='parsers/gulp', code_name='gulp',
Daniel Speckhard's avatar
Daniel Speckhard committed
341
342
343
344
345
346
347
        parser_class_name='gulpparser.GULPParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*'
            r'\*\*\*\*\*\*\*\*\*\*\*\*\*\s*'
            r'\s*\*\s*GENERAL UTILITY LATTICE PROGRAM\s*\*\s*')
    ),
    LegacyParser(
348
        name='parsers/siesta', code_name='Siesta',
Daniel Speckhard's avatar
Daniel Speckhard committed
349
350
351
352
353
        parser_class_name='siestaparser.SiestaParser',
        mainfile_contents_re=(
            r'(Siesta Version: siesta-|SIESTA [0-9]\.[0-9]\.[0-9])')
    ),
    LegacyParser(
354
        name='parsers/elk', code_name='elk',
Daniel Speckhard's avatar
Daniel Speckhard committed
355
        parser_class_name='elkparser.ElkParser',
356
        mainfile_contents_re=r'\| Elk version [0-9.a-zA-Z]+ started \|'
Daniel Speckhard's avatar
Daniel Speckhard committed
357
358
    ),
    LegacyParser(
359
        name='parsers/elastic', code_name='elastic',
Daniel Speckhard's avatar
Daniel Speckhard committed
360
361
        parser_class_name='elasticparser.ElasticParser',
        mainfile_contents_re=r'\s*Order of elastic constants\s*=\s*[0-9]+\s*'
Daniel Speckhard's avatar
Daniel Speckhard committed
362
363
    ),
    LegacyParser(
364
        name='parsers/gamess', code_name='GAMESS',
Daniel Speckhard's avatar
Daniel Speckhard committed
365
366
367
368
369
        parser_class_name='gamessparser.GamessParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*\*\s*GAMESS VERSION =\s*(.*)\*\s*'
            r'\s*\*\s*FROM IOWA STATE UNIVERSITY\s*\*\s*')
370
371
    ),
    LegacyParser(
372
        name='parsers/turbomole', code_name='turbomole',
373
374
        parser_class_name='turbomoleparser.TurbomoleParser',
        mainfile_contents_re=(
375
            r'Copyright \(C\) [0-9]+ TURBOMOLE GmbH, Karlsruhe')
376
377
378
379
380
381
    ),
    LegacyParser(
        name='parsers/skeleton', code_name='skeleton', domain='EMS',
        parser_class_name='skeletonparser.SkeletonParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_contents_re=(r'skeleton experimental metadata format')
Markus Scheidgen's avatar
Markus Scheidgen committed
382
383
384
385
386
    ),
    LegacyParser(
        name='parsers/mpes', code_name='mpes', domain='EMS',
        parser_class_name='mpesparser.MPESParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
387
        mainfile_name_re=(r'.*.meta'),
388
        mainfile_contents_re=(r'"data_repository_name": "zenodo.org"')
389
390
391
392
393
394
    ),
    LegacyParser(
        name='parsers/aptfim', code_name='mpes', domain='EMS',
        parser_class_name='aptfimparser.APTFIMParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_name_re=(r'.*.aptfim')
395
    ),
396
    LegacyParser(
397
        name='parsers/qbox', code_name='qbox', domain='DFT',
398
399
        parser_class_name='qboxparser.QboxParser',
        mainfile_mime_re=r'(application/xml)|(text/.*)',
400
        mainfile_contents_re=(r'http://qboxcode.org')
401
    ),
402
    LegacyParser(
403
        name='parsers/dmol', code_name='DMol3', domain='DFT',
404
405
406
        parser_class_name='dmol3parser.Dmol3Parser',
        mainfile_name_re=r'.*\.outmol',
        mainfile_contents_re=r'Materials Studio DMol\^3'
407
    ),
408
    LegacyParser(
409
        name='parser/fleur', code_name='fleur', domain='DFT',
410
        parser_class_name='fleurparser.FleurParser',
411
412
        mainfile_contents_re=r'This output is generated by fleur.'
    ),
413
    LegacyParser(
414
        name='parser/molcas', code_name='MOLCAS', domain='DFT',
415
        parser_class_name='molcasparser.MolcasParser',
416
417
        mainfile_contents_re=r'M O L C A S'
    ),
418
    LegacyParser(
Markus Scheidgen's avatar
Markus Scheidgen committed
419
        name='parser/onetep', code_name='ONETEP', domain='DFT',
420
        parser_class_name='onetepparser.OnetepParser',
421
422
        mainfile_contents_re=r'####### #     # ####### ####### ####### ######'
    ),
423
424
425
426
427
428
429
430
431
    # There are some entries with PIDs that have mainfiles which do not match what
    # the actual parsers expect. We use the EmptyParser to produce placeholder entries
    # to keep the PIDs. These parsers will not match for new, non migrated data.
    EmptyParser(
        name='missing/octopus', code_name='Octopus', domain='DFT',
        mainfile_name_re=r'(inp)|(.*/inp)'
    ),
    EmptyParser(
        name='missing/crystal', code_name='Crystal',
432
        mainfile_name_re=r'.*\.cryst\.out'
433
    ),
434
435
    EmptyParser(
        name='missing/wien2k', code_name='WIEN2k',
436
        mainfile_name_re=r'.*\.scf'
437
    ),
438
439
    EmptyParser(
        name='missing/fhi-aims', code_name='FHI-aims', domain='DFT',
440
        mainfile_name_re=r'.*\.fhiaims'
441
442
    ),
    BrokenParser()
443
]
444

445
""" Instantiation and constructor based config of all parsers. """
446

Markus Scheidgen's avatar
Markus Scheidgen committed
447
parser_dict = {parser.name: parser for parser in parsers}  # type: ignore
448
""" A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. """