__init__.py 18.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
16
The *parsing* module is an interface for the existing NOMAD-coe parsers.
17
18
This module redefines some of the old NOMAD-coe python-common functionality to create a
more coherent interface to the parsers.
19
20
21
22
23
24
25
26

Assumption about parsers
------------------------
For now, we make a few assumption about parsers
- they always work on the same *meta-info* version
- they have no conflicting python requirments
- they can be loaded at the same time and can be used within the same python process
- they are uniquely identified by a GIT URL and publicly accessible
27
- their version is uniquely identified by a GIT commit SHA
28
29
30
31
32
33

Each parser is defined via an instance of :class:`Parser`.

.. autoclass:: nomad.parsing.Parser
    :members:

Markus Scheidgen's avatar
Markus Scheidgen committed
34
35
36
37
The implementation :class:`LegacyParser` is used for most NOMAD-coe parsers.

.. autoclass:: nomad.parsing.LegacyParser

38
39
40
41
42
The parser definitions are available via the following two variables.

.. autodata:: nomad.parsing.parsers
.. autodata:: nomad.parsing.parser_dict

Markus Scheidgen's avatar
Markus Scheidgen committed
43
44
Parsers are reused for multiple caclulations.

45
46
Parsers and calculation files are matched via regular expressions.

Markus Scheidgen's avatar
Markus Scheidgen committed
47
.. autofunction:: nomad.parsing.match_parser
48

Markus Scheidgen's avatar
Markus Scheidgen committed
49
Parsers in NOMAD-coe use a *backend* to create output. There are different NOMAD-coe
Markus Scheidgen's avatar
Markus Scheidgen committed
50
basends. In nomad@FAIRDI, we only currently only use a single backed. A version of
Markus Scheidgen's avatar
Markus Scheidgen committed
51
52
53
NOMAD-coe's *LocalBackend*. It stores all parser results in memory. The following
classes provide a interface definition for *backends* as an ABC and a concrete implementation
based on NOMAD-coe's *python-common* module.
54
55
56
57
58

.. autoclass:: nomad.parsing.AbstractParserBackend
    :members:
.. autoclass:: nomad.parsing.LocalBackend
    :members:
Markus Scheidgen's avatar
Markus Scheidgen committed
59

60
"""
61
from typing import Callable, IO, Union
62
import magic
63
64
import gzip
import bz2
65
import os.path
66

67
from nomad import files, config
68
69

from nomad.parsing.backend import AbstractParserBackend, LocalBackend, LegacyLocalBackend, JSONStreamWriter, BadContextURI, WrongContextState
70
from nomad.parsing.parser import Parser, LegacyParser, VaspOutcarParser, BrokenParser, MissingParser, MatchingParser
71
from nomad.parsing.artificial import TemplateParser, GenerateRandomParser, ChaosParser, EmptyParser
72

73

74
75
76
77
78
79
_compressions = {
    b'\x1f\x8b\x08': ('gz', gzip.open),
    b'\x42\x5a\x68': ('bz2', bz2.open)
}


80
def match_parser(mainfile: str, upload_files: Union[str, files.StagingUploadFiles], strict=True) -> 'Parser':
81
82
83
84
85
86
87
88
89
90
    """
    Performs parser matching. This means it take the given mainfile and potentially
    opens it with the given callback and tries to identify a parser that can parse
    the file.

    This is determined by filename (e.g. *.out), mime type (e.g. text/*, application/xml),
    and beginning file contents.

    Arguments:
        mainfile: The upload relative path to the mainfile
91
92
        upload_files: Either a :class:`files.StagingUploadFiles` object or a directory name.
            Directory name + mainfile needs to point to the file.
93
        strict: Only match strict parsers, e.g. no artificial parsers for missing or empty entries.
94
95
96

    Returns: The parser, or None if no parser could be matched.
    """
97
98
99
100
101
102
    if isinstance(upload_files, str):
        mainfile_path = os.path.join(upload_files, mainfile)
    else:
        mainfile_path = upload_files.raw_file_object(mainfile).os_path

    with open(mainfile_path, 'rb') as f:
103
        compression, open_compressed = _compressions.get(f.read(3), (None, open))
104
105
106

    with open_compressed(mainfile_path, 'rb') as cf:
        buffer = cf.read(2048)
107
108
109

    mime_type = magic.from_buffer(buffer, mime=True)
    for parser in parsers:
110
111
112
113
114
115
116
117
118
        if strict and (isinstance(parser, MissingParser) or isinstance(parser, EmptyParser)):
            continue

        if parser.domain != config.domain:
            continue

        if parser.is_mainfile(mainfile_path, mime_type, buffer, compression):
            # TODO: deal with multiple possible parser specs
            return parser
119
120
121
122

    return None


123
parsers = [
124
    GenerateRandomParser(),
125
    TemplateParser(),
126
    ChaosParser(),
Daniel Speckhard's avatar
Daniel Speckhard committed
127
    LegacyParser(
128
        name='parsers/phonopy', code_name='Phonopy',
Daniel Speckhard's avatar
Daniel Speckhard committed
129
        parser_class_name='phonopyparser.PhonopyParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
130
        # mainfile_contents_re=r'',  # Empty regex since this code calls other DFT codes.
Daniel Speckhard's avatar
Daniel Speckhard committed
131
132
        mainfile_name_re=(r'.*/phonopy-FHI-aims-displacement-0*1/control.in$')
    ),
133
    LegacyParser(
134
        name='parsers/vasp', code_name='VASP',
135
        parser_class_name='vaspparser.VASPRunParserInterface',
136
137
        mainfile_mime_re=r'(application/xml)|(text/.*)',
        mainfile_contents_re=(
138
139
140
141
            r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
            r'?\s*<modeling>'
            r'?\s*<generator>'
            r'?\s*<i name="program" type="string">\s*vasp\s*</i>'
142
143
            r'?'),
        supported_compressions=['gz', 'bz2']
144
    ),
145
    VaspOutcarParser(
146
        name='parsers/vasp-outcar', code_name='VASP',
147
        parser_class_name='vaspparser.VaspOutcarParser',
148
149
        mainfile_name_re=r'(.*/)?OUTCAR(\.[^\.]*)?',
        mainfile_contents_re=(r'^\svasp\.')
150
    ),
151
    LegacyParser(
152
        name='parsers/exciting', code_name='exciting',
153
        parser_class_name='excitingparser.ExcitingParser',
154
155
        mainfile_name_re=r'^.*.OUT?',
        mainfile_contents_re=(r'EXCITING.*started')
156
157
    ),
    LegacyParser(
158
        name='parsers/fhi-aims', code_name='FHI-aims',
159
        parser_class_name='fhiaimsparser.FHIaimsParser',
160
        mainfile_contents_re=(
161
162
            r'^(.*\n)*'
            r'?\s*Invoking FHI-aims \.\.\.'
Markus Scheidgen's avatar
Markus Scheidgen committed
163
164
            # r'?\s*Version'
        )
165
166
    ),
    LegacyParser(
167
        name='parsers/cp2k', code_name='CP2K',
168
        parser_class_name='cp2kparser.CP2KParser',
169
        mainfile_contents_re=(
170
171
172
173
            r'\*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n'
            r' \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n'
            r' \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n'
            r' \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n'
speckhard's avatar
speckhard committed
174
175
            r'  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n'
        )
176
    ),
177
    LegacyParser(
178
        name='parsers/crystal', code_name='Crystal',
179
        parser_class_name='crystalparser.CrystalParser',
180
        mainfile_contents_re=(
181
            r'(CRYSTAL\s*\n0 0 0)|('
speckhard's avatar
speckhard committed
182
            r'\s*\*\s{10,}CRYSTAL(?P<majorVersion>[\d]+)\s{10,}\*'
183
            r'\s*\*\s{10,}public \: (?P<minorVersion>[\d\.]+) \- .*\*)'
speckhard's avatar
speckhard committed
184
        )
185
    ),
186
187
188
    # The main contents regex of CPMD was causing a catostrophic backtracking issue
    # when searching through the first 500 bytes of main files. We decided
    # to use only a portion of the regex to avoid that issue.
189
    LegacyParser(
190
        name='parsers/cpmd', code_name='CPMD',
191
        parser_class_name='cpmdparser.CPMDParser',
192
        mainfile_contents_re=(
193
194
195
196
197
198
199
200
            # r'\s+\*\*\*\*\*\*  \*\*\*\*\*\*    \*\*\*\*  \*\*\*\*  \*\*\*\*\*\*\s*'
            # r'\s+\*\*\*\*\*\*\*  \*\*\*\*\*\*\*   \*\*\*\*\*\*\*\*\*\*  \*\*\*\*\*\*\*\s+'
            r'\*\*\*       \*\*   \*\*\*  \*\* \*\*\*\* \*\*  \*\*   \*\*\*'
            # r'\s+\*\*        \*\*   \*\*\*  \*\*  \*\*  \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*        \*\*\*\*\*\*\*   \*\*      \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*\*       \*\*\*\*\*\*    \*\*      \*\*  \*\*   \*\*\*\s+'
            # r'\s+\*\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\*\s+'
            # r'\s+\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\s+'
speckhard's avatar
speckhard committed
201
        )
202
    ),
speckhard's avatar
speckhard committed
203
    LegacyParser(
204
        name='parsers/nwchem', code_name='NWChem',
speckhard's avatar
speckhard committed
205
        parser_class_name='nwchemparser.NWChemParser',
206
        mainfile_contents_re=(
207
            r'Northwest Computational Chemistry Package \(NWChem\) (\d+\.)+\d+'
speckhard's avatar
speckhard committed
208
209
210
        )
    ),
    LegacyParser(
211
        name='parsers/bigdft', code_name='BigDFT',
speckhard's avatar
speckhard committed
212
        parser_class_name='bigdftparser.BigDFTParser',
213
        mainfile_contents_re=(
Markus Scheidgen's avatar
Markus Scheidgen committed
214
215
216
217
218
219
            # r'__________________________________ A fast and precise DFT wavelet code\s*'
            # r'\|     \|     \|     \|     \|     \|\s*'
            # r'\|     \|     \|     \|     \|     \|      BBBB         i       gggggg\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|     B    B               g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B     B        i     g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B    B         i     g        g\s*'
speckhard's avatar
speckhard committed
220
            r'\|_____\|__:__\|__:__\|_____\|_____\|___ BBBBB          i     g         g\s*'
Markus Scheidgen's avatar
Markus Scheidgen committed
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
            # r'\|  :  \|     \|     \|  :  \|     \|    B    B         i     g         g\s*'
            # r'\|--\+0-\|     \|     \|-0\+--\|     \|    B     B     iiii     g         g\s*'
            # r'\|__:__\|_____\|_____\|__:__\|_____\|    B     B        i      g        g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B BBBB        i        g      g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B        iiiii          gggggg\s*'
            # r'\|_____\|__:__\|__:__\|_____\|_____\|__BBBBB\s*'
            # r'\|     \|     \|     \|  :  \|     \|                           TTTTTTTTT\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|  DDDDDD          FFFFF        T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\| D      D        F        TTTT T\s*'
            # r'\|     \|     \|     \|  :  \|     \|D        D      F        T     T\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|D         D     FFFF     T     T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\|D___      D     F         T    T\s*'
            # r'\|     \|     \|  :  \|     \|     \|D         D     F          TTTTT\s*'
            # r'\|     \|     \|--\+0-\|     \|     \| D        D     F         T    T\s*'
            # r'\|_____\|_____\|__:__\|_____\|_____\|          D     F        T     T\s*'
            # r'\|     \|     \|     \|     \|     \|         D               T    T\s*'
            # r'\|     \|     \|     \|     \|     \|   DDDDDD       F         TTTT\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|______                    www\.bigdft\.org'
speckhard's avatar
speckhard committed
239
        )
240
241
    ),
    LegacyParser(
242
        name='parsers/wien2k', code_name='WIEN2k',
243
        parser_class_name='wien2kparser.Wien2kParser',
244
        mainfile_contents_re=r':LABEL\d+: using WIEN2k_\d+\.\d+'
Markus Scheidgen's avatar
Markus Scheidgen committed
245
246
    ),
    LegacyParser(
Markus Scheidgen's avatar
Markus Scheidgen committed
247
        name='parsers/band', code_name='BAND',
248
        parser_class_name='bandparser.BANDParser',
Markus Scheidgen's avatar
Markus Scheidgen committed
249
        mainfile_contents_re=r' +\* +Amsterdam Density Functional +\(ADF\)'),
Daniel Speckhard's avatar
Daniel Speckhard committed
250
    LegacyParser(
251
        name='parsers/gaussian', code_name='Gaussian',
Daniel Speckhard's avatar
Daniel Speckhard committed
252
        parser_class_name='gaussianparser.GaussianParser',
Daniel Speckhard's avatar
Daniel Speckhard committed
253
254
255
256
257
258
259
        # This previous file matching string was too far down the line.
        # r'\s*Cite this work as:'
        # r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9.]*,'
        # r'\s\*\*\*\*\*\*\*\*\*\*\*\**'
        # r'\s*Gaussian\s*([0-9]+):\s*([A-Za-z0-9-.]+)\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)'
        # r'\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)')
        mainfile_contents_re=r'Gaussian, Inc'),
260
    LegacyParser(
261
        name='parsers/quantumespresso', code_name='Quantum Espresso',
262
        parser_class_name='quantumespressoparser.QuantumEspressoParserPWSCF',
263
        mainfile_contents_re=r'Program PWSCF.*starts'
264
265
266
267
        #    r'^(.*\n)*'
        #    r'\s*Program (\S+)\s+v\.(\S+)(?:\s+\(svn\s+rev\.\s+'
        #    r'(\d+)\s*\))?\s+starts[^\n]+'
        #    r'(?:\s*\n?)*This program is part of the open-source Quantum')
Daniel Speckhard's avatar
Daniel Speckhard committed
268
269
    ),
    LegacyParser(
270
        name='parsers/abinit', code_name='ABINIT',
Daniel Speckhard's avatar
Daniel Speckhard committed
271
        parser_class_name='abinitparser.AbinitParser',
272
        mainfile_contents_re=(r'^\n*\.Version\s*[0-9.]*\s*of ABINIT\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
273
274
    ),
    LegacyParser(
275
        name='parsers/orca', code_name='ORCA',
Daniel Speckhard's avatar
Daniel Speckhard committed
276
277
278
279
280
281
282
        parser_class_name='orcaparser.OrcaParser',
        mainfile_contents_re=(
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s+\* O   R   C   A \*\s*'
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*'
            r'\s*--- An Ab Initio, DFT and Semiempirical electronic structure package ---\s*')
283
284
    ),
    LegacyParser(
285
        name='parsers/castep', code_name='CASTEP',
286
287
        parser_class_name='castepparser.CastepParser',
        mainfile_contents_re=(r'\s\|\s*CCC\s*AA\s*SSS\s*TTTTT\s*EEEEE\s*PPPP\s*\|\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
288
289
    ),
    LegacyParser(
290
        name='parsers/dl-poly', code_name='DL_POLY',
Daniel Speckhard's avatar
Daniel Speckhard committed
291
292
        parser_class_name='dlpolyparser.DlPolyParserWrapper',
        mainfile_contents_re=(r'\*\* DL_POLY \*\*')
Daniel Speckhard's avatar
Daniel Speckhard committed
293
294
    ),
    LegacyParser(
295
        name='parsers/lib-atoms', code_name='libAtoms',
Daniel Speckhard's avatar
Daniel Speckhard committed
296
297
        parser_class_name='libatomsparser.LibAtomsParserWrapper',
        mainfile_contents_re=(r'\s*<GAP_params\s')
Daniel Speckhard's avatar
Daniel Speckhard committed
298
299
    ),
    LegacyParser(
300
        name='parsers/octopus', code_name='Octopus',
Daniel Speckhard's avatar
Daniel Speckhard committed
301
        parser_class_name='octopusparser.OctopusParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
302
303
304
        mainfile_contents_re=(r'\|0\) ~ \(0\) \|')
        # We decided to use the octopus eyes instead of
        # r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file.
Daniel Speckhard's avatar
Daniel Speckhard committed
305
306
    ),
    LegacyParser(
307
        name='parsers/gpaw', code_name='GPAW',
Daniel Speckhard's avatar
Daniel Speckhard committed
308
309
310
311
312
        parser_class_name='gpawparser.GPAWParserWrapper',
        mainfile_name_re=(r'^.*\.gpw$'),
        mainfile_mime_re=r'application/x-tar'
    ),
    LegacyParser(
313
        name='parsers/gpaw2', code_name='GPAW',
Daniel Speckhard's avatar
Daniel Speckhard committed
314
315
316
317
318
319
        parser_class_name='gpawparser.GPAWParser2Wrapper',
        # mainfile_contents_re=r'',  # We can't read .gpw2 to match AFFormatGPAW'
        mainfile_name_re=(r'^.*\.gpw2$'),
        mainfile_mime_re=r'application/x-tar'
    ),
    LegacyParser(
320
        name='parsers/atk', code_name='ATK',
Daniel Speckhard's avatar
Daniel Speckhard committed
321
322
323
324
325
        parser_class_name='atkparser.ATKParserWrapper',
        # mainfile_contents_re=r'',  # We can't read .gpw as txt - of UlmGPAW|AFFormatGPAW'
        mainfile_name_re=r'^.*\.nc',
        # The previously used mime type r'application/x-netcdf' wasn't found by magic library.
        mainfile_mime_re=r'application/octet-stream'
Daniel Speckhard's avatar
Daniel Speckhard committed
326
327
    ),
    LegacyParser(
328
        name='parsers/gulp', code_name='gulp',
Daniel Speckhard's avatar
Daniel Speckhard committed
329
330
331
332
333
334
335
        parser_class_name='gulpparser.GULPParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*'
            r'\*\*\*\*\*\*\*\*\*\*\*\*\*\s*'
            r'\s*\*\s*GENERAL UTILITY LATTICE PROGRAM\s*\*\s*')
    ),
    LegacyParser(
336
        name='parsers/siesta', code_name='Siesta',
Daniel Speckhard's avatar
Daniel Speckhard committed
337
338
339
340
341
        parser_class_name='siestaparser.SiestaParser',
        mainfile_contents_re=(
            r'(Siesta Version: siesta-|SIESTA [0-9]\.[0-9]\.[0-9])')
    ),
    LegacyParser(
342
        name='parsers/elk', code_name='elk',
Daniel Speckhard's avatar
Daniel Speckhard committed
343
        parser_class_name='elkparser.ElkParser',
344
        mainfile_contents_re=r'\| Elk version [0-9.a-zA-Z]+ started \|'
Daniel Speckhard's avatar
Daniel Speckhard committed
345
346
    ),
    LegacyParser(
347
        name='parsers/elastic', code_name='elastic',
Daniel Speckhard's avatar
Daniel Speckhard committed
348
349
        parser_class_name='elasticparser.ElasticParser',
        mainfile_contents_re=r'\s*Order of elastic constants\s*=\s*[0-9]+\s*'
Daniel Speckhard's avatar
Daniel Speckhard committed
350
351
    ),
    LegacyParser(
352
        name='parsers/gamess', code_name='GAMESS',
Daniel Speckhard's avatar
Daniel Speckhard committed
353
354
355
356
357
        parser_class_name='gamessparser.GamessParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*\*\s*GAMESS VERSION =\s*(.*)\*\s*'
            r'\s*\*\s*FROM IOWA STATE UNIVERSITY\s*\*\s*')
358
359
    ),
    LegacyParser(
360
        name='parsers/turbomole', code_name='turbomole',
361
362
        parser_class_name='turbomoleparser.TurbomoleParser',
        mainfile_contents_re=(
363
            r'Copyright \(C\) [0-9]+ TURBOMOLE GmbH, Karlsruhe')
364
365
366
367
368
369
    ),
    LegacyParser(
        name='parsers/skeleton', code_name='skeleton', domain='EMS',
        parser_class_name='skeletonparser.SkeletonParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_contents_re=(r'skeleton experimental metadata format')
Markus Scheidgen's avatar
Markus Scheidgen committed
370
371
372
373
374
    ),
    LegacyParser(
        name='parsers/mpes', code_name='mpes', domain='EMS',
        parser_class_name='mpesparser.MPESParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
375
        mainfile_name_re=(r'.*.meta'),
376
        mainfile_contents_re=(r'"data_repository_name": "zenodo.org"')
377
378
379
380
381
382
    ),
    LegacyParser(
        name='parsers/aptfim', code_name='mpes', domain='EMS',
        parser_class_name='aptfimparser.APTFIMParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_name_re=(r'.*.aptfim')
383
    ),
384
    LegacyParser(
385
        name='parsers/qbox', code_name='qbox', domain='DFT',
386
387
        parser_class_name='qboxparser.QboxParser',
        mainfile_mime_re=r'(application/xml)|(text/.*)',
388
        mainfile_contents_re=(r'http://qboxcode.org')
389
    ),
390
    LegacyParser(
391
        name='parsers/dmol', code_name='DMol3', domain='DFT',
392
393
394
        parser_class_name='dmol3parser.Dmol3Parser',
        mainfile_name_re=r'.*\.outmol',
        mainfile_contents_re=r'Materials Studio DMol\^3'
395
    ),
396
    LegacyParser(
397
        name='parser/fleur', code_name='fleur', domain='DFT',
398
        parser_class_name='fleurparser.FleurParser',
399
400
        mainfile_contents_re=r'This output is generated by fleur.'
    ),
401
    LegacyParser(
402
        name='parser/molcas', code_name='MOLCAS', domain='DFT',
403
        parser_class_name='molcasparser.MolcasParser',
404
405
        mainfile_contents_re=r'M O L C A S'
    ),
406
    LegacyParser(
Markus Scheidgen's avatar
Markus Scheidgen committed
407
        name='parser/onetep', code_name='ONETEP', domain='DFT',
408
        parser_class_name='onetepparser.OnetepParser',
409
410
        mainfile_contents_re=r'####### #     # ####### ####### ####### ######'
    ),
411
412
413
414
415
416
417
418
419
    # There are some entries with PIDs that have mainfiles which do not match what
    # the actual parsers expect. We use the EmptyParser to produce placeholder entries
    # to keep the PIDs. These parsers will not match for new, non migrated data.
    EmptyParser(
        name='missing/octopus', code_name='Octopus', domain='DFT',
        mainfile_name_re=r'(inp)|(.*/inp)'
    ),
    EmptyParser(
        name='missing/crystal', code_name='Crystal',
420
        mainfile_name_re=r'.*\.cryst\.out'
421
    ),
422
423
    EmptyParser(
        name='missing/wien2k', code_name='WIEN2k',
424
        mainfile_name_re=r'.*\.scf'
425
    ),
426
427
    EmptyParser(
        name='missing/fhi-aims', code_name='FHI-aims', domain='DFT',
428
        mainfile_name_re=r'.*\.fhiaims'
429
430
    ),
    BrokenParser()
431
]
432

433
""" Instantiation and constructor based config of all parsers. """
434

Markus Scheidgen's avatar
Markus Scheidgen committed
435
parser_dict = {parser.name: parser for parser in parsers}  # type: ignore
436
""" A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. """