__init__.py 18.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
16
The *parsing* module is an interface for the existing NOMAD-coe parsers.
17
18
This module redefines some of the old NOMAD-coe python-common functionality to create a
more coherent interface to the parsers.
19
20
21
22
23
24
25
26

Assumption about parsers
------------------------
For now, we make a few assumption about parsers
- they always work on the same *meta-info* version
- they have no conflicting python requirments
- they can be loaded at the same time and can be used within the same python process
- they are uniquely identified by a GIT URL and publicly accessible
27
- their version is uniquely identified by a GIT commit SHA
28
29
30
31
32
33

Each parser is defined via an instance of :class:`Parser`.

.. autoclass:: nomad.parsing.Parser
    :members:

Markus Scheidgen's avatar
Markus Scheidgen committed
34
35
36
37
The implementation :class:`LegacyParser` is used for most NOMAD-coe parsers.

.. autoclass:: nomad.parsing.LegacyParser

38
39
40
41
42
The parser definitions are available via the following two variables.

.. autodata:: nomad.parsing.parsers
.. autodata:: nomad.parsing.parser_dict

Markus Scheidgen's avatar
Markus Scheidgen committed
43
44
Parsers are reused for multiple caclulations.

45
46
Parsers and calculation files are matched via regular expressions.

Markus Scheidgen's avatar
Markus Scheidgen committed
47
.. autofunction:: nomad.parsing.match_parser
48

Markus Scheidgen's avatar
Markus Scheidgen committed
49
Parsers in NOMAD-coe use a *backend* to create output. There are different NOMAD-coe
Markus Scheidgen's avatar
Markus Scheidgen committed
50
basends. In nomad@FAIRDI, we only currently only use a single backed. A version of
Markus Scheidgen's avatar
Markus Scheidgen committed
51
52
53
NOMAD-coe's *LocalBackend*. It stores all parser results in memory. The following
classes provide a interface definition for *backends* as an ABC and a concrete implementation
based on NOMAD-coe's *python-common* module.
54
55
56
57
58

.. autoclass:: nomad.parsing.AbstractParserBackend
    :members:
.. autoclass:: nomad.parsing.LocalBackend
    :members:
Markus Scheidgen's avatar
Markus Scheidgen committed
59

60
"""
61
from typing import Callable, IO, Union
62
import magic
63
64
import gzip
import bz2
65
import os.path
66

67
from nomad import files, config
68
69

from nomad.parsing.backend import AbstractParserBackend, LocalBackend, LegacyLocalBackend, JSONStreamWriter, BadContextURI, WrongContextState
70
from nomad.parsing.parser import Parser, LegacyParser, VaspOutcarParser, BrokenParser, MissingParser
71
from nomad.parsing.artificial import TemplateParser, GenerateRandomParser, ChaosParser
72

73

74
75
76
77
78
79
_compressions = {
    b'\x1f\x8b\x08': ('gz', gzip.open),
    b'\x42\x5a\x68': ('bz2', bz2.open)
}


80
def match_parser(mainfile: str, upload_files: Union[str, files.StagingUploadFiles]) -> 'Parser':
81
82
83
84
85
86
87
88
89
90
    """
    Performs parser matching. This means it take the given mainfile and potentially
    opens it with the given callback and tries to identify a parser that can parse
    the file.

    This is determined by filename (e.g. *.out), mime type (e.g. text/*, application/xml),
    and beginning file contents.

    Arguments:
        mainfile: The upload relative path to the mainfile
91
92
        upload_files: Either a :class:`files.StagingUploadFiles` object or a directory name.
            Directory name + mainfile needs to point to the file.
93
94
95

    Returns: The parser, or None if no parser could be matched.
    """
96
97
98
99
100
101
    if isinstance(upload_files, str):
        mainfile_path = os.path.join(upload_files, mainfile)
    else:
        mainfile_path = upload_files.raw_file_object(mainfile).os_path

    with open(mainfile_path, 'rb') as f:
102
        compression, open_compressed = _compressions.get(f.read(3), (None, open))
103
104
105

    with open_compressed(mainfile_path, 'rb') as cf:
        buffer = cf.read(2048)
106
107
108

    mime_type = magic.from_buffer(buffer, mime=True)
    for parser in parsers:
109
110
111
112
        if parser.domain == config.domain:
            if parser.is_mainfile(mainfile_path, mime_type, buffer, compression):
                # TODO: deal with multiple possible parser specs
                return parser
113
114
115
116

    return None


117
parsers = [
118
    GenerateRandomParser(),
119
    TemplateParser(),
120
    ChaosParser(),
Daniel Speckhard's avatar
Daniel Speckhard committed
121
    LegacyParser(
122
        name='parsers/phonopy', code_name='Phonopy',
Daniel Speckhard's avatar
Daniel Speckhard committed
123
        parser_class_name='phonopyparser.PhonopyParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
124
        # mainfile_contents_re=r'',  # Empty regex since this code calls other DFT codes.
Daniel Speckhard's avatar
Daniel Speckhard committed
125
126
        mainfile_name_re=(r'.*/phonopy-FHI-aims-displacement-0*1/control.in$')
    ),
127
    LegacyParser(
128
        name='parsers/vasp', code_name='VASP',
129
        parser_class_name='vaspparser.VASPRunParserInterface',
130
131
        mainfile_mime_re=r'(application/xml)|(text/.*)',
        mainfile_contents_re=(
132
133
134
135
            r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
            r'?\s*<modeling>'
            r'?\s*<generator>'
            r'?\s*<i name="program" type="string">\s*vasp\s*</i>'
136
137
            r'?'),
        supported_compressions=['gz', 'bz2']
138
    ),
139
    VaspOutcarParser(
140
        name='parsers/vasp-outcar', code_name='VASP',
141
        parser_class_name='vaspparser.VaspOutcarParser',
142
143
        mainfile_name_re=r'(.*/)?OUTCAR(\.[^\.]*)?',
        mainfile_contents_re=(r'^\svasp\.')
144
    ),
145
    LegacyParser(
146
        name='parsers/exciting', code_name='exciting',
147
        parser_class_name='excitingparser.ExcitingParser',
148
149
        mainfile_name_re=r'^.*.OUT?',
        mainfile_contents_re=(r'EXCITING.*started')
150
151
    ),
    LegacyParser(
152
        name='parsers/fhi-aims', code_name='FHI-aims',
153
        parser_class_name='fhiaimsparser.FHIaimsParser',
154
        mainfile_contents_re=(
155
156
            r'^(.*\n)*'
            r'?\s*Invoking FHI-aims \.\.\.'
Markus Scheidgen's avatar
Markus Scheidgen committed
157
158
            # r'?\s*Version'
        )
159
160
    ),
    LegacyParser(
161
        name='parsers/cp2k', code_name='CP2K',
162
        parser_class_name='cp2kparser.CP2KParser',
163
        mainfile_contents_re=(
164
165
166
167
            r'\*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n'
            r' \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n'
            r' \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n'
            r' \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n'
speckhard's avatar
speckhard committed
168
169
            r'  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n'
        )
170
    ),
171
    LegacyParser(
172
        name='parsers/crystal', code_name='Crystal',
173
        parser_class_name='crystalparser.CrystalParser',
174
        mainfile_contents_re=(
175
            r'(CRYSTAL\s*\n0 0 0)|('
speckhard's avatar
speckhard committed
176
            r'\s*\*\s{10,}CRYSTAL(?P<majorVersion>[\d]+)\s{10,}\*'
177
            r'\s*\*\s{10,}public \: (?P<minorVersion>[\d\.]+) \- .*\*)'
speckhard's avatar
speckhard committed
178
        )
179
    ),
180
181
182
    # The main contents regex of CPMD was causing a catostrophic backtracking issue
    # when searching through the first 500 bytes of main files. We decided
    # to use only a portion of the regex to avoid that issue.
183
    LegacyParser(
184
        name='parsers/cpmd', code_name='CPMD',
185
        parser_class_name='cpmdparser.CPMDParser',
186
        mainfile_contents_re=(
187
188
189
190
191
192
193
194
            # r'\s+\*\*\*\*\*\*  \*\*\*\*\*\*    \*\*\*\*  \*\*\*\*  \*\*\*\*\*\*\s*'
            # r'\s+\*\*\*\*\*\*\*  \*\*\*\*\*\*\*   \*\*\*\*\*\*\*\*\*\*  \*\*\*\*\*\*\*\s+'
            r'\*\*\*       \*\*   \*\*\*  \*\* \*\*\*\* \*\*  \*\*   \*\*\*'
            # r'\s+\*\*        \*\*   \*\*\*  \*\*  \*\*  \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*        \*\*\*\*\*\*\*   \*\*      \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*\*       \*\*\*\*\*\*    \*\*      \*\*  \*\*   \*\*\*\s+'
            # r'\s+\*\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\*\s+'
            # r'\s+\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\s+'
speckhard's avatar
speckhard committed
195
        )
196
    ),
speckhard's avatar
speckhard committed
197
    LegacyParser(
198
        name='parsers/nwchem', code_name='NWChem',
speckhard's avatar
speckhard committed
199
        parser_class_name='nwchemparser.NWChemParser',
200
        mainfile_contents_re=(
201
            r'Northwest Computational Chemistry Package \(NWChem\) (\d+\.)+\d+'
speckhard's avatar
speckhard committed
202
203
204
        )
    ),
    LegacyParser(
205
        name='parsers/bigdft', code_name='BigDFT',
speckhard's avatar
speckhard committed
206
        parser_class_name='bigdftparser.BigDFTParser',
207
        mainfile_contents_re=(
Markus Scheidgen's avatar
Markus Scheidgen committed
208
209
210
211
212
213
            # r'__________________________________ A fast and precise DFT wavelet code\s*'
            # r'\|     \|     \|     \|     \|     \|\s*'
            # r'\|     \|     \|     \|     \|     \|      BBBB         i       gggggg\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|     B    B               g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B     B        i     g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B    B         i     g        g\s*'
speckhard's avatar
speckhard committed
214
            r'\|_____\|__:__\|__:__\|_____\|_____\|___ BBBBB          i     g         g\s*'
Markus Scheidgen's avatar
Markus Scheidgen committed
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
            # r'\|  :  \|     \|     \|  :  \|     \|    B    B         i     g         g\s*'
            # r'\|--\+0-\|     \|     \|-0\+--\|     \|    B     B     iiii     g         g\s*'
            # r'\|__:__\|_____\|_____\|__:__\|_____\|    B     B        i      g        g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B BBBB        i        g      g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B        iiiii          gggggg\s*'
            # r'\|_____\|__:__\|__:__\|_____\|_____\|__BBBBB\s*'
            # r'\|     \|     \|     \|  :  \|     \|                           TTTTTTTTT\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|  DDDDDD          FFFFF        T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\| D      D        F        TTTT T\s*'
            # r'\|     \|     \|     \|  :  \|     \|D        D      F        T     T\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|D         D     FFFF     T     T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\|D___      D     F         T    T\s*'
            # r'\|     \|     \|  :  \|     \|     \|D         D     F          TTTTT\s*'
            # r'\|     \|     \|--\+0-\|     \|     \| D        D     F         T    T\s*'
            # r'\|_____\|_____\|__:__\|_____\|_____\|          D     F        T     T\s*'
            # r'\|     \|     \|     \|     \|     \|         D               T    T\s*'
            # r'\|     \|     \|     \|     \|     \|   DDDDDD       F         TTTT\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|______                    www\.bigdft\.org'
speckhard's avatar
speckhard committed
233
        )
234
235
    ),
    LegacyParser(
236
        name='parsers/wien2k', code_name='WIEN2k',
237
        parser_class_name='wien2kparser.Wien2kParser',
238
        mainfile_contents_re=r':LABEL\d+: using WIEN2k_\d+\.\d+'
Markus Scheidgen's avatar
Markus Scheidgen committed
239
240
    ),
    LegacyParser(
Markus Scheidgen's avatar
Markus Scheidgen committed
241
        name='parsers/band', code_name='BAND',
242
        parser_class_name='bandparser.BANDParser',
Markus Scheidgen's avatar
Markus Scheidgen committed
243
        mainfile_contents_re=r' +\* +Amsterdam Density Functional +\(ADF\)'),
Daniel Speckhard's avatar
Daniel Speckhard committed
244
    LegacyParser(
245
        name='parsers/gaussian', code_name='Gaussian',
Daniel Speckhard's avatar
Daniel Speckhard committed
246
        parser_class_name='gaussianparser.GaussianParser',
Daniel Speckhard's avatar
Daniel Speckhard committed
247
248
249
250
251
252
253
        # This previous file matching string was too far down the line.
        # r'\s*Cite this work as:'
        # r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9.]*,'
        # r'\s\*\*\*\*\*\*\*\*\*\*\*\**'
        # r'\s*Gaussian\s*([0-9]+):\s*([A-Za-z0-9-.]+)\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)'
        # r'\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)')
        mainfile_contents_re=r'Gaussian, Inc'),
254
    LegacyParser(
255
        name='parsers/quantumespresso', code_name='Quantum Espresso',
256
        parser_class_name='quantumespressoparser.QuantumEspressoParserPWSCF',
257
        mainfile_contents_re=r'(Program PWSCF)(.|\n)*(This program is part of the open-source Quantum)'
258
259
260
261
        #    r'^(.*\n)*'
        #    r'\s*Program (\S+)\s+v\.(\S+)(?:\s+\(svn\s+rev\.\s+'
        #    r'(\d+)\s*\))?\s+starts[^\n]+'
        #    r'(?:\s*\n?)*This program is part of the open-source Quantum')
Daniel Speckhard's avatar
Daniel Speckhard committed
262
263
    ),
    LegacyParser(
264
        name='parsers/abinit', code_name='ABINIT',
Daniel Speckhard's avatar
Daniel Speckhard committed
265
        parser_class_name='abinitparser.AbinitParser',
266
        mainfile_contents_re=(r'^\n*\.Version\s*[0-9.]*\s*of ABINIT\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
267
268
    ),
    LegacyParser(
269
        name='parsers/orca', code_name='ORCA',
Daniel Speckhard's avatar
Daniel Speckhard committed
270
271
272
273
274
275
276
        parser_class_name='orcaparser.OrcaParser',
        mainfile_contents_re=(
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s+\* O   R   C   A \*\s*'
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*'
            r'\s*--- An Ab Initio, DFT and Semiempirical electronic structure package ---\s*')
277
278
    ),
    LegacyParser(
279
        name='parsers/castep', code_name='CASTEP',
280
281
        parser_class_name='castepparser.CastepParser',
        mainfile_contents_re=(r'\s\|\s*CCC\s*AA\s*SSS\s*TTTTT\s*EEEEE\s*PPPP\s*\|\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
282
283
    ),
    LegacyParser(
284
        name='parsers/dl-poly', code_name='DL_POLY',
Daniel Speckhard's avatar
Daniel Speckhard committed
285
286
        parser_class_name='dlpolyparser.DlPolyParserWrapper',
        mainfile_contents_re=(r'\*\* DL_POLY \*\*')
Daniel Speckhard's avatar
Daniel Speckhard committed
287
288
    ),
    LegacyParser(
289
        name='parsers/lib-atoms', code_name='libAtoms',
Daniel Speckhard's avatar
Daniel Speckhard committed
290
291
        parser_class_name='libatomsparser.LibAtomsParserWrapper',
        mainfile_contents_re=(r'\s*<GAP_params\s')
Daniel Speckhard's avatar
Daniel Speckhard committed
292
293
    ),
    LegacyParser(
294
        name='parsers/octopus', code_name='Octopus',
Daniel Speckhard's avatar
Daniel Speckhard committed
295
        parser_class_name='octopusparser.OctopusParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
296
297
298
        mainfile_contents_re=(r'\|0\) ~ \(0\) \|')
        # We decided to use the octopus eyes instead of
        # r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file.
Daniel Speckhard's avatar
Daniel Speckhard committed
299
300
    ),
    LegacyParser(
301
        name='parsers/gpaw', code_name='GPAW',
Daniel Speckhard's avatar
Daniel Speckhard committed
302
303
304
305
306
        parser_class_name='gpawparser.GPAWParserWrapper',
        mainfile_name_re=(r'^.*\.gpw$'),
        mainfile_mime_re=r'application/x-tar'
    ),
    LegacyParser(
307
        name='parsers/gpaw2', code_name='GPAW',
Daniel Speckhard's avatar
Daniel Speckhard committed
308
309
310
311
312
313
        parser_class_name='gpawparser.GPAWParser2Wrapper',
        # mainfile_contents_re=r'',  # We can't read .gpw2 to match AFFormatGPAW'
        mainfile_name_re=(r'^.*\.gpw2$'),
        mainfile_mime_re=r'application/x-tar'
    ),
    LegacyParser(
314
        name='parsers/atk', code_name='ATK',
Daniel Speckhard's avatar
Daniel Speckhard committed
315
316
317
318
319
        parser_class_name='atkparser.ATKParserWrapper',
        # mainfile_contents_re=r'',  # We can't read .gpw as txt - of UlmGPAW|AFFormatGPAW'
        mainfile_name_re=r'^.*\.nc',
        # The previously used mime type r'application/x-netcdf' wasn't found by magic library.
        mainfile_mime_re=r'application/octet-stream'
Daniel Speckhard's avatar
Daniel Speckhard committed
320
321
    ),
    LegacyParser(
322
        name='parsers/gulp', code_name='gulp',
Daniel Speckhard's avatar
Daniel Speckhard committed
323
324
325
326
327
328
329
        parser_class_name='gulpparser.GULPParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*'
            r'\*\*\*\*\*\*\*\*\*\*\*\*\*\s*'
            r'\s*\*\s*GENERAL UTILITY LATTICE PROGRAM\s*\*\s*')
    ),
    LegacyParser(
330
        name='parsers/siesta', code_name='Siesta',
Daniel Speckhard's avatar
Daniel Speckhard committed
331
332
333
334
335
        parser_class_name='siestaparser.SiestaParser',
        mainfile_contents_re=(
            r'(Siesta Version: siesta-|SIESTA [0-9]\.[0-9]\.[0-9])')
    ),
    LegacyParser(
336
        name='parsers/elk', code_name='elk',
Daniel Speckhard's avatar
Daniel Speckhard committed
337
        parser_class_name='elkparser.ElkParser',
338
        mainfile_contents_re=r'\| Elk version [0-9.a-zA-Z]+ started \|'
Daniel Speckhard's avatar
Daniel Speckhard committed
339
340
    ),
    LegacyParser(
341
        name='parsers/elastic', code_name='elastic',
Daniel Speckhard's avatar
Daniel Speckhard committed
342
343
        parser_class_name='elasticparser.ElasticParser',
        mainfile_contents_re=r'\s*Order of elastic constants\s*=\s*[0-9]+\s*'
Daniel Speckhard's avatar
Daniel Speckhard committed
344
345
    ),
    LegacyParser(
346
        name='parsers/gamess', code_name='GAMESS',
Daniel Speckhard's avatar
Daniel Speckhard committed
347
348
349
350
351
        parser_class_name='gamessparser.GamessParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*\*\s*GAMESS VERSION =\s*(.*)\*\s*'
            r'\s*\*\s*FROM IOWA STATE UNIVERSITY\s*\*\s*')
352
353
    ),
    LegacyParser(
354
        name='parsers/turbomole', code_name='turbomole',
355
356
        parser_class_name='turbomoleparser.TurbomoleParser',
        mainfile_contents_re=(
357
            r'Copyright \(C\) [0-9]+ TURBOMOLE GmbH, Karlsruhe')
358
359
360
361
362
363
    ),
    LegacyParser(
        name='parsers/skeleton', code_name='skeleton', domain='EMS',
        parser_class_name='skeletonparser.SkeletonParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_contents_re=(r'skeleton experimental metadata format')
Markus Scheidgen's avatar
Markus Scheidgen committed
364
365
366
367
368
    ),
    LegacyParser(
        name='parsers/mpes', code_name='mpes', domain='EMS',
        parser_class_name='mpesparser.MPESParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
369
        mainfile_name_re=(r'.*.meta'),
370
        mainfile_contents_re=(r'"data_repository_name": "zenodo.org"')
371
372
373
374
375
376
    ),
    LegacyParser(
        name='parsers/aptfim', code_name='mpes', domain='EMS',
        parser_class_name='aptfimparser.APTFIMParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_name_re=(r'.*.aptfim')
377
    ),
378
    LegacyParser(
379
        name='parsers/qbox', code_name='qbox', domain='DFT',
380
381
        parser_class_name='qboxparser.QboxParser',
        mainfile_mime_re=r'(application/xml)|(text/.*)',
382
        mainfile_contents_re=(r'http://qboxcode.org')
383
    ),
384
    LegacyParser(
385
        name='parsers/dmol', code_name='DMol3', domain='DFT',
386
387
388
        parser_class_name='dmol3parser.Dmol3Parser',
        mainfile_name_re=r'.*\.outmol',
        mainfile_contents_re=r'Materials Studio DMol\^3'
389
    ),
390
    LegacyParser(
391
        name='parser/fleur', code_name='fleur', domain='DFT',
392
        parser_class_name='fleurparser.FleurParser',
393
394
        mainfile_contents_re=r'This output is generated by fleur.'
    ),
395
    LegacyParser(
396
        name='parser/molcas', code_name='MOLCAS', domain='DFT',
397
        parser_class_name='molcasparser.MolcasParser',
398
399
        mainfile_contents_re=r'M O L C A S'
    ),
400
    LegacyParser(
Markus Scheidgen's avatar
Markus Scheidgen committed
401
        name='parser/onetep', code_name='ONETEP', domain='DFT',
402
        parser_class_name='onetepparser.OnetepParser',
403
404
405
        mainfile_contents_re=r'####### #     # ####### ####### ####### ######'
    ),
    # These are supposedly octopus files, but they do not look like octopus files at all
406
407
408
409
410
    # TODO We have migrated the wrong octopus mainfiles .. this should be removed now
    # MissingParser(
    #     name='parser/octopus', code_name='Octopus', domain='DFT',
    #     mainfile_name_re=r'(inp)|(.*/inp)'
    # ),
411
412
413
414
    # We already have crystal with mainfile_contents_re, but this one does not always properly match
    LegacyParser(
        name='parsers/crystal', code_name='Crystal',
        parser_class_name='crystalparser.CrystalParser',
415
        mainfile_name_re=r'.*\.cryst\.out'
416
417
418
419
420
    ),
    # We already have wien2k with mainfile_contents_re, but this one does not always properly match
    LegacyParser(
        name='parsers/wien2k', code_name='WIEN2k',
        parser_class_name='wien2kparser.Wien2kParser',
421
        mainfile_name_re=r'.*\.scf'
422
423
424
425
426
    ),
    # We already have fhi-aims with mainfile_contents_re, but this one does not always properly match
    LegacyParser(
        name='parsers/fhi-aims', code_name='FHI-aims',
        parser_class_name='fhiaimsparser.FHIaimsParser',
427
        mainfile_name_re=r'.*\.fhiaims'
428
429
    ),
    BrokenParser()
430
]
431

432
""" Instantiation and constructor based config of all parsers. """
433

Markus Scheidgen's avatar
Markus Scheidgen committed
434
parser_dict = {parser.name: parser for parser in parsers}  # type: ignore
435
""" A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. """