__init__.py 18.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
16
The *parsing* module is an interface for the existing NOMAD-coe parsers.
17
18
This module redefines some of the old NOMAD-coe python-common functionality to create a
more coherent interface to the parsers.
19
20
21
22
23
24
25
26

Assumption about parsers
------------------------
For now, we make a few assumption about parsers
- they always work on the same *meta-info* version
- they have no conflicting python requirments
- they can be loaded at the same time and can be used within the same python process
- they are uniquely identified by a GIT URL and publicly accessible
27
- their version is uniquely identified by a GIT commit SHA
28
29
30
31
32
33

Each parser is defined via an instance of :class:`Parser`.

.. autoclass:: nomad.parsing.Parser
    :members:

Markus Scheidgen's avatar
Markus Scheidgen committed
34
35
36
37
The implementation :class:`LegacyParser` is used for most NOMAD-coe parsers.

.. autoclass:: nomad.parsing.LegacyParser

38
39
40
41
42
The parser definitions are available via the following two variables.

.. autodata:: nomad.parsing.parsers
.. autodata:: nomad.parsing.parser_dict

Markus Scheidgen's avatar
Markus Scheidgen committed
43
44
Parsers are reused for multiple caclulations.

45
46
Parsers and calculation files are matched via regular expressions.

Markus Scheidgen's avatar
Markus Scheidgen committed
47
.. autofunction:: nomad.parsing.match_parser
48

Markus Scheidgen's avatar
Markus Scheidgen committed
49
Parsers in NOMAD-coe use a *backend* to create output. There are different NOMAD-coe
Markus Scheidgen's avatar
Markus Scheidgen committed
50
basends. In nomad@FAIRDI, we only currently only use a single backed. A version of
Markus Scheidgen's avatar
Markus Scheidgen committed
51
52
53
NOMAD-coe's *LocalBackend*. It stores all parser results in memory. The following
classes provide a interface definition for *backends* as an ABC and a concrete implementation
based on NOMAD-coe's *python-common* module.
54
55
56
57
58

.. autoclass:: nomad.parsing.AbstractParserBackend
    :members:
.. autoclass:: nomad.parsing.LocalBackend
    :members:
Markus Scheidgen's avatar
Markus Scheidgen committed
59

60
"""
61
62
from typing import Callable, IO
import magic
63
64
65
import gzip
import bz2

66
from nomad import files, config
67
68

from nomad.parsing.backend import AbstractParserBackend, LocalBackend, LegacyLocalBackend, JSONStreamWriter, BadContextURI, WrongContextState
69
from nomad.parsing.parser import Parser, LegacyParser, VaspOutcarParser, BrokenParser, MissingParser
70
from nomad.parsing.artificial import TemplateParser, GenerateRandomParser, ChaosParser
71

72

73
74
75
76
77
78
79
_compressions = {
    b'\x1f\x8b\x08': ('gz', gzip.open),
    b'\x42\x5a\x68': ('bz2', bz2.open)
}


def match_parser(mainfile: str, upload_files: files.StagingUploadFiles) -> 'Parser':
80
81
82
83
84
85
86
87
88
89
90
91
92
93
    """
    Performs parser matching. This means it take the given mainfile and potentially
    opens it with the given callback and tries to identify a parser that can parse
    the file.

    This is determined by filename (e.g. *.out), mime type (e.g. text/*, application/xml),
    and beginning file contents.

    Arguments:
        mainfile: The upload relative path to the mainfile
        open: A function that allows to open a stream to the file

    Returns: The parser, or None if no parser could be matched.
    """
94
95
    with upload_files.raw_file(mainfile, 'rb') as f:
        compression, open_compressed = _compressions.get(f.read(3), (None, open))
96
97
    mainfile_path = upload_files.raw_file_object(mainfile).os_path
    with open_compressed(mainfile_path, 'rb') as f:
98
99
100
101
        buffer = f.read(2048)

    mime_type = magic.from_buffer(buffer, mime=True)
    for parser in parsers:
102
103
104
105
        if parser.domain == config.domain:
            if parser.is_mainfile(mainfile_path, mime_type, buffer, compression):
                # TODO: deal with multiple possible parser specs
                return parser
106
107
108
109

    return None


110
parsers = [
111
    GenerateRandomParser(),
112
    TemplateParser(),
113
    ChaosParser(),
Daniel Speckhard's avatar
Daniel Speckhard committed
114
    LegacyParser(
115
        name='parsers/phonopy', code_name='Phonopy',
Daniel Speckhard's avatar
Daniel Speckhard committed
116
        parser_class_name='phonopyparser.PhonopyParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
117
        # mainfile_contents_re=r'',  # Empty regex since this code calls other DFT codes.
Daniel Speckhard's avatar
Daniel Speckhard committed
118
119
        mainfile_name_re=(r'.*/phonopy-FHI-aims-displacement-0*1/control.in$')
    ),
120
    LegacyParser(
121
        name='parsers/vasp', code_name='VASP',
122
        parser_class_name='vaspparser.VASPRunParserInterface',
123
124
        mainfile_mime_re=r'(application/xml)|(text/.*)',
        mainfile_contents_re=(
125
126
127
128
            r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
            r'?\s*<modeling>'
            r'?\s*<generator>'
            r'?\s*<i name="program" type="string">\s*vasp\s*</i>'
129
130
            r'?'),
        supported_compressions=['gz', 'bz2']
131
    ),
132
    VaspOutcarParser(
133
        name='parsers/vasp-outcar', code_name='VASP',
134
        parser_class_name='vaspparser.VaspOutcarParser',
135
136
        mainfile_name_re=r'(.*/)?OUTCAR(\.[^\.]*)?',
        mainfile_contents_re=(r'^\svasp\.')
137
    ),
138
    LegacyParser(
139
        name='parsers/exciting', code_name='exciting',
140
        parser_class_name='excitingparser.ExcitingParser',
141
142
        mainfile_name_re=r'^.*.OUT?',
        mainfile_contents_re=(r'EXCITING.*started')
143
144
    ),
    LegacyParser(
145
        name='parsers/fhi-aims', code_name='FHI-aims',
146
        parser_class_name='fhiaimsparser.FHIaimsParser',
147
        mainfile_contents_re=(
148
149
            r'^(.*\n)*'
            r'?\s*Invoking FHI-aims \.\.\.'
Daniel Speckhard's avatar
Daniel Speckhard committed
150
            r'?\s*Version'),
Daniel Speckhard's avatar
Daniel Speckhard committed
151
        mainfile_name_re=r'^.(?!.*phonopy-FHI-aims-displacement)'
152
153
    ),
    LegacyParser(
154
        name='parsers/cp2k', code_name='CP2K',
155
        parser_class_name='cp2kparser.CP2KParser',
156
        mainfile_contents_re=(
157
158
159
160
            r'\*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n'
            r' \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n'
            r' \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n'
            r' \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n'
speckhard's avatar
speckhard committed
161
162
            r'  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n'
        )
163
    ),
164
    LegacyParser(
165
        name='parsers/crystal', code_name='Crystal',
166
        parser_class_name='crystalparser.CrystalParser',
167
        mainfile_contents_re=(
168
            r'(CRYSTAL\s*\n0 0 0)|('
speckhard's avatar
speckhard committed
169
            r'\s*\*\s{10,}CRYSTAL(?P<majorVersion>[\d]+)\s{10,}\*'
170
            r'\s*\*\s{10,}public \: (?P<minorVersion>[\d\.]+) \- .*\*)'
speckhard's avatar
speckhard committed
171
        )
172
    ),
173
174
175
    # The main contents regex of CPMD was causing a catostrophic backtracking issue
    # when searching through the first 500 bytes of main files. We decided
    # to use only a portion of the regex to avoid that issue.
176
    LegacyParser(
177
        name='parsers/cpmd', code_name='CPMD',
178
        parser_class_name='cpmdparser.CPMDParser',
179
        mainfile_contents_re=(
180
181
182
183
184
185
186
187
            # r'\s+\*\*\*\*\*\*  \*\*\*\*\*\*    \*\*\*\*  \*\*\*\*  \*\*\*\*\*\*\s*'
            # r'\s+\*\*\*\*\*\*\*  \*\*\*\*\*\*\*   \*\*\*\*\*\*\*\*\*\*  \*\*\*\*\*\*\*\s+'
            r'\*\*\*       \*\*   \*\*\*  \*\* \*\*\*\* \*\*  \*\*   \*\*\*'
            # r'\s+\*\*        \*\*   \*\*\*  \*\*  \*\*  \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*        \*\*\*\*\*\*\*   \*\*      \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*\*       \*\*\*\*\*\*    \*\*      \*\*  \*\*   \*\*\*\s+'
            # r'\s+\*\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\*\s+'
            # r'\s+\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\s+'
speckhard's avatar
speckhard committed
188
        )
189
    ),
speckhard's avatar
speckhard committed
190
    LegacyParser(
191
        name='parsers/nwchem', code_name='NWChem',
speckhard's avatar
speckhard committed
192
        parser_class_name='nwchemparser.NWChemParser',
193
        mainfile_contents_re=(
194
            r'Northwest Computational Chemistry Package \(NWChem\) (\d+\.)+\d+'
speckhard's avatar
speckhard committed
195
196
197
        )
    ),
    LegacyParser(
198
        name='parsers/bigdft', code_name='BigDFT',
speckhard's avatar
speckhard committed
199
        parser_class_name='bigdftparser.BigDFTParser',
200
        mainfile_contents_re=(
Markus Scheidgen's avatar
Markus Scheidgen committed
201
202
203
204
205
206
            # r'__________________________________ A fast and precise DFT wavelet code\s*'
            # r'\|     \|     \|     \|     \|     \|\s*'
            # r'\|     \|     \|     \|     \|     \|      BBBB         i       gggggg\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|     B    B               g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B     B        i     g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B    B         i     g        g\s*'
speckhard's avatar
speckhard committed
207
            r'\|_____\|__:__\|__:__\|_____\|_____\|___ BBBBB          i     g         g\s*'
Markus Scheidgen's avatar
Markus Scheidgen committed
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
            # r'\|  :  \|     \|     \|  :  \|     \|    B    B         i     g         g\s*'
            # r'\|--\+0-\|     \|     \|-0\+--\|     \|    B     B     iiii     g         g\s*'
            # r'\|__:__\|_____\|_____\|__:__\|_____\|    B     B        i      g        g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B BBBB        i        g      g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B        iiiii          gggggg\s*'
            # r'\|_____\|__:__\|__:__\|_____\|_____\|__BBBBB\s*'
            # r'\|     \|     \|     \|  :  \|     \|                           TTTTTTTTT\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|  DDDDDD          FFFFF        T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\| D      D        F        TTTT T\s*'
            # r'\|     \|     \|     \|  :  \|     \|D        D      F        T     T\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|D         D     FFFF     T     T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\|D___      D     F         T    T\s*'
            # r'\|     \|     \|  :  \|     \|     \|D         D     F          TTTTT\s*'
            # r'\|     \|     \|--\+0-\|     \|     \| D        D     F         T    T\s*'
            # r'\|_____\|_____\|__:__\|_____\|_____\|          D     F        T     T\s*'
            # r'\|     \|     \|     \|     \|     \|         D               T    T\s*'
            # r'\|     \|     \|     \|     \|     \|   DDDDDD       F         TTTT\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|______                    www\.bigdft\.org'
speckhard's avatar
speckhard committed
226
        )
227
228
    ),
    LegacyParser(
229
        name='parsers/wien2k', code_name='WIEN2k',
230
        parser_class_name='wien2kparser.Wien2kParser',
231
        mainfile_contents_re=r':LABEL\d+: using WIEN2k_\d+\.\d+'
Markus Scheidgen's avatar
Markus Scheidgen committed
232
233
    ),
    LegacyParser(
234
        name='parsers/band', code_name=config.services.not_processed_value,
235
        parser_class_name='bandparser.BANDParser',
Markus Scheidgen's avatar
Markus Scheidgen committed
236
        mainfile_contents_re=r' +\* +Amsterdam Density Functional +\(ADF\)'),
Daniel Speckhard's avatar
Daniel Speckhard committed
237
    LegacyParser(
238
        name='parsers/gaussian', code_name='Gaussian',
Daniel Speckhard's avatar
Daniel Speckhard committed
239
        parser_class_name='gaussianparser.GaussianParser',
Daniel Speckhard's avatar
Daniel Speckhard committed
240
241
242
243
244
245
246
        # This previous file matching string was too far down the line.
        # r'\s*Cite this work as:'
        # r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9.]*,'
        # r'\s\*\*\*\*\*\*\*\*\*\*\*\**'
        # r'\s*Gaussian\s*([0-9]+):\s*([A-Za-z0-9-.]+)\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)'
        # r'\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)')
        mainfile_contents_re=r'Gaussian, Inc'),
247
    LegacyParser(
248
        name='parsers/quantumespresso', code_name='Quantum Espresso',
249
        parser_class_name='quantumespressoparser.QuantumEspressoParserPWSCF',
250
251
252
253
254
        mainfile_contents_re=r'(Program PWSCF)|(This program is part of the open-source Quantum)'
        #    r'^(.*\n)*'
        #    r'\s*Program (\S+)\s+v\.(\S+)(?:\s+\(svn\s+rev\.\s+'
        #    r'(\d+)\s*\))?\s+starts[^\n]+'
        #    r'(?:\s*\n?)*This program is part of the open-source Quantum')
Daniel Speckhard's avatar
Daniel Speckhard committed
255
256
    ),
    LegacyParser(
257
        name='parsers/abinit', code_name='ABINIT',
Daniel Speckhard's avatar
Daniel Speckhard committed
258
        parser_class_name='abinitparser.AbinitParser',
259
        mainfile_contents_re=(r'^\n*\.Version\s*[0-9.]*\s*of ABINIT\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
260
261
    ),
    LegacyParser(
262
        name='parsers/orca', code_name='ORCA',
Daniel Speckhard's avatar
Daniel Speckhard committed
263
264
265
266
267
268
269
        parser_class_name='orcaparser.OrcaParser',
        mainfile_contents_re=(
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s+\* O   R   C   A \*\s*'
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*'
            r'\s*--- An Ab Initio, DFT and Semiempirical electronic structure package ---\s*')
270
271
    ),
    LegacyParser(
272
        name='parsers/castep', code_name='CASTEP',
273
274
        parser_class_name='castepparser.CastepParser',
        mainfile_contents_re=(r'\s\|\s*CCC\s*AA\s*SSS\s*TTTTT\s*EEEEE\s*PPPP\s*\|\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
275
276
    ),
    LegacyParser(
277
        name='parsers/dl-poly', code_name='DL_POLY',
Daniel Speckhard's avatar
Daniel Speckhard committed
278
279
        parser_class_name='dlpolyparser.DlPolyParserWrapper',
        mainfile_contents_re=(r'\*\* DL_POLY \*\*')
Daniel Speckhard's avatar
Daniel Speckhard committed
280
281
    ),
    LegacyParser(
282
        name='parsers/lib-atoms', code_name='libAtoms',
Daniel Speckhard's avatar
Daniel Speckhard committed
283
284
        parser_class_name='libatomsparser.LibAtomsParserWrapper',
        mainfile_contents_re=(r'\s*<GAP_params\s')
Daniel Speckhard's avatar
Daniel Speckhard committed
285
286
    ),
    LegacyParser(
287
        name='parsers/octopus', code_name='Octopus',
Daniel Speckhard's avatar
Daniel Speckhard committed
288
        parser_class_name='octopusparser.OctopusParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
289
290
291
        mainfile_contents_re=(r'\|0\) ~ \(0\) \|')
        # We decided to use the octopus eyes instead of
        # r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file.
Daniel Speckhard's avatar
Daniel Speckhard committed
292
293
    ),
    LegacyParser(
294
        name='parsers/gpaw', code_name='GPAW',
Daniel Speckhard's avatar
Daniel Speckhard committed
295
296
297
298
299
        parser_class_name='gpawparser.GPAWParserWrapper',
        mainfile_name_re=(r'^.*\.gpw$'),
        mainfile_mime_re=r'application/x-tar'
    ),
    LegacyParser(
300
        name='parsers/gpaw2', code_name='GPAW',
Daniel Speckhard's avatar
Daniel Speckhard committed
301
302
303
304
305
306
        parser_class_name='gpawparser.GPAWParser2Wrapper',
        # mainfile_contents_re=r'',  # We can't read .gpw2 to match AFFormatGPAW'
        mainfile_name_re=(r'^.*\.gpw2$'),
        mainfile_mime_re=r'application/x-tar'
    ),
    LegacyParser(
307
        name='parsers/atk', code_name='ATK',
Daniel Speckhard's avatar
Daniel Speckhard committed
308
309
310
311
312
        parser_class_name='atkparser.ATKParserWrapper',
        # mainfile_contents_re=r'',  # We can't read .gpw as txt - of UlmGPAW|AFFormatGPAW'
        mainfile_name_re=r'^.*\.nc',
        # The previously used mime type r'application/x-netcdf' wasn't found by magic library.
        mainfile_mime_re=r'application/octet-stream'
Daniel Speckhard's avatar
Daniel Speckhard committed
313
314
    ),
    LegacyParser(
315
        name='parsers/gulp', code_name='gulp',
Daniel Speckhard's avatar
Daniel Speckhard committed
316
317
318
319
320
321
322
        parser_class_name='gulpparser.GULPParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*'
            r'\*\*\*\*\*\*\*\*\*\*\*\*\*\s*'
            r'\s*\*\s*GENERAL UTILITY LATTICE PROGRAM\s*\*\s*')
    ),
    LegacyParser(
323
        name='parsers/siesta', code_name='Siesta',
Daniel Speckhard's avatar
Daniel Speckhard committed
324
325
326
327
328
        parser_class_name='siestaparser.SiestaParser',
        mainfile_contents_re=(
            r'(Siesta Version: siesta-|SIESTA [0-9]\.[0-9]\.[0-9])')
    ),
    LegacyParser(
329
        name='parsers/elk', code_name='elk',
Daniel Speckhard's avatar
Daniel Speckhard committed
330
        parser_class_name='elkparser.ElkParser',
331
        mainfile_contents_re=r'\| Elk version [0-9.a-zA-Z]+ started \|'
Daniel Speckhard's avatar
Daniel Speckhard committed
332
333
    ),
    LegacyParser(
334
        name='parsers/elastic', code_name='elastic',
Daniel Speckhard's avatar
Daniel Speckhard committed
335
336
        parser_class_name='elasticparser.ElasticParser',
        mainfile_contents_re=r'\s*Order of elastic constants\s*=\s*[0-9]+\s*'
Daniel Speckhard's avatar
Daniel Speckhard committed
337
338
    ),
    LegacyParser(
339
        name='parsers/gamess', code_name='GAMESS',
Daniel Speckhard's avatar
Daniel Speckhard committed
340
341
342
343
344
        parser_class_name='gamessparser.GamessParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*\*\s*GAMESS VERSION =\s*(.*)\*\s*'
            r'\s*\*\s*FROM IOWA STATE UNIVERSITY\s*\*\s*')
345
346
    ),
    LegacyParser(
347
        name='parsers/turbomole', code_name='turbomole',
348
349
        parser_class_name='turbomoleparser.TurbomoleParser',
        mainfile_contents_re=(
350
            r'Copyright \(C\) [0-9]+ TURBOMOLE GmbH, Karlsruhe')
351
352
353
354
355
356
    ),
    LegacyParser(
        name='parsers/skeleton', code_name='skeleton', domain='EMS',
        parser_class_name='skeletonparser.SkeletonParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_contents_re=(r'skeleton experimental metadata format')
Markus Scheidgen's avatar
Markus Scheidgen committed
357
358
359
360
361
    ),
    LegacyParser(
        name='parsers/mpes', code_name='mpes', domain='EMS',
        parser_class_name='mpesparser.MPESParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
362
363
        mainfile_name_re=(r'.*_data.meta'),
        mainfile_contents_re=(r'"data_repository_name": "zenodo.org"')
364
365
366
367
368
369
    ),
    LegacyParser(
        name='parsers/aptfim', code_name='mpes', domain='EMS',
        parser_class_name='aptfimparser.APTFIMParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_name_re=(r'.*.aptfim')
370
371
372
373
    ),
    MissingParser(
        name='parsers/qbox', code_name='qbox', domain='DFT',
        mainfile_contents_re=(r'http://qboxcode.org')
374
    ),
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
    MissingParser(
        name='parsers/dmol', code_name='DMol3', domain='DFT',
        mainfile_name_re=r'.*\.outmol'
    ),
    MissingParser(
        name='parser/fleur', code_name='fleur', domain='DFT',
        mainfile_contents_re=r'This output is generated by fleur.'
    ),
    MissingParser(
        name='parser/molcas', code_name='MOLCAS', domain='DFT',
        mainfile_contents_re=r'M O L C A S'
    ),
    MissingParser(
        name='parser/molcas', code_name='MOLCAS', domain='DFT',
        mainfile_contents_re=r'####### #     # ####### ####### ####### ######'
    ),
    # These are supposedly octopus files, but they do not look like octopus files at all
    MissingParser(
        name='parser/octopus', code_name='Octopus', domain='DFT',
        mainfile_name_re=r'(inp)|(.*/inp)'
    ),
    # We already have crystal with mainfile_contents_re, but this one does not always properly match
    LegacyParser(
        name='parsers/crystal', code_name='Crystal',
        parser_class_name='crystalparser.CrystalParser',
400
        mainfile_name_re=r'.*\.cryst\.out'
401
402
403
404
405
    ),
    # We already have wien2k with mainfile_contents_re, but this one does not always properly match
    LegacyParser(
        name='parsers/wien2k', code_name='WIEN2k',
        parser_class_name='wien2kparser.Wien2kParser',
406
        mainfile_name_re=r'.*\.scf'
407
408
409
410
411
    ),
    # We already have fhi-aims with mainfile_contents_re, but this one does not always properly match
    LegacyParser(
        name='parsers/fhi-aims', code_name='FHI-aims',
        parser_class_name='fhiaimsparser.FHIaimsParser',
412
        mainfile_name_re=r'.*\.fhiaims'
413
414
    ),
    BrokenParser()
415
]
416

417
""" Instantiation and constructor based config of all parsers. """
418

Markus Scheidgen's avatar
Markus Scheidgen committed
419
parser_dict = {parser.name: parser for parser in parsers}  # type: ignore
420
""" A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. """