__init__.py 18.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
16
The *parsing* module is an interface for the existing NOMAD-coe parsers.
17
18
This module redefines some of the old NOMAD-coe python-common functionality to create a
more coherent interface to the parsers.
19
20
21
22
23

Assumption about parsers
------------------------
For now, we make a few assumption about parsers
- they always work on the same *meta-info* version
Markus Scheidgen's avatar
Markus Scheidgen committed
24
- they have no conflicting python requirements
25
26
- they can be loaded at the same time and can be used within the same python process
- they are uniquely identified by a GIT URL and publicly accessible
27
- their version is uniquely identified by a GIT commit SHA
28

Markus Scheidgen's avatar
Markus Scheidgen committed
29
Each parser is defined via an instance of :class:`Parser`. The implementation :class:`LegacyParser` is used for most NOMAD-coe parsers.
30
31
32
33

.. autoclass:: nomad.parsing.Parser
    :members:

Markus Scheidgen's avatar
Markus Scheidgen committed
34
35
36
37
38
39
40
41
42
43
44
45
The are sub-classes for parsers with special purposes.

.. autoclass:: nomad.parsing.Parser
.. autoclass:: nomad.parsing.MatchingParser
.. autoclass:: nomad.parsing.MissingParser
.. autoclass:: nomad.parsing.BrokenParser
.. autoclass:: nomad.parsing.TemplateParser
.. autoclass:: nomad.parsing.GenerateRandomParser
.. autoclass:: nomad.parsing.ChaosParser
.. autoclass:: nomad.parsing.EmptyParser


Markus Scheidgen's avatar
Markus Scheidgen committed
46
47
48
49
The implementation :class:`LegacyParser` is used for most NOMAD-coe parsers.

.. autoclass:: nomad.parsing.LegacyParser

Markus Scheidgen's avatar
Markus Scheidgen committed
50

51
52
53
54
55
The parser definitions are available via the following two variables.

.. autodata:: nomad.parsing.parsers
.. autodata:: nomad.parsing.parser_dict

Markus Scheidgen's avatar
Markus Scheidgen committed
56
Parsers are reused for multiple calculations.
Markus Scheidgen's avatar
Markus Scheidgen committed
57

58
59
Parsers and calculation files are matched via regular expressions.

Markus Scheidgen's avatar
Markus Scheidgen committed
60
.. autofunction:: nomad.parsing.match_parser
61

Markus Scheidgen's avatar
Markus Scheidgen committed
62
Parsers in NOMAD-coe use a *backend* to create output. There are different NOMAD-coe
Markus Scheidgen's avatar
Markus Scheidgen committed
63
basends. In nomad@FAIRDI, we only currently only use a single backed. A version of
Markus Scheidgen's avatar
Markus Scheidgen committed
64
65
66
NOMAD-coe's *LocalBackend*. It stores all parser results in memory. The following
classes provide a interface definition for *backends* as an ABC and a concrete implementation
based on NOMAD-coe's *python-common* module.
67
68
69
70
71
72

.. autoclass:: nomad.parsing.AbstractParserBackend
    :members:
.. autoclass:: nomad.parsing.LocalBackend
    :members:
"""
Markus Scheidgen's avatar
Markus Scheidgen committed
73

74
from typing import Callable, IO, Union
75
import magic
76
77
import gzip
import bz2
78
import os.path
79

80
from nomad import files, config
81
82

from nomad.parsing.backend import AbstractParserBackend, LocalBackend, LegacyLocalBackend, JSONStreamWriter, BadContextURI, WrongContextState
83
from nomad.parsing.parser import Parser, LegacyParser, VaspOutcarParser, BrokenParser, MissingParser, MatchingParser
84
from nomad.parsing.artificial import TemplateParser, GenerateRandomParser, ChaosParser, EmptyParser
85

86

87
88
89
90
91
92
_compressions = {
    b'\x1f\x8b\x08': ('gz', gzip.open),
    b'\x42\x5a\x68': ('bz2', bz2.open)
}


93
def match_parser(mainfile: str, upload_files: Union[str, files.StagingUploadFiles], strict=True) -> 'Parser':
94
95
96
97
98
99
100
101
102
103
    """
    Performs parser matching. This means it take the given mainfile and potentially
    opens it with the given callback and tries to identify a parser that can parse
    the file.

    This is determined by filename (e.g. *.out), mime type (e.g. text/*, application/xml),
    and beginning file contents.

    Arguments:
        mainfile: The upload relative path to the mainfile
104
105
        upload_files: Either a :class:`files.StagingUploadFiles` object or a directory name.
            Directory name + mainfile needs to point to the file.
106
        strict: Only match strict parsers, e.g. no artificial parsers for missing or empty entries.
107
108
109

    Returns: The parser, or None if no parser could be matched.
    """
110
111
112
113
114
115
    if isinstance(upload_files, str):
        mainfile_path = os.path.join(upload_files, mainfile)
    else:
        mainfile_path = upload_files.raw_file_object(mainfile).os_path

    with open(mainfile_path, 'rb') as f:
116
        compression, open_compressed = _compressions.get(f.read(3), (None, open))
117
118

    with open_compressed(mainfile_path, 'rb') as cf:
119
        buffer = cf.read(config.parser_matching_size)
120
121
122

    mime_type = magic.from_buffer(buffer, mime=True)
    for parser in parsers:
123
124
125
126
127
128
129
130
131
        if strict and (isinstance(parser, MissingParser) or isinstance(parser, EmptyParser)):
            continue

        if parser.domain != config.domain:
            continue

        if parser.is_mainfile(mainfile_path, mime_type, buffer, compression):
            # TODO: deal with multiple possible parser specs
            return parser
132
133
134
135

    return None


136
parsers = [
137
    GenerateRandomParser(),
138
    TemplateParser(),
139
    ChaosParser(),
Daniel Speckhard's avatar
Daniel Speckhard committed
140
    LegacyParser(
141
        name='parsers/phonopy', code_name='Phonopy',
Daniel Speckhard's avatar
Daniel Speckhard committed
142
        parser_class_name='phonopyparser.PhonopyParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
143
        # mainfile_contents_re=r'',  # Empty regex since this code calls other DFT codes.
Daniel Speckhard's avatar
Daniel Speckhard committed
144
145
        mainfile_name_re=(r'.*/phonopy-FHI-aims-displacement-0*1/control.in$')
    ),
146
    LegacyParser(
147
        name='parsers/vasp', code_name='VASP',
148
        parser_class_name='vaspparser.VASPRunParserInterface',
149
150
        mainfile_mime_re=r'(application/xml)|(text/.*)',
        mainfile_contents_re=(
151
152
153
154
            r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
            r'?\s*<modeling>'
            r'?\s*<generator>'
            r'?\s*<i name="program" type="string">\s*vasp\s*</i>'
155
156
            r'?'),
        supported_compressions=['gz', 'bz2']
157
    ),
158
    VaspOutcarParser(
159
        name='parsers/vasp-outcar', code_name='VASP',
160
        parser_class_name='vaspparser.VaspOutcarParser',
161
162
        mainfile_name_re=r'(.*/)?OUTCAR(\.[^\.]*)?',
        mainfile_contents_re=(r'^\svasp\.')
163
    ),
164
    LegacyParser(
165
        name='parsers/exciting', code_name='exciting',
166
        parser_class_name='excitingparser.ExcitingParser',
167
168
        mainfile_name_re=r'^.*.OUT?',
        mainfile_contents_re=(r'EXCITING.*started')
169
170
    ),
    LegacyParser(
171
        name='parsers/fhi-aims', code_name='FHI-aims',
172
        parser_class_name='fhiaimsparser.FHIaimsParser',
173
        mainfile_contents_re=(
174
175
            r'^(.*\n)*'
            r'?\s*Invoking FHI-aims \.\.\.'
Markus Scheidgen's avatar
Markus Scheidgen committed
176
177
            # r'?\s*Version'
        )
178
179
    ),
    LegacyParser(
180
        name='parsers/cp2k', code_name='CP2K',
181
        parser_class_name='cp2kparser.CP2KParser',
182
        mainfile_contents_re=(
183
184
185
186
            r'\*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n'
            r' \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n'
            r' \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n'
            r' \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n'
speckhard's avatar
speckhard committed
187
188
            r'  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n'
        )
189
    ),
190
    LegacyParser(
191
        name='parsers/crystal', code_name='Crystal',
192
        parser_class_name='crystalparser.CrystalParser',
193
        mainfile_contents_re=(
194
            r'(CRYSTAL\s*\n0 0 0)|('
speckhard's avatar
speckhard committed
195
            r'\s*\*\s{10,}CRYSTAL(?P<majorVersion>[\d]+)\s{10,}\*'
196
            r'\s*\*\s{10,}public \: (?P<minorVersion>[\d\.]+) \- .*\*)'
speckhard's avatar
speckhard committed
197
        )
198
    ),
199
200
201
    # The main contents regex of CPMD was causing a catostrophic backtracking issue
    # when searching through the first 500 bytes of main files. We decided
    # to use only a portion of the regex to avoid that issue.
202
    LegacyParser(
203
        name='parsers/cpmd', code_name='CPMD',
204
        parser_class_name='cpmdparser.CPMDParser',
205
        mainfile_contents_re=(
206
207
208
209
210
211
212
213
            # r'\s+\*\*\*\*\*\*  \*\*\*\*\*\*    \*\*\*\*  \*\*\*\*  \*\*\*\*\*\*\s*'
            # r'\s+\*\*\*\*\*\*\*  \*\*\*\*\*\*\*   \*\*\*\*\*\*\*\*\*\*  \*\*\*\*\*\*\*\s+'
            r'\*\*\*       \*\*   \*\*\*  \*\* \*\*\*\* \*\*  \*\*   \*\*\*'
            # r'\s+\*\*        \*\*   \*\*\*  \*\*  \*\*  \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*        \*\*\*\*\*\*\*   \*\*      \*\*  \*\*    \*\*\s+'
            # r'\s+\*\*\*       \*\*\*\*\*\*    \*\*      \*\*  \*\*   \*\*\*\s+'
            # r'\s+\*\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\*\s+'
            # r'\s+\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\s+'
speckhard's avatar
speckhard committed
214
        )
215
    ),
speckhard's avatar
speckhard committed
216
    LegacyParser(
217
        name='parsers/nwchem', code_name='NWChem',
speckhard's avatar
speckhard committed
218
        parser_class_name='nwchemparser.NWChemParser',
219
        mainfile_contents_re=(
220
            r'Northwest Computational Chemistry Package \(NWChem\) (\d+\.)+\d+'
speckhard's avatar
speckhard committed
221
222
223
        )
    ),
    LegacyParser(
224
        name='parsers/bigdft', code_name='BigDFT',
speckhard's avatar
speckhard committed
225
        parser_class_name='bigdftparser.BigDFTParser',
226
        mainfile_contents_re=(
Markus Scheidgen's avatar
Markus Scheidgen committed
227
228
229
230
231
232
            # r'__________________________________ A fast and precise DFT wavelet code\s*'
            # r'\|     \|     \|     \|     \|     \|\s*'
            # r'\|     \|     \|     \|     \|     \|      BBBB         i       gggggg\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|     B    B               g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B     B        i     g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B    B         i     g        g\s*'
speckhard's avatar
speckhard committed
233
            r'\|_____\|__:__\|__:__\|_____\|_____\|___ BBBBB          i     g         g\s*'
Markus Scheidgen's avatar
Markus Scheidgen committed
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
            # r'\|  :  \|     \|     \|  :  \|     \|    B    B         i     g         g\s*'
            # r'\|--\+0-\|     \|     \|-0\+--\|     \|    B     B     iiii     g         g\s*'
            # r'\|__:__\|_____\|_____\|__:__\|_____\|    B     B        i      g        g\s*'
            # r'\|     \|  :  \|  :  \|     \|     \|    B BBBB        i        g      g\s*'
            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B        iiiii          gggggg\s*'
            # r'\|_____\|__:__\|__:__\|_____\|_____\|__BBBBB\s*'
            # r'\|     \|     \|     \|  :  \|     \|                           TTTTTTTTT\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|  DDDDDD          FFFFF        T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\| D      D        F        TTTT T\s*'
            # r'\|     \|     \|     \|  :  \|     \|D        D      F        T     T\s*'
            # r'\|     \|     \|     \|--\+0-\|     \|D         D     FFFF     T     T\s*'
            # r'\|_____\|_____\|_____\|__:__\|_____\|D___      D     F         T    T\s*'
            # r'\|     \|     \|  :  \|     \|     \|D         D     F          TTTTT\s*'
            # r'\|     \|     \|--\+0-\|     \|     \| D        D     F         T    T\s*'
            # r'\|_____\|_____\|__:__\|_____\|_____\|          D     F        T     T\s*'
            # r'\|     \|     \|     \|     \|     \|         D               T    T\s*'
            # r'\|     \|     \|     \|     \|     \|   DDDDDD       F         TTTT\s*'
            # r'\|_____\|_____\|_____\|_____\|_____\|______                    www\.bigdft\.org'
speckhard's avatar
speckhard committed
252
        )
253
254
    ),
    LegacyParser(
255
        name='parsers/wien2k', code_name='WIEN2k',
256
        parser_class_name='wien2kparser.Wien2kParser',
257
        mainfile_contents_re=r':LABEL\d+: using WIEN2k_\d+\.\d+'
Markus Scheidgen's avatar
Markus Scheidgen committed
258
259
    ),
    LegacyParser(
Markus Scheidgen's avatar
Markus Scheidgen committed
260
        name='parsers/band', code_name='BAND',
261
        parser_class_name='bandparser.BANDParser',
Markus Scheidgen's avatar
Markus Scheidgen committed
262
        mainfile_contents_re=r' +\* +Amsterdam Density Functional +\(ADF\)'),
Daniel Speckhard's avatar
Daniel Speckhard committed
263
    LegacyParser(
264
        name='parsers/gaussian', code_name='Gaussian',
Daniel Speckhard's avatar
Daniel Speckhard committed
265
        parser_class_name='gaussianparser.GaussianParser',
266
267
        mainfile_contents_re=(
            r'\s*Cite this work as:'
268
269
            r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9\.]*,')
    ),
270
    LegacyParser(
271
        name='parsers/quantumespresso', code_name='Quantum Espresso',
272
        parser_class_name='quantumespressoparser.QuantumEspressoParserPWSCF',
273
        mainfile_contents_re=r'Program PWSCF.*starts'
274
275
276
277
        #    r'^(.*\n)*'
        #    r'\s*Program (\S+)\s+v\.(\S+)(?:\s+\(svn\s+rev\.\s+'
        #    r'(\d+)\s*\))?\s+starts[^\n]+'
        #    r'(?:\s*\n?)*This program is part of the open-source Quantum')
Daniel Speckhard's avatar
Daniel Speckhard committed
278
279
    ),
    LegacyParser(
280
        name='parsers/abinit', code_name='ABINIT',
Daniel Speckhard's avatar
Daniel Speckhard committed
281
        parser_class_name='abinitparser.AbinitParser',
282
        mainfile_contents_re=(r'^\n*\.Version\s*[0-9.]*\s*of ABINIT\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
283
284
    ),
    LegacyParser(
285
        name='parsers/orca', code_name='ORCA',
Daniel Speckhard's avatar
Daniel Speckhard committed
286
287
288
289
290
291
292
        parser_class_name='orcaparser.OrcaParser',
        mainfile_contents_re=(
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s+\* O   R   C   A \*\s*'
            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*'
            r'\s*--- An Ab Initio, DFT and Semiempirical electronic structure package ---\s*')
293
294
    ),
    LegacyParser(
295
        name='parsers/castep', code_name='CASTEP',
296
297
        parser_class_name='castepparser.CastepParser',
        mainfile_contents_re=(r'\s\|\s*CCC\s*AA\s*SSS\s*TTTTT\s*EEEEE\s*PPPP\s*\|\s*')
Daniel Speckhard's avatar
Daniel Speckhard committed
298
299
    ),
    LegacyParser(
300
        name='parsers/dl-poly', code_name='DL_POLY',
Daniel Speckhard's avatar
Daniel Speckhard committed
301
302
        parser_class_name='dlpolyparser.DlPolyParserWrapper',
        mainfile_contents_re=(r'\*\* DL_POLY \*\*')
Daniel Speckhard's avatar
Daniel Speckhard committed
303
304
    ),
    LegacyParser(
305
        name='parsers/lib-atoms', code_name='libAtoms',
Daniel Speckhard's avatar
Daniel Speckhard committed
306
307
        parser_class_name='libatomsparser.LibAtomsParserWrapper',
        mainfile_contents_re=(r'\s*<GAP_params\s')
Daniel Speckhard's avatar
Daniel Speckhard committed
308
309
    ),
    LegacyParser(
310
        name='parsers/octopus', code_name='Octopus',
Daniel Speckhard's avatar
Daniel Speckhard committed
311
        parser_class_name='octopusparser.OctopusParserWrapper',
Daniel Speckhard's avatar
Daniel Speckhard committed
312
313
314
        mainfile_contents_re=(r'\|0\) ~ \(0\) \|')
        # We decided to use the octopus eyes instead of
        # r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file.
Daniel Speckhard's avatar
Daniel Speckhard committed
315
316
    ),
    LegacyParser(
317
        name='parsers/gpaw', code_name='GPAW',
Daniel Speckhard's avatar
Daniel Speckhard committed
318
319
320
321
322
        parser_class_name='gpawparser.GPAWParserWrapper',
        mainfile_name_re=(r'^.*\.gpw$'),
        mainfile_mime_re=r'application/x-tar'
    ),
    LegacyParser(
323
        name='parsers/gpaw2', code_name='GPAW',
Daniel Speckhard's avatar
Daniel Speckhard committed
324
325
326
327
328
329
        parser_class_name='gpawparser.GPAWParser2Wrapper',
        # mainfile_contents_re=r'',  # We can't read .gpw2 to match AFFormatGPAW'
        mainfile_name_re=(r'^.*\.gpw2$'),
        mainfile_mime_re=r'application/x-tar'
    ),
    LegacyParser(
330
        name='parsers/atk', code_name='ATK',
Daniel Speckhard's avatar
Daniel Speckhard committed
331
332
333
334
335
        parser_class_name='atkparser.ATKParserWrapper',
        # mainfile_contents_re=r'',  # We can't read .gpw as txt - of UlmGPAW|AFFormatGPAW'
        mainfile_name_re=r'^.*\.nc',
        # The previously used mime type r'application/x-netcdf' wasn't found by magic library.
        mainfile_mime_re=r'application/octet-stream'
Daniel Speckhard's avatar
Daniel Speckhard committed
336
337
    ),
    LegacyParser(
338
        name='parsers/gulp', code_name='gulp',
Daniel Speckhard's avatar
Daniel Speckhard committed
339
340
341
342
343
344
345
        parser_class_name='gulpparser.GULPParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*'
            r'\*\*\*\*\*\*\*\*\*\*\*\*\*\s*'
            r'\s*\*\s*GENERAL UTILITY LATTICE PROGRAM\s*\*\s*')
    ),
    LegacyParser(
346
        name='parsers/siesta', code_name='Siesta',
Daniel Speckhard's avatar
Daniel Speckhard committed
347
348
349
350
351
        parser_class_name='siestaparser.SiestaParser',
        mainfile_contents_re=(
            r'(Siesta Version: siesta-|SIESTA [0-9]\.[0-9]\.[0-9])')
    ),
    LegacyParser(
352
        name='parsers/elk', code_name='elk',
Daniel Speckhard's avatar
Daniel Speckhard committed
353
        parser_class_name='elkparser.ElkParser',
354
        mainfile_contents_re=r'\| Elk version [0-9.a-zA-Z]+ started \|'
Daniel Speckhard's avatar
Daniel Speckhard committed
355
356
    ),
    LegacyParser(
357
        name='parsers/elastic', code_name='elastic',
Daniel Speckhard's avatar
Daniel Speckhard committed
358
359
        parser_class_name='elasticparser.ElasticParser',
        mainfile_contents_re=r'\s*Order of elastic constants\s*=\s*[0-9]+\s*'
Daniel Speckhard's avatar
Daniel Speckhard committed
360
361
    ),
    LegacyParser(
362
        name='parsers/gamess', code_name='GAMESS',
Daniel Speckhard's avatar
Daniel Speckhard committed
363
364
365
366
367
        parser_class_name='gamessparser.GamessParser',
        mainfile_contents_re=(
            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
            r'\s*\*\s*GAMESS VERSION =\s*(.*)\*\s*'
            r'\s*\*\s*FROM IOWA STATE UNIVERSITY\s*\*\s*')
368
369
    ),
    LegacyParser(
370
        name='parsers/turbomole', code_name='turbomole',
371
372
        parser_class_name='turbomoleparser.TurbomoleParser',
        mainfile_contents_re=(
373
            r'Copyright \(C\) [0-9]+ TURBOMOLE GmbH, Karlsruhe')
374
375
376
377
378
379
    ),
    LegacyParser(
        name='parsers/skeleton', code_name='skeleton', domain='EMS',
        parser_class_name='skeletonparser.SkeletonParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_contents_re=(r'skeleton experimental metadata format')
Markus Scheidgen's avatar
Markus Scheidgen committed
380
381
382
383
384
    ),
    LegacyParser(
        name='parsers/mpes', code_name='mpes', domain='EMS',
        parser_class_name='mpesparser.MPESParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
385
        mainfile_name_re=(r'.*.meta'),
386
        mainfile_contents_re=(r'"data_repository_name": "zenodo.org"')
387
388
389
390
391
392
    ),
    LegacyParser(
        name='parsers/aptfim', code_name='mpes', domain='EMS',
        parser_class_name='aptfimparser.APTFIMParserInterface',
        mainfile_mime_re=r'(application/json)|(text/.*)',
        mainfile_name_re=(r'.*.aptfim')
393
    ),
394
    LegacyParser(
395
        name='parsers/qbox', code_name='qbox', domain='DFT',
396
397
        parser_class_name='qboxparser.QboxParser',
        mainfile_mime_re=r'(application/xml)|(text/.*)',
398
        mainfile_contents_re=(r'http://qboxcode.org')
399
    ),
400
    LegacyParser(
401
        name='parsers/dmol', code_name='DMol3', domain='DFT',
402
403
404
        parser_class_name='dmol3parser.Dmol3Parser',
        mainfile_name_re=r'.*\.outmol',
        mainfile_contents_re=r'Materials Studio DMol\^3'
405
    ),
406
    LegacyParser(
407
        name='parser/fleur', code_name='fleur', domain='DFT',
408
        parser_class_name='fleurparser.FleurParser',
409
410
        mainfile_contents_re=r'This output is generated by fleur.'
    ),
411
    LegacyParser(
412
        name='parser/molcas', code_name='MOLCAS', domain='DFT',
413
        parser_class_name='molcasparser.MolcasParser',
414
415
        mainfile_contents_re=r'M O L C A S'
    ),
416
    LegacyParser(
Markus Scheidgen's avatar
Markus Scheidgen committed
417
        name='parser/onetep', code_name='ONETEP', domain='DFT',
418
        parser_class_name='onetepparser.OnetepParser',
419
420
        mainfile_contents_re=r'####### #     # ####### ####### ####### ######'
    ),
421
422
423
424
425
426
427
428
429
    # There are some entries with PIDs that have mainfiles which do not match what
    # the actual parsers expect. We use the EmptyParser to produce placeholder entries
    # to keep the PIDs. These parsers will not match for new, non migrated data.
    EmptyParser(
        name='missing/octopus', code_name='Octopus', domain='DFT',
        mainfile_name_re=r'(inp)|(.*/inp)'
    ),
    EmptyParser(
        name='missing/crystal', code_name='Crystal',
430
        mainfile_name_re=r'.*\.cryst\.out'
431
    ),
432
433
    EmptyParser(
        name='missing/wien2k', code_name='WIEN2k',
434
        mainfile_name_re=r'.*\.scf'
435
    ),
436
437
    EmptyParser(
        name='missing/fhi-aims', code_name='FHI-aims', domain='DFT',
438
        mainfile_name_re=r'.*\.fhiaims'
439
440
    ),
    BrokenParser()
441
]
442

443
""" Instantiation and constructor based config of all parsers. """
444

Markus Scheidgen's avatar
Markus Scheidgen committed
445
parser_dict = {parser.name: parser for parser in parsers}  # type: ignore
446
""" A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. """