diff --git a/nomad/app/api/info.py b/nomad/app/api/info.py
index ce67893f0063c05f5a88fcc1cbdaf7510027699a..c5d07d6cfc343a4904239e2fe93d371fff566210 100644
--- a/nomad/app/api/info.py
+++ b/nomad/app/api/info.py
@@ -20,7 +20,8 @@ from typing import Dict, Any
 from flask_restplus import Resource, fields
 from datetime import datetime
 
-from nomad import config, parsing, normalizing, datamodel, gitinfo, search
+from nomad import config, normalizing, datamodel, gitinfo, search
+from nomad.parsing import parsers, MatchingParser
 
 from .api import api
 
@@ -94,8 +95,8 @@ class InfoResource(Resource):
     def get(self):
         ''' Return information about the nomad backend and its configuration. '''
         codes_dict = {}
-        for parser in parsing.parser_dict.values():
-            if isinstance(parser, parsing.MatchingParser) and parser.domain == 'dft':
+        for parser in parsers.parser_dict.values():
+            if isinstance(parser, MatchingParser) and parser.domain == 'dft':
                 code_name = parser.code_name
                 if code_name in codes_dict:
                     continue
@@ -105,10 +106,10 @@ class InfoResource(Resource):
         return {
             'parsers': [
                 key[key.index('/') + 1:]
-                for key in parsing.parser_dict.keys()],
+                for key in parsers.parser_dict.keys()],
             'metainfo_packages': ['general', 'general.experimental', 'common', 'public'] + sorted([
                 key[key.index('/') + 1:]
-                for key in parsing.parser_dict.keys()]),
+                for key in parsers.parser_dict.keys()]),
             'codes': codes,
             'normalizers': [normalizer.__name__ for normalizer in normalizing.normalizers],
             'statistics': statistics(),
diff --git a/nomad/app/api/metainfo.py b/nomad/app/api/metainfo.py
index 1b0979f16514eadb1115a4e67357eabb60f0b6eb..be6d05cd38f1b1300287251a2c5ae4b50987e104 100644
--- a/nomad/app/api/metainfo.py
+++ b/nomad/app/api/metainfo.py
@@ -22,7 +22,7 @@ import importlib
 
 from nomad.metainfo.legacy import python_package_mapping, LegacyMetainfoEnvironment
 from nomad.metainfo import Package
-from nomad.parsing import parsers
+from nomad.parsing.parsers import parsers
 
 from .api import api
 
diff --git a/nomad/cli/parse.py b/nomad/cli/parse.py
index 445afa554b5845b24c260fd6ad9d61a6f1f8719a..2ed3c9b0f3b0e12f38a03532d8f2c66622259726 100644
--- a/nomad/cli/parse.py
+++ b/nomad/cli/parse.py
@@ -4,10 +4,9 @@ import json
 import click
 import sys
 
-from nomad import utils
-from nomad import parsing
-from nomad import normalizing
-from nomad import datamodel
+from nomad import utils, parsing, normalizing, datamodel
+from nomad.parsing.parsers import parser_dict, match_parser
+
 import nomadcore
 
 from .cli import cli
@@ -27,10 +26,10 @@ def parse(
     if logger is None:
         logger = utils.get_logger(__name__)
     if parser_name is not None:
-        parser = parsing.parser_dict.get(parser_name)
+        parser = parser_dict.get(parser_name)
         assert parser is not None, 'the given parser must exist'
     else:
-        parser = parsing.match_parser(mainfile_path, strict=strict)
+        parser = match_parser(mainfile_path, strict=strict)
         if isinstance(parser, parsing.MatchingParser):
             parser_name = parser.name
         else:
diff --git a/nomad/datamodel/dft.py b/nomad/datamodel/dft.py
index 5e5ae809bff908e830b135a594e4b15b196af261..6b117a39539ac9ae464d8e3713695bf0f3a31fb4 100644
--- a/nomad/datamodel/dft.py
+++ b/nomad/datamodel/dft.py
@@ -266,7 +266,7 @@ class DFTMetadata(MSection):
     def code_name_from_parser(self):
         entry = self.m_parent
         if entry.parser_name is not None:
-            from nomad.parsing import parser_dict
+            from nomad.parsing.parsers import parser_dict
             parser = parser_dict.get(entry.parser_name)
             if hasattr(parser, 'code_name'):
                 return parser.code_name
diff --git a/nomad/parsing/__init__.py b/nomad/parsing/__init__.py
index 942e0f5a79bd979d73631ab163199813361a0b95..4cfa35be822e101485dcf5f5016b0151ad5ea18f 100644
--- a/nomad/parsing/__init__.py
+++ b/nomad/parsing/__init__.py
@@ -50,14 +50,14 @@ The implementation :class:`LegacyParser` is used for most NOMAD-coe parsers.
 
 The parser definitions are available via the following two variables.
 
-.. autodata:: nomad.parsing.parsers
-.. autodata:: nomad.parsing.parser_dict
+.. autodata:: nomad.parsing.parsers.parsers
+.. autodata:: nomad.parsing.parsers.parser_dict
 
 Parsers are reused for multiple calculations.
 
 Parsers and calculation files are matched via regular expressions.
 
-.. autofunction:: nomad.parsing.match_parser
+.. autofunction:: nomad.parsing.parsers.match_parser
 
 Parsers in NOMAD-coe use a *backend* to create output. There are different NOMAD-coe
 basends. In nomad@FAIRDI, we only currently only use a single backed. The following
@@ -70,503 +70,6 @@ based on nomad@fairdi's metainfo:
     :members:
 '''
 
-from typing import Callable, IO, Union, Dict
-import os.path
-
-from nomad import config, datamodel
-
-from nomad.parsing.legacy import (
-    AbstractParserBackend, Backend, BackendError, BadContextUri, LegacyParser, VaspOutcarParser)
+from nomad.parsing.legacy import AbstractParserBackend, Backend, BackendError, BadContextUri, LegacyParser
 from nomad.parsing.parser import Parser, BrokenParser, MissingParser, MatchingParser
-from nomad.parsing.artificial import (
-    TemplateParser, GenerateRandomParser, ChaosParser, EmptyParser)
-from eelsparser import EelsParser
-from mpesparser import MPESParser
-from aptfimparser import APTFIMParser
-
-try:
-    # these packages are not available without parsing extra, which is ok, if the
-    # parsers are only initialized to load their metainfo definitions
-    import magic
-    import gzip
-    import bz2
-    import lzma
-
-    _compressions = {
-        b'\x1f\x8b\x08': ('gz', gzip.open),
-        b'\x42\x5a\x68': ('bz2', bz2.open),
-        b'\xfd\x37\x7a': ('xz', lzma.open)
-    }
-
-    encoding_magic = magic.Magic(mime_encoding=True)
-
-except ImportError:
-    pass
-
-
-def match_parser(mainfile_path: str, strict=True) -> 'Parser':
-    '''
-    Performs parser matching. This means it take the given mainfile and potentially
-    opens it with the given callback and tries to identify a parser that can parse
-    the file.
-
-    This is determined by filename (e.g. *.out), mime type (e.g. text/*, application/xml),
-    and beginning file contents.
-
-    Arguments:
-        mainfile_path: Path to the mainfile
-        strict: Only match strict parsers, e.g. no artificial parsers for missing or empty entries.
-
-    Returns: The parser, or None if no parser could be matched.
-    '''
-    mainfile = os.path.basename(mainfile_path)
-    if mainfile.startswith('.') or mainfile.startswith('~'):
-        return None
-
-    with open(mainfile_path, 'rb') as f:
-        compression, open_compressed = _compressions.get(f.read(3), (None, open))
-
-    with open_compressed(mainfile_path, 'rb') as cf:  # type: ignore
-        buffer = cf.read(config.parser_matching_size)
-
-    mime_type = magic.from_buffer(buffer, mime=True)
-
-    decoded_buffer = None
-    encoding = None
-    try:  # Try to open the file as a string for regex matching.
-        decoded_buffer = buffer.decode('utf-8')
-    except UnicodeDecodeError:
-        # This file is either binary or has wrong encoding
-        encoding = encoding_magic.from_buffer(buffer)
-
-        if config.services.force_raw_file_decoding:
-            encoding = 'iso-8859-1'
-
-        if encoding in ['iso-8859-1']:
-            try:
-                decoded_buffer = buffer.decode(encoding)
-            except Exception:
-                pass
-
-    for parser in parsers:
-        if strict and isinstance(parser, (MissingParser, EmptyParser)):
-            continue
-
-        if parser.is_mainfile(mainfile_path, mime_type, buffer, decoded_buffer, compression):
-            # potentially convert the file
-            if encoding in ['iso-8859-1']:
-                try:
-                    with open(mainfile_path, 'rb') as binary_file:
-                        content = binary_file.read().decode(encoding)
-                except Exception:
-                    pass
-                else:
-                    with open(mainfile_path, 'wt') as text_file:
-                        text_file.write(content)
-
-            # TODO: deal with multiple possible parser specs
-            return parser
-
-    return None
-
-
-parsers = [
-    GenerateRandomParser(),
-    TemplateParser(),
-    ChaosParser(),
-    LegacyParser(
-        name='parsers/phonopy', code_name='Phonopy', code_homepage='https://phonopy.github.io/phonopy/',
-        parser_class_name='phonopyparser.PhonopyParserWrapper',
-        # mainfile_contents_re=r'',  # Empty regex since this code calls other DFT codes.
-        mainfile_name_re=(r'.*/phonopy-FHI-aims-displacement-0*1/control.in$')
-    ),
-    LegacyParser(
-        name='parsers/vasp', code_name='VASP', code_homepage='https://www.vasp.at/',
-        parser_class_name='vaspparser.VASPRunParser',
-        mainfile_mime_re=r'(application/.*)|(text/.*)',
-        mainfile_contents_re=(
-            r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
-            r'?\s*<modeling>'
-            r'?\s*<generator>'
-            r'?\s*<i name="program" type="string">\s*vasp\s*</i>'
-            r'?'),
-        supported_compressions=['gz', 'bz2', 'xz']
-    ),
-    VaspOutcarParser(
-        name='parsers/vasp-outcar', code_name='VASP', code_homepage='https://www.vasp.at/',
-        parser_class_name='vaspparser.VaspOutcarParser',
-        mainfile_name_re=r'(.*/)?OUTCAR(\.[^\.]*)?',
-        mainfile_contents_re=(r'^\svasp\.')
-    ),
-    LegacyParser(
-        name='parsers/exciting', code_name='exciting', code_homepage='http://exciting-code.org/',
-        parser_class_name='excitingparser.ExcitingParser',
-        mainfile_name_re=r'^.*.OUT(\.[^/]*)?$',
-        mainfile_contents_re=(r'EXCITING.*started')
-    ),
-    LegacyParser(
-        name='parsers/fhi-aims', code_name='FHI-aims', code_homepage='https://aimsclub.fhi-berlin.mpg.de/',
-        parser_class_name='fhiaimsparser.FHIaimsParser',
-        mainfile_contents_re=(
-            r'^(.*\n)*'
-            r'?\s*Invoking FHI-aims \.\.\.'
-            # r'?\s*Version'
-        )
-    ),
-    LegacyParser(
-        name='parsers/cp2k', code_name='CP2K', code_homepage='https://www.cp2k.org/',
-        parser_class_name='cp2kparser.CP2KParser',
-        mainfile_contents_re=(
-            r'\*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n'
-            r' \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n'
-            r' \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n'
-            r' \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n'
-            r'  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n'
-        )
-    ),
-    LegacyParser(
-        name='parsers/crystal', code_name='Crystal', code_homepage='https://www.crystal.unito.it/',
-        parser_class_name='crystalparser.CrystalParser',
-        mainfile_contents_re=(
-            r'(CRYSTAL\s*\n\d+ \d+ \d+)|(CRYSTAL will run on \d+ processors)|'
-            r'(\s*\*\s*CRYSTAL[\d]+\s*\*\s*\*\s*(public|Release) \: [\d\.]+.*\*)|'
-            r'(Executable:\s*[/_\-a-zA-Z0-9]*MPPcrystal)'
-        )
-    ),
-    # The main contents regex of CPMD was causing a catostrophic backtracking issue
-    # when searching through the first 500 bytes of main files. We decided
-    # to use only a portion of the regex to avoid that issue.
-    LegacyParser(
-        name='parsers/cpmd', code_name='CPMD', code_homepage='https://www.lcrc.anl.gov/for-users/software/available-software/cpmd/',
-        parser_class_name='cpmdparser.CPMDParser',
-        mainfile_contents_re=(
-            # r'\s+\*\*\*\*\*\*  \*\*\*\*\*\*    \*\*\*\*  \*\*\*\*  \*\*\*\*\*\*\s*'
-            # r'\s+\*\*\*\*\*\*\*  \*\*\*\*\*\*\*   \*\*\*\*\*\*\*\*\*\*  \*\*\*\*\*\*\*\s+'
-            r'\*\*\*       \*\*   \*\*\*  \*\* \*\*\*\* \*\*  \*\*   \*\*\*'
-            # r'\s+\*\*        \*\*   \*\*\*  \*\*  \*\*  \*\*  \*\*    \*\*\s+'
-            # r'\s+\*\*        \*\*\*\*\*\*\*   \*\*      \*\*  \*\*    \*\*\s+'
-            # r'\s+\*\*\*       \*\*\*\*\*\*    \*\*      \*\*  \*\*   \*\*\*\s+'
-            # r'\s+\*\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\*\s+'
-            # r'\s+\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\s+'
-        )
-    ),
-    LegacyParser(
-        name='parsers/nwchem', code_name='NWChem', code_homepage='http://www.nwchem-sw.org/',
-        parser_class_name='nwchemparser.NWChemParser',
-        mainfile_contents_re=(
-            r'Northwest Computational Chemistry Package \(NWChem\) (\d+\.)+\d+'
-        )
-    ),
-    LegacyParser(
-        name='parsers/bigdft', code_name='BigDFT', code_homepage='http://bigdft.org/',
-        parser_class_name='bigdftparser.BigDFTParser',
-        mainfile_contents_re=(
-            # r'__________________________________ A fast and precise DFT wavelet code\s*'
-            # r'\|     \|     \|     \|     \|     \|\s*'
-            # r'\|     \|     \|     \|     \|     \|      BBBB         i       gggggg\s*'
-            # r'\|_____\|_____\|_____\|_____\|_____\|     B    B               g\s*'
-            # r'\|     \|  :  \|  :  \|     \|     \|    B     B        i     g\s*'
-            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B    B         i     g        g\s*'
-            r'\|_____\|__:__\|__:__\|_____\|_____\|___ BBBBB          i     g         g\s*'
-            # r'\|  :  \|     \|     \|  :  \|     \|    B    B         i     g         g\s*'
-            # r'\|--\+0-\|     \|     \|-0\+--\|     \|    B     B     iiii     g         g\s*'
-            # r'\|__:__\|_____\|_____\|__:__\|_____\|    B     B        i      g        g\s*'
-            # r'\|     \|  :  \|  :  \|     \|     \|    B BBBB        i        g      g\s*'
-            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B        iiiii          gggggg\s*'
-            # r'\|_____\|__:__\|__:__\|_____\|_____\|__BBBBB\s*'
-            # r'\|     \|     \|     \|  :  \|     \|                           TTTTTTTTT\s*'
-            # r'\|     \|     \|     \|--\+0-\|     \|  DDDDDD          FFFFF        T\s*'
-            # r'\|_____\|_____\|_____\|__:__\|_____\| D      D        F        TTTT T\s*'
-            # r'\|     \|     \|     \|  :  \|     \|D        D      F        T     T\s*'
-            # r'\|     \|     \|     \|--\+0-\|     \|D         D     FFFF     T     T\s*'
-            # r'\|_____\|_____\|_____\|__:__\|_____\|D___      D     F         T    T\s*'
-            # r'\|     \|     \|  :  \|     \|     \|D         D     F          TTTTT\s*'
-            # r'\|     \|     \|--\+0-\|     \|     \| D        D     F         T    T\s*'
-            # r'\|_____\|_____\|__:__\|_____\|_____\|          D     F        T     T\s*'
-            # r'\|     \|     \|     \|     \|     \|         D               T    T\s*'
-            # r'\|     \|     \|     \|     \|     \|   DDDDDD       F         TTTT\s*'
-            # r'\|_____\|_____\|_____\|_____\|_____\|______                    www\.bigdft\.org'
-        )
-    ),
-    LegacyParser(
-        name='parsers/wien2k', code_name='WIEN2k', code_homepage='http://www.wien2k.at/',
-        parser_class_name='wien2kparser.Wien2kParser',
-        mainfile_contents_re=r'\s*---------\s*:ITE[0-9]+:\s*[0-9]+\.\s*ITERATION\s*---------'
-    ),
-    LegacyParser(
-        name='parsers/band', code_name='BAND', code_homepage='https://www.scm.com/product/band_periodicdft/',
-        parser_class_name='bandparser.BANDParser',
-        mainfile_contents_re=r' +\* +Amsterdam Density Functional +\(ADF\)'),
-    LegacyParser(
-        name='parsers/gaussian', code_name='Gaussian', code_homepage='http://gaussian.com/',
-        parser_class_name='gaussianparser.GaussianParser',
-        mainfile_mime_re=r'.*',
-        mainfile_contents_re=(
-            r'\s*Cite this work as:'
-            r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9\.]*,')
-    ),
-    LegacyParser(
-        name='parsers/quantumespresso', code_name='Quantum Espresso', code_homepage='https://www.quantum-espresso.org/',
-        parser_class_name='quantumespressoparser.QuantumEspressoParserPWSCF',
-        mainfile_contents_re=(
-            r'(Program PWSCF.*starts)|'
-            r'(Current dimensions of program PWSCF are)')
-        #    r'^(.*\n)*'
-        #    r'\s*Program (\S+)\s+v\.(\S+)(?:\s+\(svn\s+rev\.\s+'
-        #    r'(\d+)\s*\))?\s+starts[^\n]+'
-        #    r'(?:\s*\n?)*This program is part of the open-source Quantum')
-    ),
-    LegacyParser(
-        name='parsers/abinit', code_name='ABINIT', code_homepage='https://www.abinit.org/',
-        parser_class_name='abinitparser.AbinitParser',
-        mainfile_contents_re=(r'^\n*\.Version\s*[0-9.]*\s*of ABINIT\s*')
-    ),
-    LegacyParser(
-        name='parsers/orca', code_name='ORCA', code_homepage='https://orcaforum.kofo.mpg.de/',
-        parser_class_name='orcaparser.OrcaParser',
-        mainfile_contents_re=(
-            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
-            r'\s+\* O   R   C   A \*\s*'
-            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
-            r'\s*'
-            r'\s*--- An Ab Initio, DFT and Semiempirical electronic structure package ---\s*')
-    ),
-    LegacyParser(
-        name='parsers/castep', code_name='CASTEP', code_homepage='http://www.castep.org/',
-        parser_class_name='castepparser.CastepParser',
-        mainfile_contents_re=(r'\s\|\s*CCC\s*AA\s*SSS\s*TTTTT\s*EEEEE\s*PPPP\s*\|\s*')
-    ),
-    LegacyParser(
-        name='parsers/dl-poly', code_name='DL_POLY', code_homepage='https://www.scd.stfc.ac.uk/Pages/DL_POLY.aspx',
-        parser_class_name='dlpolyparser.DlPolyParserWrapper',
-        mainfile_contents_re=(r'\*\* DL_POLY \*\*')
-    ),
-    LegacyParser(
-        name='parsers/lib-atoms', code_name='libAtoms', code_homepage='https://libatoms.github.io/',
-        parser_class_name='libatomsparser.LibAtomsParserWrapper',
-        mainfile_contents_re=(r'\s*<GAP_params\s')
-    ),
-    LegacyParser(
-        name='parsers/octopus', code_name='Octopus', code_homepage='https://octopus-code.org/',
-        parser_class_name='octopusparser.OctopusParserWrapper',
-        mainfile_contents_re=(r'\|0\) ~ \(0\) \|')
-        # We decided to use the octopus eyes instead of
-        # r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file.
-    ),
-    # match gpaw2 first, other .gpw files are then considered to be "gpaw1"
-    LegacyParser(
-        name='parsers/gpaw2', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
-        parser_class_name='gpawparser.GPAWParser2Wrapper',
-        mainfile_binary_header=b'GPAW',
-        mainfile_name_re=(r'^.*\.(gpw2|gpw)$'),
-        mainfile_mime_re=r'application/(x-tar|octet-stream)'
-    ),
-    LegacyParser(
-        name='parsers/gpaw', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
-        parser_class_name='gpawparser.GPAWParserWrapper',
-        mainfile_name_re=(r'^.*\.gpw$'),
-        mainfile_mime_re=r'application/(x-tar|octet-stream)'
-    ),
-    LegacyParser(
-        name='parsers/atk', code_name='ATK', code_homepage='https://www.synopsys.com/silicon/quantumatk.html',
-        parser_class_name='atkparser.ATKParserWrapper',
-        # mainfile_contents_re=r'',  # We can't read .gpw as txt - of UlmGPAW|AFFormatGPAW'
-        mainfile_name_re=r'^.*\.nc',
-        # The previously used mime type r'application/x-netcdf' wasn't found by magic library.
-        mainfile_mime_re=r'application/octet-stream'
-    ),
-    LegacyParser(
-        name='parsers/gulp', code_name='gulp', code_homepage='http://gulp.curtin.edu.au/gulp/',
-        parser_class_name='gulpparser.GULPParser',
-        mainfile_contents_re=(
-            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*'
-            r'\*\*\*\*\*\*\*\*\*\*\*\*\*\s*'
-            r'\s*\*\s*GENERAL UTILITY LATTICE PROGRAM\s*\*\s*')
-    ),
-    LegacyParser(
-        name='parsers/siesta', code_name='Siesta', code_homepage='https://departments.icmab.es/leem/siesta/',
-        parser_class_name='siestaparser.SiestaParser',
-        mainfile_contents_re=(
-            r'(Siesta Version: siesta-|SIESTA [0-9]\.[0-9]\.[0-9])|'
-            r'(\*\s*WELCOME TO SIESTA\s*\*)')
-    ),
-    LegacyParser(
-        name='parsers/elk', code_name='elk', code_homepage='http://elk.sourceforge.net/',
-        parser_class_name='elkparser.ElkParser',
-        mainfile_contents_re=r'\| Elk version [0-9.a-zA-Z]+ started \|'
-    ),
-    LegacyParser(
-        name='parsers/elastic', code_name='elastic', code_homepage='http://exciting-code.org/elastic',
-        parser_class_name='elasticparser.ElasticParser',
-        mainfile_contents_re=r'\s*Order of elastic constants\s*=\s*[0-9]+\s*'
-    ),
-    LegacyParser(
-        name='parsers/gamess', code_name='GAMESS', code_homepage='https://www.msg.chem.iastate.edu/gamess/versions.html',
-        parser_class_name='gamessparser.GamessParser',
-        mainfile_contents_re=(
-            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
-            r'\s*\*\s*GAMESS VERSION =\s*(.*)\*\s*'
-            r'\s*\*\s*FROM IOWA STATE UNIVERSITY\s*\*\s*')
-    ),
-    LegacyParser(
-        name='parsers/turbomole', code_name='turbomole', code_homepage='https://www.turbomole.org/',
-        parser_class_name='turbomoleparser.TurbomoleParser',
-        mainfile_contents_re=(
-            r'Copyright \(C\) [0-9]+ TURBOMOLE GmbH, Karlsruhe')
-    ),
-    LegacyParser(
-        name='parsers/skeleton', code_name='skeleton', code_homepage=None,
-        domain='ems',
-        parser_class_name='skeletonparser.SkeletonParserInterface',
-        mainfile_mime_re=r'(application/json)|(text/.*)',
-        mainfile_contents_re=(r'skeleton experimental metadata format')
-    ),
-    MPESParser(),
-    APTFIMParser(),
-    EelsParser(),
-    LegacyParser(
-        name='parsers/qbox', code_name='qbox', code_homepage='http://qboxcode.org/', domain='dft',
-        parser_class_name='qboxparser.QboxParser',
-        mainfile_mime_re=r'(application/xml)|(text/.*)',
-        mainfile_contents_re=(r'http://qboxcode.org')
-    ),
-    LegacyParser(
-        name='parsers/dmol', code_name='DMol3', code_homepage='http://dmol3.web.psi.ch/dmol3.html', domain='dft',
-        parser_class_name='dmol3parser.Dmol3Parser',
-        mainfile_name_re=r'.*\.outmol',
-        mainfile_contents_re=r'Materials Studio DMol\^3'
-    ),
-    LegacyParser(
-        name='parsers/fleur', code_name='fleur', code_homepage='https://www.flapw.de/', domain='dft',
-        parser_class_name='fleurparser.FleurParser',
-        mainfile_contents_re=r'This output is generated by fleur.'
-    ),
-    LegacyParser(
-        name='parsers/molcas', code_name='MOLCAS', code_homepage='http://www.molcas.org/', domain='dft',
-        parser_class_name='molcasparser.MolcasParser',
-        mainfile_contents_re=r'M O L C A S'
-    ),
-    LegacyParser(
-        name='parsers/onetep', code_name='ONETEP', code_homepage='https://www.onetep.org/', domain='dft',
-        parser_class_name='onetepparser.OnetepParser',
-        mainfile_contents_re=r'####### #     # ####### ####### ####### ######'
-    ),
-    LegacyParser(
-        name='parsers/openkim', code_name='OpenKIM', domain='dft',
-        parser_class_name='openkimparser.OpenKIMParser',
-        mainfile_contents_re=r'OPENKIM'
-    ),
-    LegacyParser(
-        name='parsers/tinker', code_name='TINKER', domain='dft',
-        parser_class_name='tinkerparser.TinkerParser',
-        mainfile_contents_re=r'TINKER  ---  Software Tools for Molecular Design'
-    ),
-    LegacyParser(
-        name='parsers/lammps', code_name='lammps', domain='dft',
-        parser_class_name='lammpsparser.LammpsParser',
-        mainfile_contents_re=r'^LAMMPS'
-    ),
-    LegacyParser(
-        name='parsers/amber', code_name='Amber', domain='dft',
-        parser_class_name='amberparser.AMBERParser',
-        mainfile_contents_re=r'\s*Amber\s[0-9]+\s[A-Z]+\s*[0-9]+'
-    ),
-    LegacyParser(
-        name='parsers/gromacs', code_name='Gromacs', domain='dft',
-        parser_class_name='gromacsparser.GROMACSParser',
-        mainfile_contents_re=r'GROMACS - gmx mdrun'
-    ),
-    LegacyParser(
-        name='parsers/gromos', code_name='Gromos', domain='dft',
-        parser_class_name='gromosparser.GromosParser',
-        mainfile_contents_re=r'Bugreports to http://www.gromos.net'
-    ),
-    LegacyParser(
-        name='parsers/namd', code_name='Namd', domain='dft',
-        parser_class_name='namdparser.NamdParser',
-        mainfile_contents_re=r'\s*Info:\s*NAMD\s*[0-9.]+\s*for\s*',
-        mainfile_mime_re=r'text/.*',
-    ),
-    LegacyParser(
-        name='parsers/charmm', code_name='Charmm', domain='dft',
-        parser_class_name='charmmparser.CharmmParser',
-        mainfile_contents_re=r'\s*Chemistry\s*at\s*HARvard\s*Macromolecular\s*Mechanics\s*',
-        mainfile_mime_re=r'text/.*',
-    ),
-    LegacyParser(
-        name='parsers/dftbplus', code_name='DFTb plus', domain='dft',
-        parser_class_name='dftbplusparser.DFTBPlusParser',
-        mainfile_contents_re=r'^ Fermi distribution function\s*',
-        mainfile_mime_re=r'text/.*',
-    ),
-    LegacyParser(
-        name='parsers/asap', code_name='ASAP', domain='dft',
-        parser_class_name='asapparser.AsapParser',
-        mainfile_name_re=r'.*.traj$',
-        mainfile_mime_re=r'application/octet-stream',
-    ),
-    LegacyParser(
-        name='parsers/fplo', code_name='fplo', domain='dft',
-        parser_class_name='fploparser.FploParser',
-        mainfile_contents_re=r'\s*\|\s*FULL-POTENTIAL LOCAL-ORBITAL MINIMUM BASIS BANDSTRUCTURE CODE\s*\|\s*',
-        mainfile_mime_re=r'text/.*',
-    ),
-    LegacyParser(
-        name='parsers/mopac', code_name='MOPAC', domain='dft',
-        parser_class_name='mopacparser.MopacParser',
-        mainfile_contents_re=r'\s*\*\*\s*MOPAC\s*([0-9a-zA-Z]*)\s*\*\*\s*',
-        mainfile_mime_re=r'text/.*',
-    )
-]
-
-empty_parsers = [
-    EmptyParser(
-        name='missing/octopus', code_name='Octopus', code_homepage='https://octopus-code.org/',
-        domain='dft',
-        mainfile_name_re=r'(inp)|(.*/inp)'
-    ),
-    EmptyParser(
-        name='missing/crystal', code_name='Crystal', code_homepage='https://www.crystal.unito.it/index.php',
-        domain='dft',
-        mainfile_name_re=r'.*\.cryst\.out'
-    ),
-    EmptyParser(
-        name='missing/wien2k', code_name='WIEN2k', code_homepage='http://www.wien2k.at/',
-        domain='dft',
-        mainfile_name_re=r'.*\.scf'
-    ),
-    EmptyParser(
-        name='missing/fhi-aims', code_name='FHI-aims', code_homepage='https://aimsclub.fhi-berlin.mpg.de/',
-        domain='dft',
-        mainfile_name_re=r'.*\.fhiaims'
-    )
-]
-
-if config.use_empty_parsers:
-    # There are some entries with PIDs that have mainfiles which do not match what
-    # the actual parsers expect. We use the EmptyParser to produce placeholder entries
-    # to keep the PIDs. These parsers will not match for new, non migrated data.
-    parsers.extend(empty_parsers)
-
-parsers.append(BrokenParser())
-
-''' Instantiation and constructor based config of all parsers. '''
-
-parser_dict = {parser.name: parser for parser in parsers + empty_parsers}  # type: ignore
-''' A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. '''
-
-# renamed parsers
-parser_dict['parser/broken'] = parser_dict['parsers/broken']
-parser_dict['parser/fleur'] = parser_dict['parsers/fleur']
-parser_dict['parser/molcas'] = parser_dict['parsers/molcas']
-parser_dict['parser/octopus'] = parser_dict['parsers/octopus']
-parser_dict['parser/onetep'] = parser_dict['parsers/onetep']
-
-# register code names as possible statistic value to the dft datamodel
-code_names = sorted(
-    set([
-        getattr(parser, 'code_name')
-        for parser in parsers
-        if parser.domain == 'dft' and getattr(parser, 'code_name', None) is not None and getattr(parser, 'code_name') != 'currupted mainfile']),
-    key=lambda code_name: code_name.lower())
-datamodel.DFTMetadata.code_name.a_search.statistic_values = code_names + [config.services.unavailable_value, config.services.not_processed_value]
+from nomad.parsing.artificial import TemplateParser, GenerateRandomParser, ChaosParser, EmptyParser
diff --git a/nomad/parsing/parsers.py b/nomad/parsing/parsers.py
new file mode 100644
index 0000000000000000000000000000000000000000..c477e0dcbe9d706f6212a4c418f8b25aa91aefce
--- /dev/null
+++ b/nomad/parsing/parsers.py
@@ -0,0 +1,513 @@
+# Copyright 2018 Markus Scheidgen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an"AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os.path
+
+from nomad import config, datamodel
+
+from .parser import MissingParser, BrokenParser, Parser
+from .legacy import LegacyParser, VaspOutcarParser
+from .artificial import EmptyParser, GenerateRandomParser, TemplateParser, ChaosParser
+
+from eelsparser import EelsParser
+from mpesparser import MPESParser
+from aptfimparser import APTFIMParser
+
+try:
+    # these packages are not available without parsing extra, which is ok, if the
+    # parsers are only initialized to load their metainfo definitions
+    import magic
+    import gzip
+    import bz2
+    import lzma
+
+    _compressions = {
+        b'\x1f\x8b\x08': ('gz', gzip.open),
+        b'\x42\x5a\x68': ('bz2', bz2.open),
+        b'\xfd\x37\x7a': ('xz', lzma.open)
+    }
+
+    encoding_magic = magic.Magic(mime_encoding=True)
+
+except ImportError:
+    pass
+
+
+def match_parser(mainfile_path: str, strict=True) -> Parser:
+    '''
+    Performs parser matching. This means it take the given mainfile and potentially
+    opens it with the given callback and tries to identify a parser that can parse
+    the file.
+
+    This is determined by filename (e.g. *.out), mime type (e.g. text/*, application/xml),
+    and beginning file contents.
+
+    Arguments:
+        mainfile_path: Path to the mainfile
+        strict: Only match strict parsers, e.g. no artificial parsers for missing or empty entries.
+
+    Returns: The parser, or None if no parser could be matched.
+    '''
+    mainfile = os.path.basename(mainfile_path)
+    if mainfile.startswith('.') or mainfile.startswith('~'):
+        return None
+
+    with open(mainfile_path, 'rb') as f:
+        compression, open_compressed = _compressions.get(f.read(3), (None, open))
+
+    with open_compressed(mainfile_path, 'rb') as cf:  # type: ignore
+        buffer = cf.read(config.parser_matching_size)
+
+    mime_type = magic.from_buffer(buffer, mime=True)
+
+    decoded_buffer = None
+    encoding = None
+    try:  # Try to open the file as a string for regex matching.
+        decoded_buffer = buffer.decode('utf-8')
+    except UnicodeDecodeError:
+        # This file is either binary or has wrong encoding
+        encoding = encoding_magic.from_buffer(buffer)
+
+        if config.services.force_raw_file_decoding:
+            encoding = 'iso-8859-1'
+
+        if encoding in ['iso-8859-1']:
+            try:
+                decoded_buffer = buffer.decode(encoding)
+            except Exception:
+                pass
+
+    for parser in parsers:
+        if strict and isinstance(parser, (MissingParser, EmptyParser)):
+            continue
+
+        if parser.is_mainfile(mainfile_path, mime_type, buffer, decoded_buffer, compression):
+            # potentially convert the file
+            if encoding in ['iso-8859-1']:
+                try:
+                    with open(mainfile_path, 'rb') as binary_file:
+                        content = binary_file.read().decode(encoding)
+                except Exception:
+                    pass
+                else:
+                    with open(mainfile_path, 'wt') as text_file:
+                        text_file.write(content)
+
+            # TODO: deal with multiple possible parser specs
+            return parser
+
+    return None
+
+
+parsers = [
+    GenerateRandomParser(),
+    TemplateParser(),
+    ChaosParser(),
+    LegacyParser(
+        name='parsers/phonopy', code_name='Phonopy', code_homepage='https://phonopy.github.io/phonopy/',
+        parser_class_name='phonopyparser.PhonopyParserWrapper',
+        # mainfile_contents_re=r'',  # Empty regex since this code calls other DFT codes.
+        mainfile_name_re=(r'.*/phonopy-FHI-aims-displacement-0*1/control.in$')
+    ),
+    LegacyParser(
+        name='parsers/vasp', code_name='VASP', code_homepage='https://www.vasp.at/',
+        parser_class_name='vaspparser.VASPRunParser',
+        mainfile_mime_re=r'(application/.*)|(text/.*)',
+        mainfile_contents_re=(
+            r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
+            r'?\s*<modeling>'
+            r'?\s*<generator>'
+            r'?\s*<i name="program" type="string">\s*vasp\s*</i>'
+            r'?'),
+        supported_compressions=['gz', 'bz2', 'xz']
+    ),
+    VaspOutcarParser(
+        name='parsers/vasp-outcar', code_name='VASP', code_homepage='https://www.vasp.at/',
+        parser_class_name='vaspparser.VaspOutcarParser',
+        mainfile_name_re=r'(.*/)?OUTCAR(\.[^\.]*)?',
+        mainfile_contents_re=(r'^\svasp\.')
+    ),
+    LegacyParser(
+        name='parsers/exciting', code_name='exciting', code_homepage='http://exciting-code.org/',
+        parser_class_name='excitingparser.ExcitingParser',
+        mainfile_name_re=r'^.*.OUT(\.[^/]*)?$',
+        mainfile_contents_re=(r'EXCITING.*started')
+    ),
+    LegacyParser(
+        name='parsers/fhi-aims', code_name='FHI-aims', code_homepage='https://aimsclub.fhi-berlin.mpg.de/',
+        parser_class_name='fhiaimsparser.FHIaimsParser',
+        mainfile_contents_re=(
+            r'^(.*\n)*'
+            r'?\s*Invoking FHI-aims \.\.\.'
+            # r'?\s*Version'
+        )
+    ),
+    LegacyParser(
+        name='parsers/cp2k', code_name='CP2K', code_homepage='https://www.cp2k.org/',
+        parser_class_name='cp2kparser.CP2KParser',
+        mainfile_contents_re=(
+            r'\*\*\*\* \*\*\*\* \*\*\*\*\*\*  \*\*  PROGRAM STARTED AT\s.*\n'
+            r' \*\*\*\*\* \*\* \*\*\*  \*\*\* \*\*   PROGRAM STARTED ON\s*.*\n'
+            r' \*\*    \*\*\*\*   \*\*\*\*\*\*    PROGRAM STARTED BY .*\n'
+            r' \*\*\*\*\* \*\*    \*\* \*\* \*\*   PROGRAM PROCESS ID .*\n'
+            r'  \*\*\*\* \*\*  \*\*\*\*\*\*\*  \*\*  PROGRAM STARTED IN .*\n'
+        )
+    ),
+    LegacyParser(
+        name='parsers/crystal', code_name='Crystal', code_homepage='https://www.crystal.unito.it/',
+        parser_class_name='crystalparser.CrystalParser',
+        mainfile_contents_re=(
+            r'(CRYSTAL\s*\n\d+ \d+ \d+)|(CRYSTAL will run on \d+ processors)|'
+            r'(\s*\*\s*CRYSTAL[\d]+\s*\*\s*\*\s*(public|Release) \: [\d\.]+.*\*)|'
+            r'(Executable:\s*[/_\-a-zA-Z0-9]*MPPcrystal)'
+        )
+    ),
+    # The main contents regex of CPMD was causing a catostrophic backtracking issue
+    # when searching through the first 500 bytes of main files. We decided
+    # to use only a portion of the regex to avoid that issue.
+    LegacyParser(
+        name='parsers/cpmd', code_name='CPMD', code_homepage='https://www.lcrc.anl.gov/for-users/software/available-software/cpmd/',
+        parser_class_name='cpmdparser.CPMDParser',
+        mainfile_contents_re=(
+            # r'\s+\*\*\*\*\*\*  \*\*\*\*\*\*    \*\*\*\*  \*\*\*\*  \*\*\*\*\*\*\s*'
+            # r'\s+\*\*\*\*\*\*\*  \*\*\*\*\*\*\*   \*\*\*\*\*\*\*\*\*\*  \*\*\*\*\*\*\*\s+'
+            r'\*\*\*       \*\*   \*\*\*  \*\* \*\*\*\* \*\*  \*\*   \*\*\*'
+            # r'\s+\*\*        \*\*   \*\*\*  \*\*  \*\*  \*\*  \*\*    \*\*\s+'
+            # r'\s+\*\*        \*\*\*\*\*\*\*   \*\*      \*\*  \*\*    \*\*\s+'
+            # r'\s+\*\*\*       \*\*\*\*\*\*    \*\*      \*\*  \*\*   \*\*\*\s+'
+            # r'\s+\*\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\*\s+'
+            # r'\s+\*\*\*\*\*\*  \*\*        \*\*      \*\*  \*\*\*\*\*\*\s+'
+        )
+    ),
+    LegacyParser(
+        name='parsers/nwchem', code_name='NWChem', code_homepage='http://www.nwchem-sw.org/',
+        parser_class_name='nwchemparser.NWChemParser',
+        mainfile_contents_re=(
+            r'Northwest Computational Chemistry Package \(NWChem\) (\d+\.)+\d+'
+        )
+    ),
+    LegacyParser(
+        name='parsers/bigdft', code_name='BigDFT', code_homepage='http://bigdft.org/',
+        parser_class_name='bigdftparser.BigDFTParser',
+        mainfile_contents_re=(
+            # r'__________________________________ A fast and precise DFT wavelet code\s*'
+            # r'\|     \|     \|     \|     \|     \|\s*'
+            # r'\|     \|     \|     \|     \|     \|      BBBB         i       gggggg\s*'
+            # r'\|_____\|_____\|_____\|_____\|_____\|     B    B               g\s*'
+            # r'\|     \|  :  \|  :  \|     \|     \|    B     B        i     g\s*'
+            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B    B         i     g        g\s*'
+            r'\|_____\|__:__\|__:__\|_____\|_____\|___ BBBBB          i     g         g\s*'
+            # r'\|  :  \|     \|     \|  :  \|     \|    B    B         i     g         g\s*'
+            # r'\|--\+0-\|     \|     \|-0\+--\|     \|    B     B     iiii     g         g\s*'
+            # r'\|__:__\|_____\|_____\|__:__\|_____\|    B     B        i      g        g\s*'
+            # r'\|     \|  :  \|  :  \|     \|     \|    B BBBB        i        g      g\s*'
+            # r'\|     \|-0\+--\|-0\+--\|     \|     \|    B        iiiii          gggggg\s*'
+            # r'\|_____\|__:__\|__:__\|_____\|_____\|__BBBBB\s*'
+            # r'\|     \|     \|     \|  :  \|     \|                           TTTTTTTTT\s*'
+            # r'\|     \|     \|     \|--\+0-\|     \|  DDDDDD          FFFFF        T\s*'
+            # r'\|_____\|_____\|_____\|__:__\|_____\| D      D        F        TTTT T\s*'
+            # r'\|     \|     \|     \|  :  \|     \|D        D      F        T     T\s*'
+            # r'\|     \|     \|     \|--\+0-\|     \|D         D     FFFF     T     T\s*'
+            # r'\|_____\|_____\|_____\|__:__\|_____\|D___      D     F         T    T\s*'
+            # r'\|     \|     \|  :  \|     \|     \|D         D     F          TTTTT\s*'
+            # r'\|     \|     \|--\+0-\|     \|     \| D        D     F         T    T\s*'
+            # r'\|_____\|_____\|__:__\|_____\|_____\|          D     F        T     T\s*'
+            # r'\|     \|     \|     \|     \|     \|         D               T    T\s*'
+            # r'\|     \|     \|     \|     \|     \|   DDDDDD       F         TTTT\s*'
+            # r'\|_____\|_____\|_____\|_____\|_____\|______                    www\.bigdft\.org'
+        )
+    ),
+    LegacyParser(
+        name='parsers/wien2k', code_name='WIEN2k', code_homepage='http://www.wien2k.at/',
+        parser_class_name='wien2kparser.Wien2kParser',
+        mainfile_contents_re=r'\s*---------\s*:ITE[0-9]+:\s*[0-9]+\.\s*ITERATION\s*---------'
+    ),
+    LegacyParser(
+        name='parsers/band', code_name='BAND', code_homepage='https://www.scm.com/product/band_periodicdft/',
+        parser_class_name='bandparser.BANDParser',
+        mainfile_contents_re=r' +\* +Amsterdam Density Functional +\(ADF\)'),
+    LegacyParser(
+        name='parsers/gaussian', code_name='Gaussian', code_homepage='http://gaussian.com/',
+        parser_class_name='gaussianparser.GaussianParser',
+        mainfile_mime_re=r'.*',
+        mainfile_contents_re=(
+            r'\s*Cite this work as:'
+            r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9\.]*,')
+    ),
+    LegacyParser(
+        name='parsers/quantumespresso', code_name='Quantum Espresso', code_homepage='https://www.quantum-espresso.org/',
+        parser_class_name='quantumespressoparser.QuantumEspressoParserPWSCF',
+        mainfile_contents_re=(
+            r'(Program PWSCF.*starts)|'
+            r'(Current dimensions of program PWSCF are)')
+        #    r'^(.*\n)*'
+        #    r'\s*Program (\S+)\s+v\.(\S+)(?:\s+\(svn\s+rev\.\s+'
+        #    r'(\d+)\s*\))?\s+starts[^\n]+'
+        #    r'(?:\s*\n?)*This program is part of the open-source Quantum')
+    ),
+    LegacyParser(
+        name='parsers/abinit', code_name='ABINIT', code_homepage='https://www.abinit.org/',
+        parser_class_name='abinitparser.AbinitParser',
+        mainfile_contents_re=(r'^\n*\.Version\s*[0-9.]*\s*of ABINIT\s*')
+    ),
+    LegacyParser(
+        name='parsers/orca', code_name='ORCA', code_homepage='https://orcaforum.kofo.mpg.de/',
+        parser_class_name='orcaparser.OrcaParser',
+        mainfile_contents_re=(
+            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
+            r'\s+\* O   R   C   A \*\s*'
+            r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
+            r'\s*'
+            r'\s*--- An Ab Initio, DFT and Semiempirical electronic structure package ---\s*')
+    ),
+    LegacyParser(
+        name='parsers/castep', code_name='CASTEP', code_homepage='http://www.castep.org/',
+        parser_class_name='castepparser.CastepParser',
+        mainfile_contents_re=(r'\s\|\s*CCC\s*AA\s*SSS\s*TTTTT\s*EEEEE\s*PPPP\s*\|\s*')
+    ),
+    LegacyParser(
+        name='parsers/dl-poly', code_name='DL_POLY', code_homepage='https://www.scd.stfc.ac.uk/Pages/DL_POLY.aspx',
+        parser_class_name='dlpolyparser.DlPolyParserWrapper',
+        mainfile_contents_re=(r'\*\* DL_POLY \*\*')
+    ),
+    LegacyParser(
+        name='parsers/lib-atoms', code_name='libAtoms', code_homepage='https://libatoms.github.io/',
+        parser_class_name='libatomsparser.LibAtomsParserWrapper',
+        mainfile_contents_re=(r'\s*<GAP_params\s')
+    ),
+    LegacyParser(
+        name='parsers/octopus', code_name='Octopus', code_homepage='https://octopus-code.org/',
+        parser_class_name='octopusparser.OctopusParserWrapper',
+        mainfile_contents_re=(r'\|0\) ~ \(0\) \|')
+        # We decided to use the octopus eyes instead of
+        # r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file.
+    ),
+    # match gpaw2 first, other .gpw files are then considered to be "gpaw1"
+    LegacyParser(
+        name='parsers/gpaw2', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
+        parser_class_name='gpawparser.GPAWParser2Wrapper',
+        mainfile_binary_header=b'GPAW',
+        mainfile_name_re=(r'^.*\.(gpw2|gpw)$'),
+        mainfile_mime_re=r'application/(x-tar|octet-stream)'
+    ),
+    LegacyParser(
+        name='parsers/gpaw', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
+        parser_class_name='gpawparser.GPAWParserWrapper',
+        mainfile_name_re=(r'^.*\.gpw$'),
+        mainfile_mime_re=r'application/(x-tar|octet-stream)'
+    ),
+    LegacyParser(
+        name='parsers/atk', code_name='ATK', code_homepage='https://www.synopsys.com/silicon/quantumatk.html',
+        parser_class_name='atkparser.ATKParserWrapper',
+        # mainfile_contents_re=r'',  # We can't read .gpw as txt - of UlmGPAW|AFFormatGPAW'
+        mainfile_name_re=r'^.*\.nc',
+        # The previously used mime type r'application/x-netcdf' wasn't found by magic library.
+        mainfile_mime_re=r'application/octet-stream'
+    ),
+    LegacyParser(
+        name='parsers/gulp', code_name='gulp', code_homepage='http://gulp.curtin.edu.au/gulp/',
+        parser_class_name='gulpparser.GULPParser',
+        mainfile_contents_re=(
+            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*'
+            r'\*\*\*\*\*\*\*\*\*\*\*\*\*\s*'
+            r'\s*\*\s*GENERAL UTILITY LATTICE PROGRAM\s*\*\s*')
+    ),
+    LegacyParser(
+        name='parsers/siesta', code_name='Siesta', code_homepage='https://departments.icmab.es/leem/siesta/',
+        parser_class_name='siestaparser.SiestaParser',
+        mainfile_contents_re=(
+            r'(Siesta Version: siesta-|SIESTA [0-9]\.[0-9]\.[0-9])|'
+            r'(\*\s*WELCOME TO SIESTA\s*\*)')
+    ),
+    LegacyParser(
+        name='parsers/elk', code_name='elk', code_homepage='http://elk.sourceforge.net/',
+        parser_class_name='elkparser.ElkParser',
+        mainfile_contents_re=r'\| Elk version [0-9.a-zA-Z]+ started \|'
+    ),
+    LegacyParser(
+        name='parsers/elastic', code_name='elastic', code_homepage='http://exciting-code.org/elastic',
+        parser_class_name='elasticparser.ElasticParser',
+        mainfile_contents_re=r'\s*Order of elastic constants\s*=\s*[0-9]+\s*'
+    ),
+    LegacyParser(
+        name='parsers/gamess', code_name='GAMESS', code_homepage='https://www.msg.chem.iastate.edu/gamess/versions.html',
+        parser_class_name='gamessparser.GamessParser',
+        mainfile_contents_re=(
+            r'\s*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
+            r'\s*\*\s*GAMESS VERSION =\s*(.*)\*\s*'
+            r'\s*\*\s*FROM IOWA STATE UNIVERSITY\s*\*\s*')
+    ),
+    LegacyParser(
+        name='parsers/turbomole', code_name='turbomole', code_homepage='https://www.turbomole.org/',
+        parser_class_name='turbomoleparser.TurbomoleParser',
+        mainfile_contents_re=(
+            r'Copyright \(C\) [0-9]+ TURBOMOLE GmbH, Karlsruhe')
+    ),
+    LegacyParser(
+        name='parsers/skeleton', code_name='skeleton', code_homepage=None,
+        domain='ems',
+        parser_class_name='skeletonparser.SkeletonParserInterface',
+        mainfile_mime_re=r'(application/json)|(text/.*)',
+        mainfile_contents_re=(r'skeleton experimental metadata format')
+    ),
+    MPESParser(),
+    APTFIMParser(),
+    EelsParser(),
+    LegacyParser(
+        name='parsers/qbox', code_name='qbox', code_homepage='http://qboxcode.org/', domain='dft',
+        parser_class_name='qboxparser.QboxParser',
+        mainfile_mime_re=r'(application/xml)|(text/.*)',
+        mainfile_contents_re=(r'http://qboxcode.org')
+    ),
+    LegacyParser(
+        name='parsers/dmol', code_name='DMol3', code_homepage='http://dmol3.web.psi.ch/dmol3.html', domain='dft',
+        parser_class_name='dmol3parser.Dmol3Parser',
+        mainfile_name_re=r'.*\.outmol',
+        mainfile_contents_re=r'Materials Studio DMol\^3'
+    ),
+    LegacyParser(
+        name='parsers/fleur', code_name='fleur', code_homepage='https://www.flapw.de/', domain='dft',
+        parser_class_name='fleurparser.FleurParser',
+        mainfile_contents_re=r'This output is generated by fleur.'
+    ),
+    LegacyParser(
+        name='parsers/molcas', code_name='MOLCAS', code_homepage='http://www.molcas.org/', domain='dft',
+        parser_class_name='molcasparser.MolcasParser',
+        mainfile_contents_re=r'M O L C A S'
+    ),
+    LegacyParser(
+        name='parsers/onetep', code_name='ONETEP', code_homepage='https://www.onetep.org/', domain='dft',
+        parser_class_name='onetepparser.OnetepParser',
+        mainfile_contents_re=r'####### #     # ####### ####### ####### ######'
+    ),
+    LegacyParser(
+        name='parsers/openkim', code_name='OpenKIM', domain='dft',
+        parser_class_name='openkimparser.OpenKIMParser',
+        mainfile_contents_re=r'OPENKIM'
+    ),
+    LegacyParser(
+        name='parsers/tinker', code_name='TINKER', domain='dft',
+        parser_class_name='tinkerparser.TinkerParser',
+        mainfile_contents_re=r'TINKER  ---  Software Tools for Molecular Design'
+    ),
+    LegacyParser(
+        name='parsers/lammps', code_name='lammps', domain='dft',
+        parser_class_name='lammpsparser.LammpsParser',
+        mainfile_contents_re=r'^LAMMPS'
+    ),
+    LegacyParser(
+        name='parsers/amber', code_name='Amber', domain='dft',
+        parser_class_name='amberparser.AMBERParser',
+        mainfile_contents_re=r'\s*Amber\s[0-9]+\s[A-Z]+\s*[0-9]+'
+    ),
+    LegacyParser(
+        name='parsers/gromacs', code_name='Gromacs', domain='dft',
+        parser_class_name='gromacsparser.GROMACSParser',
+        mainfile_contents_re=r'GROMACS - gmx mdrun'
+    ),
+    LegacyParser(
+        name='parsers/gromos', code_name='Gromos', domain='dft',
+        parser_class_name='gromosparser.GromosParser',
+        mainfile_contents_re=r'Bugreports to http://www.gromos.net'
+    ),
+    LegacyParser(
+        name='parsers/namd', code_name='Namd', domain='dft',
+        parser_class_name='namdparser.NamdParser',
+        mainfile_contents_re=r'\s*Info:\s*NAMD\s*[0-9.]+\s*for\s*',
+        mainfile_mime_re=r'text/.*',
+    ),
+    LegacyParser(
+        name='parsers/charmm', code_name='Charmm', domain='dft',
+        parser_class_name='charmmparser.CharmmParser',
+        mainfile_contents_re=r'\s*Chemistry\s*at\s*HARvard\s*Macromolecular\s*Mechanics\s*',
+        mainfile_mime_re=r'text/.*',
+    ),
+    LegacyParser(
+        name='parsers/dftbplus', code_name='DFTb plus', domain='dft',
+        parser_class_name='dftbplusparser.DFTBPlusParser',
+        mainfile_contents_re=r'^ Fermi distribution function\s*',
+        mainfile_mime_re=r'text/.*',
+    ),
+    LegacyParser(
+        name='parsers/asap', code_name='ASAP', domain='dft',
+        parser_class_name='asapparser.AsapParser',
+        mainfile_name_re=r'.*.traj$',
+        mainfile_mime_re=r'application/octet-stream',
+    ),
+    LegacyParser(
+        name='parsers/fplo', code_name='fplo', domain='dft',
+        parser_class_name='fploparser.FploParser',
+        mainfile_contents_re=r'\s*\|\s*FULL-POTENTIAL LOCAL-ORBITAL MINIMUM BASIS BANDSTRUCTURE CODE\s*\|\s*',
+        mainfile_mime_re=r'text/.*',
+    ),
+    LegacyParser(
+        name='parsers/mopac', code_name='MOPAC', domain='dft',
+        parser_class_name='mopacparser.MopacParser',
+        mainfile_contents_re=r'\s*\*\*\s*MOPAC\s*([0-9a-zA-Z]*)\s*\*\*\s*',
+        mainfile_mime_re=r'text/.*',
+    )
+]
+
+empty_parsers = [
+    EmptyParser(
+        name='missing/octopus', code_name='Octopus', code_homepage='https://octopus-code.org/',
+        domain='dft',
+        mainfile_name_re=r'(inp)|(.*/inp)'
+    ),
+    EmptyParser(
+        name='missing/crystal', code_name='Crystal', code_homepage='https://www.crystal.unito.it/index.php',
+        domain='dft',
+        mainfile_name_re=r'.*\.cryst\.out'
+    ),
+    EmptyParser(
+        name='missing/wien2k', code_name='WIEN2k', code_homepage='http://www.wien2k.at/',
+        domain='dft',
+        mainfile_name_re=r'.*\.scf'
+    ),
+    EmptyParser(
+        name='missing/fhi-aims', code_name='FHI-aims', code_homepage='https://aimsclub.fhi-berlin.mpg.de/',
+        domain='dft',
+        mainfile_name_re=r'.*\.fhiaims'
+    )
+]
+
+if config.use_empty_parsers:
+    # There are some entries with PIDs that have mainfiles which do not match what
+    # the actual parsers expect. We use the EmptyParser to produce placeholder entries
+    # to keep the PIDs. These parsers will not match for new, non migrated data.
+    parsers.extend(empty_parsers)
+
+parsers.append(BrokenParser())
+
+''' Instantiation and constructor based config of all parsers. '''
+
+parser_dict = {parser.name: parser for parser in parsers + empty_parsers}  # type: ignore
+''' A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. '''
+
+# renamed parsers
+parser_dict['parser/broken'] = parser_dict['parsers/broken']
+parser_dict['parser/fleur'] = parser_dict['parsers/fleur']
+parser_dict['parser/molcas'] = parser_dict['parsers/molcas']
+parser_dict['parser/octopus'] = parser_dict['parsers/octopus']
+parser_dict['parser/onetep'] = parser_dict['parsers/onetep']
+
+# register code names as possible statistic value to the dft datamodel
+code_names = sorted(
+    set([
+        getattr(parser, 'code_name')
+        for parser in parsers
+        if parser.domain == 'dft' and getattr(parser, 'code_name', None) is not None and getattr(parser, 'code_name') != 'currupted mainfile']),
+    key=lambda code_name: code_name.lower())
+datamodel.DFTMetadata.code_name.a_search.statistic_values = code_names + [config.services.unavailable_value, config.services.not_processed_value]
diff --git a/nomad/processing/data.py b/nomad/processing/data.py
index 8e52799d21cd4d63eebe724b68acd1252eeed9e4..fee1f526bcf5cd759d8619b3592ac43ce26ed019 100644
--- a/nomad/processing/data.py
+++ b/nomad/processing/data.py
@@ -38,7 +38,8 @@ from structlog.processors import StackInfoRenderer, format_exc_info, TimeStamper
 from nomad import utils, config, infrastructure, search, datamodel
 from nomad.files import PathObject, UploadFiles, ExtractError, ArchiveBasedStagingUploadFiles, PublicUploadFiles, StagingUploadFiles
 from nomad.processing.base import Proc, process, task, PENDING, SUCCESS, FAILURE
-from nomad.parsing import parser_dict, match_parser, Backend
+from nomad.parsing import Backend
+from nomad.parsing.parsers import parser_dict, match_parser
 from nomad.normalizing import normalizers
 from nomad.datamodel import EntryArchive
 from nomad.archive import query_archive
@@ -1036,7 +1037,7 @@ class Upload(Proc):
             modified_upload = self._get_collection().find_one_and_update(
                 {'_id': self.upload_id, 'joined': {'$ne': True}},
                 {'$set': {'joined': True}})
-            if modified_upload['joined'] is False:
+            if modified_upload is None or modified_upload['joined'] is False:
                 self.get_logger().info('join')
 
                 # Before cleaning up, run an additional normalizer on phonon
diff --git a/tests/parser_measurement.py b/tests/parser_measurement.py
index e0cec2adac49c8dcca22cb1125ec0d590effa8c8..bc5905bb1cb2a39035889f2a24c21df7abd1c8a1 100644
--- a/tests/parser_measurement.py
+++ b/tests/parser_measurement.py
@@ -3,7 +3,7 @@ if __name__ == '__main__':
     import logging
     import time
     from nomad import config, utils
-    from nomad.parsing import parser_dict
+    from nomad.parsing.parsers import parser_dict
     from nomad.cli.parse import normalize_all
     from nomad.metainfo.legacy import LegacyMetainfoEnvironment
     from nomad.parsing.legacy import Backend
diff --git a/tests/test_datamodel.py b/tests/test_datamodel.py
index 54d46db39997c2a4ead0b5a2b9bb81e94dde7562..d60de974fe57b47cadd7bbc8fa8b5c35fda86e25 100644
--- a/tests/test_datamodel.py
+++ b/tests/test_datamodel.py
@@ -22,7 +22,8 @@ import datetime
 from ase.data import chemical_symbols
 from ase.spacegroup import Spacegroup
 
-from nomad import datamodel, parsing, utils, files
+from nomad import datamodel, utils, files
+from nomad.parsing.parsers import parser_dict
 
 number_of = 20
 
@@ -37,7 +38,7 @@ systems = ['atom', 'molecule/cluster', '2D/surface', 'bulk']
 comments = [gen.sentence() for _ in range(0, number_of)]
 references = [(i + 1, gen.url()) for i in range(0, number_of)]
 datasets = [(i + 1, gen.slug()) for i in range(0, number_of)]
-codes = list(set([parser.code_name for parser in parsing.parser_dict.values() if hasattr(parser, 'code_name')]))  # type: ignore
+codes = list(set([parser.code_name for parser in parser_dict.values() if hasattr(parser, 'code_name')]))  # type: ignore
 filepaths = ['/'.join(gen.url().split('/')[3:]) for _ in range(0, number_of)]
 
 low_numbers_for_atoms = [1, 1, 2, 2, 2, 2, 2, 3, 3, 4]
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
index 6f1271a647f72055729db7ae4a208e54da11d23d..d0b88a96b0df3cafd15b01f596c382c5b3c1244d 100644
--- a/tests/test_parsing.py
+++ b/tests/test_parsing.py
@@ -20,7 +20,8 @@ import os
 from shutil import copyfile
 
 from nomad import utils, files, datamodel
-from nomad.parsing import parser_dict, match_parser, BrokenParser, BadContextUri, Backend
+from nomad.parsing import BrokenParser, BadContextUri, Backend
+from nomad.parsing.parsers import parser_dict, match_parser
 from nomad.app import dump_json
 from nomad.metainfo import MSection