config.py 12.5 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
Markus Scheidgen's avatar
Markus Scheidgen committed
5
6
7
8
9
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
Markus Scheidgen's avatar
Markus Scheidgen committed
10
#     http://www.apache.org/licenses/LICENSE-2.0
Markus Scheidgen's avatar
Markus Scheidgen committed
11
12
#
# Unless required by applicable law or agreed to in writing, software
Markus Scheidgen's avatar
Markus Scheidgen committed
13
# distributed under the License is distributed on an "AS IS" BASIS,
Markus Scheidgen's avatar
Markus Scheidgen committed
14
15
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markus Scheidgen's avatar
Markus Scheidgen committed
17
#
Markus Scheidgen's avatar
Markus Scheidgen committed
18

19
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
This module describes all configurable parameters for the nomad python code. The
configuration is used for all executed python code including API, worker, CLI, and other
scripts. To use the configuration in your own scripts or new modules, simply import
this module.

All parameters are structured into objects for two reasons. First, to have
categories. Second, to allow runtime manipulation that is not effected
by python import logic. The categories are choosen along infrastructure components:
``mongo``, ``elastic``, etc.

This module also provides utilities to read the configuration from environment variables
and .yaml files. This is done automatically on import. The precedence is env over .yaml
over defaults.

.. autoclass:: nomad.config.NomadConfig
.. autofunction:: nomad.config.load_config
36
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
37

Markus Scheidgen's avatar
Markus Scheidgen committed
38
import logging
39
40
41
import os
import os.path
import yaml
Markus Scheidgen's avatar
Markus Scheidgen committed
42
43
import warnings

44
45
46
47
48
49
50
51
52
53
try:
    from nomad import gitinfo
except ImportError:
    git_root = os.path.join(os.path.dirname(__file__), '..')
    cwd = os.getcwd()
    os.chdir(git_root)
    os.system('./gitinfo.sh')
    os.chdir(cwd)

    from nomad import gitinfo
Markus Scheidgen's avatar
Markus Scheidgen committed
54
55


Markus Scheidgen's avatar
Markus Scheidgen committed
56
57
warnings.filterwarnings('ignore', message='numpy.dtype size changed')
warnings.filterwarnings('ignore', message='numpy.ufunc size changed')
Markus Scheidgen's avatar
Markus Scheidgen committed
58

59

60
class NomadConfig(dict):
61
    '''
Markus Scheidgen's avatar
Markus Scheidgen committed
62
63
    A class for configuration categories. It is a dict subclass that uses attributes as
    key/value pairs.
64
    '''
65
66
67
68
69
70
71
72
73
74
75
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def __getattr__(self, name):
        if name in self:
            return self[name]
        else:
            raise AttributeError("No such attribute: " + name)

    def __setattr__(self, name, value):
        self[name] = value
76

77
78
79
80
81
    def __delattr__(self, name):
        if name in self:
            del self[name]
        else:
            raise AttributeError("No such attribute: " + name)
82
83


84
85
CELERY_WORKER_ROUTING = 'worker'
CELERY_QUEUE_ROUTING = 'queue'
86

87
88
89
90
91
rabbitmq = NomadConfig(
    host='localhost',
    user='rabbitmq',
    password='rabbitmq'
)
92

93

94
95
def rabbitmq_url():
    return 'pyamqp://%s:%s@%s//' % (rabbitmq.user, rabbitmq.password, rabbitmq.host)
96

Lauri Himanen's avatar
Lauri Himanen committed
97

98
99
100
celery = NomadConfig(
    max_memory=64e6,  # 64 GB
    timeout=1800,  # 1/2 h
101
    acks_late=False,
102
    routing=CELERY_QUEUE_ROUTING,
103
104
105
106
107
    priorities={
        'Upload.process_upload': 5,
        'Upload.delete_upload': 9,
        'Upload.publish_upload': 10
    }
108
)
109

110
111
112
113
fs = NomadConfig(
    tmp='.volumes/fs/tmp',
    staging='.volumes/fs/staging',
    public='.volumes/fs/public',
114
    local_tmp='/tmp',
115
    prefix_size=2,
116
    archive_version_suffix=None,
117
    working_directory=os.getcwd()
118
)
119

120
121
122
elastic = NomadConfig(
    host='localhost',
    port=9200,
123
124
    index_name='nomad_fairdi_calcs',
    materials_index_name='nomad_fairdi_materials'
125
)
126

127
keycloak = NomadConfig(
128
    server_url='https://nomad-lab.eu/fairdi/keycloak/auth/',
129
    realm_name='fairdi_nomad_prod',
130
    username='admin',
131
    password='*',
132
133
134
    client_id='nomad_public',
    client_secret=None,
    oasis=False)
135

136
137
138
139
140
mongo = NomadConfig(
    host='localhost',
    port=27017,
    db_name='nomad_fairdi'
)
141

142
logstash = NomadConfig(
143
    enabled=False,
144
145
146
147
    host='localhost',
    tcp_port='5000',
    level=logging.DEBUG
)
Markus Scheidgen's avatar
Markus Scheidgen committed
148

149
150
151
services = NomadConfig(
    api_host='localhost',
    api_port=8000,
152
    api_base_path='/fairdi/nomad/latest',
153
    api_secret='defaultApiSecret',
154
    api_chaos=0,
155
    admin_user_id='00000000-0000-0000-0000-000000000000',
156
    not_processed_value='not processed',
157
    unavailable_value='unavailable',
158
    https=False,
159
    https_upload=False,
160
    upload_limit=10,
161
162
    force_raw_file_decoding=False,
    download_scan_size=500,
163
    download_scan_timeout=u'30m'
164
165
)

166
167
oasis = NomadConfig(
    central_nomad_api_url='https://nomad-lab.eu/prod/rae/api',
168
169
    central_nomad_deployment_id='nomad-lab.eu/prod/rae',
    allowed_users=None  # a list of usernames or user account emails
170
171
)

Markus Scheidgen's avatar
Markus Scheidgen committed
172
173
174
175
tests = NomadConfig(
    default_timeout=30
)

176

177
def api_url(ssl: bool = True, api: str = 'api'):
178
    protocol = 'https' if services.https and ssl else 'http'
David Sikter's avatar
David Sikter committed
179
    host_and_port = services.api_host.strip('/')
180
    if services.api_port not in [80, 443]:
David Sikter's avatar
David Sikter committed
181
        host_and_port += ':' + str(services.api_port)
182
    base_path = services.api_base_path.strip('/')
David Sikter's avatar
David Sikter committed
183
    return f'{protocol}://{host_and_port}/{base_path}/{api}'
184

185

186
def gui_url(page: str = None):
187
188
189
    base = api_url(True)[:-3]
    if base.endswith('/'):
        base = base[:-1]
190
191
192
193

    if page is not None:
        return '%s/gui/%s' % (base, page)

194
195
    return '%s/gui' % base

196

197
def _check_config():
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
    """Used to check that the current configuration is valid. Should only be
    called once after the final config is loaded.

    Raises:
        AssertionError: if there is a contradiction or invalid values in the
            config file settings.
    """
    # The AFLOW symmetry information is checked once on import
    proto_symmetry_tolerance = normalize.prototype_symmetry_tolerance
    symmetry_tolerance = normalize.symmetry_tolerance
    if proto_symmetry_tolerance != symmetry_tolerance:
        raise AssertionError(
            "The AFLOW prototype information is outdated due to changed tolerance "
            "for symmetry detection. Please update the AFLOW prototype information "
            "by running the CLI command 'nomad admin ops prototype-update "
            "--matches-only'."
        )

216
    if normalize.springer_db_path and not os.path.exists(normalize.springer_db_path):
217
218
        normalize.springer_db_path = None

219

220
mail = NomadConfig(
Lauri Himanen's avatar
Lauri Himanen committed
221
    enabled=False,
222
    with_login=False,
Lauri Himanen's avatar
Lauri Himanen committed
223
224
    host='',
    port=8995,
225
226
    user='',
    password='',
227
228
    from_address='support@nomad-lab.eu',
    cc_address='support@nomad-lab.eu'
229
230
231
)

normalize = NomadConfig(
232
233
    # The system size limit for running the dimensionality analysis. For very
    # large systems the dimensionality analysis will get too expensive.
234
    system_classification_with_clusters_threshold=64,
235
    # Symmetry tolerance controls the precision used by spglib in order to find
236
237
    # symmetries. The atoms are allowed to move 1/2*symmetry_tolerance from
    # their symmetry positions in order for spglib to still detect symmetries.
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
238
239
240
    # The unit is angstroms. The value of 0.1 is used e.g. by Materials Project
    # according to
    # https://pymatgen.org/pymatgen.symmetry.analyzer.html#pymatgen.symmetry.analyzer.SpacegroupAnalyzer
241
    symmetry_tolerance=0.1,
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
242
243
    # The symmetry tolerance used in aflow prototype matching. Should only be
    # changed before re-running the prototype detection.
244
    prototype_symmetry_tolerance=0.1,
245
246
247
248
249
    # Maximum number of atoms in the single cell of a 2D material for it to be
    # considered valid.
    max_2d_single_cell_size=7,
    # The distance tolerance between atoms for grouping them into the same
    # cluster. Used in detecting system type.
250
    cluster_threshold=2.5,
251
    # Defines the "bin size" for rounding cell angles for the material hash
252
253
254
255
256
    angle_rounding=float(10.0),  # unit: degree
    # The threshold for a system to be considered "flat". Used e.g. when
    # determining if a 2D structure is purely 2-dimensional to allow extra rigid
    # transformations that are improper in 3D but proper in 2D.
    flat_dim_threshold=0.1,
257
258
259
    # The threshold for point equality in k-space. Unit: 1/m.
    k_space_precision=150e6,
    # The energy threshold for how much a band can be on top or below the fermi
260
261
    # level in order to detect a gap. Unit: Joule.
    band_structure_energy_tolerance=1.6022e-20,  # 0.1 eV
262
263
    springer_db_path=os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
264
265
        'normalizing/data/springer.msg'
    )
266
267
)

268
269
270
271
paths = NomadConfig(
    similarity="",
)

272
273
274
client = NomadConfig(
    user='leonard.hofstadter@nomad-fairdi.tests.de',
    password='password',
275
    url='http://nomad-lab.eu/prod/rae/api'
276
277
)

278
279
280
281
282
283
284
285
datacite = NomadConfig(
    mds_host='https://mds.datacite.org',
    enabled=False,
    prefix='10.17172',
    user='*',
    password='*'
)

286
meta = NomadConfig(
Markus Scheidgen's avatar
Markus Scheidgen committed
287
    version='0.10.4',
288
289
    commit=gitinfo.commit,
    release='devel',
290
    deployment='standard',
291
292
293
294
    default_domain='dft',
    service='unknown nomad service',
    name='novel materials discovery (NOMAD)',
    description='A FAIR data sharing platform for materials science data',
295
    homepage='https://nomad-lab.eu',
296
    source_url='https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR',
297
298
    maintainer_email='markus.scheidgen@physik.hu-berlin.de',
    deployment_id='nomad-lab.eu/prod/rae'
299
300
)

301
302
303
304
gitlab = NomadConfig(
    private_token='not set'
)

305
auxfile_cutoff = 100
306
parser_matching_size = 150 * 80  # 150 lines of 80 ASCII characters per line
307
console_log_level = logging.WARNING
308
max_upload_size = 32 * (1024 ** 3)
309
raw_file_strip_cutoff = 1000
310
max_entry_download = 500000
311
use_empty_parsers = False
312
reprocess_match = False
313
reprocess_unmatched = True
314
reprocess_rematch = True
315
process_reuse_parser = True
316
317
metadata_file_name = 'nomad'
metadata_file_extensions = ('json', 'yaml', 'yml')
318
enable_lazy_import = True
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
319

320

321
322
def normalize_loglevel(value, default_level=logging.INFO):
    plain_value = value
323
324
325
326
327
328
    if plain_value is None:
        return default_level
    else:
        try:
            return int(plain_value)
        except ValueError:
329
            return getattr(logging, plain_value)
330
331


332
_transformations = {
333
334
    'console_log_level': normalize_loglevel,
    'logstash_level': normalize_loglevel
335
}
Markus Scheidgen's avatar
Markus Scheidgen committed
336

Markus Scheidgen's avatar
Markus Scheidgen committed
337

338
339
340
341
# use std python logger, since logging is not configured while loading configuration
logger = logging.getLogger(__name__)


342
def _apply(key, value, raise_error: bool = True) -> None:
343
    '''
344
345
346
    Changes the config according to given key and value. The first part of a key
    (with ``_`` as a separator) is interpreted as a group of settings. E.g. ``fs_staging``
    leading to ``config.fs.staging``.
347
    '''
348
    full_key = key
349
350
351
352
353
354
355
    try:
        group_key, config_key = full_key.split('_', 1)
    except Exception:
        if raise_error:
            logger.error(f'config key does not exist: {full_key}')
        return

356
357
358
359
360
361
362
363
364
365
366
367
368
    current = globals()

    if group_key not in current:
        if key not in current:
            if raise_error:
                logger.error(f'config key does not exist: {full_key}')
            return
    else:
        current = current[group_key]
        if not isinstance(current, NomadConfig):
            if raise_error:
                logger.error(f'config key does not exist: {full_key}')
            return
369

370
371
372
373
        if config_key not in current:
            if raise_error:
                logger.error(f'config key does not exist: {full_key}')
            return
374

375
        key = config_key
376

377
378
379
380
    try:
        current_value = current[key]
        if current_value is not None and not isinstance(value, type(current_value)):
            value = _transformations.get(full_key, type(current_value))(value)
381

382
383
384
385
        current[key] = value
        logger.info(f'set config setting {full_key}={value}')
    except Exception as e:
        logger.error(f'cannot set config setting {full_key}={value}: {e}')
386

Markus Scheidgen's avatar
Markus Scheidgen committed
387

388
def _apply_env_variables():
389
390
391
    kwargs = {
        key[len('NOMAD_'):].lower(): value
        for key, value in os.environ.items()
392
        if key.startswith('NOMAD_') and key != 'NOMAD_CONFIG'}
393

394
    for key, value in kwargs.items():
395
396
        _apply(key, value, raise_error=False)

397

398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
def _apply_nomad_yaml():
    config_file = os.environ.get('NOMAD_CONFIG', 'nomad.yaml')

    if not os.path.exists(config_file):
        return

    with open(config_file, 'r') as stream:
        try:
            config_data = yaml.load(stream, Loader=getattr(yaml, 'FullLoader'))
        except yaml.YAMLError as e:
            logger.error(f'cannot read nomad config: {e}')
            return

    for key, value in config_data.items():
        if isinstance(value, dict):
            group_key = key
            for key, value in value.items():
                _apply(f'{group_key}_{key}', value)
        else:
            _apply(key, value)


def load_config():
    '''
    Loads the configuration from nomad.yaml and environment.
    '''
    _apply_nomad_yaml()
    _apply_env_variables()
    _check_config()
427

428
429

load_config()