config.py 15.8 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
Markus Scheidgen's avatar
Markus Scheidgen committed
5
6
7
8
9
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
Markus Scheidgen's avatar
Markus Scheidgen committed
10
#     http://www.apache.org/licenses/LICENSE-2.0
Markus Scheidgen's avatar
Markus Scheidgen committed
11
12
#
# Unless required by applicable law or agreed to in writing, software
Markus Scheidgen's avatar
Markus Scheidgen committed
13
# distributed under the License is distributed on an "AS IS" BASIS,
Markus Scheidgen's avatar
Markus Scheidgen committed
14
15
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markus Scheidgen's avatar
Markus Scheidgen committed
17
#
Markus Scheidgen's avatar
Markus Scheidgen committed
18

19
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
This module describes all configurable parameters for the nomad python code. The
configuration is used for all executed python code including API, worker, CLI, and other
scripts. To use the configuration in your own scripts or new modules, simply import
this module.

All parameters are structured into objects for two reasons. First, to have
categories. Second, to allow runtime manipulation that is not effected
by python import logic. The categories are choosen along infrastructure components:
``mongo``, ``elastic``, etc.

This module also provides utilities to read the configuration from environment variables
and .yaml files. This is done automatically on import. The precedence is env over .yaml
over defaults.

.. autoclass:: nomad.config.NomadConfig
.. autofunction:: nomad.config.load_config
36
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
37

Markus Scheidgen's avatar
Markus Scheidgen committed
38
import logging
39
40
41
import os
import os.path
import yaml
Markus Scheidgen's avatar
Markus Scheidgen committed
42
import warnings
43
from typing import Dict, Any
Markus Scheidgen's avatar
Markus Scheidgen committed
44

45
46
47
48
49
50
51
52
53
54
try:
    from nomad import gitinfo
except ImportError:
    git_root = os.path.join(os.path.dirname(__file__), '..')
    cwd = os.getcwd()
    os.chdir(git_root)
    os.system('./gitinfo.sh')
    os.chdir(cwd)

    from nomad import gitinfo
Markus Scheidgen's avatar
Markus Scheidgen committed
55
56


Markus Scheidgen's avatar
Markus Scheidgen committed
57
58
warnings.filterwarnings('ignore', message='numpy.dtype size changed')
warnings.filterwarnings('ignore', message='numpy.ufunc size changed')
Markus Scheidgen's avatar
Markus Scheidgen committed
59

60

61
class NomadConfig(dict):
62
    '''
Markus Scheidgen's avatar
Markus Scheidgen committed
63
64
    A class for configuration categories. It is a dict subclass that uses attributes as
    key/value pairs.
65
    '''
66
67
68
69
70
71
72
73
74
75
76
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def __getattr__(self, name):
        if name in self:
            return self[name]
        else:
            raise AttributeError("No such attribute: " + name)

    def __setattr__(self, name, value):
        self[name] = value
77

78
79
80
81
82
    def __delattr__(self, name):
        if name in self:
            del self[name]
        else:
            raise AttributeError("No such attribute: " + name)
83

84
85
86
87
88
89
90
91
92
93
94
95
96
    def customize(self, custom_settings: Dict[str, Any]) -> 'NomadConfig':
        '''
        Returns a new NomadConfig object, created by taking a copy of the current config and
        updating it with the settings defined in `custom_settings`. The `custom_settings` dict
        must not contain any new keys (keys not defined in this NomadConfig). If it does,
        an exception will be raised.
        '''
        rv = NomadConfig(**self)
        if custom_settings:
            for k, v in custom_settings.items():
                assert k in rv, f'Invalid setting: {k}'
                rv[k] = v
        return rv
97

98

99
100
CELERY_WORKER_ROUTING = 'worker'
CELERY_QUEUE_ROUTING = 'queue'
101

102
103
104
105
106
rabbitmq = NomadConfig(
    host='localhost',
    user='rabbitmq',
    password='rabbitmq'
)
107

108

109
110
def rabbitmq_url():
    return 'pyamqp://%s:%s@%s//' % (rabbitmq.user, rabbitmq.password, rabbitmq.host)
111

Lauri Himanen's avatar
Lauri Himanen committed
112

113
114
115
celery = NomadConfig(
    max_memory=64e6,  # 64 GB
    timeout=1800,  # 1/2 h
116
    acks_late=False,
117
    routing=CELERY_QUEUE_ROUTING,
118
119
120
121
122
    priorities={
        'Upload.process_upload': 5,
        'Upload.delete_upload': 9,
        'Upload.publish_upload': 10
    }
123
)
124

125
126
127
128
fs = NomadConfig(
    tmp='.volumes/fs/tmp',
    staging='.volumes/fs/staging',
    public='.volumes/fs/public',
129
    local_tmp='/tmp',
130
    prefix_size=2,
131
    archive_version_suffix=None,
132
    working_directory=os.getcwd()
133
)
134

135
elastic = NomadConfig(
136
137
    host='localhost',
    port=9200,
138
    timeout=60,
139
    bulk_timeout=600,
140
141
    bulk_size=1000,
    entries_per_material_cap=1000,
142
    entries_index='nomad_entries_v1',
143
    materials_index='nomad_materials_v1',
144
)
145

146
keycloak = NomadConfig(
147
    server_url='https://nomad-lab.eu/fairdi/keycloak/auth/',
148
    realm_name='fairdi_nomad_prod',
149
    username='admin',
150
    password='password',
151
152
153
    client_id='nomad_public',
    client_secret=None,
    oasis=False)
154

155
156
157
158
159
mongo = NomadConfig(
    host='localhost',
    port=27017,
    db_name='nomad_fairdi'
)
160

161
logstash = NomadConfig(
162
    enabled=False,
163
164
165
166
    host='localhost',
    tcp_port='5000',
    level=logging.DEBUG
)
Markus Scheidgen's avatar
Markus Scheidgen committed
167

168
169
170
services = NomadConfig(
    api_host='localhost',
    api_port=8000,
171
    api_base_path='/fairdi/nomad/latest',
172
    api_secret='defaultApiSecret',
173
    api_chaos=0,
174
    admin_user_id='00000000-0000-0000-0000-000000000000',
175
    not_processed_value='not processed',
176
    unavailable_value='unavailable',
177
    https=False,
178
    https_upload=False,
179
    upload_limit=10,
180
181
    force_raw_file_decoding=False,
    download_scan_size=500,
182
    download_scan_timeout=u'30m'
183
184
)

185
oasis = NomadConfig(
186
187
    central_nomad_api_url='https://nomad-lab.eu/prod/v1/api',
    central_nomad_deployment_id='nomad-lab.eu/prod/v1',
188
    allowed_users=None  # a list of usernames or user account emails
189
190
)

Markus Scheidgen's avatar
Markus Scheidgen committed
191
192
193
194
tests = NomadConfig(
    default_timeout=30
)

195

196
def api_url(ssl: bool = True, api: str = 'api'):
197
198
199
200
    '''
    Returns the url of the current running nomad API. This is for server-side use.
    This is not the NOMAD url to use as a client, use `nomad.config.client.url` instead.
    '''
201
    protocol = 'https' if services.https and ssl else 'http'
David Sikter's avatar
David Sikter committed
202
    host_and_port = services.api_host.strip('/')
203
    if services.api_port not in [80, 443]:
David Sikter's avatar
David Sikter committed
204
        host_and_port += ':' + str(services.api_port)
205
    base_path = services.api_base_path.strip('/')
David Sikter's avatar
David Sikter committed
206
    return f'{protocol}://{host_and_port}/{base_path}/{api}'
207

208

209
def gui_url(page: str = None):
210
211
212
    base = api_url(True)[:-3]
    if base.endswith('/'):
        base = base[:-1]
213
214
215
216

    if page is not None:
        return '%s/gui/%s' % (base, page)

217
218
    return '%s/gui' % base

219

220
def _check_config():
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
    """Used to check that the current configuration is valid. Should only be
    called once after the final config is loaded.

    Raises:
        AssertionError: if there is a contradiction or invalid values in the
            config file settings.
    """
    # The AFLOW symmetry information is checked once on import
    proto_symmetry_tolerance = normalize.prototype_symmetry_tolerance
    symmetry_tolerance = normalize.symmetry_tolerance
    if proto_symmetry_tolerance != symmetry_tolerance:
        raise AssertionError(
            "The AFLOW prototype information is outdated due to changed tolerance "
            "for symmetry detection. Please update the AFLOW prototype information "
            "by running the CLI command 'nomad admin ops prototype-update "
            "--matches-only'."
        )

239
    if normalize.springer_db_path and not os.path.exists(normalize.springer_db_path):
240
241
        normalize.springer_db_path = None

242

243
mail = NomadConfig(
Lauri Himanen's avatar
Lauri Himanen committed
244
    enabled=False,
245
    with_login=False,
Lauri Himanen's avatar
Lauri Himanen committed
246
247
    host='',
    port=8995,
248
249
    user='',
    password='',
250
251
    from_address='support@nomad-lab.eu',
    cc_address='support@nomad-lab.eu'
252
253
254
)

normalize = NomadConfig(
255
256
    # The system size limit for running the dimensionality analysis. For very
    # large systems the dimensionality analysis will get too expensive.
257
    system_classification_with_clusters_threshold=64,
258
    # Symmetry tolerance controls the precision used by spglib in order to find
259
260
    # symmetries. The atoms are allowed to move 1/2*symmetry_tolerance from
    # their symmetry positions in order for spglib to still detect symmetries.
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
261
262
263
    # The unit is angstroms. The value of 0.1 is used e.g. by Materials Project
    # according to
    # https://pymatgen.org/pymatgen.symmetry.analyzer.html#pymatgen.symmetry.analyzer.SpacegroupAnalyzer
264
    symmetry_tolerance=0.1,
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
265
266
    # The symmetry tolerance used in aflow prototype matching. Should only be
    # changed before re-running the prototype detection.
267
    prototype_symmetry_tolerance=0.1,
268
269
270
271
272
    # Maximum number of atoms in the single cell of a 2D material for it to be
    # considered valid.
    max_2d_single_cell_size=7,
    # The distance tolerance between atoms for grouping them into the same
    # cluster. Used in detecting system type.
273
    cluster_threshold=2.5,
274
    # Defines the "bin size" for rounding cell angles for the material hash
275
276
277
278
279
    angle_rounding=float(10.0),  # unit: degree
    # The threshold for a system to be considered "flat". Used e.g. when
    # determining if a 2D structure is purely 2-dimensional to allow extra rigid
    # transformations that are improper in 3D but proper in 2D.
    flat_dim_threshold=0.1,
280
281
282
    # The threshold for point equality in k-space. Unit: 1/m.
    k_space_precision=150e6,
    # The energy threshold for how much a band can be on top or below the fermi
283
284
    # level in order to still detect a gap. Unit: Joule.
    band_structure_energy_tolerance=8.01088e-21,  # 0.05 eV
285
286
    springer_db_path=os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
287
288
        'normalizing/data/springer.msg'
    )
289
290
)

291
292
293
294
paths = NomadConfig(
    similarity="",
)

295
296
297
client = NomadConfig(
    user='leonard.hofstadter@nomad-fairdi.tests.de',
    password='password',
298
    url='http://nomad-lab.eu/prod/v1/api'
299
300
)

301
302
303
304
305
306
307
308
datacite = NomadConfig(
    mds_host='https://mds.datacite.org',
    enabled=False,
    prefix='10.17172',
    user='*',
    password='*'
)

309
meta = NomadConfig(
310
    version='1.0.0',
311
    commit=gitinfo.commit,
312
313
    deployment='devel',
    label=None,
314
315
316
317
    default_domain='dft',
    service='unknown nomad service',
    name='novel materials discovery (NOMAD)',
    description='A FAIR data sharing platform for materials science data',
318
    homepage='https://nomad-lab.eu',
319
    source_url='https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR',
320
    maintainer_email='markus.scheidgen@physik.hu-berlin.de',
321
322
    deployment_id='nomad-lab.eu/prod/v1',
    beta=None
323
324
)

325
326
327
328
gitlab = NomadConfig(
    private_token='not set'
)

329
330
reprocess = NomadConfig(
    # Configures standard behaviour when reprocessing.
331
332
    # Note, the settings only matter for published uploads and entries. For uploads in
    # staging, we always reparse, add newfound entries, and delete unmatched entries.
333
334
335
336
    rematch_published=True,
    reprocess_existing_entries=True,
    use_original_parser=False,
    add_matched_entries_to_published=True,
337
338
    delete_unmatched_published_entries=False,
    index_invidiual_entries=False
339
340
)

341
342
343
344
345
346
347
process = NomadConfig(
    index_materials=True,
    reuse_parser=True,
    metadata_file_name='nomad',
    metadata_file_extensions=('json', 'yaml', 'yml')
)

348
bundle_import = NomadConfig(
349
    # Basic settings
350
351
    allow_bundles_from_oasis=True,  # If oasis admins can "push" bundles to this NOMAD deployment
    allow_unpublished_bundles_from_oasis=False,  # If oasis admins can "push" bundles of unpublished uploads
352
    required_nomad_version='1.0.0',  # Minimum  nomad version of bundles required for import
353

354
    default_settings=NomadConfig(
355
356
357
358
        # Default settings for the import_bundle process.
        # Note, admins, and only admins, can override these settings when importing a bundle.
        # This means that if oasis admins pushes bundles to this NOMAD deployment, these
        # default settings will be applied.
359
360
361
        include_raw_files=True,
        include_archive_files=False,
        include_datasets=True,
362
        include_bundle_info=True,  # Keeps the bundle_info.json file (not necessary but nice to have)
363
        keep_original_timestamps=False,  # If all time stamps (create time, publish time etc) should be imported from the bundle
364
365
        set_from_oasis=True,  # If the from_oasis flag and oasis_deployment_id should be set
        delete_upload_on_fail=False,  # If False, it is just removed from the ES index on failure
366
367
        delete_bundle_when_done=True,  # Deletes the source bundle when done (regardless of success)
        also_delete_bundle_parent_folder=True,  # Also deletes the parent folder, if it is empty.
368
        trigger_processing=True,  # If the upload should be processed when the import is done.
369
370
371

        # When importing with trigger_processing=True, the settings below control the
        # initial processing behaviour (see the config for `reprocess` for more info).
372
373
374
375
376
        rematch_published=True,
        reprocess_existing_entries=True,
        use_original_parser=False,
        add_matched_entries_to_published=True,
        delete_unmatched_published_entries=False
377
    )
378
379
)

380
auxfile_cutoff = 100
381
parser_matching_size = 150 * 80  # 150 lines of 80 ASCII characters per line
382
console_log_level = logging.WARNING
383
max_upload_size = 32 * (1024 ** 3)
384
raw_file_strip_cutoff = 1000
385
max_entry_download = 500000
386
encyclopedia_base = "https://nomad-lab.eu/prod/rae/encyclopedia/#"
387
aitoolkit_enabled = False
388
use_empty_parsers = False
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
389

390

391
392
def normalize_loglevel(value, default_level=logging.INFO):
    plain_value = value
393
394
395
396
397
398
    if plain_value is None:
        return default_level
    else:
        try:
            return int(plain_value)
        except ValueError:
399
            return getattr(logging, plain_value)
400
401


402
_transformations = {
403
404
    'console_log_level': normalize_loglevel,
    'logstash_level': normalize_loglevel
405
}
Markus Scheidgen's avatar
Markus Scheidgen committed
406

Markus Scheidgen's avatar
Markus Scheidgen committed
407

408
409
410
411
# use std python logger, since logging is not configured while loading configuration
logger = logging.getLogger(__name__)


412
def _apply(key, value, raise_error: bool = True) -> None:
413
    '''
414
415
416
    Changes the config according to given key and value. The first part of a key
    (with ``_`` as a separator) is interpreted as a group of settings. E.g. ``fs_staging``
    leading to ``config.fs.staging``.
417
    '''
418
    full_key = key
419
420
421
422
423
424
425
    try:
        group_key, config_key = full_key.split('_', 1)
    except Exception:
        if raise_error:
            logger.error(f'config key does not exist: {full_key}')
        return

426
427
428
429
430
431
432
433
434
435
436
437
438
    current = globals()

    if group_key not in current:
        if key not in current:
            if raise_error:
                logger.error(f'config key does not exist: {full_key}')
            return
    else:
        current = current[group_key]
        if not isinstance(current, NomadConfig):
            if raise_error:
                logger.error(f'config key does not exist: {full_key}')
            return
439

440
441
442
443
        if config_key not in current:
            if raise_error:
                logger.error(f'config key does not exist: {full_key}')
            return
444

445
        key = config_key
446

447
448
449
450
    try:
        current_value = current[key]
        if current_value is not None and not isinstance(value, type(current_value)):
            value = _transformations.get(full_key, type(current_value))(value)
451

452
453
454
455
        current[key] = value
        logger.info(f'set config setting {full_key}={value}')
    except Exception as e:
        logger.error(f'cannot set config setting {full_key}={value}: {e}')
456

Markus Scheidgen's avatar
Markus Scheidgen committed
457

458
def _apply_env_variables():
459
460
461
    kwargs = {
        key[len('NOMAD_'):].lower(): value
        for key, value in os.environ.items()
462
        if key.startswith('NOMAD_') and key != 'NOMAD_CONFIG'}
463

464
    for key, value in kwargs.items():
465
466
        _apply(key, value, raise_error=False)

467

468
469
470
471
472
473
474
475
476
477
478
479
480
def _apply_nomad_yaml():
    config_file = os.environ.get('NOMAD_CONFIG', 'nomad.yaml')

    if not os.path.exists(config_file):
        return

    with open(config_file, 'r') as stream:
        try:
            config_data = yaml.load(stream, Loader=getattr(yaml, 'FullLoader'))
        except yaml.YAMLError as e:
            logger.error(f'cannot read nomad config: {e}')
            return

481
482
483
    if not config_data:
        return

484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
    for key, value in config_data.items():
        if isinstance(value, dict):
            group_key = key
            for key, value in value.items():
                _apply(f'{group_key}_{key}', value)
        else:
            _apply(key, value)


def load_config():
    '''
    Loads the configuration from nomad.yaml and environment.
    '''
    _apply_nomad_yaml()
    _apply_env_variables()
    _check_config()
500

501
502

load_config()