config.py 17.6 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
Markus Scheidgen's avatar
Markus Scheidgen committed
5
6
7
8
9
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
Markus Scheidgen's avatar
Markus Scheidgen committed
10
#     http://www.apache.org/licenses/LICENSE-2.0
Markus Scheidgen's avatar
Markus Scheidgen committed
11
12
#
# Unless required by applicable law or agreed to in writing, software
Markus Scheidgen's avatar
Markus Scheidgen committed
13
# distributed under the License is distributed on an "AS IS" BASIS,
Markus Scheidgen's avatar
Markus Scheidgen committed
14
15
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markus Scheidgen's avatar
Markus Scheidgen committed
17
#
Markus Scheidgen's avatar
Markus Scheidgen committed
18

19
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
This module describes all configurable parameters for the nomad python code. The
configuration is used for all executed python code including API, worker, CLI, and other
scripts. To use the configuration in your own scripts or new modules, simply import
this module.

All parameters are structured into objects for two reasons. First, to have
categories. Second, to allow runtime manipulation that is not effected
by python import logic. The categories are choosen along infrastructure components:
``mongo``, ``elastic``, etc.

This module also provides utilities to read the configuration from environment variables
and .yaml files. This is done automatically on import. The precedence is env over .yaml
over defaults.

.. autoclass:: nomad.config.NomadConfig
.. autofunction:: nomad.config.load_config
36
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
37

Markus Scheidgen's avatar
Markus Scheidgen committed
38
import logging
39
40
41
import os
import os.path
import yaml
Markus Scheidgen's avatar
Markus Scheidgen committed
42
import warnings
43
from typing import Dict, Any
Markus Scheidgen's avatar
Markus Scheidgen committed
44

45
46
47
48
49
50
51
52
53
54
try:
    from nomad import gitinfo
except ImportError:
    git_root = os.path.join(os.path.dirname(__file__), '..')
    cwd = os.getcwd()
    os.chdir(git_root)
    os.system('./gitinfo.sh')
    os.chdir(cwd)

    from nomad import gitinfo
Markus Scheidgen's avatar
Markus Scheidgen committed
55
56


Markus Scheidgen's avatar
Markus Scheidgen committed
57
58
warnings.filterwarnings('ignore', message='numpy.dtype size changed')
warnings.filterwarnings('ignore', message='numpy.ufunc size changed')
Markus Scheidgen's avatar
Markus Scheidgen committed
59

60

61
class NomadConfig(dict):
62
    '''
Markus Scheidgen's avatar
Markus Scheidgen committed
63
64
    A class for configuration categories. It is a dict subclass that uses attributes as
    key/value pairs.
65
    '''
66
67
68
69
70
71
72
73
74
75
76
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def __getattr__(self, name):
        if name in self:
            return self[name]
        else:
            raise AttributeError("No such attribute: " + name)

    def __setattr__(self, name, value):
        self[name] = value
77

78
79
80
81
82
    def __delattr__(self, name):
        if name in self:
            del self[name]
        else:
            raise AttributeError("No such attribute: " + name)
83

84
85
86
87
88
89
90
91
92
93
94
95
96
    def customize(self, custom_settings: Dict[str, Any]) -> 'NomadConfig':
        '''
        Returns a new NomadConfig object, created by taking a copy of the current config and
        updating it with the settings defined in `custom_settings`. The `custom_settings` dict
        must not contain any new keys (keys not defined in this NomadConfig). If it does,
        an exception will be raised.
        '''
        rv = NomadConfig(**self)
        if custom_settings:
            for k, v in custom_settings.items():
                assert k in rv, f'Invalid setting: {k}'
                rv[k] = v
        return rv
97

98

99
100
CELERY_WORKER_ROUTING = 'worker'
CELERY_QUEUE_ROUTING = 'queue'
101

102
103
104
105
106
rabbitmq = NomadConfig(
    host='localhost',
    user='rabbitmq',
    password='rabbitmq'
)
107

108

109
110
def rabbitmq_url():
    return 'pyamqp://%s:%s@%s//' % (rabbitmq.user, rabbitmq.password, rabbitmq.host)
111

Lauri Himanen's avatar
Lauri Himanen committed
112

113
114
115
celery = NomadConfig(
    max_memory=64e6,  # 64 GB
    timeout=1800,  # 1/2 h
116
    acks_late=False,
117
    routing=CELERY_QUEUE_ROUTING,
118
119
120
121
122
    priorities={
        'Upload.process_upload': 5,
        'Upload.delete_upload': 9,
        'Upload.publish_upload': 10
    }
123
)
124

125
126
127
fs = NomadConfig(
    tmp='.volumes/fs/tmp',
    staging='.volumes/fs/staging',
128
    staging_external=None,
129
    public='.volumes/fs/public',
130
    public_external=None,
131
    local_tmp='/tmp',
132
    prefix_size=2,
133
    archive_version_suffix='v1',
134
    working_directory=os.getcwd()
135
)
136

137
138
139
140
141
142
try:
    fs.staging_external = os.path.abspath(fs.staging)
    fs.public_external = os.path.abspath(fs.public)
except Exception:
    pass

143
elastic = NomadConfig(
144
145
    host='localhost',
    port=9200,
146
    timeout=60,
147
    bulk_timeout=600,
148
149
    bulk_size=1000,
    entries_per_material_cap=1000,
150
    entries_index='nomad_entries_v1',
151
    materials_index='nomad_materials_v1',
152
)
153

154
keycloak = NomadConfig(
155
    public_server_url=None,
156
    server_url='https://nomad-lab.eu/fairdi/keycloak/auth/',
157
    realm_name='fairdi_nomad_prod',
158
    username='admin',
159
    password='password',
160
    client_id='nomad_public',
161
    client_secret=None)
162

163
164
165
mongo = NomadConfig(
    host='localhost',
    port=27017,
166
    db_name='nomad_v1'
167
)
168

169
logstash = NomadConfig(
170
    enabled=False,
171
172
173
174
    host='localhost',
    tcp_port='5000',
    level=logging.DEBUG
)
Markus Scheidgen's avatar
Markus Scheidgen committed
175

176
177
178
services = NomadConfig(
    api_host='localhost',
    api_port=8000,
179
    api_base_path='/fairdi/nomad/latest',
180
    api_secret='defaultApiSecret',
181
    api_chaos=0,
182
    admin_user_id='00000000-0000-0000-0000-000000000000',
183
    not_processed_value='not processed',
184
    unavailable_value='unavailable',
185
    https=False,
186
    https_upload=False,
187
    upload_limit=10,
188
189
    force_raw_file_decoding=False,
    download_scan_size=500,
190
    download_scan_timeout=u'30m'
191
192
)

193
oasis = NomadConfig(
194
195
    central_nomad_api_url='https://nomad-lab.eu/prod/v1/api',
    central_nomad_deployment_id='nomad-lab.eu/prod/v1',
196
197
198
    allowed_users=None,  # a list of usernames or user account emails
    uses_central_user_management=False,
    is_oasis=False
199
200
)

Markus Scheidgen's avatar
Markus Scheidgen committed
201
202
203
204
tests = NomadConfig(
    default_timeout=30
)

205

206
def api_url(ssl: bool = True, api: str = 'api', api_host: str = None, api_port: int = None):
207
208
209
210
    '''
    Returns the url of the current running nomad API. This is for server-side use.
    This is not the NOMAD url to use as a client, use `nomad.config.client.url` instead.
    '''
211
212
213
214
    if api_port is None:
        api_port = services.api_port
    if api_host is None:
        api_host = services.api_host
215
    protocol = 'https' if services.https and ssl else 'http'
216
217
218
    host_and_port = api_host
    if api_port not in [80, 443]:
        host_and_port += ':' + str(api_port)
219
    base_path = services.api_base_path.strip('/')
David Sikter's avatar
David Sikter committed
220
    return f'{protocol}://{host_and_port}/{base_path}/{api}'
221

222

223
def gui_url(page: str = None):
224
225
226
    base = api_url(True)[:-3]
    if base.endswith('/'):
        base = base[:-1]
227
228
229
230

    if page is not None:
        return '%s/gui/%s' % (base, page)

231
232
    return '%s/gui' % base

233

234
def _check_config():
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
    """Used to check that the current configuration is valid. Should only be
    called once after the final config is loaded.

    Raises:
        AssertionError: if there is a contradiction or invalid values in the
            config file settings.
    """
    # The AFLOW symmetry information is checked once on import
    proto_symmetry_tolerance = normalize.prototype_symmetry_tolerance
    symmetry_tolerance = normalize.symmetry_tolerance
    if proto_symmetry_tolerance != symmetry_tolerance:
        raise AssertionError(
            "The AFLOW prototype information is outdated due to changed tolerance "
            "for symmetry detection. Please update the AFLOW prototype information "
            "by running the CLI command 'nomad admin ops prototype-update "
            "--matches-only'."
        )

253
    if normalize.springer_db_path and not os.path.exists(normalize.springer_db_path):
254
255
        normalize.springer_db_path = None

256
257
258
    if keycloak.public_server_url is None:
        keycloak.public_server_url = keycloak.server_url

259
260
261
262
263
264
265
266
267
268
269
270
    if fs.staging_external is None:
        fs.staging_external = fs.staging

    if fs.staging_external is not None and not os.path.isabs(fs.staging_external):
        fs.staging_external = os.path.abspath(fs.staging_external)

    if north.users_fs is not None and not os.path.isabs(north.users_fs):
        north.users_fs = os.path.abspath(north.users_fs)

    if north.shared_fs is not None and not os.path.isabs(north.shared_fs):
        north.shared_fs = os.path.abspath(north.shared_fs)

271

272
mail = NomadConfig(
Lauri Himanen's avatar
Lauri Himanen committed
273
    enabled=False,
274
    with_login=False,
Lauri Himanen's avatar
Lauri Himanen committed
275
276
    host='',
    port=8995,
277
278
    user='',
    password='',
279
280
    from_address='support@nomad-lab.eu',
    cc_address='support@nomad-lab.eu'
281
282
283
)

normalize = NomadConfig(
284
285
    # The system size limit for running the dimensionality analysis. For very
    # large systems the dimensionality analysis will get too expensive.
286
    system_classification_with_clusters_threshold=64,
287
    # Symmetry tolerance controls the precision used by spglib in order to find
288
289
    # symmetries. The atoms are allowed to move 1/2*symmetry_tolerance from
    # their symmetry positions in order for spglib to still detect symmetries.
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
290
291
292
    # The unit is angstroms. The value of 0.1 is used e.g. by Materials Project
    # according to
    # https://pymatgen.org/pymatgen.symmetry.analyzer.html#pymatgen.symmetry.analyzer.SpacegroupAnalyzer
293
    symmetry_tolerance=0.1,
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
294
295
    # The symmetry tolerance used in aflow prototype matching. Should only be
    # changed before re-running the prototype detection.
296
    prototype_symmetry_tolerance=0.1,
297
298
299
300
301
    # Maximum number of atoms in the single cell of a 2D material for it to be
    # considered valid.
    max_2d_single_cell_size=7,
    # The distance tolerance between atoms for grouping them into the same
    # cluster. Used in detecting system type.
302
    cluster_threshold=2.5,
303
    # Defines the "bin size" for rounding cell angles for the material hash
304
305
306
307
308
    angle_rounding=float(10.0),  # unit: degree
    # The threshold for a system to be considered "flat". Used e.g. when
    # determining if a 2D structure is purely 2-dimensional to allow extra rigid
    # transformations that are improper in 3D but proper in 2D.
    flat_dim_threshold=0.1,
309
310
311
    # The threshold for point equality in k-space. Unit: 1/m.
    k_space_precision=150e6,
    # The energy threshold for how much a band can be on top or below the fermi
312
313
    # level in order to still detect a gap. Unit: Joule.
    band_structure_energy_tolerance=8.01088e-21,  # 0.05 eV
314
315
    springer_db_path=os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
316
317
        'normalizing/data/springer.msg'
    )
318
319
)

320
321
322
323
paths = NomadConfig(
    similarity="",
)

324
325
326
client = NomadConfig(
    user='leonard.hofstadter@nomad-fairdi.tests.de',
    password='password',
327
    url='http://nomad-lab.eu/prod/v1/api'
328
329
)

330
331
332
333
334
335
336
337
datacite = NomadConfig(
    mds_host='https://mds.datacite.org',
    enabled=False,
    prefix='10.17172',
    user='*',
    password='*'
)

338
meta = NomadConfig(
Markus Scheidgen's avatar
Markus Scheidgen committed
339
    version='1.1.1',
340
    commit=gitinfo.commit,
341
342
    deployment='devel',
    label=None,
343
344
345
346
    default_domain='dft',
    service='unknown nomad service',
    name='novel materials discovery (NOMAD)',
    description='A FAIR data sharing platform for materials science data',
347
    homepage='https://nomad-lab.eu',
348
    source_url='https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR',
349
    maintainer_email='markus.scheidgen@physik.hu-berlin.de',
350
351
    deployment_id='nomad-lab.eu/prod/v1',
    beta=None
352
353
)

354
355
356
357
gitlab = NomadConfig(
    private_token='not set'
)

358
359
reprocess = NomadConfig(
    # Configures standard behaviour when reprocessing.
360
361
    # Note, the settings only matter for published uploads and entries. For uploads in
    # staging, we always reparse, add newfound entries, and delete unmatched entries.
362
363
364
365
    rematch_published=True,
    reprocess_existing_entries=True,
    use_original_parser=False,
    add_matched_entries_to_published=True,
366
367
    delete_unmatched_published_entries=False,
    index_invidiual_entries=False
368
369
)

370
371
372
373
374
375
376
process = NomadConfig(
    index_materials=True,
    reuse_parser=True,
    metadata_file_name='nomad',
    metadata_file_extensions=('json', 'yaml', 'yml')
)

377
bundle_import = NomadConfig(
378
    # Basic settings
379
380
    allow_bundles_from_oasis=True,  # If oasis admins can "push" bundles to this NOMAD deployment
    allow_unpublished_bundles_from_oasis=False,  # If oasis admins can "push" bundles of unpublished uploads
381
    required_nomad_version='1.0.0',  # Minimum  nomad version of bundles required for import
382

383
    default_settings=NomadConfig(
384
385
386
387
        # Default settings for the import_bundle process.
        # Note, admins, and only admins, can override these settings when importing a bundle.
        # This means that if oasis admins pushes bundles to this NOMAD deployment, these
        # default settings will be applied.
388
389
390
        include_raw_files=True,
        include_archive_files=False,
        include_datasets=True,
391
        include_bundle_info=True,  # Keeps the bundle_info.json file (not necessary but nice to have)
392
        keep_original_timestamps=False,  # If all time stamps (create time, publish time etc) should be imported from the bundle
393
394
        set_from_oasis=True,  # If the from_oasis flag and oasis_deployment_id should be set
        delete_upload_on_fail=False,  # If False, it is just removed from the ES index on failure
395
396
        delete_bundle_when_done=True,  # Deletes the source bundle when done (regardless of success)
        also_delete_bundle_parent_folder=True,  # Also deletes the parent folder, if it is empty.
397
        trigger_processing=True,  # If the upload should be processed when the import is done.
398
399
400

        # When importing with trigger_processing=True, the settings below control the
        # initial processing behaviour (see the config for `reprocess` for more info).
401
402
403
404
405
        rematch_published=True,
        reprocess_existing_entries=True,
        use_original_parser=False,
        add_matched_entries_to_published=True,
        delete_unmatched_published_entries=False
406
    )
407
408
)

409
north = NomadConfig(
410
    hub_ip_connect='172.17.0.1',  # Set this to host.docker.internal on windows/macos.
411
    hub_connect_url=None,
412
    hub_ip='0.0.0.0',
413
    docker_network=None,
414
    hub_host='localhost',
415
    hub_port=9000,
416
417
    shared_fs='.volumes/fs/north/shared',
    users_fs='.volumes/fs/north/users',
418
    jupyterhub_crypt_key=None,
419
420
421
    windows=True,  # enable windows (as in windows the OS) hacks
)

422
423
424
425
archive = NomadConfig(
    block_size=256 * 1024,
    read_buffer_size=256 * 1024,  # GPFS needs at least 256K to achieve decent performance
    max_process_number=20,  # maximum number of processes can be assigned to process archive query
tlc@void's avatar
tlc@void committed
426
    min_entries_per_process=20  # minimum number of entries per process
427
428
)

429
430

def north_url(ssl: bool = True):
431
    return api_url(ssl=ssl, api='north', api_host=north.hub_host, api_port=north.hub_port)
432
433


434
auxfile_cutoff = 100
435
parser_matching_size = 150 * 80  # 150 lines of 80 ASCII characters per line
436
console_log_level = logging.WARNING
437
max_upload_size = 32 * (1024 ** 3)
438
raw_file_strip_cutoff = 1000
439
max_entry_download = 500000
440
encyclopedia_base = "https://nomad-lab.eu/prod/rae/encyclopedia/#"
441
aitoolkit_enabled = False
442
use_empty_parsers = False
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
443

444

445
446
def normalize_loglevel(value, default_level=logging.INFO):
    plain_value = value
447
448
449
450
451
452
    if plain_value is None:
        return default_level
    else:
        try:
            return int(plain_value)
        except ValueError:
453
            return getattr(logging, plain_value)
454
455


456
_transformations = {
457
458
    'console_log_level': normalize_loglevel,
    'logstash_level': normalize_loglevel
459
}
Markus Scheidgen's avatar
Markus Scheidgen committed
460

Markus Scheidgen's avatar
Markus Scheidgen committed
461

462
463
464
465
# use std python logger, since logging is not configured while loading configuration
logger = logging.getLogger(__name__)


466
def _apply(key, value, raise_error: bool = True) -> None:
467
    '''
468
469
470
    Changes the config according to given key and value. The first part of a key
    (with ``_`` as a separator) is interpreted as a group of settings. E.g. ``fs_staging``
    leading to ``config.fs.staging``.
471
    '''
472
    full_key = key
473
474
475
476
477
478
479
    try:
        group_key, config_key = full_key.split('_', 1)
    except Exception:
        if raise_error:
            logger.error(f'config key does not exist: {full_key}')
        return

480
481
482
483
484
485
486
487
488
489
490
491
492
    current = globals()

    if group_key not in current:
        if key not in current:
            if raise_error:
                logger.error(f'config key does not exist: {full_key}')
            return
    else:
        current = current[group_key]
        if not isinstance(current, NomadConfig):
            if raise_error:
                logger.error(f'config key does not exist: {full_key}')
            return
493

494
495
496
497
        if config_key not in current:
            if raise_error:
                logger.error(f'config key does not exist: {full_key}')
            return
498

499
        key = config_key
500

501
502
503
504
    try:
        current_value = current[key]
        if current_value is not None and not isinstance(value, type(current_value)):
            value = _transformations.get(full_key, type(current_value))(value)
505

506
507
508
509
        current[key] = value
        logger.info(f'set config setting {full_key}={value}')
    except Exception as e:
        logger.error(f'cannot set config setting {full_key}={value}: {e}')
510

Markus Scheidgen's avatar
Markus Scheidgen committed
511

512
def _apply_env_variables():
513
514
515
    kwargs = {
        key[len('NOMAD_'):].lower(): value
        for key, value in os.environ.items()
516
        if key.startswith('NOMAD_') and key != 'NOMAD_CONFIG'}
517

518
    for key, value in kwargs.items():
519
520
        _apply(key, value, raise_error=False)

521

522
523
524
525
526
527
528
529
530
531
532
533
534
def _apply_nomad_yaml():
    config_file = os.environ.get('NOMAD_CONFIG', 'nomad.yaml')

    if not os.path.exists(config_file):
        return

    with open(config_file, 'r') as stream:
        try:
            config_data = yaml.load(stream, Loader=getattr(yaml, 'FullLoader'))
        except yaml.YAMLError as e:
            logger.error(f'cannot read nomad config: {e}')
            return

535
536
537
    if not config_data:
        return

538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
    for key, value in config_data.items():
        if isinstance(value, dict):
            group_key = key
            for key, value in value.items():
                _apply(f'{group_key}_{key}', value)
        else:
            _apply(key, value)


def load_config():
    '''
    Loads the configuration from nomad.yaml and environment.
    '''
    _apply_nomad_yaml()
    _apply_env_variables()
    _check_config()
554

555
556

load_config()