config.py 12.9 KB
Newer Older
1
# Copyright 2018 Markus Scheidgen, empty_task
Markus Scheidgen's avatar
Markus Scheidgen committed
2
3
4
5
6
7
8
9
10
11
12
13
14
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
This module describes all configurable parameters for the nomad python code. The
configuration is used for all executed python code including API, worker, CLI, and other
scripts. To use the configuration in your own scripts or new modules, simply import
this module.

All parameters are structured into objects for two reasons. First, to have
categories. Second, to allow runtime manipulation that is not effected
by python import logic. The categories are choosen along infrastructure components:
``mongo``, ``elastic``, etc.

This module also provides utilities to read the configuration from environment variables
and .yaml files. This is done automatically on import. The precedence is env over .yaml
over defaults.

.. autoclass:: nomad.config.NomadConfig
.. autofunction:: nomad.config.apply
.. autofunction:: nomad.config.load_config
33
'''
Markus Scheidgen's avatar
Markus Scheidgen committed
34

Markus Scheidgen's avatar
Markus Scheidgen committed
35
import logging
36
37
38
import os
import os.path
import yaml
Markus Scheidgen's avatar
Markus Scheidgen committed
39
40
import warnings

41
42
43
44
45
46
47
48
49
50
try:
    from nomad import gitinfo
except ImportError:
    git_root = os.path.join(os.path.dirname(__file__), '..')
    cwd = os.getcwd()
    os.chdir(git_root)
    os.system('./gitinfo.sh')
    os.chdir(cwd)

    from nomad import gitinfo
Markus Scheidgen's avatar
Markus Scheidgen committed
51
52


Markus Scheidgen's avatar
Markus Scheidgen committed
53
54
warnings.filterwarnings('ignore', message='numpy.dtype size changed')
warnings.filterwarnings('ignore', message='numpy.ufunc size changed')
Markus Scheidgen's avatar
Markus Scheidgen committed
55

56

57
class NomadConfig(dict):
58
    '''
Markus Scheidgen's avatar
Markus Scheidgen committed
59
60
    A class for configuration categories. It is a dict subclass that uses attributes as
    key/value pairs.
61
    '''
62
63
64
65
66
67
68
69
70
71
72
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def __getattr__(self, name):
        if name in self:
            return self[name]
        else:
            raise AttributeError("No such attribute: " + name)

    def __setattr__(self, name, value):
        self[name] = value
73

74
75
76
77
78
    def __delattr__(self, name):
        if name in self:
            del self[name]
        else:
            raise AttributeError("No such attribute: " + name)
79
80


81
82
CELERY_WORKER_ROUTING = 'worker'
CELERY_QUEUE_ROUTING = 'queue'
83

84
85
86
87
88
rabbitmq = NomadConfig(
    host='localhost',
    user='rabbitmq',
    password='rabbitmq'
)
89

90

91
92
def rabbitmq_url():
    return 'pyamqp://%s:%s@%s//' % (rabbitmq.user, rabbitmq.password, rabbitmq.host)
93

Lauri Himanen's avatar
Lauri Himanen committed
94

95
96
97
98
celery = NomadConfig(
    max_memory=64e6,  # 64 GB
    timeout=1800,  # 1/2 h
    acks_late=True,
99
    routing=CELERY_QUEUE_ROUTING,
100
101
102
103
104
    priorities={
        'Upload.process_upload': 5,
        'Upload.delete_upload': 9,
        'Upload.publish_upload': 10
    }
105
)
106

107
108
109
110
fs = NomadConfig(
    tmp='.volumes/fs/tmp',
    staging='.volumes/fs/staging',
    public='.volumes/fs/public',
111
    local_tmp='/tmp',
112
113
    prefix_size=2,
    working_directory=os.getcwd()
114
)
115

116
117
118
elastic = NomadConfig(
    host='localhost',
    port=9200,
119
120
    index_name='nomad_fairdi_calcs',
    materials_index_name='nomad_fairdi_materials'
121
)
122

123
keycloak = NomadConfig(
124
    server_url='https://nomad-lab.eu/fairdi/keycloak/auth/',
125
126
127
    realm_name='fairdi_nomad_test',
    username='admin',
    password='password',
128
129
130
    client_id='nomad_public',
    client_secret=None,
    oasis=False)
131

132
133
134
135
136
mongo = NomadConfig(
    host='localhost',
    port=27017,
    db_name='nomad_fairdi'
)
137

138
logstash = NomadConfig(
139
    enabled=False,
140
141
142
143
    host='localhost',
    tcp_port='5000',
    level=logging.DEBUG
)
Markus Scheidgen's avatar
Markus Scheidgen committed
144

145
146
147
services = NomadConfig(
    api_host='localhost',
    api_port=8000,
Markus Scheidgen's avatar
Markus Scheidgen committed
148
    api_base_path='/fairdi/nomad/latest',
149
    api_secret='defaultApiSecret',
150
    api_chaos=0,
151
    admin_user_id='00000000-0000-0000-0000-000000000000',
152
    not_processed_value='not processed',
153
    unavailable_value='unavailable',
154
    https=False,
155
    upload_limit=10,
156
157
    force_raw_file_decoding=False,
    download_scan_size=500,
158
    download_scan_timeout=u'30m'
159
160
)

Markus Scheidgen's avatar
Markus Scheidgen committed
161
162
163
164
tests = NomadConfig(
    default_timeout=30
)

165

166
def api_url(ssl: bool = True):
167
    return '%s://%s/%s/api' % (
168
        'https' if services.https and ssl else 'http',
169
170
        services.api_host.strip('/'),
        services.api_base_path.strip('/'))
171
172


173
def gui_url(page: str = None):
174
175
176
    base = api_url(True)[:-3]
    if base.endswith('/'):
        base = base[:-1]
177
178
179
180

    if page is not None:
        return '%s/gui/%s' % (base, page)

181
182
    return '%s/gui' % base

183

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def check_config():
    """Used to check that the current configuration is valid. Should only be
    called once after the final config is loaded.

    Raises:
        AssertionError: if there is a contradiction or invalid values in the
            config file settings.
    """
    # The AFLOW symmetry information is checked once on import
    proto_symmetry_tolerance = normalize.prototype_symmetry_tolerance
    symmetry_tolerance = normalize.symmetry_tolerance
    if proto_symmetry_tolerance != symmetry_tolerance:
        raise AssertionError(
            "The AFLOW prototype information is outdated due to changed tolerance "
            "for symmetry detection. Please update the AFLOW prototype information "
            "by running the CLI command 'nomad admin ops prototype-update "
            "--matches-only'."
        )

203
204
205
    if not os.path.exists(normalize.springer_db_path):
        normalize.springer_db_path = None

206

207
mail = NomadConfig(
Lauri Himanen's avatar
Lauri Himanen committed
208
    enabled=False,
209
    with_login=False,
Lauri Himanen's avatar
Lauri Himanen committed
210
211
    host='',
    port=8995,
212
213
    user='',
    password='',
214
215
    from_address='support@nomad-lab.eu',
    cc_address='support@nomad-lab.eu'
216
217
218
)

normalize = NomadConfig(
219
220
    # The system size limit for running the dimensionality analysis. For very
    # large systems the dimensionality analysis will get too expensive.
221
    system_classification_with_clusters_threshold=64,
222
    # Symmetry tolerance controls the precision used by spglib in order to find
223
224
    # symmetries. The atoms are allowed to move 1/2*symmetry_tolerance from
    # their symmetry positions in order for spglib to still detect symmetries.
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
225
226
227
    # The unit is angstroms. The value of 0.1 is used e.g. by Materials Project
    # according to
    # https://pymatgen.org/pymatgen.symmetry.analyzer.html#pymatgen.symmetry.analyzer.SpacegroupAnalyzer
228
    symmetry_tolerance=0.1,
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
229
230
    # The symmetry tolerance used in aflow prototype matching. Should only be
    # changed before re-running the prototype detection.
231
    prototype_symmetry_tolerance=0.1,
232
233
234
235
236
    # Maximum number of atoms in the single cell of a 2D material for it to be
    # considered valid.
    max_2d_single_cell_size=7,
    # The distance tolerance between atoms for grouping them into the same
    # cluster. Used in detecting system type.
237
    cluster_threshold=2.5,
238
    # Defines the "bin size" for rounding cell angles for the material hash
239
240
241
242
243
    angle_rounding=float(10.0),  # unit: degree
    # The threshold for a system to be considered "flat". Used e.g. when
    # determining if a 2D structure is purely 2-dimensional to allow extra rigid
    # transformations that are improper in 3D but proper in 2D.
    flat_dim_threshold=0.1,
244
245
246
    # The threshold for point equality in k-space. Unit: 1/m.
    k_space_precision=150e6,
    # The energy threshold for how much a band can be on top or below the fermi
247
248
    # level in order to detect a gap. Unit: Joule.
    band_structure_energy_tolerance=1.6022e-20,  # 0.1 eV
249
250
    springer_db_path=os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
251
252
        'normalizing/data/springer.msg'
    )
253
254
)

255
256
257
258
paths = NomadConfig(
    similarity="",
)

259
260
261
client = NomadConfig(
    user='leonard.hofstadter@nomad-fairdi.tests.de',
    password='password',
262
    url='http://nomad-lab.eu/prod/rae/api'
263
264
)

265
266
267
268
269
270
271
272
datacite = NomadConfig(
    mds_host='https://mds.datacite.org',
    enabled=False,
    prefix='10.17172',
    user='*',
    password='*'
)

273
meta = NomadConfig(
274
    version='0.9.5',
275
276
277
278
279
280
    commit=gitinfo.commit,
    release='devel',
    default_domain='dft',
    service='unknown nomad service',
    name='novel materials discovery (NOMAD)',
    description='A FAIR data sharing platform for materials science data',
281
    homepage='https://nomad-lab.eu',
282
283
    source_url='https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR',
    maintainer_email='markus.scheidgen@physik.hu-berlin.de'
284
285
)

286
auxfile_cutoff = 100
287
parser_matching_size = 9128
288
console_log_level = logging.WARNING
289
max_upload_size = 32 * (1024 ** 3)
290
raw_file_strip_cutoff = 1000
291
use_empty_parsers = False
292
reprocess_unmatched = True
293
294
metadata_file_name = 'nomad'
metadata_file_extensions = ('json', 'yaml', 'yml')
Lauri Himanen's avatar
Merged.    
Lauri Himanen committed
295

296

297
298
def normalize_loglevel(value, default_level=logging.INFO):
    plain_value = value
299
300
301
302
303
304
    if plain_value is None:
        return default_level
    else:
        try:
            return int(plain_value)
        except ValueError:
305
            return getattr(logging, plain_value)
306
307


308
transformations = {
309
310
    'console_log_level': normalize_loglevel,
    'logstash_level': normalize_loglevel
311
}
Markus Scheidgen's avatar
Markus Scheidgen committed
312

Markus Scheidgen's avatar
Markus Scheidgen committed
313

314
315
316
317
# use std python logger, since logging is not configured while loading configuration
logger = logging.getLogger(__name__)


318
def apply(key, value) -> None:
319
    '''
320
321
322
    Changes the config according to given key and value. The keys are interpreted as paths
    to config values with ``_`` as a separator. E.g. ``fs_staging`` leading to
    ``config.fs.staging``
323
    '''
324
325
326
    path = list(reversed(key.split('_')))
    child_segment = None
    current_value = None
327
    child_config = globals()
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
    child_key = None

    try:
        while len(path) > 0:
            if child_segment is None:
                child_segment = path.pop()
            else:
                child_segment += '_' + path.pop()

            if child_segment in child_config:
                current_value = child_config[child_segment]

            if current_value is None:
                if len(path) == 0:
                    raise KeyError
343
344

                continue
345
346
            if isinstance(current_value, NomadConfig):
                child_config = current_value
347
                current_value = None
348
349
350
351
                child_segment = None
            else:
                if len(path) > 0:
                    raise KeyError()
Markus Scheidgen's avatar
Markus Scheidgen committed
352
353
354

                child_key = child_segment
                break
355
356
357
358

        if child_key is None or current_value is None:
            raise KeyError()
    except KeyError:
Markus Scheidgen's avatar
Markus Scheidgen committed
359
        return
360
361
362
363
364
365
366
367
368
369
370

    if not isinstance(value, type(current_value)):
        try:
            value = transformations.get(key, type(current_value))(value)
        except Exception as e:
            logger.error(
                'config key %s value %s has wrong type: %s' % (key, str(value), str(e)))

    child_config[child_key] = value


Markus Scheidgen's avatar
Markus Scheidgen committed
371
def load_config(config_file: str = os.environ.get('NOMAD_CONFIG', 'nomad.yaml')) -> None:
372
    '''
Markus Scheidgen's avatar
Markus Scheidgen committed
373
374
375
376
377
    Loads the configuration from the ``config_file`` and environment.

    Arguments:
        config_file: Override the configfile, default is file stored in env variable
            NOMAD_CONFIG or ``nomad.yaml``.
378
    '''
379
    # load yaml and override defaults (only when not in test)
380
381
382
    if os.path.exists(config_file):
        with open(config_file, 'r') as stream:
            try:
383
                config_data = yaml.load(stream, Loader=getattr(yaml, 'FullLoader'))
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
            except yaml.YAMLError as e:
                logger.error('cannot read nomad config', exc_info=e)

        def adapt(config, new_config, child_key=None):
            for key, value in new_config.items():
                if key in config:
                    if child_key is None:
                        qualified_key = key
                    else:
                        qualified_key = '%s_%s' % (child_key, key)

                    current_value = config[key]
                    if isinstance(value, dict) and isinstance(current_value, NomadConfig):
                        adapt(current_value, value, qualified_key)
                    else:
                        if not isinstance(value, type(current_value)):
                            try:
                                value = transformations.get(qualified_key, type(current_value))(value)
                            except Exception as e:
                                logger.error(
                                    'config key %s value %s has wrong type: %s' % (key, str(value), str(e)))
                        else:
                            config[key] = value
Markus Scheidgen's avatar
Markus Scheidgen committed
407
                            logger.debug('override config key %s with value %s' % (key, str(value)))
408
409
410
                else:
                    logger.error('config key %s does not exist' % key)

Markus Scheidgen's avatar
Markus Scheidgen committed
411
412
        if config_data is not None:
            adapt(globals(), config_data)
413

Markus Scheidgen's avatar
Markus Scheidgen committed
414
    # load env and override yaml and defaults
415
416
417
418
419
    kwargs = {
        key[len('NOMAD_'):].lower(): value
        for key, value in os.environ.items()
        if key.startswith('NOMAD_')
    }
420

421
    for key, value in kwargs.items():
422
        apply(key, value)
423

424
425
    check_config()

426
427

load_config()