diff --git a/gui/tests/env.js b/gui/tests/env.js index b23c00f1905a3ff455ca5a2e689fc147e1056f2f..d461fb95c023e130a2d684f98fe479a259fe2063 100644 --- a/gui/tests/env.js +++ b/gui/tests/env.js @@ -12,7 +12,6 @@ window.nomadEnv = { "globalLoginRequired": false, "servicesUploadLimit": 10, "ui": { - "default_unit_system": "Custom", "entry_context": { "overview": { "include": [ @@ -1132,6 +1131,7 @@ window.nomadEnv = { } } } - } + }, + "default_unit_system": "Custom" } } diff --git a/mkdocs.yml b/mkdocs.yml index 9c7305144f5a565d50104983194543d7549a91c7..017d9c3d4640eb00d8adeff26ab31d2db8f92ff5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -24,7 +24,9 @@ nav: - develop/search.md - develop/parser.md - develop/normalizers.md - - Operating NOMAD (Oasis): oasis.md + - Operating NOMAD: + - oasis.md + - config.md theme: name: material palette: diff --git a/nomad/app/main.py b/nomad/app/main.py index bbe38763a2bcf626366ab495eddb5f712c697f79..030d579621dd9a6dd0d0e256ddde4779e3d23c60 100644 --- a/nomad/app/main.py +++ b/nomad/app/main.py @@ -118,7 +118,7 @@ async def http_exception_handler(request, exc): <body> <h1>NOMAD app</h1> <h2>info</h2> - {'<br/>'.join(f'{key}: {value}' for key, value in config.meta.items())} + {'<br/>'.join(f'{key}: {value}' for key, value in config.meta.dict().items())} <h2>apis</h2> <a href="{app_base}/api/v1/extensions/docs">NOMAD API v1</a><br/> <a href="{app_base}/optimade/v1/extensions/docs">Optimade API</a><br/> diff --git a/nomad/app/v1/routers/entries.py b/nomad/app/v1/routers/entries.py index 9c2d3000e9a03199a7d45d7d87bdabeb66c246f8..44ca81f0c79b55965093078119d492d2d129a402 100644 --- a/nomad/app/v1/routers/entries.py +++ b/nomad/app/v1/routers/entries.py @@ -526,11 +526,11 @@ def _answer_entries_raw_request(owner: Owner, query: Query, files: Files, user: required=MetadataRequired(include=[]), user_id=user.user_id if user is not None else None) - if response.pagination.total > config.max_entry_download: + if response.pagination.total > config.services.max_entry_download: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail='The limit of maximum number of entries in a single download (%d) has been exeeded (%d).' % ( - config.max_entry_download, response.pagination.total)) + config.services.max_entry_download, response.pagination.total)) files_params = Files() if files is None else files search_includes = ['entry_id', 'upload_id', 'mainfile'] @@ -832,12 +832,12 @@ def _answer_entries_archive_download_request( required=MetadataRequired(include=[]), user_id=user.user_id if user is not None else None) - if response.pagination.total > config.max_entry_download: + if response.pagination.total > config.services.max_entry_download: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=( 'The limit of maximum number of entries in a single download (%d) has been ' - 'exeeded (%d).' % (config.max_entry_download, response.pagination.total))) + 'exeeded (%d).' % (config.services.max_entry_download, response.pagination.total))) manifest = [] search_includes = ['entry_id', 'upload_id', 'parser_name'] diff --git a/nomad/app/v1/routers/uploads.py b/nomad/app/v1/routers/uploads.py index 1dbfa20f1777119e4264ed9b93a85fa2a6a5008a..e0159e377b5382fb163f26e0e31d8412ecae9800 100644 --- a/nomad/app/v1/routers/uploads.py +++ b/nomad/app/v1/routers/uploads.py @@ -1036,8 +1036,8 @@ async def put_upload_raw_path( upload_path = upload_paths[0] full_path = os.path.join(path, os.path.basename(upload_path)) try: - reprocess_settings = dict( - index_invidiual_entries=True, reprocess_existing_entries=True) + reprocess_settings = config.Reprocess( + index_individual_entries=True, reprocess_existing_entries=True) entry = upload.put_file_and_process_local( upload_path, path, reprocess_settings=reprocess_settings) @@ -1659,17 +1659,12 @@ async def get_upload_bundle( upload = _get_upload_with_read_access(upload_id, user, include_others=True) _check_upload_not_processing(upload) - export_settings_dict: Dict[str, Any] = dict( + export_settings = config.bundle_export.default_settings.customize( + None, include_raw_files=include_raw_files, include_archive_files=include_archive_files, include_datasets=include_datasets) - for k, v in export_settings_dict.copy().items(): - if v is None: - del export_settings_dict[k] - - export_settings = config.bundle_export.default_settings.customize(export_settings_dict) - try: stream = BundleExporter( upload, @@ -1755,7 +1750,8 @@ async def post_upload_bundle( file data in the http body. Both are supported. See the POST `uploads` endpoint for examples of curl commands for uploading files. ''' - import_settings_dict: Dict[str, Any] = dict( + import_settings = config.bundle_import.default_settings.customize( + None, include_raw_files=include_raw_files, include_archive_files=include_archive_files, include_datasets=include_datasets, @@ -1764,12 +1760,6 @@ async def post_upload_bundle( set_from_oasis=set_from_oasis, trigger_processing=trigger_processing) - for k, v in import_settings_dict.copy().items(): - if v is None: - del import_settings_dict[k] - - import_settings = config.bundle_import.default_settings.customize(import_settings_dict) - bundle_importer: BundleImporter = None bundle_path: str = None @@ -1792,7 +1782,10 @@ async def post_upload_bundle( upload = bundle_importer.create_upload_skeleton() bundle_importer.close() # Run the import as a @process - upload.import_bundle(bundle_path=bundle_path, import_settings=import_settings, embargo_length=embargo_length) + upload.import_bundle( + bundle_path=bundle_path, + import_settings=import_settings.dict(), + embargo_length=embargo_length) return UploadProcDataResponse( upload_id=upload.upload_id, data=_upload_to_pydantic(upload)) diff --git a/nomad/atomutils.py b/nomad/atomutils.py index 3e512b6a62d39be384642d8a95fd9ecf0a8644e2..18aeba8142dca2e93063f1cf958b6a9b5d1d93a9 100644 --- a/nomad/atomutils.py +++ b/nomad/atomutils.py @@ -424,7 +424,7 @@ def get_hill_decomposition(atom_labels: NDArray[Any], reduced: bool = False) -> } if (names[0] in order): if (names[1] in order): - if(order[names[0]] < order[names[1]]): + if (order[names[0]] < order[names[1]]): # For non-metals: # Swap symbols and counts if first element # is more electronegative than the second one, diff --git a/nomad/bundles.py b/nomad/bundles.py index 65f63fcb9db99851005a5a412ba81e4b3b51e5fb..08b84a12b22267b39dbd68b9ca4bb3708f1ed7e4 100644 --- a/nomad/bundles.py +++ b/nomad/bundles.py @@ -26,7 +26,7 @@ from fastapi import HTTPException, status class BundleExporter: def __init__( self, upload: Upload, export_as_stream: bool, export_path: str, zipped: bool, overwrite: bool, - export_settings: config.NomadConfig): + export_settings: config.BundleExportSettings): ''' Class for exporting an upload as a *bundle*. Bundles are used to export and import uploads between different NOMAD installations. After instantiating a BundleExporter, @@ -48,8 +48,8 @@ class BundleExporter: If the target file/folder should be overwritten by this operation. Not applicable if `export_as_stream` is True. export_settings: - A NomadConfig with settings for controlling the bundle content. See the - `config.bundle_export.default_settings` for applicable options. + Settings for controlling the bundle content. See the + `config.BundeExportSettings` for applicable options. NOTE: the dictionary must specify a *complete* set of options. ''' BundleExporter.check_export_settings(export_settings) @@ -61,13 +61,7 @@ class BundleExporter: self.export_settings = export_settings @classmethod - def check_export_settings(cls, export_settings: config.NomadConfig): - ''' - Perform quick sanity checks of a dictionary with export settings. - NOTE: the dictionary must specify a *complete* set of options. - ''' - ''' Perform quick sanity checks of the settings. ''' - check_settings(export_settings, config.bundle_export.default_settings) + def check_export_settings(cls, export_settings: config.BundleExportSettings): assert export_settings.include_archive_files or export_settings.include_raw_files, ( 'Export must include the archive files or the raw files, or both') @@ -112,8 +106,8 @@ class BundleExporter: ''' Create the bundle_info.json data ''' bundle_info: Dict[str, Any] = dict( upload_id=self.upload.upload_id, - source=config.meta, # Information about the source system, i.e. this NOMAD installation - export_settings=self.export_settings, + source=config.meta.dict(), # Information about the source system, i.e. this NOMAD installation + export_settings=self.export_settings.dict(), upload=self.upload.to_mongo().to_dict(), entries=[entry.to_mongo().to_dict() for entry in self.upload.successful_entries]) # Handle datasets @@ -134,7 +128,7 @@ class BundleExporter: class BundleImporter: - def __init__(self, user: datamodel.User, import_settings: config.NomadConfig, embargo_length: int = None): + def __init__(self, user: datamodel.User, import_settings: config.BundleImportSettings, embargo_length: int = None): ''' Class for importing an upload from a *bundle*. @@ -143,15 +137,14 @@ class BundleImporter: The user requesting the import. Used to check permissions. Of omitted, no permission checks are done. import_settings: - A NomadConfig with settings for controlling the bundle content. See the - `config.bundle_import.default_settings` for applicable options. + Settings for controlling the bundle content. See the + `config.BundleImportSettings` for applicable options. NOTE: the dictionary must specify a complete set of options. embargo_length: Used to set the embargo length. If set to None, the value will be imported from the bundle. The value should be between 0 and 36. A value of 0 means no embargo. ''' - BundleImporter.check_import_settings(import_settings) self.user = user self.import_settings = import_settings self.embargo_length = embargo_length @@ -177,14 +170,6 @@ class BundleImporter: return False return os.path.isfile(os.path.join(path, bundle_info_filename)) - @classmethod - def check_import_settings(cls, import_settings: config.NomadConfig): - ''' - Perform quick sanity checks of a dictionary with import settings. - NOTE: the dictionary must specify a complete set of options. - ''' - check_settings(import_settings, config.bundle_import.default_settings) - def check_api_permissions(self): ''' Checks if the specified user is allowed to import a bundle via the api. Raises a @@ -201,8 +186,8 @@ class BundleImporter: status_code=status.HTTP_401_UNAUTHORIZED, detail='User not authorized to import bundles') if not is_admin: - for k, v in self.import_settings.items(): - if v != config.bundle_import.default_settings.get(k): + for k, v in self.import_settings.dict().items(): + if v != config.bundle_import.default_settings.dict().get(k): raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail=f'Changing the setting {k} requires an admin user') @@ -279,7 +264,8 @@ class BundleImporter: if self.import_settings.trigger_processing: return self._reprocess_upload() return None - except Exception: + except Exception as e: + logger.error('could not import bundle', exc_info=e) self.bundle.close() if self.import_settings.delete_bundle_on_fail: self.delete_bundle() @@ -294,7 +280,7 @@ class BundleImporter: # Just ensure the upload is deleted from search with utils.timer(logger, 'upload deleted from index'): search.delete_upload(self.upload.upload_id, refresh=True) - raise + raise e def close(self): if self.bundle: @@ -515,18 +501,7 @@ class BundleImporter: refresh=True) def _reprocess_upload(self): - reprocess_settings = { - k: v for k, v in self.import_settings.items() if k in config.reprocess} - return self.upload._process_upload_local(reprocess_settings=reprocess_settings) - - -def check_settings(given_settings: config.NomadConfig, default_settings: config.NomadConfig): - ''' Check that the given settings is complete ''' - assert given_settings, 'No settings provided' - missing_keys = [key for key in default_settings.keys() if key not in given_settings] - assert not missing_keys, f'Missing value for setting "{missing_keys[0]}"' - unknown_keys = [key for key in given_settings.keys() if key not in default_settings] - assert not unknown_keys, f'Received unknown setting "{unknown_keys[0]}"' + return self.upload._process_upload_local(reprocess_settings=self.import_settings.process_settings) def keys_exist(data: Dict[str, Any], required_keys: Iterable[str], error_message: str): diff --git a/nomad/cli/admin/uploads.py b/nomad/cli/admin/uploads.py index c0a7af75be640c4aecc1e6fdb4cbdf3dc1e48f88..ebed59a634ff83735fd22ddca4169ad33913250e 100644 --- a/nomad/cli/admin/uploads.py +++ b/nomad/cli/admin/uploads.py @@ -586,18 +586,18 @@ def export_bundle(ctx, uploads, out_dir, uncompressed, overwrite, settings, igno _, uploads = _query_uploads(uploads, **ctx.obj.uploads_kwargs) - default_export_settings = config.bundle_export.default_settings.customize(config.bundle_export.default_settings_cli) - if settings: settings = json.loads(settings) try: + default_export_settings = config.bundle_export.default_settings.customize( + config.bundle_export.default_settings_cli) export_settings = default_export_settings.customize(settings) BundleExporter.check_export_settings(export_settings) - except AssertionError as e: + except Exception as e: # Invalid setting provided print(e) print('\nAvailable settings and their configured default values:') - for k, v in default_export_settings.items(): + for k, v in default_export_settings.dict().items(): print(f' {k:<40}: {v}') return -1 else: @@ -667,6 +667,8 @@ def export_bundle(ctx, uploads, out_dir, uncompressed, overwrite, settings, igno (the default behaviour is to abort on first failing bundle).''') @click.pass_context def import_bundle(ctx, input_path, multi, settings, embargo_length, use_celery, ignore_errors): + from pydantic import parse_obj_as + from nomad.bundles import BundleImporter from nomad import infrastructure @@ -696,13 +698,13 @@ def import_bundle(ctx, input_path, multi, settings, embargo_length, use_celery, if settings: settings = json.loads(settings) try: - import_settings = default_import_settings.customize(settings) - BundleImporter.check_import_settings(import_settings) + import_settings = default_import_settings.customize( + parse_obj_as(config.BundleExportSettings, settings)) except Exception as e: # Invalid setting provided print(e) print('\nAvailable settings and their configured default values:') - for k, v in default_import_settings.items(): + for k, v in default_import_settings.dict().items(): print(f' {k:<40}: {v}') return -1 else: diff --git a/nomad/cli/aflow.py b/nomad/cli/aflow.py index 1c0aafa5e56c7e089907b6beb704f77639cea4cc..b5102152b9f874c5b15a649debd793e66e59f2ab 100644 --- a/nomad/cli/aflow.py +++ b/nomad/cli/aflow.py @@ -426,7 +426,7 @@ class DbUpdater: return 'uploaded', upload['upload_id'] size = 0.0 - max_zip_size = config.max_upload_size + max_zip_size = config.process.max_upload_size dirs = [] for i in range(len(plist)): d, s = self._get_files(self.update_list[plist[i]]) diff --git a/nomad/cli/cli.py b/nomad/cli/cli.py index 78682e115c726820edaeb207095ba45b6528a932..bb746645ca3e07bac4b220dd7c67725c30632e39 100644 --- a/nomad/cli/cli.py +++ b/nomad/cli/cli.py @@ -58,12 +58,12 @@ def cli(ctx, verbose: bool, debug: bool, log_label: str): config.meta.label = log_label if debug: - config.console_log_level = logging.DEBUG + config.services.console_log_level = logging.DEBUG elif verbose: - config.console_log_level = logging.INFO + config.services.console_log_level = logging.INFO else: - config.console_log_level = logging.WARNING - utils.set_console_log_level(config.console_log_level) + config.services.console_log_level = logging.WARNING + utils.set_console_log_level(config.services.console_log_level) def run_cli(): diff --git a/nomad/cli/dev.py b/nomad/cli/dev.py index d991493a2185bafae7d5288bc59023881a2a2cfd..91cb2c8845d0860c28ecd368da8e828c6f30dd3e 100644 --- a/nomad/cli/dev.py +++ b/nomad/cli/dev.py @@ -234,13 +234,13 @@ def get_gui_config(proxy: bool = False) -> str: 'keycloakRealm': config.keycloak.realm_name, 'keycloakClientId': config.keycloak.client_id, 'debug': False, - 'encyclopediaBase': config.encyclopedia_base if config.encyclopedia_base else None, - 'aitoolkitEnabled': config.aitoolkit_enabled, + 'encyclopediaBase': config.services.encyclopedia_base if config.services.encyclopedia_base else None, + 'aitoolkitEnabled': config.services.aitoolkit_enabled, 'oasis': config.oasis.is_oasis, 'version': config.meta.beta if config.meta.beta else {}, 'globalLoginRequired': config.oasis.allowed_users is not None, 'servicesUploadLimit': config.services.upload_limit, - 'ui': config.ui if config.ui else {} + 'ui': config.ui.dict() if config.ui else {} } return f'window.nomadEnv = {json.dumps(data, indent=2)}' diff --git a/nomad/config.py b/nomad/config.py index ac447c05d53e0c14c2556f43f765f8fd7f7c1508..6df1d2302f194ab4d7717707ad4e06e014d6a2b8 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -30,9 +30,6 @@ by python import logic. The categories are choosen along infrastructure componen This module also provides utilities to read the configuration from environment variables and .yaml files. This is done automatically on import. The precedence is env over .yaml over defaults. - -.. autoclass:: nomad.config.NomadConfig -.. autofunction:: nomad.config.load_config ''' import logging @@ -41,8 +38,9 @@ import inspect import os.path import yaml import warnings -from typing import Dict, List, Any +from typing import TypeVar, List, Any, cast from pkg_resources import get_distribution, DistributionNotFound +from pydantic import BaseModel, Field try: __version__ = get_distribution("nomad-lab").version @@ -56,143 +54,173 @@ warnings.filterwarnings('ignore', message='numpy.ufunc size changed') warnings.filterwarnings('ignore', category=DeprecationWarning) -class NomadConfig(dict): - ''' - A class for configuration categories. It is a dict subclass that uses attributes as - key/value pairs. - ''' - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def __getattr__(self, name): - if name in self: - return self[name] - else: - raise AttributeError("No such attribute: " + name) - - def __setattr__(self, name, value): - self[name] = value +NomadSettingsBound = TypeVar('NomadSettingsBound', bound='NomadSettings') - def __delattr__(self, name): - if name in self: - del self[name] - else: - raise AttributeError("No such attribute: " + name) - def customize(self, custom_settings: Dict[str, Any]) -> 'NomadConfig': +class NomadSettings(BaseModel): + def customize(self: NomadSettingsBound, custom_settings: NomadSettingsBound, **kwargs) -> NomadSettingsBound: ''' - Returns a new NomadConfig object, created by taking a copy of the current config and + Returns a new config object, created by taking a copy of the current config and updating it with the settings defined in `custom_settings`. The `custom_settings` dict - must not contain any new keys (keys not defined in this NomadConfig). If it does, + must not contain any new keys (keys not defined in this NomadSettings). If it does, an exception will be raised. ''' - rv = NomadConfig(**self) + + rv = self.copy(deep=True) + if custom_settings: - for k, v in custom_settings.items(): - assert k in rv, f'Invalid setting: {k}' - rv[k] = v - return rv + for field_name in custom_settings.__fields__.keys(): + try: + setattr(rv, field_name, getattr(custom_settings, field_name)) + except Exception: + raise AssertionError(f'Invalid setting: {field_name}') + + for key, value in kwargs.items(): + if value is None: + continue + try: + setattr(rv, key, value) + except Exception: + raise AssertionError(f'Invalid setting: {field_name}') + + return cast(NomadSettingsBound, rv) CELERY_WORKER_ROUTING = 'worker' CELERY_QUEUE_ROUTING = 'queue' -rabbitmq = NomadConfig( - host='localhost', - user='rabbitmq', - password='rabbitmq' -) + +class RabbitMQ(NomadSettings): + host = 'localhost' + user = 'rabbitmq' + password = 'rabbitmq' + + +rabbitmq = RabbitMQ() def rabbitmq_url(): return 'pyamqp://%s:%s@%s//' % (rabbitmq.user, rabbitmq.password, rabbitmq.host) -celery = NomadConfig( - max_memory=64e6, # 64 GB - timeout=1800, # 1/2 h - acks_late=False, - routing=CELERY_QUEUE_ROUTING, - priorities={ +class Celery(NomadSettings): + max_memory = 64e6 # 64 GB + timeout = 1800 # 1/2 h + acks_late = False + routing = CELERY_QUEUE_ROUTING + priorities = { 'Upload.process_upload': 5, 'Upload.delete_upload': 9, 'Upload.publish_upload': 10 } -) - -fs = NomadConfig( - tmp='.volumes/fs/tmp', - staging='.volumes/fs/staging', - staging_external=None, - public='.volumes/fs/public', - public_external=None, - local_tmp='/tmp', - prefix_size=2, - archive_version_suffix='v1', - working_directory=os.getcwd(), - external_working_directory=None -) - -elastic = NomadConfig( - host='localhost', - port=9200, - timeout=60, - bulk_timeout=600, - bulk_size=1000, - entries_per_material_cap=1000, - entries_index='nomad_entries_v1', - materials_index='nomad_materials_v1', -) - -keycloak = NomadConfig( - server_url='https://nomad-lab.eu/fairdi/keycloak/auth/', - public_server_url=None, - realm_name='fairdi_nomad_prod', - username='admin', - password='password', - client_id='nomad_public', - client_secret=None) - -mongo = NomadConfig( - host='localhost', - port=27017, - db_name='nomad_v1' -) - -logstash = NomadConfig( - enabled=False, - host='localhost', - tcp_port='5000', - level=logging.DEBUG -) - -services = NomadConfig( - api_host='localhost', - api_port=8000, - api_base_path='/fairdi/nomad/latest', - api_secret='defaultApiSecret', - api_chaos=0, - admin_user_id='00000000-0000-0000-0000-000000000000', - not_processed_value='not processed', - unavailable_value='unavailable', - https=False, - https_upload=False, - upload_limit=10, - force_raw_file_decoding=False, - download_scan_size=500, - download_scan_timeout=u'30m' -) - -oasis = NomadConfig( - central_nomad_deployment_url='https://nomad-lab.eu/prod/v1/api', - allowed_users=None, # a list of usernames or user account emails - uses_central_user_management=False, - is_oasis=False -) - -tests = NomadConfig( - default_timeout=60 -) + + +celery = Celery() + + +class FS(NomadSettings): + tmp = '.volumes/fs/tmp' + staging = '.volumes/fs/staging' + staging_external: str = None + public = '.volumes/fs/public' + public_external: str = None + local_tmp = '/tmp' + prefix_size = 2 + archive_version_suffix = 'v1' + working_directory = os.getcwd() + external_working_directory: str = None + + +fs = FS() + + +class Elastic(NomadSettings): + host = 'localhost' + port = 9200 + timeout = 60 + bulk_timeout = 600 + bulk_size = 1000 + entries_per_material_cap = 1000 + entries_index = 'nomad_entries_v1' + materials_index = 'nomad_materials_v1' + + +elastic = Elastic() + + +class Keycloak(NomadSettings): + server_url = 'https://nomad-lab.eu/fairdi/keycloak/auth/' + public_server_url: str = None + realm_name = 'fairdi_nomad_prod' + username = 'admin' + password = 'password' + client_id = 'nomad_public' + client_secret: str = None + + +keycloak = Keycloak() + + +class Mongo(NomadSettings): + ''' All settings related to connect and use mongodb.''' + host: str = Field('localhost', description='The name of the host that runs mongodb.') + port: int = Field(27017, description='The port to connect with mongodb.') + db_name: str = Field('nomad_v1', description='The used mongodb database name.') + + +mongo = Mongo() + + +class Logstash(NomadSettings): + enabled = False + host = 'localhost' + tcp_port = '5000' + level: int = logging.DEBUG + + +logstash = Logstash() + + +class Services(NomadSettings): + api_host = 'localhost' + api_port = 8000 + api_base_path = '/fairdi/nomad/latest' + api_secret = 'defaultApiSecret' + api_chaos = 0 + admin_user_id = '00000000-0000-0000-0000-000000000000' + not_processed_value = 'not processed' + unavailable_value = 'unavailable' + https = False + https_upload = False + upload_limit = 10 + force_raw_file_decoding = False + download_scan_size = 500 + download_scan_timeout = u'30m' + encyclopedia_base = "https://nomad-lab.eu/prod/rae/encyclopedia/#" + aitoolkit_enabled = False + console_log_level = logging.WARNING + max_entry_download = 500000 + + +services = Services() + + +class Oasis(NomadSettings): + central_nomad_deployment_url = 'https://nomad-lab.eu/prod/v1/api', + allowed_users: str = Field( + None, description='A list of usernames or user account emails.') + uses_central_user_management = False, + is_oasis = False + + +oasis = Oasis() + + +class Tests(NomadSettings): + default_timeout = 60 + + +tests = Tests() def api_url(ssl: bool = True, api: str = 'api', api_host: str = None, api_port: int = None): @@ -223,276 +251,323 @@ def gui_url(page: str = None): return '%s/gui' % base -def _check_config(): - """Used to check that the current configuration is valid. Should only be - called once after the final config is loaded. +class Mail(NomadSettings): + enabled = False + with_login = False + host = '' + port = 8995 + user = '' + password = '' + from_address = 'support@nomad-lab.eu' + cc_address = 'support@nomad-lab.eu' + + +mail = Mail() + + +class Normalize(NomadSettings): + system_classification_with_clusters_threshold = Field( + 64, description=''' + The system size limit for running the dimensionality analysis. For very + large systems the dimensionality analysis will get too expensive. + ''') + symmetry_tolerance = Field( + 0.1, description=''' + Symmetry tolerance controls the precision used by spglib in order to find + symmetries. The atoms are allowed to move 1/2*symmetry_tolerance from + their symmetry positions in order for spglib to still detect symmetries. + The unit is angstroms. The value of 0.1 is used e.g. by Materials Project + according to + https://pymatgen.org/pymatgen.symmetry.analyzer.html#pymatgen.symmetry.analyzer.SpacegroupAnalyzer + ''') + prototype_symmetry_tolerance = Field( + 0.1, description=''' + The symmetry tolerance used in aflow prototype matching. Should only be + changed before re-running the prototype detection. + ''') + max_2d_single_cell_size = Field( + 7, description=''' + Maximum number of atoms in the single cell of a 2D material for it to be + considered valid. + ''') + cluster_threshold = Field( + 2.5, description=''' + The distance tolerance between atoms for grouping them into the same + cluster. Used in detecting system type. + ''') + + angle_rounding = Field( + float(10.0), description=''' + Defines the "bin size" for rounding cell angles for the material hash in degree. + ''') + flat_dim_threshold = Field( + 0.1, description=''' + The threshold for a system to be considered "flat". Used e.g. when + determining if a 2D structure is purely 2-dimensional to allow extra rigid + transformations that are improper in 3D but proper in 2D. + ''') + + k_space_precision = Field( + 150e6, description=''' + The threshold for point equality in k-space. Unit: 1/m. + ''') + band_structure_energy_tolerance = Field( + 8.01088e-21, description=''' + The energy threshold for how much a band can be on top or below the fermi + level in order to still detect a gap. Unit: Joule. + ''') + springer_db_path = Field( + os.path.join(os.path.dirname(os.path.abspath(__file__)), 'normalizing/data/springer.msg')) + + +normalize = Normalize() + + +class Resources(NomadSettings): + enabled = False + db_name = 'nomad_v1_resources' + max_time_in_mongo = Field( + 60 * 60 * 24 * 365., description=''' + Maxmimum time a resource is stored in mongodb before being updated. + ''') + download_retries = Field( + 2, description='Number of retries when downloading resources.') + download_retry_delay = Field( + 10, description='Delay between retries in seconds') + max_connections = Field( + 10, description='Maximum simultaneous connections used to download resources.') + + +resources = Resources() + + +class Client(NomadSettings): + user: str = None + password: str = None + access_token: str = None + url = 'http://nomad-lab.eu/prod/v1/api' + + +client = Client() + + +class DataCite(NomadSettings): + mds_host = 'https://mds.datacite.org' + enabled = False + prefix = '10.17172' + user = '*' + password = '*' + + +datacite = DataCite() + + +class Meta(NomadSettings): + version = __version__ + commit = '' + deployment = Field( + 'devel', description='Human-friendly name of the nomad deployment') + deployment_url = Field( + 'https://my-oasis.org/api', description='The deployment\'s url (api url).') + label: str = None + default_domain = 'dft' + service = 'unknown nomad service' + name = 'novel materials discovery (NOMAD)' + description = 'A FAIR data sharing platform for materials science data' + homepage = 'https://nomad-lab.eu' + source_url = 'https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR' + maintainer_email = 'markus.scheidgen@physik.hu-berlin.de' + beta: dict = None + + +meta = Meta() + + +class GitLab(NomadSettings): + private_token = 'not set' - Raises: - AssertionError: if there is a contradiction or invalid values in the - config file settings. - """ - # The AFLOW symmetry information is checked once on import - proto_symmetry_tolerance = normalize.prototype_symmetry_tolerance - symmetry_tolerance = normalize.symmetry_tolerance - if proto_symmetry_tolerance != symmetry_tolerance: - raise AssertionError( - "The AFLOW prototype information is outdated due to changed tolerance " - "for symmetry detection. Please update the AFLOW prototype information " - "by running the CLI command 'nomad admin ops prototype-update " - "--matches-only'." - ) - if normalize.springer_db_path and not os.path.exists(normalize.springer_db_path): - normalize.springer_db_path = None +gitlab = GitLab() - if keycloak.public_server_url is None: - keycloak.public_server_url = keycloak.server_url - def set_external_path(source_obj, source_key, target_obj, target_key, overwrite=False): - source_value = getattr(source_obj, source_key) - target_value = getattr(target_obj, target_key) +class Reprocess(NomadSettings): + ''' + Configures standard behaviour when reprocessing. + Note, the settings only matter for published uploads and entries. For uploads in + taging, we always reparse, add newfound entries, and delete unmatched entries. + ''' + rematch_published = True + reprocess_existing_entries = True + use_original_parser = False + add_matched_entries_to_published = True + delete_unmatched_published_entries = False + index_individual_entries = False + + +reprocess = Reprocess() + + +class Process(NomadSettings): + store_package_definition_in_mongo = Field( + False, description='Configures if to store the corresponding package definition in mongodb.') + add_definition_id_to_reference = Field( + False, description=''' + Configures if to attach definition id to `m_def`, note it is different from `m_def_id`. + The `m_def_id` will be exported with the `with_def_id=True` via `m_to_dict`. + ''') + write_definition_id_to_archive = Field(False, description='Write `m_def_id` to the archive.') + index_materials = True + reuse_parser = True + metadata_file_name = 'nomad' + metadata_file_extensions = ('json', 'yaml', 'yml') + auxfile_cutoff = 100 + parser_matching_size = 150 * 80 # 150 lines of 80 ASCII characters per line + max_upload_size = 32 * (1024 ** 3) + use_empty_parsers = False + + +process = Process() + + +class RFC3161Timestamp(NomadSettings): + server = Field( + 'http://time.certum.pl/', description='The rfc3161ng timestamping host.') + cert: str = Field( + None, description='Path to the optional rfc3161ng timestamping server certificate.') + hash_algorithm = Field( + 'sha256', description='Hash algorithm used by the rfc3161ng timestamping server.') + username: str = None + password: str = None + + +rfc3161_timestamp = RFC3161Timestamp() + + +class BundleExportSettings(NomadSettings): + include_raw_files = True + include_archive_files = True + include_datasets = True + + +class BundleExport(NomadSettings): + default_cli_bundle_export_path: str = './bundles' + default_settings = BundleExportSettings() + default_settings_cli: BundleExportSettings = Field(None, description=''' + Additional default settings, applied when exporting using the CLI (command-line interface). + This allows to override some of the settings specified in the general default settings above. + ''') + + +bundle_export = BundleExport() + + +class BundleImportSettings(NomadSettings): + process_settings = Field( + Reprocess( + rematch_published=True, + reprocess_existing_entries=True, + use_original_parser=False, + add_matched_entries_to_published=True, + delete_unmatched_published_entries=False + ), description=''' + It is possible to trigger processing of the raw files, but it is no longer the + preferred way to import bundles. If used, the settings below control the reprocessing + behaviour (see the config for `reprocess` for more info). + ''' + ) - if target_value and not overwrite: - return + include_raw_files = True + include_archive_files = False + include_datasets = True + + include_bundle_info = Field( + True, description='Keeps the bundle_info.json file, not necessary but nice to have.') + keep_original_timestamps = Field( + False, description=''' + If all time stamps (create time, publish time etc) should be imported from + the bundle. + ''') + set_from_oasis = Field( + True, description='If the from_oasis flag and oasis_deployment_url should be set.') + + delete_upload_on_fail = Field( + False, description='If False, it is just removed from the ES index on failure.') + delete_bundle_on_fail = Field( + True, description='Deletes the source bundle if the import fails.') + delete_bundle_on_success = Field( + True, description='Deletes the source bundle if the import succeeds.') + delete_bundle_include_parent_folder = Field( + True, description='When deleting the bundle, also include parent folder, if empty.') + + trigger_processing = Field( + True, description='If the upload should be processed when the import is done.') + + +class BundleImport(NomadSettings): + + required_nomad_version = Field( + '1.1.2', description='Minimum nomad version of bundles required for import.') + + default_cli_bundle_import_path = './bundles' + + allow_bundles_from_oasis = Field( + False, description='If oasis admins can "push" bundles to this NOMAD deployment.') + allow_unpublished_bundles_from_oasis = Field( + False, description='If oasis admins can "push" bundles of unpublished uploads.') + + default_settings = BundleImportSettings() + + default_settings_cli = Field( + BundleImportSettings( + delete_bundle_on_fail=False, + delete_bundle_on_success=False + ), + description=''' + Additional default settings, applied when importing using the CLI (command-line interface). + This allows to override some of the settings specified in the general default settings above. + ''' + ) - if not source_value: - return - if fs.external_working_directory and not os.path.isabs(source_value): - target_value = os.path.join(fs.external_working_directory, source_value) - else: - target_value = source_value +bundle_import = BundleImport() - setattr(target_obj, target_key, target_value) - set_external_path(fs, 'staging', fs, 'staging_external') - set_external_path(fs, 'public', fs, 'public_external') - set_external_path(north, 'users_fs', north, 'users_fs', overwrite=True) - set_external_path(north, 'shared_fs', north, 'shared_fs', overwrite=True) +class North(NomadSettings): + hub_connect_ip: str = Field( + None, description='Set this to host.docker.internal on windows/macos.') + hub_connect_url: str = None + hub_ip = '0.0.0.0' + docker_network: str = None + hub_host = 'localhost' + hub_port = 9000 + shared_fs = '.volumes/fs/north/shared' + users_fs = '.volumes/fs/north/users' + jupyterhub_crypt_key: str = None + nomad_host: str = Field( + None, description='Host name to reach nomad app from spawned containers.') + windows = Field( + True, description='Enable windows (as in windows the OS) hacks.') -mail = NomadConfig( - enabled=False, - with_login=False, - host='', - port=8995, - user='', - password='', - from_address='support@nomad-lab.eu', - cc_address='support@nomad-lab.eu' -) - -normalize = NomadConfig( - # The system size limit for running the dimensionality analysis. For very - # large systems the dimensionality analysis will get too expensive. - system_classification_with_clusters_threshold=64, - # Symmetry tolerance controls the precision used by spglib in order to find - # symmetries. The atoms are allowed to move 1/2*symmetry_tolerance from - # their symmetry positions in order for spglib to still detect symmetries. - # The unit is angstroms. The value of 0.1 is used e.g. by Materials Project - # according to - # https://pymatgen.org/pymatgen.symmetry.analyzer.html#pymatgen.symmetry.analyzer.SpacegroupAnalyzer - symmetry_tolerance=0.1, - # The symmetry tolerance used in aflow prototype matching. Should only be - # changed before re-running the prototype detection. - prototype_symmetry_tolerance=0.1, - # Maximum number of atoms in the single cell of a 2D material for it to be - # considered valid. - max_2d_single_cell_size=7, - # The distance tolerance between atoms for grouping them into the same - # cluster. Used in detecting system type. - cluster_threshold=2.5, - # Defines the "bin size" for rounding cell angles for the material hash - angle_rounding=float(10.0), # unit: degree - # The threshold for a system to be considered "flat". Used e.g. when - # determining if a 2D structure is purely 2-dimensional to allow extra rigid - # transformations that are improper in 3D but proper in 2D. - flat_dim_threshold=0.1, - # The threshold for point equality in k-space. Unit: 1/m. - k_space_precision=150e6, - # The energy threshold for how much a band can be on top or below the fermi - # level in order to still detect a gap. Unit: Joule. - band_structure_energy_tolerance=8.01088e-21, # 0.05 eV - springer_db_path=os.path.join( - os.path.dirname(os.path.abspath(__file__)), - 'normalizing/data/springer.msg' - ) -) - -resources = NomadConfig( - enabled=False, - db_name='nomad_v1_resources', - # Maxmimum time a resource is stored in mongodb before being updated. - max_time_in_mongo=60 * 60 * 24 * 365., - # Number of download retries - download_retries=2, - # Delay in seconds before each successive retry - download_retry_delay=10, - # Maximum number of httpx connections - max_connections=10 -) - -paths = NomadConfig( - similarity="", -) - -client = NomadConfig( - user=None, - password=None, - access_token=None, - url='http://nomad-lab.eu/prod/v1/api' -) - -datacite = NomadConfig( - mds_host='https://mds.datacite.org', - enabled=False, - prefix='10.17172', - user='*', - password='*' -) - -meta = NomadConfig( - version=__version__, - commit='', - deployment='devel', # A human-friendly name of the nomad deployment - deployment_url='https://my-oasis.org/api', # The deployment's url (api url). - label=None, - default_domain='dft', - service='unknown nomad service', - name='novel materials discovery (NOMAD)', - description='A FAIR data sharing platform for materials science data', - homepage='https://nomad-lab.eu', - source_url='https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR', - maintainer_email='markus.scheidgen@physik.hu-berlin.de', - beta=None -) - -gitlab = NomadConfig( - private_token='not set' -) - -reprocess = NomadConfig( - # Configures standard behaviour when reprocessing. - # Note, the settings only matter for published uploads and entries. For uploads in - # staging, we always reparse, add newfound entries, and delete unmatched entries. - rematch_published=True, - reprocess_existing_entries=True, - use_original_parser=False, - add_matched_entries_to_published=True, - delete_unmatched_published_entries=False, - index_invidiual_entries=False -) - -process = NomadConfig( - # Configures if to store the corresponding package definition in mongodb. - store_package_definition_in_mongo=False, - # Configures if to attach definition id to `m_def`, note it is different from `m_def_id`. - # The `m_def_id` will be exported with the `with_def_id=True` via `m_to_dict`. - add_definition_id_to_reference=False, - # write `m_def_id` to the archive - write_definition_id_to_archive=False, - index_materials=True, - reuse_parser=True, - metadata_file_name='nomad', - metadata_file_extensions=('json', 'yaml', 'yml') -) - -rfc3161_timestamp = NomadConfig( - # rfc3161ng timestamping server - server='http://time.certum.pl/', - # cert PATH used for rfc3161ng timestamping server - cert=None, - # hash algorithm for rfc3161ng timestamping server, depends on the server - hash_algorithm='sha256', - username=None, - password=None -) - -bundle_export = NomadConfig( - # General settings - default_cli_bundle_export_path='./bundles', - - # Default settings - default_settings=NomadConfig( - # General default settings. - include_raw_files=True, - include_archive_files=True, - include_datasets=True, - ), - - default_settings_cli=NomadConfig( - # Additional default settings, applied when exporting using the CLI (command-line interface). - # This allows to override some of the settings specified in the general default settings above. - ) -) - -bundle_import = NomadConfig( - # General settings - required_nomad_version='1.1.2', # Minimum nomad version of bundles required for import - default_cli_bundle_import_path='./bundles', - - # Permission settings - allow_bundles_from_oasis=False, # If oasis admins can "push" bundles to this NOMAD deployment - allow_unpublished_bundles_from_oasis=False, # If oasis admins can "push" bundles of unpublished uploads - - # Default settings - default_settings=NomadConfig( - # General default settings. - include_raw_files=True, - include_archive_files=True, - include_datasets=True, - include_bundle_info=True, # Keeps the bundle_info.json file (not necessary but nice to have) - keep_original_timestamps=False, # If all time stamps (create time, publish time etc) should be imported from the bundle - set_from_oasis=True, # If the from_oasis flag and oasis_deployment_url should be set - # Cleanup settings - delete_upload_on_fail=False, # If False, the entries are just removed from the ES index on failure - delete_bundle_on_fail=True, # Deletes the source bundle if the import fails - delete_bundle_on_success=True, # Deletes the source bundle if the import succeeds - delete_bundle_include_parent_folder=True, # When deleting the bundle, also include parent folder, if empty. - - # It is possible to trigger processing of the raw files, but it is no longer the - # preferred way to import bundles. If used, the settings below control the reprocessing - # behaviour (see the config for `reprocess` for more info). - trigger_processing=False, # Set if you want to reprocess after import (not recommended). - rematch_published=True, - reprocess_existing_entries=True, - use_original_parser=False, - add_matched_entries_to_published=True, - delete_unmatched_published_entries=False - ), - - default_settings_cli=NomadConfig( - # Additional default settings, applied when importing using the CLI (command-line interface). - # This allows to override some of the settings specified in the general default settings above. - delete_bundle_on_fail=False, - delete_bundle_on_success=False - ) -) - -north = NomadConfig( - hub_connect_ip=None, # Set this to host.docker.internal on windows/macos. - hub_connect_url=None, - hub_ip='0.0.0.0', - docker_network=None, - hub_host='localhost', - hub_port=9000, - shared_fs='.volumes/fs/north/shared', - users_fs='.volumes/fs/north/users', - jupyterhub_crypt_key=None, - nomad_host=None, # host name to reach nomad app from spawned containers - windows=True, # enable windows (as in windows the OS) hacks -) - -archive = NomadConfig( - block_size=256 * 1024, - read_buffer_size=256 * 1024, # GPFS needs at least 256K to achieve decent performance - max_process_number=20, # maximum number of processes can be assigned to process archive query - min_entries_per_process=20 # minimum number of entries per process -) - -ui = NomadConfig( - default_unit_system='Custom', - entry_context={ +north = North() + + +class Archive(NomadSettings): + block_size = 256 * 1024 + read_buffer_size = Field( + 256 * 1024, description='GPFS needs at least 256K to achieve decent performance.') + max_process_number = Field( + 20, description='Maximum number of processes can be assigned to process archive query.') + min_entries_per_process = Field( + 20, description='Minimum number of entries per process.') + + +archive = Archive() + + +class UIConfig(NomadSettings): + default_unit_system = 'Custom' + entry_context: dict = { 'overview': { 'include': [ 'sections', @@ -530,8 +605,8 @@ ui = NomadConfig( 'relatedResources': {'error': 'Could not render related resources card.'}, } } - }, - search_contexts={ + } + search_contexts: dict = { "include": ["entries", "eln", "materials", "solar_cells"], "exclude": [], "options": { @@ -1176,22 +1251,13 @@ ui = NomadConfig( } } } -) -def north_url(ssl: bool = True): - return api_url(ssl=ssl, api='north', api_host=north.hub_host, api_port=north.hub_port) +ui = UIConfig() -auxfile_cutoff = 100 -parser_matching_size = 150 * 80 # 150 lines of 80 ASCII characters per line -console_log_level = logging.WARNING -max_upload_size = 32 * (1024 ** 3) -raw_file_strip_cutoff = 1000 -max_entry_download = 500000 -encyclopedia_base = "https://nomad-lab.eu/prod/rae/encyclopedia/#" -aitoolkit_enabled = False -use_empty_parsers = False +def north_url(ssl: bool = True): + return api_url(ssl=ssl, api='north', api_host=north.hub_host, api_port=north.hub_port) def normalize_loglevel(value, default_level=logging.INFO): @@ -1215,6 +1281,54 @@ _transformations = { logger = logging.getLogger(__name__) +def _check_config(): + """Used to check that the current configuration is valid. Should only be + called once after the final config is loaded. + + Raises: + AssertionError: if there is a contradiction or invalid values in the + config file settings. + """ + # The AFLOW symmetry information is checked once on import + proto_symmetry_tolerance = normalize.prototype_symmetry_tolerance + symmetry_tolerance = normalize.symmetry_tolerance + if proto_symmetry_tolerance != symmetry_tolerance: + raise AssertionError( + "The AFLOW prototype information is outdated due to changed tolerance " + "for symmetry detection. Please update the AFLOW prototype information " + "by running the CLI command 'nomad admin ops prototype-update " + "--matches-only'." + ) + + if normalize.springer_db_path and not os.path.exists(normalize.springer_db_path): + normalize.springer_db_path = None + + if keycloak.public_server_url is None: + keycloak.public_server_url = keycloak.server_url + + def set_external_path(source_obj, source_key, target_obj, target_key, overwrite=False): + source_value = getattr(source_obj, source_key) + target_value = getattr(target_obj, target_key) + + if target_value and not overwrite: + return + + if not source_value: + return + + if fs.external_working_directory and not os.path.isabs(source_value): + target_value = os.path.join(fs.external_working_directory, source_value) + else: + target_value = source_value + + setattr(target_obj, target_key, target_value) + + set_external_path(fs, 'staging', fs, 'staging_external') + set_external_path(fs, 'public', fs, 'public_external') + set_external_path(north, 'users_fs', north, 'users_fs', overwrite=True) + set_external_path(north, 'shared_fs', north, 'shared_fs', overwrite=True) + + def _merge(a: dict, b: dict, path: List[str] = None) -> dict: ''' Recursively merges b into a. Will add new key-value pairs, and will @@ -1250,6 +1364,7 @@ def _apply(key, value, raise_error: bool = True) -> None: current = globals() + current_value: Any = None if group_key not in current: if key not in current: if raise_error: @@ -1257,12 +1372,14 @@ def _apply(key, value, raise_error: bool = True) -> None: return else: current = current[group_key] - if not isinstance(current, NomadConfig): + if not isinstance(current, NomadSettings): if raise_error: logger.error(f'config key does not exist: {full_key}') return - if config_key not in current: + try: + current_value = getattr(current, config_key) + except AttributeError: if raise_error: logger.error(f'config key does not exist: {full_key}') return @@ -1270,14 +1387,16 @@ def _apply(key, value, raise_error: bool = True) -> None: key = config_key try: - current_value = current[key] if current_value is not None and not isinstance(value, type(current_value)): value = _transformations.get(full_key, type(current_value))(value) if isinstance(value, dict): - value = _merge(current[key], value) + value = _merge(current_value, value) - current[key] = value + if isinstance(current, dict): + current[key] = value + else: + setattr(current, key, value) logger.info(f'set config setting {full_key}={value}') except Exception as e: logger.error(f'cannot set config setting {full_key}={value}: {e}') diff --git a/nomad/files.py b/nomad/files.py index 0e2a0c107f11708ca1032f3863775b60f28b961e..89362e50a82941cb605f55310c4167760649479c 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -680,7 +680,7 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta): utils.get_logger(__name__).error( 'could not remove empty prefix dir', directory=parent_directory, exc_info=e) - def files_to_bundle(self, export_settings: config.NomadConfig) -> Iterable[FileSource]: + def files_to_bundle(self, export_settings: config.BundleExportSettings) -> Iterable[FileSource]: ''' A generator of :class:`FileSource` objects, defining the files/folders to be included in an upload bundle when *exporting*. The arguments allows for further filtering of what to include. @@ -692,7 +692,7 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta): @classmethod def files_from_bundle( - cls, bundle_file_source: BrowsableFileSource, import_settings: config.NomadConfig) -> Iterable[FileSource]: + cls, bundle_file_source: BrowsableFileSource, import_settings: config.BundleImportSettings) -> Iterable[FileSource]: ''' Returns an Iterable of :class:`FileSource`, defining the files/folders to be included in an upload bundle when *importing*. Only the files specified by the import_settings are included. @@ -1141,7 +1141,7 @@ class StagingUploadFiles(UploadFiles): aux_files.append(os.path.join(entry_relative_dir, dir_element)) file_count += 1 - if with_cutoff and file_count > config.auxfile_cutoff: + if with_cutoff and file_count > config.process.auxfile_cutoff: # If there are too many of them, its probably just a directory with lots of # mainfiles/entries. In this case it does not make any sense to provide thousands of # aux files. @@ -1175,7 +1175,7 @@ class StagingUploadFiles(UploadFiles): hash.update(mainfile_key.encode('utf8')) return utils.make_websave(hash) - def files_to_bundle(self, export_settings: config.NomadConfig) -> Iterable[FileSource]: + def files_to_bundle(self, export_settings: config.BundleExportSettings) -> Iterable[FileSource]: # Defines files for upload bundles of staging uploads. if export_settings.include_raw_files: yield DiskFileSource(self.os_path, 'raw') @@ -1184,7 +1184,7 @@ class StagingUploadFiles(UploadFiles): @classmethod def files_from_bundle( - cls, bundle_file_source: BrowsableFileSource, import_settings: config.NomadConfig) -> Iterable[FileSource]: + cls, bundle_file_source: BrowsableFileSource, import_settings: config.BundleImportSettings) -> Iterable[FileSource]: # Files to import for a staging upload if import_settings.include_raw_files: yield bundle_file_source.sub_source('raw') @@ -1520,7 +1520,7 @@ class PublicUploadFiles(UploadFiles): self._raw_zip_file = self._raw_zip_file_object = None self._archive_msg_file = self._archive_msg_file_object = None - def files_to_bundle(self, export_settings: config.NomadConfig) -> Iterable[FileSource]: + def files_to_bundle(self, export_settings: config.BundleExportSettings) -> Iterable[FileSource]: # Defines files for upload bundles of published uploads. for filename in sorted(os.listdir(self.os_path)): if filename.startswith('raw-') and export_settings.include_raw_files: @@ -1530,7 +1530,7 @@ class PublicUploadFiles(UploadFiles): @classmethod def files_from_bundle( - cls, bundle_file_source: BrowsableFileSource, import_settings: config.NomadConfig) -> Iterable[FileSource]: + cls, bundle_file_source: BrowsableFileSource, import_settings: config.BundleImportSettings) -> Iterable[FileSource]: for filename in bundle_file_source.directory_list(''): if filename.startswith('raw-') and import_settings.include_raw_files: yield bundle_file_source.sub_source(filename) diff --git a/nomad/parsing/parsers.py b/nomad/parsing/parsers.py index 80301931c0d6e35beac860ca7de0568b04f83343..c0b706d6d8c332fd513850957bffcb562f1165f0 100644 --- a/nomad/parsing/parsers.py +++ b/nomad/parsing/parsers.py @@ -79,7 +79,7 @@ def match_parser(mainfile_path: str, strict=True, parser_name: str = None) -> Tu compression, open_compressed = _compressions.get(f.read(3), (None, open)) with open_compressed(mainfile_path, 'rb') as cf: # type: ignore - buffer = cf.read(config.parser_matching_size) + buffer = cf.read(config.process.parser_matching_size) mime_type = magic.from_buffer(buffer, mime=True) @@ -646,7 +646,7 @@ empty_parsers = [ ) ] -if config.use_empty_parsers: +if config.process.use_empty_parsers: # There are some entries with PIDs that have mainfiles which do not match what # the actual parsers expect. We use the EmptyParser to produce placeholder entries # to keep the PIDs. These parsers will not match for new, non migrated data. diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 2bfd849f9f29e00c1d5b7d81accd92afc5dd22d6..9863211bf9e6df082ec29a65d6fdbbc90794d58c 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -29,7 +29,6 @@ entries, and files ''' import base64 from typing import Optional, cast, Any, List, Tuple, Set, Iterator, Dict, Iterable, Sequence, Union - import rfc3161ng from mongoengine import ( StringField, DateTimeField, BooleanField, IntField, ListField, DictField) @@ -44,6 +43,7 @@ from structlog.processors import StackInfoRenderer, format_exc_info, TimeStamper import requests from fastapi.exceptions import RequestValidationError from pydantic.error_wrappers import ErrorWrapper +from pydantic import parse_obj_as import validators from nomad import utils, config, infrastructure, search, datamodel, metainfo, parsing, client @@ -987,13 +987,15 @@ class Entry(Proc): 'last_status_message': 'Parent entry processing'}}) # Load the reprocess settings from the upload, and apply defaults - settings = config.reprocess.customize(self.upload.reprocess_settings) + settings = config.reprocess + if self.upload.reprocess_settings: + settings = settings.customize(parse_obj_as(config.Reprocess, self.upload.reprocess_settings)) self.set_last_status_message('Determining action') # If this entry has been processed before, or imported from a bundle, nomad_version # should be set. If not, this is the initial processing. self._is_initial_processing = self.nomad_version is None - self._perform_index = self._is_initial_processing or settings.get('index_invidiual_entries', True) + self._perform_index = self._is_initial_processing or settings.index_individual_entries if not self.upload.published or self._is_initial_processing: should_parse = True elif not settings.reprocess_existing_entries: @@ -1026,7 +1028,7 @@ class Entry(Proc): for entry in self._main_and_child_entries(): entry._initialize_metadata_for_processing() - if len(self._entry_metadata.files) >= config.auxfile_cutoff: + if len(self._entry_metadata.files) >= config.process.auxfile_cutoff: self.warning( 'This entry has many aux files in its directory. ' 'Have you placed many mainfiles in the same directory?') @@ -1510,7 +1512,8 @@ class Upload(Proc): @process() def process_upload( - self, file_operations: List[Dict[str, Any]] = None, reprocess_settings: Dict[str, Any] = None, + self, file_operations: List[Dict[str, Any]] = None, + reprocess_settings: Dict[str, Any] = None, path_filter: str = None, only_updated_files: bool = False): ''' A @process that executes a file operation (if provided), and matches, parses and normalizes @@ -1530,7 +1533,7 @@ class Upload(Proc): `path` (specifying the path relative to the raw folder which is to be deleted). "COPY" and "MOVE" operations require two arguments: `path_to_existing_file` and `path_to_target_file`. - reprocess_settings: An optional dictionary specifying the behaviour when reprocessing. + reprocess_settings: Optional configuration of the reprocessing behavior. Settings that are not specified are defaulted. See `config.reprocess` for available options and the configured default values. path_filter: An optional path used to filter out what should be processed. @@ -1538,10 +1541,14 @@ class Upload(Proc): folder, everything under this folder will be processed. only_updated_files: If only files updated by the file operations should be processed. ''' - return self._process_upload_local(file_operations, reprocess_settings, path_filter, only_updated_files) + return self._process_upload_local( + file_operations, + parse_obj_as(config.Reprocess, reprocess_settings) if reprocess_settings else None, + path_filter, only_updated_files) def _process_upload_local( - self, file_operations: List[Dict[str, Any]] = None, reprocess_settings: Dict[str, Any] = None, + self, file_operations: List[Dict[str, Any]] = None, + reprocess_settings: config.Reprocess = None, path_filter: str = None, only_updated_files: bool = False): ''' The function doing the actual processing, but locally, not as a @process. @@ -1549,8 +1556,9 @@ class Upload(Proc): ''' logger = self.get_logger() logger.info('starting to (re)process') - reprocess_settings = config.reprocess.customize(reprocess_settings) # Add default settings - self.reprocess_settings = reprocess_settings + settings = config.reprocess.customize(reprocess_settings) # Add default settings + if reprocess_settings: + self.reprocess_settings = reprocess_settings.dict() # Sanity checks if path_filter: @@ -1558,7 +1566,7 @@ class Upload(Proc): assert not (path_filter and only_updated_files), 'Cannot specify both `path_filter` and `only_updated_files`' if self.published: assert not file_operations, 'Upload is published, cannot update files' - assert reprocess_settings.rematch_published or reprocess_settings.reprocess_existing_entries, ( + assert settings.rematch_published or settings.reprocess_existing_entries, ( # pylint: disable=no-member 'Settings do no allow reprocessing of a published upload') # TODO remove after worker_hostnames are handled correctly @@ -1571,7 +1579,7 @@ class Upload(Proc): # All looks ok, process updated_files = self.update_files(file_operations, only_updated_files) - self.match_all(reprocess_settings, path_filter, updated_files) + self.match_all(settings, path_filter, updated_files) self.parser_level = None if self.parse_next_level(0, path_filter, updated_files): self.set_last_status_message(f'Waiting for results (level {self.parser_level})') @@ -1580,7 +1588,7 @@ class Upload(Proc): self.cleanup() @process_local - def put_file_and_process_local(self, path, target_dir, reprocess_settings: Dict[str, Any] = None) -> Entry: + def put_file_and_process_local(self, path, target_dir, reprocess_settings: config.Reprocess = None) -> Entry: ''' Pushes a raw file, matches it, and if matched, runs the processing - all as a local process. If the the target path exists, it will be overwritten. If matched, we return the @@ -1594,7 +1602,8 @@ class Upload(Proc): if staging_upload_files.raw_path_exists(target_path): assert staging_upload_files.raw_path_is_file(target_path), 'Target path is a directory' - self.reprocess_settings = reprocess_settings + if reprocess_settings: + self.reprocess_settings = reprocess_settings.dict() # Push the file self.set_last_status_message('Putting the file') @@ -1811,7 +1820,7 @@ class Upload(Proc): 'exception while matching pot. mainfile', mainfile=path_info.path, exc_info=e) - def match_all(self, reprocess_settings, path_filter: str = None, updated_files: Set[str] = None): + def match_all(self, reprocess_settings: config.Reprocess, path_filter: str = None, updated_files: Set[str] = None): ''' The process step used to identify mainfile/parser combinations among the upload's files, and create or delete respective :class:`Entry` instances (if needed). @@ -2219,8 +2228,9 @@ class Upload(Proc): from nomad.bundles import BundleImporter bundle_importer: BundleImporter = None try: - bundle_importer = BundleImporter( - None, config.bundle_import.default_settings.customize(import_settings), embargo_length) + import_settings_obj = parse_obj_as(config.BundleImportSettings, import_settings) + import_settings_obj = config.bundle_import.default_settings.customize(import_settings_obj) + bundle_importer = BundleImporter(None, import_settings_obj, embargo_length) bundle_importer.open(bundle_path) return bundle_importer.import_bundle(self, False) finally: diff --git a/nomad/utils/__init__.py b/nomad/utils/__init__.py index 6b937926044b470bb0d875e79e8c275711038682..71dc3654fdf16abcd23bd4a3007b5a9aa2995e79 100644 --- a/nomad/utils/__init__.py +++ b/nomad/utils/__init__.py @@ -92,7 +92,7 @@ except ImportError: def get_logger(name, **kwargs): return ClassicLogger(name, **kwargs) - def configure_logging(console_log_level=config.console_log_level): + def configure_logging(console_log_level=config.services.console_log_level): import logging logging.basicConfig(level=console_log_level) diff --git a/nomad/utils/structlogging.py b/nomad/utils/structlogging.py index dba144896a6a9d13fb228ffcf4a41a348a275864..7cfb002dc61b0f0a9f6ddd9cf7900b812bcf8a5b 100644 --- a/nomad/utils/structlogging.py +++ b/nomad/utils/structlogging.py @@ -290,7 +290,7 @@ root = logging.getLogger() # configure logging in general -def configure_logging(console_log_level=config.console_log_level): +def configure_logging(console_log_level=config.services.console_log_level): logging.basicConfig(level=logging.DEBUG) for handler in root.handlers: if not isinstance(handler, LogstashHandler): diff --git a/ops/kubernetes/nomad/templates/nomad-configmap.yml b/ops/kubernetes/nomad/templates/nomad-configmap.yml index e4b687766c6b7ca760550ec4483f88007c1c82af..817ea8489c5ca9b3f0b6881dea66169c0afd4c8d 100644 --- a/ops/kubernetes/nomad/templates/nomad-configmap.yml +++ b/ops/kubernetes/nomad/templates/nomad-configmap.yml @@ -21,7 +21,6 @@ data: isTest: {{ .Values.version.isTest }} usesBetaData: {{ .Values.version.usesBetaData }} officialUrl: "{{ .Values.version.officialUrl }}" - aitoolkit_enabled: {{ .Values.aitoolkit.enabled }} process: reuse_parser: {{ .Values.process.reuseParser }} index_materials: {{ .Values.process.indexMaterials }} @@ -31,7 +30,7 @@ data: use_original_parser: {{ .Values.reprocess.useOriginalParser }} add_matched_entries_to_published: {{ .Values.reprocess.addMatchedEntriesToPublished }} delete_unmatched_published_entries: {{ .Values.reprocess.deleteUnmatchedPublishedEntries }} - index_invidiual_entries: {{ .Values.reprocess.indexIndividualEntries }} + index_individual_entries: {{ .Values.reprocess.indexIndividualEntries }} fs: tmp: ".volumes/fs/staging/tmp" prefix_size: {{ .Values.volumes.prefixSize }} @@ -51,6 +50,7 @@ data: https: {{ .Values.proxy.external.https }} upload_limit: {{ .Values.api.uploadLimit }} admin_user_id: {{ .Values.keycloak.admin_user_id }} + aitoolkit_enabled: {{ .Values.services.aitoolkit.enabled }} rabbitmq: host: "{{ .Release.Name }}-rabbitmq" elastic: diff --git a/ops/kubernetes/nomad/values.yaml b/ops/kubernetes/nomad/values.yaml index ae54f2e5b3f3e6dcfca581ab6db46a11395dc03e..ed5e6d50b670a2cc7a036cc4d123e5f362fc3650 100644 --- a/ops/kubernetes/nomad/values.yaml +++ b/ops/kubernetes/nomad/values.yaml @@ -96,10 +96,6 @@ encyclopedia: ## enable links to the 'new' encyclopedia enabled: true -aitoolkit: - ## enable aitoolkit references - enabled: false - ## Everything concerning the nginx that serves the gui, proxies the api # It is run via NodePort service proxy: @@ -200,6 +196,9 @@ datacite: services: jupyterhub: enabled: false + aitoolkit: + ## enable aitoolkit references + enabled: false jupyterhub: # fullnameOverride: null diff --git a/tests/app/v1/routers/test_entries.py b/tests/app/v1/routers/test_entries.py index 153814dbe33d72a8ead7255e287c4ecfdc01c4e0..54a379a34a23dedbfe061a9d25dac32f95c146fe 100644 --- a/tests/app/v1/routers/test_entries.py +++ b/tests/app/v1/routers/test_entries.py @@ -482,7 +482,7 @@ def test_entries_raw( pytest.param(perform_entries_raw_test, id='raw'), pytest.param(perform_entries_archive_download_test, id='archive-download')]) def test_entries_download_max(monkeypatch, client, example_data, test_method, http_method): - monkeypatch.setattr('nomad.config.max_entry_download', 20) + monkeypatch.setattr('nomad.config.services.max_entry_download', 20) test_method(client, status_code=400, http_method=http_method) diff --git a/tests/app/v1/routers/test_uploads.py b/tests/app/v1/routers/test_uploads.py index d0ba5cebcc751685715d96af4d3f9ba297b6cc92..dd8bc92cd09ac454a6f01fc012dc39f01bbce944 100644 --- a/tests/app/v1/routers/test_uploads.py +++ b/tests/app/v1/routers/test_uploads.py @@ -1619,28 +1619,25 @@ def test_post_upload_action_publish( assert_gets_published(client, upload_id, user_auth, current_embargo_length=12, **query_args) -@pytest.mark.parametrize('kwargs', [ +@pytest.mark.parametrize('import_settings, query_args', [ pytest.param( - dict( - import_settings=dict(include_archive_files=False, trigger_processing=True), - query_args=dict(embargo_length=0)), + config.BundleImportSettings(include_archive_files=False, trigger_processing=True), + dict(embargo_length=0), id='trigger-processing'), pytest.param( - dict( - import_settings=dict(include_archive_files=True, trigger_processing=False), - query_args=dict(embargo_length=28)), - id='no-processing')]) + config.BundleImportSettings(include_archive_files=True, trigger_processing=False), + dict(embargo_length=28), + id='no-processing') +]) def test_post_upload_action_publish_to_central_nomad( client, proc_infra, monkeypatch, oasis_publishable_upload, - test_users_dict, test_auth_dict, kwargs): + test_users_dict, test_auth_dict, import_settings, query_args): ''' Tests the publish action with to_central_nomad=True. ''' upload_id, suffix = oasis_publishable_upload - query_args = kwargs.get('query_args', {}) query_args['to_central_nomad'] = True embargo_length = query_args.get('embargo_length') - import_settings = kwargs.get('import_settings', {}) - expected_status_code = kwargs.get('expected_status_code', 200) - user = kwargs.get('user', 'test_user') + expected_status_code = 200 + user = 'test_user' user_auth, __token = test_auth_dict[user] old_upload = Upload.get(upload_id) diff --git a/tests/examples/test_docs.py b/tests/examples/test_docs.py index fc1820af63407eaf69904b9ba66298c3385cbcfb..f6d7a1bacbd58c6f8001c0849ec357fe6111023a 100644 --- a/tests/examples/test_docs.py +++ b/tests/examples/test_docs.py @@ -38,7 +38,7 @@ def test_python_schema(): def test_yaml_schema(): yaml_package = _load_yaml('basic_schema/schema.archive.yaml')['definitions'] yaml_data = _load_yaml('basic_schema/data.archive.yaml')['data'] - del(yaml_data['m_def']) + del (yaml_data['m_def']) package = Package.m_from_dict(yaml_package) package.init_metainfo() diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 42883286f56e87b1a6fae03a45453d6ce0d7aa9b..f1b730f397603967caabdb587197f107ca5640eb 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -207,6 +207,7 @@ def assert_user_metadata(entries_metadata, user_metadata): assert value_actual == value_expected, f'Mismatch {k}: {value_expected} != {value_actual}' +@pytest.mark.timeout(config.tests.default_timeout) def test_processing(processed, no_warn, mails, monkeypatch): assert_processing(processed) @@ -214,6 +215,7 @@ def test_processing(processed, no_warn, mails, monkeypatch): assert re.search(r'Processing completed', mails.messages[0].data.decode('utf-8')) is not None +@pytest.mark.timeout(config.tests.default_timeout) def test_processing_two_runs(test_user, proc_infra, tmp): upload_file = create_template_upload_file( tmp, mainfiles=['tests/data/proc/templates/template_tworuns.json']) @@ -221,6 +223,7 @@ def test_processing_two_runs(test_user, proc_infra, tmp): assert_processing(processed) +@pytest.mark.timeout(config.tests.default_timeout) def test_processing_with_large_dir(test_user, proc_infra, tmp): upload_path = create_template_upload_file( tmp, mainfiles=['tests/data/proc/templates/template.json'], auxfiles=150) @@ -230,6 +233,7 @@ def test_processing_with_large_dir(test_user, proc_infra, tmp): assert len(entry.warnings) == 1 +@pytest.mark.timeout(config.tests.default_timeout) def test_publish(non_empty_processed: Upload, no_warn, internal_example_user_metadata, monkeypatch): processed = non_empty_processed set_upload_entry_metadata(processed, internal_example_user_metadata) @@ -252,6 +256,7 @@ def test_publish(non_empty_processed: Upload, no_warn, internal_example_user_met assert_processing(Upload.get(processed.upload_id), published=True, process='publish_upload') +@pytest.mark.timeout(config.tests.default_timeout) def test_publish_directly(non_empty_uploaded, test_user, proc_infra, no_warn, monkeypatch): processed = run_processing(non_empty_uploaded, test_user, publish_directly=True) @@ -262,6 +267,7 @@ def test_publish_directly(non_empty_uploaded, test_user, proc_infra, no_warn, mo assert_processing(Upload.get(processed.upload_id), published=True) +@pytest.mark.timeout(config.tests.default_timeout) def test_republish(non_empty_processed: Upload, no_warn, internal_example_user_metadata, monkeypatch): processed = non_empty_processed set_upload_entry_metadata(processed, internal_example_user_metadata) @@ -283,6 +289,7 @@ def test_republish(non_empty_processed: Upload, no_warn, internal_example_user_m assert_search_upload(entries, additional_keys, published=True) +@pytest.mark.timeout(config.tests.default_timeout) def test_publish_failed( non_empty_uploaded: Tuple[str, str], internal_example_user_metadata, test_user, monkeypatch, proc_infra): @@ -307,22 +314,17 @@ def test_publish_failed( assert_search_upload(entries, additional_keys, published=True, processed=False) -@pytest.mark.parametrize('kwargs', [ +@pytest.mark.parametrize('import_settings, embargo_length', [ # pytest.param( - # dict( - # import_settings=dict(include_archive_files=True, trigger_processing=False), - # embargo_length=0), + # config.BundleImportSettings(include_archive_files=True, trigger_processing=False), 0, # id='no-processing'), pytest.param( - dict( - import_settings=dict(include_archive_files=False, trigger_processing=True), - embargo_length=17), - id='trigger-processing')]) + config.BundleImportSettings(include_archive_files=False, trigger_processing=True), 17, + id='trigger-processing') +]) def test_publish_to_central_nomad( - proc_infra, monkeypatch, oasis_publishable_upload, test_user, no_warn, kwargs): + proc_infra, monkeypatch, oasis_publishable_upload, test_user, no_warn, import_settings, embargo_length): upload_id, suffix = oasis_publishable_upload - import_settings = kwargs.get('import_settings', {}) - embargo_length = kwargs.get('embargo_length') old_upload = Upload.get(upload_id) import_settings = config.bundle_import.default_settings.customize(import_settings) diff --git a/tests/test_config.py b/tests/test_config.py index 0579dfa0c6bf2035451ff148c1222c6e2bee5fc5..d025d0013d249de154f39789735b23dfd5a19208 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -27,9 +27,9 @@ from .utils import assert_log @pytest.fixture def with_config(): - old_values = config.fs.public, config.fs.archive_version_suffix, config.auxfile_cutoff + old_values = config.fs.public, config.fs.archive_version_suffix yield config - config.fs.public, config.fs.archive_version_suffix, config.auxfile_cutoff = old_values + config.fs.public, config.fs.archive_version_suffix = old_values def test_apply(with_config, caplog): @@ -39,16 +39,13 @@ def test_apply(with_config, caplog): config._apply('fs_archive_version_suffix', 'test_value') assert config.fs.archive_version_suffix == 'test_value' - config._apply('auxfile_cutoff', '200') - assert config.auxfile_cutoff == 200 - config._apply('does_not_exist', 'test_value') assert_log(caplog, 'ERROR', 'does_not_exist does not exist') config._apply('fs_does_not_exist', 'test_value') assert_log(caplog, 'ERROR', 'fs_does_not_exist does not exist') - config._apply('max_entry_download', 'not_a_number') + config._apply('services_max_entry_download', 'not_a_number') assert_log(caplog, 'ERROR', 'cannot set') config._apply('nounderscore', 'test_value') @@ -69,9 +66,10 @@ def test_nomad_yaml(raw_files, with_config, monkeypatch, caplog): 'archive_version_suffix': 'test_value', 'does_not_exist': 'test_value' }, - 'auxfile_cutoff': '200', 'does_not_exist': 'test_value', - 'max_entry_download': 'not_a_number' + 'services': { + 'max_entry_download': 'not_a_number' + } } test_nomad_yaml = os.path.join(config.fs.tmp, 'nomad_test.yaml') @@ -85,7 +83,6 @@ def test_nomad_yaml(raw_files, with_config, monkeypatch, caplog): assert config.fs.public == 'test_value' assert config.fs.archive_version_suffix == 'test_value' - assert config.auxfile_cutoff == 200 assert_log(caplog, 'ERROR', 'does_not_exist does not exist') assert_log(caplog, 'ERROR', 'fs_does_not_exist does not exist') assert_log(caplog, 'ERROR', 'cannot set')