diff --git a/nomad/api.py b/nomad/api.py index 125815550b1e982733334893970b383360b946a7..d233f8a0bb4c0bba6d4fea402b786cf1b3389aeb 100644 --- a/nomad/api.py +++ b/nomad/api.py @@ -767,6 +767,8 @@ def get_raw(upload_hash, calc_hash): :returns: the raw data in body """ archive_id = '%s/%s' % (upload_hash, calc_hash) + logger = get_logger(__name__, endpoint='raw', action='get', archive_id=archive_id) + try: repo = RepoCalc.get(id=archive_id) except NotFoundError: @@ -808,8 +810,11 @@ def get_raw(upload_hash, calc_hash): return dict(arcname=filename, iterable=iter_content()) yield write(repo.mainfile) - for auxfile in repo.aux_files: - yield write(os.path.join(os.path.dirname(repo.mainfile), auxfile)) + try: + for auxfile in repo.aux_files: + yield write(os.path.join(os.path.dirname(repo.mainfile), auxfile)) + except Exception as e: + logger.error('Exception while accessing auxfiles.', exc_info=e) zip_stream = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED) zip_stream.paths_to_write = iterator() diff --git a/nomad/config.py b/nomad/config.py index 93b20e9cc3d5b0b5c6580de11c9fa717dff13d59..0e9e1b93a667103428aa9c21c48576e0ffd055d4 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -98,4 +98,4 @@ services = NomadServicesConfig( api_secret=os.environ.get('NOMAD_API_SECRET', 'defaultApiSecret') ) -console_log_level = get_loglevel_from_env('NOMAD_CONSOLE_LOGLEVEL', default_level=logging.CRITICAL) +console_log_level = get_loglevel_from_env('NOMAD_CONSOLE_LOGLEVEL', default_level=logging.ERROR) diff --git a/nomad/infrastructure.py b/nomad/infrastructure.py index 103424b0d60f10e514e7465b8c463a383b8866dd..7a01cab9097cac4084b104ba858ef5da60caca79 100644 --- a/nomad/infrastructure.py +++ b/nomad/infrastructure.py @@ -36,6 +36,7 @@ mongo_client = None def setup(): """ Creates connections to mongodb and elastic search. """ global elastic_client + setup_logging() setup_mongo() setup_elastic() @@ -43,6 +44,10 @@ def setup(): user.ensure_test_users() +def setup_logging(): + utils.configure_logging() + + def setup_mongo(): """ Creates connection to mongodb. """ global mongo_client diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 0edf68d2a4520aca49e0aa35054876178e28871d..d1c9385d1e7b09c48e8cbdd642f6d36809570b05 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -24,7 +24,7 @@ calculations, and files :members: """ -from typing import List, Any, ContextManager +from typing import List, Any, ContextManager, Tuple, Generator from datetime import datetime from elasticsearch.exceptions import NotFoundError from mongoengine import StringField, BooleanField, DateTimeField, DictField, IntField @@ -456,8 +456,32 @@ class Upload(Chord): self.fail('The same file was already uploaded and processed.', level=logging.INFO) return + def match_mainfiles(self) -> Generator[Tuple[File, str, object], None, None]: + """ + Generator function that matches all files in the upload to all parsers to + determine the upload's mainfiles. + + Returns: + Tuples of mainfile, filename, and parsers + """ + for filename in self.upload_file.filelist: + potential_mainfile = self.upload_file.get_file(filename) + for parser in parsers: + try: + with potential_mainfile.open() as mainfile_f: + if parser.is_mainfile(filename, lambda fn: mainfile_f): + yield potential_mainfile, filename, parser + except Exception as e: + self.error( + 'exception while matching pot. mainfile', + mainfile=filename, exc_info=e) + @task def parse_all(self): + """ + Identified mainfail/parser combinations among the upload's files, creates + respective :class:`Calc` instances, and triggers their processing. + """ logger = self.get_logger() # TODO: deal with multiple possible parser specs @@ -466,25 +490,15 @@ class Upload(Chord): upload_size=self.upload_file.size, upload_filecount=len(self.upload_file.filelist)): total_calcs = 0 - for filename in self.upload_file.filelist: - for parser in parsers: - try: - potential_mainfile = self.upload_file.get_file(filename) - with potential_mainfile.open() as mainfile_f: - if parser.is_mainfile(filename, lambda fn: mainfile_f): - mainfile_path = potential_mainfile.os_path - calc = Calc.create( - archive_id='%s/%s' % (self.upload_hash, utils.hash(filename)), - mainfile=filename, parser=parser.name, - mainfile_tmp_path=mainfile_path, - upload_id=self.upload_id) - - calc.process() - total_calcs += 1 - except Exception as e: - self.error( - 'exception while matching pot. mainfile', - mainfile=filename, exc_info=e) + for mainfile, filename, parser in self.match_mainfiles(): + calc = Calc.create( + archive_id='%s/%s' % (self.upload_hash, utils.hash(filename)), + mainfile=filename, parser=parser.name, + mainfile_tmp_path=mainfile.os_path, + upload_id=self.upload_id) + + calc.process() + total_calcs += 1 # have to save the total_calcs information for chord management self.spwaned_childred(total_calcs) diff --git a/nomad/utils.py b/nomad/utils.py index 1aa91c1468c0683ac3411dfe2d32c80baab4ff60..a1d22d6c39526deb6ddf390a311401f17c0f1077 100644 --- a/nomad/utils.py +++ b/nomad/utils.py @@ -29,7 +29,7 @@ Depending on the configuration all logs will also be send to a central logstash. .. autofunc::nomad.utils.get_logger """ -from typing import Union, IO, cast +from typing import Union, IO, cast, List import hashlib import base64 import logging @@ -110,8 +110,7 @@ def add_logstash_handler(logger): logger.addHandler(logstash_handler) -_logging_is_configured = False -if not _logging_is_configured: +def configure_logging(): # configure structlog log_processors = [ StackInfoRenderer(), @@ -136,15 +135,13 @@ if not _logging_is_configured: logging.basicConfig(stream=sys.stdout) root = logging.getLogger() for handler in root.handlers: - handler.setLevel(config.console_log_level if 'pytest' not in sys.modules else logging.CRITICAL) + handler.setLevel(config.console_log_level) # configure logstash - if config.logstash.enabled and 'pytest' not in sys.modules: + if config.logstash.enabled: add_logstash_handler(root) root.info('Structlog configured for logstash') - _logging_is_configured = True - def create_uuid() -> str: """ Returns a web-save base64 encoded random uuid (type 4). """ @@ -222,3 +219,25 @@ def timer(logger, event, method='info', **kwargs): logger_method(event, exec_time=stop - start, **kwargs) else: logger.error('Uknown logger method %s.' % method) + + +class archive: + @staticmethod + def create(upload_hash: str, calc_hash: str) -> str: + return '%s/%s' % (upload_hash, calc_hash) + + @staticmethod + def items(archive_id: str) -> List[str]: + return archive_id.split('/') + + @staticmethod + def item(archive_id: str, index: int) -> str: + return archive.items(archive_id)[index] + + @staticmethod + def calc_hash(archive_id: str) -> str: + return archive.item(archive_id, 1) + + @staticmethod + def upload_hash(archive_id: str) -> str: + return archive.item(archive_id, 0) diff --git a/tests/conftest.py b/tests/conftest.py index 30f21c599d499c0945eeef9af11bcc00e5bb401a..6c48dda8272af94b63f8217ecea7d8238b192cd4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,17 @@ import pytest +import logging from mongoengine import connect from mongoengine.connection import disconnect from nomad import config, user, infrastructure +@pytest.fixture(scope='session', autouse=True) +def nomad_logging(): + config.logstash = config.logstash._replace(enabled=False) + config.console_log_level = logging.CRITICAL + + @pytest.fixture(scope='session') def celery_includes(): return ['nomad.processing.base']