From a8cc781af36882a19fa8019cfa50a6b677183cfd Mon Sep 17 00:00:00 2001 From: David Sikter <david.sikter@physik.hu-berlin.de> Date: Tue, 4 Jan 2022 13:05:24 +0100 Subject: [PATCH] Rename mongo document Calc -> Entry --- nomad/app/v1/routers/datasets.py | 4 +- nomad/app/v1/routers/entries.py | 6 +- nomad/app/v1/routers/uploads.py | 4 +- nomad/cli/admin/admin.py | 4 +- nomad/cli/admin/clean.py | 4 +- nomad/cli/admin/entries.py | 2 +- nomad/cli/admin/migrate.py | 6 +- nomad/cli/admin/uploads.py | 12 ++-- nomad/processing/__init__.py | 10 +-- nomad/processing/data.py | 76 +++++++++++------------ nomad/tmp.py | 4 +- tests/app/v1/routers/test_datasets.py | 4 +- tests/app/v1/routers/test_entries_edit.py | 10 +-- tests/app/v1/routers/test_uploads.py | 8 +-- tests/processing/test_data.py | 22 +++---- tests/test_cli.py | 26 ++++---- tests/utils.py | 4 +- 17 files changed, 103 insertions(+), 103 deletions(-) diff --git a/nomad/app/v1/routers/datasets.py b/nomad/app/v1/routers/datasets.py index 7cc031d955..eb817b7e33 100644 --- a/nomad/app/v1/routers/datasets.py +++ b/nomad/app/v1/routers/datasets.py @@ -249,7 +249,7 @@ async def post_datasets( empty = len(entry_ids) == 0 if not empty: - processing.Calc._get_collection().update_many( + processing.Entry._get_collection().update_many( mongo_query, {'$push': {'datasets': dataset.dataset_id}}) update_by_query( ''' @@ -308,7 +308,7 @@ async def delete_dataset( mongo_query = {'_id': {'$in': entry_ids}} if len(entry_ids) > 0: - processing.Calc._get_collection().update_many( + processing.Entry._get_collection().update_many( mongo_query, {'$pull': {'datasets': dataset.dataset_id}}) update_by_query( ''' diff --git a/nomad/app/v1/routers/entries.py b/nomad/app/v1/routers/entries.py index ba77f001d9..5d1d105956 100644 --- a/nomad/app/v1/routers/entries.py +++ b/nomad/app/v1/routers/entries.py @@ -1134,7 +1134,7 @@ def edit(query: Query, user: User, mongo_update: Dict[str, Any] = None, re_index # perform the update on the mongo db with utils.timer(logger, 'edit mongo update executed', size=len(entry_ids)): if mongo_update is not None: - n_updated = proc.Calc.objects(calc_id__in=entry_ids).update(multi=True, **mongo_update) + n_updated = proc.Entry.objects(calc_id__in=entry_ids).update(multi=True, **mongo_update) if n_updated != len(entry_ids): logger.error('edit repo did not update all entries', payload=mongo_update) @@ -1142,7 +1142,7 @@ def edit(query: Query, user: User, mongo_update: Dict[str, Any] = None, re_index with utils.timer(logger, 'edit elastic update executed', size=len(entry_ids)): if re_index: updated_metadata: List[datamodel.EntryMetadata] = [] - for entry in proc.Calc.objects(calc_id__in=entry_ids): + for entry in proc.Entry.objects(calc_id__in=entry_ids): entry_metadata = entry.mongo_metadata(entry.upload) # Ensure that updated fields are marked as "set", even if they are cleared entry_metadata.m_update_from_dict(mongo_update) @@ -1340,7 +1340,7 @@ async def post_entry_metadata_edit( # remove potentially empty old datasets if removed_datasets is not None: for dataset in removed_datasets: - if proc.Calc.objects(datasets=dataset).first() is None: + if proc.Entry.objects(datasets=dataset).first() is None: datamodel.Dataset.m_def.a_mongo.objects(dataset_id=dataset).delete() return data diff --git a/nomad/app/v1/routers/uploads.py b/nomad/app/v1/routers/uploads.py index 83b67c875b..8dc85efd7e 100644 --- a/nomad/app/v1/routers/uploads.py +++ b/nomad/app/v1/routers/uploads.py @@ -30,7 +30,7 @@ from fastapi.exceptions import RequestValidationError from nomad import utils, config, files from nomad.files import UploadFiles, StagingUploadFiles, UploadBundle, is_safe_relative_path -from nomad.processing import Upload, Calc, ProcessAlreadyRunning, ProcessStatus, MetadataEditRequestHandler +from nomad.processing import Upload, Entry, ProcessAlreadyRunning, ProcessStatus, MetadataEditRequestHandler from nomad.utils import strip from nomad.search import search @@ -1553,7 +1553,7 @@ def _upload_to_pydantic(upload: Upload) -> UploadProcData: return pydantic_upload -def _entry_to_pydantic(entry: Calc) -> EntryProcData: +def _entry_to_pydantic(entry: Entry) -> EntryProcData: ''' Converts the mongo db object to an EntryProcData object''' return EntryProcData.from_orm(entry) diff --git a/nomad/cli/admin/admin.py b/nomad/cli/admin/admin.py index c7468f7f72..bbfc8881e8 100644 --- a/nomad/cli/admin/admin.py +++ b/nomad/cli/admin/admin.py @@ -67,7 +67,7 @@ def reset_processing(zero_complete_time): errors=[], warnings=[], complete_time=datetime.fromtimestamp(0) if zero_complete_time else datetime.now()) - reset_collection(proc.Calc) + reset_collection(proc.Entry) reset_collection(proc.Upload) @@ -383,7 +383,7 @@ def migrate_mongo( if entry_query: print('Quering entries...') - upload_ids = list(db_src.calc.distinct('upload_id', entry_query)) + upload_ids = list(db_src.entry.distinct('upload_id', entry_query)) if upload_ids: upload_query = {'_id': {'$in': upload_ids}} print('Quering uploads...') diff --git a/nomad/cli/admin/clean.py b/nomad/cli/admin/clean.py index 06f4f01f61..2b2d8678b7 100644 --- a/nomad/cli/admin/clean.py +++ b/nomad/cli/admin/clean.py @@ -42,7 +42,7 @@ def clean(dry, skip_entries, skip_fs, skip_es, staging_too, force): infrastructure.setup_elastic() if not skip_entries: - uploads_for_entries = mongo_client[nomad_config.mongo.db_name]['calc'].distinct('upload_id') + uploads_for_entries = mongo_client[nomad_config.mongo.db_name]['entry'].distinct('upload_id') uploads = {} for upload in mongo_client[nomad_config.mongo.db_name]['upload'].distinct('_id'): uploads[upload] = True @@ -57,7 +57,7 @@ def clean(dry, skip_entries, skip_fs, skip_es, staging_too, force): input('Will delete entries (mongo + es) for %d missing uploads. Press any key to continue ...' % len(missing_uploads)) for upload in missing_uploads: - mongo_client[nomad_config.mongo.db_name]['calc'].remove(dict(upload_id=upload)) + mongo_client[nomad_config.mongo.db_name]['entry'].remove(dict(upload_id=upload)) elasticsearch_dsl.Search(index=nomad_config.elastic.entries_index).query('term', upload_id=upload).delete() else: print('Found %s uploads that have entries in mongo, but there is no upload entry.' % len(missing_uploads)) diff --git a/nomad/cli/admin/entries.py b/nomad/cli/admin/entries.py index ddd1eab320..34bae838c2 100644 --- a/nomad/cli/admin/entries.py +++ b/nomad/cli/admin/entries.py @@ -43,4 +43,4 @@ def rm(entries, skip_es, skip_mongo): search.delete_entry(entry_id=entry, refresh=True, update_materials=True) if not skip_mongo: - proc.Calc.objects(calc_id__in=entries).delete() + proc.Entry.objects(calc_id__in=entries).delete() diff --git a/nomad/cli/admin/migrate.py b/nomad/cli/admin/migrate.py index 4313f52e6b..cf4847cabc 100644 --- a/nomad/cli/admin/migrate.py +++ b/nomad/cli/admin/migrate.py @@ -25,7 +25,7 @@ from pymongo import ReplaceOne from pymongo.database import Database from pymongo.cursor import Cursor from nomad import utils -from nomad.processing import ProcessStatus, Upload, Calc +from nomad.processing import ProcessStatus, Upload, Entry from nomad.processing.data import generate_entry_id from nomad.datamodel import Dataset from nomad.parsing.parsers import parser_dict @@ -68,8 +68,8 @@ def create_collections_if_needed(db_dst: Database): ''' if 'upload' not in db_dst.collection_names(): Upload.objects() - if 'calc' not in db_dst.collection_names(): - Calc.objects() + if 'entry' not in db_dst.collection_names(): + Entry.objects() if 'dataset' not in db_dst.collection_names(): Dataset.m_def.a_mongo.objects() diff --git a/nomad/cli/admin/uploads.py b/nomad/cli/admin/uploads.py index f0b21d0aa6..85df409982 100644 --- a/nomad/cli/admin/uploads.py +++ b/nomad/cli/admin/uploads.py @@ -222,7 +222,7 @@ def _query_uploads( if processing_incomplete_entries or processing_incomplete or processing_necessary: entries_mongo_query_q &= Q(process_status__in=proc.ProcessStatus.STATUSES_PROCESSING) - mongo_entry_based_uploads = set(proc.Calc.objects(entries_mongo_query_q).distinct(field="upload_id")) + mongo_entry_based_uploads = set(proc.Entry.objects(entries_mongo_query_q).distinct(field="upload_id")) if entries_query_uploads is not None: entries_query_uploads = entries_query_uploads.intersection(mongo_entry_based_uploads) else: @@ -339,13 +339,13 @@ def reset(ctx, uploads, with_entries, success, failure): i = 0 for upload in uploads: if with_entries: - entry_update = proc.Calc.reset_pymongo_update() + entry_update = proc.Entry.reset_pymongo_update() if success: entry_update['process_status'] = proc.ProcessStatus.SUCCESS if failure: entry_update['process_status'] = proc.ProcessStatus.FAILURE - proc.Calc._get_collection().update_many( + proc.Entry._get_collection().update_many( dict(upload_id=upload.upload_id), {'$set': entry_update}) upload.reset(force=True) @@ -422,7 +422,7 @@ def delete_upload(upload, skip_es: bool = False, skip_files: bool = False, skip_ # delete mongo if not skip_mongo: - proc.Calc.objects(upload_id=upload.upload_id).delete() + proc.Entry.objects(upload_id=upload.upload_id).delete() upload.delete() @@ -491,7 +491,7 @@ def stop(ctx, uploads, entries: bool, kill: bool, no_celery: bool): def stop_all(query): for process in query: logger_kwargs = dict(upload_id=process.upload_id) - if isinstance(process, proc.Calc): + if isinstance(process, proc.Entry): logger_kwargs.update(calc_id=process.calc_id) if not no_celery: @@ -518,7 +518,7 @@ def stop(ctx, uploads, entries: bool, kill: bool, no_celery: bool): process.fail('process terminate via nomad cli') running_query = query & mongoengine.Q(process_status__in=proc.ProcessStatus.STATUSES_PROCESSING) - stop_all(proc.Calc.objects(running_query)) + stop_all(proc.Entry.objects(running_query)) if not entries: stop_all(proc.Upload.objects(running_query)) diff --git a/nomad/processing/__init__.py b/nomad/processing/__init__.py index 52a53ffde5..c6aea6f77f 100644 --- a/nomad/processing/__init__.py +++ b/nomad/processing/__init__.py @@ -26,13 +26,13 @@ Processing is build on top of *celery* (http://www.celeryproject.org/) and Celery provides a task-based programming model for distributed computing. It uses a broker, e.g. a distributed task queue like *RabbitMQ* to distribute tasks. We use mongodb to store the current state of processing in :class:`Upload` and -:class:`Calc` documents. This combination allows us to easily distribute +:class:`Entry` documents. This combination allows us to easily distribute processing work while having the processing state, i.e. (intermediate) results, always available. This module is structured into our *celery app* and abstract process base class :class:`Proc` (``base.py``), and the concrete processing classes -:class:`Upload` and :class:`Calc` (``data.py``). +:class:`Upload` and :class:`Entry` (``data.py``). This module does not contain the functions to do the actual work. Those are encapsulated in :py:mod:`nomad.files`, :py:mod:`nomad.repo`, :py:mod:`nomad.users`, @@ -51,15 +51,15 @@ We also don't have to deal with celery result backends and synchronizing with th .. autoclass:: nomad.processing.base.Proc -There are two concrete processes :class:`Upload` and :class: `Calc`. Instances of both +There are two concrete processes :class:`Upload` and :class: `Entry`. Instances of both classes do represent the processing state, as well as the respective entity. .. autoclass:: nomad.processing.data.Upload :members: -.. autoclass:: nomad.processing.data.Calc +.. autoclass:: nomad.processing.data.Entry :members: ''' from nomad.processing.base import ( app, InvalidId, ProcNotRegistered, ProcessStatus, ProcessAlreadyRunning) -from nomad.processing.data import Upload, Calc, MetadataEditRequestHandler +from nomad.processing.data import Upload, Entry, MetadataEditRequestHandler diff --git a/nomad/processing/data.py b/nomad/processing/data.py index f5d0952c25..9d2ac4333f 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -22,7 +22,7 @@ data. These are information about users, their uploads and datasets, the associa entries, and files -.. autoclass:: Calc +.. autoclass:: Entry .. autoclass:: Upload @@ -317,7 +317,7 @@ class MetadataEditRequestHandler: return self._mongo_metadata(upload, self._verified_file_metadata(path_dir='')) return {} - def get_entry_mongo_metadata(self, upload: 'Upload', entry: 'Calc') -> Dict[str, Any]: + def get_entry_mongo_metadata(self, upload: 'Upload', entry: 'Entry') -> Dict[str, Any]: ''' Returns a dictionary with metadata to set on the mongo entry object. If the provided `edit_request` is a json dictionary the :func: `validate_json_request`) is assumed @@ -506,7 +506,7 @@ class MetadataEditRequestHandler: assert False, 'Unhandled value type' # Should not happen def _mongo_metadata( - self, mongo_doc: Union['Upload', 'Calc'], verified_metadata: Dict[str, Any]) -> Dict[str, Any]: + self, mongo_doc: Union['Upload', 'Entry'], verified_metadata: Dict[str, Any]) -> Dict[str, Any]: ''' Calculates the upload or entry level *mongo* metadata, given a `mongo_doc` and a dictionary with *verified* metadata. The mongo metadata are the key-value pairs @@ -514,7 +514,7 @@ class MetadataEditRequestHandler: ''' rv: Dict[str, Any] = {} for quantity_name, verified_value in verified_metadata.items(): - if isinstance(mongo_doc, Calc) and quantity_name not in _mongo_entry_metadata: + if isinstance(mongo_doc, Entry) and quantity_name not in _mongo_entry_metadata: continue elif isinstance(mongo_doc, Upload) and quantity_name not in _mongo_upload_metadata: continue @@ -542,7 +542,7 @@ class MetadataEditRequestHandler: new_list.remove(v) return new_list - def _get_entry_key(self, entry: 'Calc', entries_key: str) -> str: + def _get_entry_key(self, entry: 'Entry', entries_key: str) -> str: if entries_key == 'calc_id' or entries_key == 'entry_id': return entry.entry_id elif entries_key == 'mainfile': @@ -602,7 +602,7 @@ class MetadataEditRequestHandler: pass return [] - def find_request_entries(self, upload: 'Upload') -> Iterable['Calc']: + def find_request_entries(self, upload: 'Upload') -> Iterable['Entry']: ''' Finds the entries of the specified upload which are effected by the request. ''' query = self._restricted_request_query(upload.upload_id) if query: @@ -613,10 +613,10 @@ class MetadataEditRequestHandler: query=query, required=MetadataRequired(include=['calc_id'])) for result in search_result: - yield Calc.get(result['calc_id']) + yield Entry.get(result['calc_id']) else: # We have no query. Return all entries for the upload - for entry in Calc.objects(upload_id=upload.upload_id): + for entry in Entry.objects(upload_id=upload.upload_id): yield entry def _verified_file_metadata(self, path_dir: str) -> Dict[str, Any]: @@ -649,7 +649,7 @@ class MetadataEditRequestHandler: return self.verified_file_metadata_cache[path_dir] -class Calc(Proc): +class Entry(Proc): ''' Instances of this class represent entries. This class manages the elastic search index entry, files, and archive for the respective entry. @@ -725,7 +725,7 @@ class Calc(Proc): self._perform_index = True @classmethod - def get(cls, id) -> 'Calc': + def get(cls, id) -> 'Entry': return cls.get_by_id(id, 'calc_id') @property @@ -1288,7 +1288,7 @@ class Upload(Proc): def delete(self): ''' Deletes this upload and its entries. ''' - Calc.objects(upload_id=self.upload_id).delete() + Entry.objects(upload_id=self.upload_id).delete() super().delete() def delete_upload_local(self): @@ -1303,7 +1303,7 @@ class Upload(Proc): search.delete_upload(self.upload_id, refresh=True) with utils.timer(logger, 'upload partial archives deleted'): - entry_ids = [entry.entry_id for entry in Calc.objects(upload_id=self.upload_id)] + entry_ids = [entry.entry_id for entry in Entry.objects(upload_id=self.upload_id)] delete_partial_archives_from_mongo(entry_ids) with utils.timer(logger, 'upload files deleted'): @@ -1457,7 +1457,7 @@ class Upload(Proc): if config.celery.routing == config.CELERY_WORKER_ROUTING: if self.worker_hostname is None: self.worker_hostname = worker_hostname - Calc._get_collection().update_many( + Entry._get_collection().update_many( {'upload_id': self.upload_id}, {'$set': {'worker_hostname': self.worker_hostname}}) @@ -1600,7 +1600,7 @@ class Upload(Proc): def match_all(self, reprocess_settings, path_filter: str = None): ''' The process step used to identify mainfile/parser combinations among the upload's files, - and create or delete respective :class:`Calc` instances (if needed). + and create or delete respective :class:`Entry` instances (if needed). ''' self.set_last_status_message('Matching') logger = self.get_logger() @@ -1621,7 +1621,7 @@ class Upload(Proc): old_entries = set() processing_entries = [] with utils.timer(logger, 'existing entries scanned'): - for entry in Calc.objects(upload_id=self.upload_id): + for entry in Entry.objects(upload_id=self.upload_id): if entry.process_running: processing_entries.append(entry.entry_id) if self._passes_path_filter(entry.mainfile, path_filter): @@ -1632,7 +1632,7 @@ class Upload(Proc): entry_id = generate_entry_id(self.upload_id, filename) try: - entry = Calc.get(entry_id) + entry = Entry.get(entry_id) # Matching entry already exists. # Ensure that we update the parser if in staging if not self.published and parser.name != entry.parser_name: @@ -1644,7 +1644,7 @@ class Upload(Proc): # No existing entry found if not self.published or reprocess_settings.add_matched_entries_to_published: # Create new entry - entry = Calc.create( + entry = Entry.create( calc_id=entry_id, mainfile=filename, parser_name=parser.name, @@ -1667,7 +1667,7 @@ class Upload(Proc): delete_partial_archives_from_mongo(entries_to_delete) for entry_id in entries_to_delete: search.delete_entry(entry_id=entry_id, update_materials=True) - entry = Calc.get(entry_id) + entry = Entry.get(entry_id) entry.delete() # No entries *should* be processing, but if there are, we reset them to @@ -1675,9 +1675,9 @@ class Upload(Proc): if processing_entries: logger.warn('Some entries are processing', count=len(processing_entries)) with utils.timer(logger, 'processing entries resetted'): - Calc._get_collection().update_many( + Entry._get_collection().update_many( {'calc_id__in': processing_entries}, - {'$set': Calc.reset_pymongo_update( + {'$set': Entry.reset_pymongo_update( worker_hostname=self.worker_hostname, process_status=ProcessStatus.FAILURE, errors=['process aborted'])}) @@ -1697,10 +1697,10 @@ class Upload(Proc): try: logger = self.get_logger() next_level: int = None - next_entries: List[Calc] = None + next_entries: List[Entry] = None with utils.timer(logger, 'entries processing called'): # Determine what the next level is and which entries belongs to this level - for entry in Calc.objects(upload_id=self.upload_id): + for entry in Entry.objects(upload_id=self.upload_id): parser = parser_dict.get(entry.parser_name) if parser: level = parser.level @@ -1730,11 +1730,11 @@ class Upload(Proc): raise def child_cls(self): - return Calc + return Entry def join(self): ''' - Called when all child processes (if any) on Calc are done. Process the next level + Called when all child processes (if any) on Entry are done. Process the next level of parsers (if any), otherwise cleanup and finalize the process. ''' if self.parse_next_level(self.parser_level + 1): @@ -1820,28 +1820,28 @@ class Upload(Proc): with utils.timer(self.get_logger(), 'upload staging files deleted'): staging_upload_files.delete() - def get_entry(self, entry_id) -> Calc: + def get_entry(self, entry_id) -> Entry: ''' Returns the upload entry with the given id or ``None``. ''' - return Calc.objects(upload_id=self.upload_id, calc_id=entry_id).first() + return Entry.objects(upload_id=self.upload_id, calc_id=entry_id).first() @property def processed_entries_count(self) -> int: ''' The number of entries that have finished processing (process_status == SUCCESS | FAILURE). ''' - return Calc.objects( + return Entry.objects( upload_id=self.upload_id, process_status__in=[ ProcessStatus.SUCCESS, ProcessStatus.FAILURE]).count() @property def total_entries_count(self) -> int: ''' The total number of entries for this upload (regardless of process status). ''' - return Calc.objects(upload_id=self.upload_id).count() + return Entry.objects(upload_id=self.upload_id).count() @property def failed_entries_count(self) -> int: ''' The number of entries with failed processing. ''' - return Calc.objects(upload_id=self.upload_id, process_status=ProcessStatus.FAILURE).count() + return Entry.objects(upload_id=self.upload_id, process_status=ProcessStatus.FAILURE).count() - def entries_sublist(self, start, end, order_by=None) -> Sequence[Calc]: + def entries_sublist(self, start, end, order_by=None) -> Sequence[Entry]: ''' Returns all entries, paginated and ordered. @@ -1850,7 +1850,7 @@ class Upload(Proc): end: the end index of the requested page order_by: the property to order by ''' - query = Calc.objects(upload_id=self.upload_id)[start:end] + query = Entry.objects(upload_id=self.upload_id)[start:end] if not order_by: return query if type(order_by) == str: @@ -1859,9 +1859,9 @@ class Upload(Proc): return query.order_by(*order_by) @property - def successful_entries(self) -> Sequence[Calc]: + def successful_entries(self) -> Sequence[Entry]: ''' All successfully processed entries. ''' - return Calc.objects(upload_id=self.upload_id, process_status=ProcessStatus.SUCCESS) + return Entry.objects(upload_id=self.upload_id, process_status=ProcessStatus.SUCCESS) @contextmanager def entries_metadata(self) -> Iterator[List[EntryMetadata]]: @@ -1873,7 +1873,7 @@ class Upload(Proc): # read all entry objects first to avoid missing cursor errors yield [ entry.full_entry_metadata(self) - for entry in list(Calc.objects(upload_id=self.upload_id))] + for entry in list(Entry.objects(upload_id=self.upload_id))] finally: self.upload_files.close() # Because full_entry_metadata reads the archive files. @@ -1883,7 +1883,7 @@ class Upload(Proc): Returns a list of :class:`EntryMetadata` containing the mongo metadata only, for all entries of this upload. ''' - return [entry.mongo_metadata(self) for entry in Calc.objects(upload_id=self.upload_id)] + return [entry.mongo_metadata(self) for entry in Entry.objects(upload_id=self.upload_id)] @process() def edit_upload_metadata(self, edit_request_json: Dict[str, Any], user_id: str): @@ -1933,7 +1933,7 @@ class Upload(Proc): # Update mongo if entry_mongo_writes: with utils.timer(logger, 'Mongo bulk write completed', nupdates=len(entry_mongo_writes)): - mongo_result = Calc._get_collection().bulk_write(entry_mongo_writes) + mongo_result = Entry._get_collection().bulk_write(entry_mongo_writes) mongo_errors = mongo_result.bulk_api_result.get('writeErrors') assert not mongo_errors, ( f'Failed to update mongo! {len(mongo_errors)} failures, first is {mongo_errors[0]}') @@ -1944,7 +1944,7 @@ class Upload(Proc): assert not failed_es, f'Failed to update ES, there were {failed_es} fails' def entry_ids(self) -> List[str]: - return [entry.entry_id for entry in Calc.objects(upload_id=self.upload_id)] + return [entry.entry_id for entry in Entry.objects(upload_id=self.upload_id)] def export_bundle( self, export_as_stream: bool, export_path: str, @@ -2188,7 +2188,7 @@ class Upload(Proc): update['calc_id'] = entry_dict['_id'] if not settings.keep_original_timestamps: update['entry_create_time'] = current_time - entry: Calc = Calc.create(**update) + entry: Entry = Entry.create(**update) entry.process_status = entry_dict['process_status'] entry.validate() except Exception as e: diff --git a/nomad/tmp.py b/nomad/tmp.py index b98501fc65..30b48bf891 100644 --- a/nomad/tmp.py +++ b/nomad/tmp.py @@ -43,10 +43,10 @@ def transform_to_v0_10(entry: EntryMetadata) -> EntryMetadata: def set_external_db_in_mongo(upload_id: str = None): from nomad import infrastructure - from nomad.processing import Calc + from nomad.processing import Entry infrastructure.setup_mongo() - calcs = Calc._get_collection() + calcs = Entry._get_collection() print(calcs) for external_db, user_id in external_dbs.items(): diff --git a/tests/app/v1/routers/test_datasets.py b/tests/app/v1/routers/test_datasets.py index a8ca3550ca..17f40540f1 100644 --- a/tests/app/v1/routers/test_datasets.py +++ b/tests/app/v1/routers/test_datasets.py @@ -153,7 +153,7 @@ def assert_dataset(dataset, query: Query = None, entries: List[str] = None, n_en expected_n_entries = n_entries if dataset['dataset_type'] == 'owned' else 0 assert search_results.pagination.total == expected_n_entries - assert processing.Calc.objects(datasets=dataset_id).count() == expected_n_entries + assert processing.Entry.objects(datasets=dataset_id).count() == expected_n_entries def assert_dataset_deleted(dataset_id): @@ -163,7 +163,7 @@ def assert_dataset_deleted(dataset_id): search_results = search( owner='admin', query={'datasets.dataset_id': dataset_id}, user_id=admin_user_id) assert search_results.pagination.total == 0 - assert processing.Calc.objects(datasets=dataset_id).count() == 0 + assert processing.Entry.objects(datasets=dataset_id).count() == 0 @pytest.mark.parametrize('query, size, status_code', [ diff --git a/tests/app/v1/routers/test_entries_edit.py b/tests/app/v1/routers/test_entries_edit.py index baeb3ef66e..cc7e4279da 100644 --- a/tests/app/v1/routers/test_entries_edit.py +++ b/tests/app/v1/routers/test_entries_edit.py @@ -106,12 +106,12 @@ class TestEditRepo(): assert has_message == message def mongo(self, *args, edited: bool = True, **kwargs): - for calc_id in args: - calc = proc.Calc.objects(calc_id='test_entry_id_%d' % calc_id).first() - assert calc is not None + for entry_id in args: + entry = proc.Entry.objects(calc_id='test_entry_id_%d' % entry_id).first() + assert entry is not None if edited: - assert calc.last_edit_time is not None - metadata = calc.mongo_metadata(calc.upload).m_to_dict() + assert entry.last_edit_time is not None + metadata = entry.mongo_metadata(entry.upload).m_to_dict() for key, value in kwargs.items(): if metadata.get(key) != value: return False diff --git a/tests/app/v1/routers/test_uploads.py b/tests/app/v1/routers/test_uploads.py index 368626fa8f..92a5e1d9d1 100644 --- a/tests/app/v1/routers/test_uploads.py +++ b/tests/app/v1/routers/test_uploads.py @@ -33,7 +33,7 @@ from tests.processing.test_edit_metadata import ( assert_metadata_edited, all_coauthor_metadata, all_admin_metadata) from tests.app.v1.routers.common import assert_response from nomad import config, files, infrastructure -from nomad.processing import Upload, Calc, ProcessStatus +from nomad.processing import Upload, Entry, ProcessStatus from nomad.files import UploadFiles, StagingUploadFiles, PublicUploadFiles from nomad.datamodel import EntryMetadata @@ -166,9 +166,9 @@ def assert_file_upload_and_processing( def assert_expected_mainfiles(upload_id, expected_mainfiles): if expected_mainfiles is not None: - entries = [e.mainfile for e in Calc.objects(upload_id=upload_id)] + entries = [e.mainfile for e in Entry.objects(upload_id=upload_id)] assert set(entries) == set(expected_mainfiles), 'Wrong entries found' - for entry in Calc.objects(upload_id=upload_id): + for entry in Entry.objects(upload_id=upload_id): if type(expected_mainfiles) != dict or expected_mainfiles[entry.mainfile]: assert entry.process_status == ProcessStatus.SUCCESS else: @@ -205,7 +205,7 @@ def assert_upload_does_not_exist(client, upload_id: str, user_auth): assert_response(response, 404) assert Upload.objects(upload_id=upload_id).first() is None - assert Calc.objects(upload_id=upload_id).count() is 0 + assert Entry.objects(upload_id=upload_id).count() is 0 mongo_db = infrastructure.mongo_client[config.mongo.db_name] mongo_collection = mongo_db['archive'] diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 8cbd7ae4f0..c813fa5b2c 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -29,7 +29,7 @@ import yaml from nomad import utils, infrastructure, config from nomad.archive import read_partial_archive_from_mongo from nomad.files import UploadFiles, StagingUploadFiles, PublicUploadFiles -from nomad.processing import Upload, Calc, ProcessStatus +from nomad.processing import Upload, Entry, ProcessStatus from nomad.processing.data import UploadContext, generate_entry_id from nomad.search import search, refresh as search_refresh @@ -88,7 +88,7 @@ def assert_processing(upload: Upload, published: bool = False, process='process_ else: assert isinstance(upload_files, StagingUploadFiles) - for entry in Calc.objects(upload_id=upload.upload_id): + for entry in Entry.objects(upload_id=upload.upload_id): assert entry.parser_name is not None assert entry.mainfile is not None assert entry.process_status == ProcessStatus.SUCCESS @@ -132,7 +132,7 @@ def assert_processing(upload: Upload, published: bool = False, process='process_ upload_files.close() search_results = search(owner=None, query={'upload_id': upload.upload_id}) - assert search_results.pagination.total == Calc.objects(upload_id=upload.upload_id).count() + assert search_results.pagination.total == Entry.objects(upload_id=upload.upload_id).count() for entry in search_results.data: assert entry['published'] == published assert entry['upload_id'] == upload.upload_id @@ -226,7 +226,7 @@ def test_publish_failed( non_empty_uploaded: Tuple[str, str], internal_example_user_metadata, test_user, monkeypatch, proc_infra): - mock_failure(Calc, 'parsing', monkeypatch) + mock_failure(Entry, 'parsing', monkeypatch) processed = run_processing(non_empty_uploaded, test_user) set_upload_entry_metadata(processed, internal_example_user_metadata) @@ -334,7 +334,7 @@ def test_re_processing(published: Upload, internal_example_user_metadata, monkey assert published.upload_files.to_staging_upload_files() is None old_upload_time = published.last_update - first_entry: Calc = published.entries_sublist(0, 1)[0] + first_entry: Entry = published.entries_sublist(0, 1)[0] old_entry_time = first_entry.last_processing_time with published.upload_files.read_archive(first_entry.entry_id) as archive: @@ -566,7 +566,7 @@ def test_re_pack(published: Upload): with upload_files.raw_file(path_info.path) as f: f.read() - for entry in Calc.objects(upload_id=upload_id): + for entry in Entry.objects(upload_id=upload_id): with upload_files.read_archive(entry.entry_id) as archive: archive[entry.entry_id].to_dict() @@ -589,8 +589,8 @@ def test_process_failure(monkeypatch, uploaded, function, proc_infra, test_user, # mock the function to throw exceptions if hasattr(Upload, function): cls = Upload - elif hasattr(Calc, function): - cls = Calc + elif hasattr(Entry, function): + cls = Entry else: assert False @@ -614,7 +614,7 @@ def test_process_failure(monkeypatch, uploaded, function, proc_infra, test_user, assert entry.process_status == ProcessStatus.FAILURE assert len(entry.errors) > 0 - entry = Calc.objects(upload_id=upload_id).first() + entry = Entry.objects(upload_id=upload_id).first() if entry is not None: with upload.upload_files.read_archive(entry.entry_id) as archive: entry_archive = archive[entry.entry_id] @@ -641,7 +641,7 @@ def test_malicious_parser_failure(proc_infra, failure, test_user, tmp): assert len(upload.errors) == 0 assert upload.process_status == ProcessStatus.SUCCESS - entries = Calc.objects(upload_id=upload.upload_id) + entries = Entry.objects(upload_id=upload.upload_id) assert entries.count() == 1 entry = next(entries) assert not entry.process_running @@ -724,7 +724,7 @@ def test_read_metadata_from_file(proc_infra, test_user, other_test_user, tmp): upload = run_processing(('test_upload', upload_file), test_user) - entries = Calc.objects(upload_id=upload.upload_id) + entries = Entry.objects(upload_id=upload.upload_id) entries = sorted(entries, key=lambda entry: entry.mainfile) comment = ['root entries comment 1', 'Entry 2 of 3', 'Entry 3 of 3', None] diff --git a/tests/test_cli.py b/tests/test_cli.py index 9e202932a3..540daee196 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -27,7 +27,7 @@ from nomad import processing as proc, files from nomad.search import search from nomad.cli import cli from nomad.cli.cli import POPO -from nomad.processing import Upload, Calc, ProcessStatus +from nomad.processing import Upload, Entry, ProcessStatus from tests.utils import ExampleData # TODO there is much more to test @@ -76,7 +76,7 @@ class TestAdmin: # Upload.objects(upload_id=upload_id).delete() # assert published.upload_files.exists() - # assert Calc.objects(upload_id=upload_id).first() is not None + # assert Entry.objects(upload_id=upload_id).first() is not None # search.refresh() # assert search.SearchRequest().search_parameter('upload_id', upload_id).execute()['total'] > 0 # # TODO test new index pair @@ -88,7 +88,7 @@ class TestAdmin: # assert result.exit_code == 0 # assert not published.upload_files.exists() - # assert Calc.objects(upload_id=upload_id).first() is None + # assert Entry.objects(upload_id=upload_id).first() is None # search.refresh() # assert search.SearchRequest().search_parameter('upload_id', upload_id).execute()['total'] > 0 # # TODO test new index pair @@ -102,7 +102,7 @@ class TestAdmin: upload_id = published.upload_id published.publish_time = publish_time published.save() - entry = Calc.objects(upload_id=upload_id).first() + entry = Entry.objects(upload_id=upload_id).first() assert published.upload_files.exists() assert published.with_embargo @@ -123,7 +123,7 @@ class TestAdmin: def test_delete_entry(self, published): upload_id = published.upload_id - entry = Calc.objects(upload_id=upload_id).first() + entry = Entry.objects(upload_id=upload_id).first() result = invoke_cli( cli, ['admin', 'entries', 'rm', entry.entry_id], catch_exceptions=False) @@ -131,7 +131,7 @@ class TestAdmin: assert result.exit_code == 0 assert 'deleting' in result.stdout assert Upload.objects(upload_id=upload_id).first() is not None - assert Calc.objects(calc_id=entry.entry_id).first() is None + assert Entry.objects(calc_id=entry.entry_id).first() is None def transform_for_index_test(entry): @@ -179,11 +179,11 @@ class TestAdminUploads: assert result.exit_code == 0 assert 'deleting' in result.stdout assert Upload.objects(upload_id=upload_id).first() is None - assert Calc.objects(upload_id=upload_id).first() is None + assert Entry.objects(upload_id=upload_id).first() is None def test_index(self, published): upload_id = published.upload_id - entry = Calc.objects(upload_id=upload_id).first() + entry = Entry.objects(upload_id=upload_id).first() entry.comment = 'specific' entry.save() @@ -214,7 +214,7 @@ class TestAdminUploads: def test_re_process(self, published, monkeypatch): monkeypatch.setattr('nomad.config.meta.version', 'test_version') upload_id = published.upload_id - entry = Calc.objects(upload_id=upload_id).first() + entry = Entry.objects(upload_id=upload_id).first() assert entry.nomad_version != 'test_version' result = invoke_cli( @@ -227,7 +227,7 @@ class TestAdminUploads: def test_re_pack(self, published, monkeypatch): upload_id = published.upload_id - entry = Calc.objects(upload_id=upload_id).first() + entry = Entry.objects(upload_id=upload_id).first() assert published.with_embargo published.embargo_length = 0 published.save() @@ -242,7 +242,7 @@ class TestAdminUploads: for path_info in upload_files.raw_directory_list(recursive=True, files_only=True): with upload_files.raw_file(path_info.path) as f: f.read() - for entry in Calc.objects(upload_id=upload_id): + for entry in Entry.objects(upload_id=upload_id): with upload_files.read_archive(entry.entry_id) as archive: assert entry.entry_id in archive @@ -278,7 +278,7 @@ class TestAdminUploads: upload_id = non_empty_processed.upload_id upload = Upload.objects(upload_id=upload_id).first() - entry = Calc.objects(upload_id=upload_id).first() + entry = Entry.objects(upload_id=upload_id).first() assert upload.process_status == ProcessStatus.SUCCESS assert entry.process_status == ProcessStatus.SUCCESS @@ -292,7 +292,7 @@ class TestAdminUploads: assert result.exit_code == 0 assert 'reset' in result.stdout upload = Upload.objects(upload_id=upload_id).first() - entry = Calc.objects(upload_id=upload_id).first() + entry = Entry.objects(upload_id=upload_id).first() expected_state = ProcessStatus.READY if success: expected_state = ProcessStatus.SUCCESS diff --git a/tests/utils.py b/tests/utils.py index 9f0d2107d0..4d2e51b97e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -112,7 +112,7 @@ def set_upload_entry_metadata(upload, metadata: Dict[str, Any]): Sets the provided metadata values on all entries of the given upload. ''' from nomad import processing as proc - for entry in proc.Calc.objects(upload_id=upload.upload_id): + for entry in proc.Entry.objects(upload_id=upload.upload_id): entry.set_mongo_entry_metadata(**metadata) entry.save() @@ -155,7 +155,7 @@ class ExampleData: for entry_metadata in self.entries.values(): process_status = ( proc.ProcessStatus.SUCCESS if entry_metadata.processed else proc.ProcessStatus.FAILURE) - mongo_entry = proc.Calc( + mongo_entry = proc.Entry( entry_create_time=entry_metadata.entry_create_time, calc_id=entry_metadata.calc_id, upload_id=entry_metadata.upload_id, -- GitLab