diff --git a/nomad/coe_repo/upload.py b/nomad/coe_repo/upload.py index af347ed026c90384320d52299834096c12559064..6e0214873e4f7b1bbdfce7306e67f082071c1d87 100644 --- a/nomad/coe_repo/upload.py +++ b/nomad/coe_repo/upload.py @@ -87,7 +87,7 @@ class Upload(Base): # type: ignore created = Column(DateTime) user = relationship('User') - calcs = relationship('Calc') + calcs = relationship('Calc', lazy='subquery') @staticmethod def from_upload_id(upload_id: str) -> 'Upload': diff --git a/nomad/datamodel.py b/nomad/datamodel.py index edbcee639363203c3cd3f41f1d8d8c8a371b14c8..da06580f58aa26a9603cbe588e12c04410ee5ee2 100644 --- a/nomad/datamodel.py +++ b/nomad/datamodel.py @@ -115,12 +115,14 @@ class CalcWithMetadata(): self.code_name: str = None self.code_version: str = None + self.backend = None + self.update(**kwargs) def to_dict(self): return { key: value for key, value in self.__dict__.items() - if value is not None + if value is not None and key not in ['backend'] } def update(self, **kwargs): diff --git a/nomad/parsing/backend.py b/nomad/parsing/backend.py index da09af4cc984f7bf1c6eda26b6743580c2a5672f..db91cc45d647a039e8b0bd08aef1501083e9a0db 100644 --- a/nomad/parsing/backend.py +++ b/nomad/parsing/backend.py @@ -342,7 +342,9 @@ class LocalBackend(LegacyParserBackend): if self._unknown_attributes.get(name) is None: self.logger.debug('Access of unexpected backend attribute/method', attribute=name) self._unknown_attributes[name] = name - return lambda *args, **kwargs: None + + return getattr(self._delegate, name) + # return lambda *args, **kwargs: None def finishedParsingSession(self, parserStatus, parserErrors, *args, **kwargs): self._delegate.finishedParsingSession(parserStatus, parserErrors, *args, **kwargs) @@ -552,6 +554,8 @@ class LocalBackend(LegacyParserBackend): target.files = repo_data['section_repository_info'].get('repository_filepaths', []) target.mainfile = repo_data['section_calculation_info'].get('main_file', None) + target.backend = self + return target def write_json(self, out: TextIO, pretty=True, filter: Callable[[str, Any], Any] = None): diff --git a/nomad/processing/data.py b/nomad/processing/data.py index f971563f4013bb0043ce6442c29633d92a3345a5..0f0fb24ad8a24f7a6d51a5c3032cfd712524daa3 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -400,7 +400,13 @@ class Upload(Proc): with utils.timer( logger, 'index updated', step='publish', upload_size=self.upload_files.size): - search.Entry.publish_upload(upload_with_metadata) + coe_upload = coe_repo.Upload.from_upload_id(upload_with_metadata.upload_id) + if coe_upload is not None: + for coe_calc in coe_upload.calcs: + entry = search.Entry.from_calc_with_metadata( + coe_calc.to_calc_with_metadata()) + entry.published = True + entry.save(refresh=True) with utils.timer( logger, 'staged upload deleted', step='publish', diff --git a/nomad/search.py b/nomad/search.py index b4572ccdc4d6629b3a0f69b7f29190c923cbb2ec..f9d06fe843c64316693a8c72f91b41add6dbc8d6 100644 --- a/nomad/search.py +++ b/nomad/search.py @@ -17,7 +17,7 @@ This module represents calculations in elastic search. """ from elasticsearch_dsl import Document, InnerDoc, Keyword, Text, Date, \ - Object, Boolean, Search + Object, Boolean, Search, Integer from nomad import config, datamodel, infrastructure, datamodel, coe_repo @@ -90,41 +90,64 @@ class Entry(Document): code_name = Keyword() code_version = Keyword() - @classmethod - def from_calc_with_metadata(cls, source: datamodel.CalcWithMetadata) -> 'Entry': - return Entry( - meta=dict(id=source.calc_id), - upload_id=source.upload_id, - upload_time=source.upload_time, - calc_id=source.calc_id, - calc_hash=source.calc_hash, - pid=str(source.pid), - mainfile=source.mainfile, - files=source.files, - uploader=User.from_user_popo(source.uploader) if source.uploader is not None else None, - - with_embargo=source.with_embargo, - published=source.published, - coauthors=[User.from_user_popo(user) for user in source.coauthors], - shared_with=[User.from_user_popo(user) for user in source.shared_with], - comment=source.comment, - references=[ref.value for ref in source.references], - datasets=[Dataset.from_dataset_popo(ds) for ds in source.datasets], - - formula=source.formula, - atoms=list(set(source.atoms)), - basis_set=source.basis_set, - xc_functional=source.xc_functional, - system=source.system, - crystal_system=source.crystal_system, - spacegroup=source.spacegroup, - code_name=source.code_name, - code_version=source.code_version) + n_total_energies = Integer() + n_geometries = Integer() + geometries = Keyword(multi=True) + quantities = Keyword(multi=True) @classmethod - def add_upload(cls, source: datamodel.UploadWithMetadata): - for calc in source.calcs: - cls.from_calc_with_metadata(calc).save() + def from_calc_with_metadata(cls, source: datamodel.CalcWithMetadata) -> 'Entry': + entry = Entry(meta=dict(id=source.calc_id)) + entry.update(source) + return entry + + def update(self, source: datamodel.CalcWithMetadata) -> None: + self.upload_id = source.upload_id + self.upload_time = source.upload_time + self.calc_id = source.calc_id + self.calc_hash = source.calc_hash + self.pid = str(source.pid) + self.mainfile = source.mainfile + self.files = source.files + self.uploader = User.from_user_popo(source.uploader) if source.uploader is not None else None + + self.with_embargo = source.with_embargo + self.published = source.published + self.coauthors = [User.from_user_popo(user) for user in source.coauthors] + self.shared_with = [User.from_user_popo(user) for user in source.shared_with] + self.comment = source.comment + self.references = [ref.value for ref in source.references] + self.datasets = [Dataset.from_dataset_popo(ds) for ds in source.datasets] + + self.formula = source.formula + self.atoms = list(set(source.atoms)) + self.basis_set = source.basis_set + self.xc_functional = source.xc_functional + self.system = source.system + self.crystal_system = source.crystal_system + self.spacegroup = source.spacegroup + self.code_name = source.code_name + self.code_version = source.code_version + + if source.backend is not None: + quantities = set() + geometries = set() + n_total_energies = 0 + n_geometries = 0 + + for meta_info, _, value in source.backend._delegate.results.traverse(): + quantities.add(meta_info) + if meta_info == 'energy_total': + n_total_energies += 1 + if meta_info == 'section_system': + n_geometries += 1 + if meta_info == 'configuration_raw_gid': + geometries.add(value) + + self.geometries = list(geometries) + self.quantities = list(quantities) + self.n_total_energies = n_total_energies + self.n_geometries = n_geometries @classmethod def update_by_query(cls, upload_id, script): diff --git a/tests/misc.http b/tests/misc.http index ecc3a3db345302835c52e94f667d6d2060f9dcfb..f601f3b93b86f34f55c5f628319d5dd5f983544c 100644 --- a/tests/misc.http +++ b/tests/misc.http @@ -13,3 +13,6 @@ content-type: application/json GET http://localhost:8000/nomad/api/v2/uploads/ HTTP/1.1 Authorization: Basic bGVvbmFyZC5ob2ZzdGFkdGVyQG5vbWFkLWZhaXJkaS50ZXN0cy5kZTo= ### + + +GET http://localhost:9200/test_nomad_fairdi_calcs/_search HTTP/1.1 \ No newline at end of file diff --git a/tests/test_api.py b/tests/test_api.py index 47179e5115e992ecef608fa3513e4a75dec13435..0e24828347e6538c47396a189b5ab7195bc745c7 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -20,6 +20,7 @@ import zipfile import io import inspect from passlib.hash import bcrypt +from datetime import datetime from nomad import config, coe_repo, search, parsing from nomad.files import UploadFiles, PublicUploadFiles @@ -29,6 +30,7 @@ from tests.conftest import create_auth_headers, clear_elastic from tests.test_files import example_file, example_file_mainfile, example_file_contents from tests.test_files import create_staging_upload, create_public_upload from tests.test_coe_repo import assert_coe_upload +from tests.test_search import assert_search_upload def test_alive(client): @@ -241,6 +243,7 @@ class TestUploads: self.assert_upload_does_not_exist(client, upload_id, test_user_auth) assert_coe_upload(upload_id, user_metadata=metadata) + assert_search_upload(upload_id, published=True) def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth): # poll until publish/delete completed @@ -358,11 +361,12 @@ class TestUploads: def test_post_metadata( self, client, proc_infra, admin_user_auth, test_user_auth, test_user, - other_test_user, no_warn): + other_test_user, no_warn, example_user_metadata): rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth) upload = self.assert_upload(rv.data) self.assert_processing(client, test_user_auth, upload['upload_id']) - metadata = dict(comment='test comment') + metadata = dict(**example_user_metadata) + metadata['_upload_time'] = datetime.now().isoformat() self.assert_unstage(client, admin_user_auth, upload['upload_id'], proc_infra, metadata) def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, no_warn): diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 07f151e222ee78c1f8ed64434040f78425617a28..811d55874d551491bdd23c7e3a9cf05b1e7ccdd4 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -67,16 +67,6 @@ class TestLocalBackend(object): def test_meta_info(self, meta_info, no_warn): assert 'section_topology' in meta_info - def test_metadata(self, backend, no_warn): - g_index = backend.openSection('section_calculation_info') - assert g_index == 0 - backend.addValue('calc_id', 't0') - backend.closeSection('section_calculation_info', 0) - g_index = backend.openSection('section_repository_info') - backend.addValue('repository_calc_id', 1) - backend.closeSection('section_repository_info', 0) - assert json.dumps(backend.metadata()) is not None - def test_section(self, backend, no_warn): g_index = backend.openSection('section_run') assert g_index == 0 diff --git a/tests/test_search.py b/tests/test_search.py index 2aeb618ca069dab8b715779878ebe1c2f7931eb6..5f930cfd065b92ab612c35c7a15894bdb1ebc3bf 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -60,3 +60,16 @@ def assert_entry(calc_id): assert search.count() == 1 results = list(hit.to_dict() for hit in search) assert results[0]['calc_id'] == calc_id + + +def assert_search_upload(upload_id, published: bool = False): + search = Entry.search().query('match_all')[0:10] + if search.count() > 0: + for hit in search: + hit = hit.to_dict() + if published: + assert int(hit.get('pid')) > 0 + assert hit.get('published') + + for coauthor in hit.get('coauthors', []): + assert coauthor.get('name', None) is not None