Commit 1e8eef8c authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added basic backend info and statistic to search index. Update search entries...

Added basic backend info and statistic to search index. Update search entries after publish (with user metadata+pid).
parent 7f0d9424
Pipeline #43813 passed with stages
in 21 minutes and 26 seconds
......@@ -87,7 +87,7 @@ class Upload(Base): # type: ignore
created = Column(DateTime)
user = relationship('User')
calcs = relationship('Calc')
calcs = relationship('Calc', lazy='subquery')
@staticmethod
def from_upload_id(upload_id: str) -> 'Upload':
......
......@@ -115,12 +115,14 @@ class CalcWithMetadata():
self.code_name: str = None
self.code_version: str = None
self.backend = None
self.update(**kwargs)
def to_dict(self):
return {
key: value for key, value in self.__dict__.items()
if value is not None
if value is not None and key not in ['backend']
}
def update(self, **kwargs):
......
......@@ -342,7 +342,9 @@ class LocalBackend(LegacyParserBackend):
if self._unknown_attributes.get(name) is None:
self.logger.debug('Access of unexpected backend attribute/method', attribute=name)
self._unknown_attributes[name] = name
return lambda *args, **kwargs: None
return getattr(self._delegate, name)
# return lambda *args, **kwargs: None
def finishedParsingSession(self, parserStatus, parserErrors, *args, **kwargs):
self._delegate.finishedParsingSession(parserStatus, parserErrors, *args, **kwargs)
......@@ -552,6 +554,8 @@ class LocalBackend(LegacyParserBackend):
target.files = repo_data['section_repository_info'].get('repository_filepaths', [])
target.mainfile = repo_data['section_calculation_info'].get('main_file', None)
target.backend = self
return target
def write_json(self, out: TextIO, pretty=True, filter: Callable[[str, Any], Any] = None):
......
......@@ -400,7 +400,13 @@ class Upload(Proc):
with utils.timer(
logger, 'index updated', step='publish',
upload_size=self.upload_files.size):
search.Entry.publish_upload(upload_with_metadata)
coe_upload = coe_repo.Upload.from_upload_id(upload_with_metadata.upload_id)
if coe_upload is not None:
for coe_calc in coe_upload.calcs:
entry = search.Entry.from_calc_with_metadata(
coe_calc.to_calc_with_metadata())
entry.published = True
entry.save(refresh=True)
with utils.timer(
logger, 'staged upload deleted', step='publish',
......
......@@ -17,7 +17,7 @@ This module represents calculations in elastic search.
"""
from elasticsearch_dsl import Document, InnerDoc, Keyword, Text, Date, \
Object, Boolean, Search
Object, Boolean, Search, Integer
from nomad import config, datamodel, infrastructure, datamodel, coe_repo
......@@ -90,41 +90,64 @@ class Entry(Document):
code_name = Keyword()
code_version = Keyword()
@classmethod
def from_calc_with_metadata(cls, source: datamodel.CalcWithMetadata) -> 'Entry':
return Entry(
meta=dict(id=source.calc_id),
upload_id=source.upload_id,
upload_time=source.upload_time,
calc_id=source.calc_id,
calc_hash=source.calc_hash,
pid=str(source.pid),
mainfile=source.mainfile,
files=source.files,
uploader=User.from_user_popo(source.uploader) if source.uploader is not None else None,
with_embargo=source.with_embargo,
published=source.published,
coauthors=[User.from_user_popo(user) for user in source.coauthors],
shared_with=[User.from_user_popo(user) for user in source.shared_with],
comment=source.comment,
references=[ref.value for ref in source.references],
datasets=[Dataset.from_dataset_popo(ds) for ds in source.datasets],
formula=source.formula,
atoms=list(set(source.atoms)),
basis_set=source.basis_set,
xc_functional=source.xc_functional,
system=source.system,
crystal_system=source.crystal_system,
spacegroup=source.spacegroup,
code_name=source.code_name,
code_version=source.code_version)
n_total_energies = Integer()
n_geometries = Integer()
geometries = Keyword(multi=True)
quantities = Keyword(multi=True)
@classmethod
def add_upload(cls, source: datamodel.UploadWithMetadata):
for calc in source.calcs:
cls.from_calc_with_metadata(calc).save()
def from_calc_with_metadata(cls, source: datamodel.CalcWithMetadata) -> 'Entry':
entry = Entry(meta=dict(id=source.calc_id))
entry.update(source)
return entry
def update(self, source: datamodel.CalcWithMetadata) -> None:
self.upload_id = source.upload_id
self.upload_time = source.upload_time
self.calc_id = source.calc_id
self.calc_hash = source.calc_hash
self.pid = str(source.pid)
self.mainfile = source.mainfile
self.files = source.files
self.uploader = User.from_user_popo(source.uploader) if source.uploader is not None else None
self.with_embargo = source.with_embargo
self.published = source.published
self.coauthors = [User.from_user_popo(user) for user in source.coauthors]
self.shared_with = [User.from_user_popo(user) for user in source.shared_with]
self.comment = source.comment
self.references = [ref.value for ref in source.references]
self.datasets = [Dataset.from_dataset_popo(ds) for ds in source.datasets]
self.formula = source.formula
self.atoms = list(set(source.atoms))
self.basis_set = source.basis_set
self.xc_functional = source.xc_functional
self.system = source.system
self.crystal_system = source.crystal_system
self.spacegroup = source.spacegroup
self.code_name = source.code_name
self.code_version = source.code_version
if source.backend is not None:
quantities = set()
geometries = set()
n_total_energies = 0
n_geometries = 0
for meta_info, _, value in source.backend._delegate.results.traverse():
quantities.add(meta_info)
if meta_info == 'energy_total':
n_total_energies += 1
if meta_info == 'section_system':
n_geometries += 1
if meta_info == 'configuration_raw_gid':
geometries.add(value)
self.geometries = list(geometries)
self.quantities = list(quantities)
self.n_total_energies = n_total_energies
self.n_geometries = n_geometries
@classmethod
def update_by_query(cls, upload_id, script):
......
......@@ -13,3 +13,6 @@ content-type: application/json
GET http://localhost:8000/nomad/api/v2/uploads/ HTTP/1.1
Authorization: Basic bGVvbmFyZC5ob2ZzdGFkdGVyQG5vbWFkLWZhaXJkaS50ZXN0cy5kZTo=
###
GET http://localhost:9200/test_nomad_fairdi_calcs/_search HTTP/1.1
\ No newline at end of file
......@@ -20,6 +20,7 @@ import zipfile
import io
import inspect
from passlib.hash import bcrypt
from datetime import datetime
from nomad import config, coe_repo, search, parsing
from nomad.files import UploadFiles, PublicUploadFiles
......@@ -29,6 +30,7 @@ from tests.conftest import create_auth_headers, clear_elastic
from tests.test_files import example_file, example_file_mainfile, example_file_contents
from tests.test_files import create_staging_upload, create_public_upload
from tests.test_coe_repo import assert_coe_upload
from tests.test_search import assert_search_upload
def test_alive(client):
......@@ -241,6 +243,7 @@ class TestUploads:
self.assert_upload_does_not_exist(client, upload_id, test_user_auth)
assert_coe_upload(upload_id, user_metadata=metadata)
assert_search_upload(upload_id, published=True)
def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth):
# poll until publish/delete completed
......@@ -358,11 +361,12 @@ class TestUploads:
def test_post_metadata(
self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
other_test_user, no_warn):
other_test_user, no_warn, example_user_metadata):
rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
upload = self.assert_upload(rv.data)
self.assert_processing(client, test_user_auth, upload['upload_id'])
metadata = dict(comment='test comment')
metadata = dict(**example_user_metadata)
metadata['_upload_time'] = datetime.now().isoformat()
self.assert_unstage(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, no_warn):
......
......@@ -67,16 +67,6 @@ class TestLocalBackend(object):
def test_meta_info(self, meta_info, no_warn):
assert 'section_topology' in meta_info
def test_metadata(self, backend, no_warn):
g_index = backend.openSection('section_calculation_info')
assert g_index == 0
backend.addValue('calc_id', 't0')
backend.closeSection('section_calculation_info', 0)
g_index = backend.openSection('section_repository_info')
backend.addValue('repository_calc_id', 1)
backend.closeSection('section_repository_info', 0)
assert json.dumps(backend.metadata()) is not None
def test_section(self, backend, no_warn):
g_index = backend.openSection('section_run')
assert g_index == 0
......
......@@ -60,3 +60,16 @@ def assert_entry(calc_id):
assert search.count() == 1
results = list(hit.to_dict() for hit in search)
assert results[0]['calc_id'] == calc_id
def assert_search_upload(upload_id, published: bool = False):
search = Entry.search().query('match_all')[0:10]
if search.count() > 0:
for hit in search:
hit = hit.to_dict()
if published:
assert int(hit.get('pid')) > 0
assert hit.get('published')
for coauthor in hit.get('coauthors', []):
assert coauthor.get('name', None) is not None
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment