Commit 152ee28c authored by Scheidgen, Markus (mscheidg)'s avatar Scheidgen, Markus (mscheidg)

Merge branch 'migration' into 'master'

Migration

See merge request !31
parents 2a334678 4fac8519
Pipeline #43867 passed with stages
in 22 minutes and 57 seconds
Subproject commit 5b2d6d5ae95cbd1916bfefd4df8ff31c1976b59f
Subproject commit b68bce7db5c02ac56b1cf892ae20458fffb23b65
......@@ -99,16 +99,16 @@ class RepoCalcsResource(Resource):
if g.user is None:
q = Q('term', published=True)
else:
q = Q('term', published=True) | Q('term', uploader__user_id=g.user.user_id)
q = Q('term', published=True) | Q('term', owners__user_id=g.user.user_id)
elif owner == 'user':
if g.user is None:
abort(401, message='Authentication required for owner value user.')
q = Q('term', uploader__user_id=g.user.user_id)
q = Q('term', owners__user_id=g.user.user_id)
elif owner == 'staging':
if g.user is None:
abort(401, message='Authentication required for owner value user.')
q = Q('term', published=False) & Q('term', uploader__user_id=g.user.user_id)
q = Q('term', published=False) & Q('term', owners__user_id=g.user.user_id)
else:
abort(400, message='Invalid owner value. Valid values are all|user|staging, default is all')
......
......@@ -87,7 +87,7 @@ class Upload(Base): # type: ignore
created = Column(DateTime)
user = relationship('User')
calcs = relationship('Calc')
calcs = relationship('Calc', lazy='subquery')
@staticmethod
def from_upload_id(upload_id: str) -> 'Upload':
......
......@@ -115,12 +115,14 @@ class CalcWithMetadata():
self.code_name: str = None
self.code_version: str = None
self.backend = None
self.update(**kwargs)
def to_dict(self):
return {
key: value for key, value in self.__dict__.items()
if value is not None
if value is not None and key not in ['backend']
}
def update(self, **kwargs):
......
......@@ -342,7 +342,9 @@ class LocalBackend(LegacyParserBackend):
if self._unknown_attributes.get(name) is None:
self.logger.debug('Access of unexpected backend attribute/method', attribute=name)
self._unknown_attributes[name] = name
return lambda *args, **kwargs: None
return getattr(self._delegate, name)
# return lambda *args, **kwargs: None
def finishedParsingSession(self, parserStatus, parserErrors, *args, **kwargs):
self._delegate.finishedParsingSession(parserStatus, parserErrors, *args, **kwargs)
......@@ -552,6 +554,8 @@ class LocalBackend(LegacyParserBackend):
target.files = repo_data['section_repository_info'].get('repository_filepaths', [])
target.mainfile = repo_data['section_calculation_info'].get('main_file', None)
target.backend = self
return target
def write_json(self, out: TextIO, pretty=True, filter: Callable[[str, Any], Any] = None):
......
......@@ -339,7 +339,7 @@ class NomadCeleryRequest(Request):
if infrastructure.repository_db is None:
infrastructure.setup_repository_db()
proc = unwarp_task(self.task, *args)
proc.fail('task timeout occurred', **kwargs)
proc.fail(event, **kwargs)
proc.process_status = PROCESS_COMPLETED
proc.on_process_complete(None)
proc.save()
......
......@@ -364,7 +364,7 @@ class Upload(Proc):
with utils.timer(
logger, 'upload deleted from index', step='delete',
upload_size=self.upload_files.size):
search.Entry.delete_upload(self.upload_id)
search.delete_upload(self.upload_id)
with utils.timer(
logger, 'staged upload deleted', step='delete',
......@@ -400,7 +400,10 @@ class Upload(Proc):
with utils.timer(
logger, 'index updated', step='publish',
upload_size=self.upload_files.size):
search.Entry.publish_upload(upload_with_metadata)
coe_upload = coe_repo.Upload.from_upload_id(upload_with_metadata.upload_id)
if coe_upload is not None:
search.publish(
[coe_calc.to_calc_with_metadata() for coe_calc in coe_upload.calcs])
with utils.timer(
logger, 'staged upload deleted', step='publish',
......
This diff is collapsed.
......@@ -13,3 +13,6 @@ content-type: application/json
GET http://localhost:8000/nomad/api/v2/uploads/ HTTP/1.1
Authorization: Basic bGVvbmFyZC5ob2ZzdGFkdGVyQG5vbWFkLWZhaXJkaS50ZXN0cy5kZTo=
###
GET http://localhost:9200/test_nomad_fairdi_calcs/_search HTTP/1.1
\ No newline at end of file
......@@ -20,6 +20,7 @@ import zipfile
import io
import inspect
from passlib.hash import bcrypt
from datetime import datetime
from nomad import config, coe_repo, search, parsing
from nomad.files import UploadFiles, PublicUploadFiles
......@@ -29,6 +30,7 @@ from tests.conftest import create_auth_headers, clear_elastic
from tests.test_files import example_file, example_file_mainfile, example_file_contents
from tests.test_files import create_staging_upload, create_public_upload
from tests.test_coe_repo import assert_coe_upload
from tests.test_search import assert_search_upload
def test_alive(client):
......@@ -241,6 +243,7 @@ class TestUploads:
self.assert_upload_does_not_exist(client, upload_id, test_user_auth)
assert_coe_upload(upload_id, user_metadata=metadata)
assert_search_upload(upload_id, published=True)
def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth):
# poll until publish/delete completed
......@@ -358,11 +361,12 @@ class TestUploads:
def test_post_metadata(
self, client, proc_infra, admin_user_auth, test_user_auth, test_user,
other_test_user, no_warn):
other_test_user, no_warn, example_user_metadata):
rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
upload = self.assert_upload(rv.data)
self.assert_processing(client, test_user_auth, upload['upload_id'])
metadata = dict(comment='test comment')
metadata = dict(**example_user_metadata)
metadata['_upload_time'] = datetime.now().isoformat()
self.assert_unstage(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, no_warn):
......
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import names
import random
from essential_generators import DocumentGenerator
import datetime
from ase.data import chemical_symbols
from nomad import datamodel, parsing, utils
number_of = 20
random.seed(0)
gen = DocumentGenerator()
users = [(i + 1, names.get_first_name(), names.get_last_name(), gen.email()) for i in range(0, number_of)]
basis_sets = ['Numeric AOs', 'Gaussians', '(L)APW+lo', 'Plane waves']
xc_functionals = ['LDA', 'GGA', 'hybrid', 'meta-GGA', 'GW', 'unknown']
crystal_systems = ['triclinic', 'monoclinic', 'orthorombic', 'tetragonal', 'hexagonal', 'cubic']
systems = ['atom', 'molecule/cluster', '2D/surface', 'bulk']
comments = [gen.sentence() for _ in range(0, number_of)]
references = [(i + 1, gen.url()) for i in range(0, number_of)]
datasets = [(i + 1, gen.slug()) for i in range(0, number_of)]
codes = [parser[8:] for parser in parsing.parser_dict.keys()]
files = ['/'.join(gen.url().split('/')[3:]) for _ in range(0, number_of)]
low_numbers_for_atoms = [1, 1, 2, 2, 2, 2, 2, 3, 3, 4]
low_numbers_for_refs_and_datasets = [0, 0, 0, 0, 1, 1, 1, 2]
def _gen_user():
id, first, last, email = random.choice(users)
return utils.POPO(id=id, first_name=first, last_name=last, email=email)
def _gen_dataset():
id, name = random.choice(datasets)
return utils.POPO(id=id, name=name, doi=_gen_ref())
def _gen_ref():
id, value = random.choice(references)
return utils.POPO(id=id, value=value)
def generate_calc(pid: int = 0) -> datamodel.CalcWithMetadata:
random.seed(pid)
self = datamodel.CalcWithMetadata()
self.upload_id = utils.create_uuid()
self.calc_id = utils.create_uuid()
self.upload_time = datetime.datetime.now()
self.calc_hash = utils.create_uuid()
self.pid = pid
self.mainfile = random.choice(files)
self.files = list([self.mainfile] + random.choices(files, k=random.choice(low_numbers_for_atoms)))
self.uploader = _gen_user()
self.with_embargo = random.choice([True, False])
self.published = True
self.coauthors = list(_gen_user() for _ in range(0, random.choice(low_numbers_for_refs_and_datasets)))
self.shared_with = list(_gen_user() for _ in range(0, random.choice(low_numbers_for_refs_and_datasets)))
self.comment = random.choice(comments)
self.references = list(_gen_ref() for _ in range(0, random.choice(low_numbers_for_refs_and_datasets)))
self.datasets = list(
_gen_dataset()
for _ in range(0, random.choice(low_numbers_for_refs_and_datasets)))
self.atoms = list(random.choices(chemical_symbols[1:], k=random.choice(low_numbers_for_atoms)))
self.formula = ''.join('%s%d' % (atom, random.choice(low_numbers_for_atoms)) for atom in self.atoms)
self.formula = self.formula.replace('1', '')
self.basis_set = random.choice(basis_sets)
self.xc_functional = random.choice(xc_functionals)
self.system = random.choice(systems)
self.crystal_system = random.choice(crystal_systems)
self.spacegroup = '1'
self.code_name = random.choice(codes)
self.code_version = '1.0.0'
return self
if __name__ == '__main__':
import time
n = 2
start = time.time()
for pid in range(0, n):
calc = generate_calc(pid)
print(calc.to_dict())
print('%f' % ((time.time() - start) / n))
......@@ -67,16 +67,6 @@ class TestLocalBackend(object):
def test_meta_info(self, meta_info, no_warn):
assert 'section_topology' in meta_info
def test_metadata(self, backend, no_warn):
g_index = backend.openSection('section_calculation_info')
assert g_index == 0
backend.addValue('calc_id', 't0')
backend.closeSection('section_calculation_info', 0)
g_index = backend.openSection('section_repository_info')
backend.addValue('repository_calc_id', 1)
backend.closeSection('section_repository_info', 0)
assert json.dumps(backend.metadata()) is not None
def test_section(self, backend, no_warn):
g_index = backend.openSection('section_run')
assert g_index == 0
......
......@@ -14,8 +14,8 @@
from elasticsearch_dsl import Q
from nomad import datamodel, search, processing, parsing
from nomad.search import Entry
from nomad import datamodel, search, processing, parsing, infrastructure, config, coe_repo
from nomad.search import Entry, aggregate_search, authors
def test_init_mapping(elastic):
......@@ -47,12 +47,44 @@ def test_index_upload(elastic, processed: processing.Upload):
pass
def test_search(elastic, normalized: parsing.LocalBackend):
calc_with_metadata = normalized.to_calc_with_metadata()
create_entry(calc_with_metadata)
refresh_index()
total, hits, aggs = aggregate_search()
assert total == 1
assert hits[0]['calc_id'] == calc_with_metadata.calc_id
assert 'Bulk' in aggs['system']
assert aggs['system']['Bulk'] == 1
def test_authors(elastic, normalized: parsing.LocalBackend, test_user: coe_repo.User, other_test_user: coe_repo.User):
calc_with_metadata = normalized.to_calc_with_metadata()
calc_with_metadata.uploader = test_user.to_popo()
create_entry(calc_with_metadata)
calc_with_metadata.calc_id = 'other test calc'
calc_with_metadata.uploader = other_test_user.to_popo()
create_entry(calc_with_metadata)
refresh_index()
results, after = authors(per_page=1)
assert len(results) == 1
name = list(results.keys())[0]
assert after == name
def refresh_index():
infrastructure.elastic_client.indices.refresh(index=config.elastic.index_name)
def create_entry(calc_with_metadata: datamodel.CalcWithMetadata):
search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)
search.Entry.from_calc_with_metadata(calc_with_metadata).save()
assert_entry(calc_with_metadata.calc_id)
def assert_entry(calc_id):
refresh_index()
calc = Entry.get(calc_id)
assert calc is not None
......@@ -60,3 +92,36 @@ def assert_entry(calc_id):
assert search.count() == 1
results = list(hit.to_dict() for hit in search)
assert results[0]['calc_id'] == calc_id
def assert_search_upload(upload_id, published: bool = False):
refresh_index()
search = Entry.search().query('match_all')[0:10]
if search.count() > 0:
for hit in search:
hit = hit.to_dict()
if published:
assert int(hit.get('pid')) > 0
assert hit.get('published')
for coauthor in hit.get('coauthors', []):
assert coauthor.get('name', None) is not None
if __name__ == '__main__':
from test_datamodel import generate_calc
from elasticsearch.helpers import bulk
import sys
print('Generate index with random example calculation data. First arg is number of items')
infrastructure.setup_elastic()
n = 100
if len(sys.argv) > 1:
n = int(sys.argv[1])
def gen_data():
for pid in range(0, n):
calc = generate_calc(pid)
calc = Entry.from_calc_with_metadata(calc)
yield calc.to_dict(include_meta=True)
bulk(infrastructure.elastic_client, gen_data())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment