Commit a9315274 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Rename operation->command. Text fixture for bravado client. Basic migrate test.

parent 2c490025
......@@ -44,7 +44,7 @@
"cwd": "${workspaceFolder}",
"program": "${workspaceFolder}/.pyenv/bin/pytest",
"args": [
"-sv", "tests/test_api.py::TestRaw::test_raw_files[test_data0]"
"-sv", "tests/test_client.py"
]
},
{
......
......@@ -193,7 +193,7 @@ async function commitUpload(uploadId) {
return client.apis.uploads.exec_upload_command({
upload_id: uploadId,
payload: {
operation: 'commit'
command: 'commit'
}
})
.catch(handleApiError)
......
......@@ -32,10 +32,10 @@ class AdminRemoveResource(Resource):
@login_really_required
def post(self):
"""
The ``reset`` operation will attempt to clear the contents of all databased and
The ``reset`` command will attempt to clear the contents of all databased and
indices.
Nomad can be configured to disable reset and the operation might not be available.
Nomad can be configured to disable reset and the command might not be available.
"""
if not g.user.is_admin:
abort(401, message='Only the admin user can perform reset.')
......@@ -56,10 +56,10 @@ class AdminResetResource(Resource):
@login_really_required
def post(self):
"""
The ``remove``operation will attempt to remove all databases. Expect the
The ``remove``command will attempt to remove all databases. Expect the
api to stop functioning after this request.
Nomad can be configured to disable remove and the operation might not be available.
Nomad can be configured to disable remove and the command might not be available.
"""
if not g.user.is_admin:
abort(401, message='Only the admin user can perform remove.')
......
......@@ -24,7 +24,7 @@ from werkzeug.datastructures import FileStorage
import os.path
from nomad import config
from nomad.processing import Upload
from nomad.processing import Upload, FAILURE
from nomad.processing import ProcessAlreadyRunning
from nomad.files import ArchiveBasedStagingUploadFiles
......@@ -103,8 +103,8 @@ upload_with_calcs_model = api.inherit('UploadWithPaginatedCalculations', upload_
}))
})
upload_operation_model = api.model('UploadOperation', {
'operation': fields.String(description='Currently commit is the only operation.'),
upload_command_model = api.model('UploadCommand', {
'command': fields.String(description='Currently commit is the only command.'),
'metadata': fields.Nested(model=upload_metadata_model, description='Additional upload and calculation meta data. Will replace previously given metadata.')
})
......@@ -313,13 +313,13 @@ class UploadResource(Resource):
@api.doc('exec_upload_command')
@api.response(404, 'Upload does not exist or not in staging')
@api.response(400, 'Operation is not supported or the upload is still/already processed')
@api.response(401, 'If the operation is not allowed for the current user')
@api.response(401, 'If the command is not allowed for the current user')
@api.marshal_with(upload_model, skip_none=True, code=200, description='Upload commited successfully')
@api.expect(upload_operation_model)
@api.expect(upload_command_model)
@login_really_required
def post(self, upload_id):
"""
Execute an upload operation. Available operations: ``commit``
Execute an upload command. Available operations: ``commit``
Unstage accepts further meta data that allows to provide coauthors, comments,
external references, etc. See the model for details. The fields that start with
......@@ -340,7 +340,7 @@ class UploadResource(Resource):
if json_data is None:
json_data = {}
operation = json_data.get('operation')
command = json_data.get('command')
metadata = json_data.get('metadata', {})
for key in metadata:
......@@ -349,9 +349,11 @@ class UploadResource(Resource):
abort(401, message='Only admin users can use _metadata_keys.')
break
if operation == 'commit':
if command == 'commit':
if upload.tasks_running:
abort(400, message='The upload is not processed yet')
if upload.tasks_status == FAILURE:
abort(400, message='Cannot commit an upload that failed processing')
try:
upload.metadata = metadata
upload.commit_upload()
......@@ -360,7 +362,7 @@ class UploadResource(Resource):
return upload, 200
abort(400, message='Unsuported operation %s.' % operation)
abort(400, message='Unsuported command %s.' % command)
upload_command_model = api.model('UploadCommand', {
......
......@@ -17,7 +17,7 @@ Swagger/bravado based python client library for the API and various usefull shel
"""
from . import local, migration, misc, upload
from .main import cli
from .main import cli, create_client
if __name__ == '__main__':
......
......@@ -28,7 +28,7 @@ pw = 'password'
def create_client():
return create_client()
return _create_client()
def _create_client(
......
......@@ -18,6 +18,7 @@ import time
import click
from nomad import utils
from nomad.processing import FAILURE, SUCCESS
from .main import cli, create_client
......@@ -45,7 +46,7 @@ def upload_file(file_path: str, name: str = None, offline: bool = False, commit:
upload = client.uploads.upload(file=f, name=name).response().result
click.echo('process online: %s' % file_path)
while upload.tasks_status not in ['SUCCESS', 'FAILURE']:
while upload.tasks_status not in [SUCCESS, FAILURE]:
upload = client.uploads.get_upload(upload_id=upload.upload_id).response().result
calcs = upload.calcs.pagination
if calcs is None:
......@@ -53,7 +54,7 @@ def upload_file(file_path: str, name: str = None, offline: bool = False, commit:
else:
total, successes, failures = (calcs.total, calcs.successes, calcs.failures)
ret = '\n' if upload.tasks_status in ('SUCCESS', 'FAILURE') else '\r'
ret = '\n' if upload.tasks_status in (SUCCESS, FAILURE) else '\r'
print(
'status: %s; task: %s; parsing: %d/%d/%d %s' %
......@@ -61,12 +62,12 @@ def upload_file(file_path: str, name: str = None, offline: bool = False, commit:
time.sleep(3)
if upload.tasks_status == 'FAILURE':
if upload.tasks_status == FAILURE:
click.echo('There have been errors:')
for error in upload.errors:
click.echo(' %s' % error)
elif commit:
client.uploads.exec_upload_command(upload_id=upload.upload_id, operation='commit').reponse()
client.uploads.exec_upload_command(upload_id=upload.upload_id, command='commit').reponse()
return upload.upload_id
......
......@@ -59,7 +59,7 @@ class CalcMetaData(Base): # type: ignore
filenames = Column(BYTEA)
location = Column(String)
version_id = Column(Integer, ForeignKey('codeversions.version_id'))
version = relationship('CodeVersion')
version = relationship('CodeVersion', lazy='joined', uselist=False)
class UserMetaData(Base): # type: ignore
......@@ -96,12 +96,20 @@ class Spacegroup(Base): # type: ignore
n = Column(Integer)
topic_code = 220
topic_atoms = 10
topic_system_type = 50
topic_xc_treatment = 75
topic_crystal_system = 90
topic_basis_set_type = 80
class Tag(Base): # type: ignore
__tablename__ = 'tags'
calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True)
calc = relationship('Calc')
tid = Column(Integer, ForeignKey('topics.tid'), primary_key=True)
topic = relationship('Topics')
topic = relationship('Topics', lazy='joined', uselist=False)
def __repr__(self):
return '<Tag(calc_id="%d", tid="%d)>' % (int(self.calc_id), int(self.tid))
......
......@@ -19,7 +19,9 @@ from sqlalchemy.orm import relationship, aliased
from sqlalchemy.sql.expression import literal
from nomad import infrastructure, datamodel
from nomad.datamodel import CalcWithMetadata
from . import base
from .user import User
from .base import Base, calc_citation_association, ownership, co_authorship, shareship, \
Tag, Topics, CalcSet, calc_dataset_containment, Citation
......@@ -39,6 +41,8 @@ class Calc(Base, datamodel.Calc): # type: ignore
owners = relationship('User', secondary=ownership, lazy='joined')
coauthors = relationship('User', secondary=co_authorship, lazy='joined')
shared_with = relationship('User', secondary=shareship, lazy='joined')
tags = relationship('Tag', lazy='joined')
spacegroup = relationship('Spacegroup', lazy='joined', uselist=False)
parents = relationship(
'Calc',
......@@ -125,6 +129,63 @@ class Calc(Base, datamodel.Calc): # type: ignore
tag = Tag(calc=self, topic=topic)
repo_db.add(tag)
_dataset_cache: dict = {}
def to_calc_with_metadata(self):
result = CalcWithMetadata(
upload_id=self.upload.upload_id if self.upload else None,
calc_id=self.calc_id)
for topic in [tag.topic for tag in self.tags]:
if topic.cid == base.topic_code:
result.program_name = topic.topic
elif topic.cid == base.topic_basis_set_type:
result.basis_set_type = topic.topic
elif topic.cid == base.topic_xc_treatment:
result.XC_functional_name = topic.topic
elif topic.cid == base.topic_system_type:
result.system_type = topic.topic
elif topic.cid == base.topic_atoms:
result.setdefault('atom_species', []).append(topic.topic)
elif topic.cid == base.topic_crystal_system:
result.crystal_system = topic.topic
else:
raise KeyError('topic cid %s.' % str(topic.cid))
result.program_version = self.calc_metadata.version.content
result.chemical_composition = self.calc_metadata.chemical_formula
result.space_group_number = self.spacegroup.n
result.setdefault('atom_species', []).sort()
datasets: List[DataSet] = []
for parent in self.parents:
parents = Calc._dataset_cache.get(parent, None)
if parents is None:
parents = parent.all_datasets
Calc._dataset_cache[parent] = parents
datasets.append(DataSet(parent))
datasets.extend(parents)
result.pid = self.pid
result.uploader = self.uploader.user_id
result.upload_time = self.calc_metadata.added
result.datasets = list(
dict(id=ds.id, dois=ds.dois, name=ds.name)
for ds in datasets)
result.with_embargo = self.with_embargo
result.comment = self.comment
result.references = self.references
result.coauthors = list(user.user_id for user in self.coauthors)
result.shared_with = list(user.user_id for user in self.shared_with)
return {
key: value for key, value in result.items()
if value is not None and value != []
}
CalcWithMetadata.register_mapping(Calc, Calc.to_calc_with_metadata)
class DataSet:
def __init__(self, dataset_calc: Calc) -> None:
......
......@@ -47,22 +47,16 @@ import datetime
from sqlalchemy import Column, Integer, String, Boolean, DateTime, ForeignKey
from sqlalchemy.orm import relationship
from nomad import utils, infrastructure, datamodel, files
from nomad import utils, infrastructure, datamodel
from nomad.datamodel import CalcWithMetadata
from . import base
from .user import User
from .calc import Calc
from .base import Base, CalcMetaData, UserMetaData, StructRatio, CodeVersion, Spacegroup, \
CalcSet, Citation
topic_code = 220
topic_atoms = 10
topic_system_type = 50
topic_xc_treatment = 75
topic_crystal_system = 90
topic_basis_set_type = 80
class UploadMetaData:
"""
Utility class that provides per upload meta data and overriding per calculation
......@@ -156,7 +150,7 @@ class Upload(Base, datamodel.Upload): # type: ignore
has_calcs = False
for calc in upload.calcs:
has_calcs = True
coe_upload._add_calculation(calc.to(files.Calc), upload_metadata.get(calc.mainfile))
coe_upload._add_calculation(calc.to(CalcWithMetadata), upload_metadata.get(calc.mainfile))
# commit
if has_calcs:
......@@ -175,7 +169,7 @@ class Upload(Base, datamodel.Upload): # type: ignore
return result
def _add_calculation(self, calc: files.Calc, calc_metadata: dict) -> None:
def _add_calculation(self, calc: CalcWithMetadata, calc_metadata: dict) -> None:
repo_db = infrastructure.repository_db
# table based properties
......@@ -219,13 +213,13 @@ class Upload(Base, datamodel.Upload): # type: ignore
repo_db.add(spacegroup)
# topic based properties
coe_calc.set_value(topic_code, calc.program_name)
coe_calc.set_value(base.topic_code, calc.program_name)
for atom in set(calc.atom_species):
coe_calc.set_value(topic_atoms, str(atom)) # TODO atom label not number
coe_calc.set_value(topic_system_type, calc.system_type)
coe_calc.set_value(topic_xc_treatment, calc.XC_functional_name) # TODO function->treatment
coe_calc.set_value(topic_crystal_system, calc.crystal_system)
coe_calc.set_value(topic_basis_set_type, calc.basis_set_type)
coe_calc.set_value(base.topic_atoms, str(atom)) # TODO atom label not number
coe_calc.set_value(base.topic_system_type, calc.system_type)
coe_calc.set_value(base.topic_xc_treatment, calc.XC_functional_name) # TODO function->treatment
coe_calc.set_value(base.topic_crystal_system, calc.crystal_system)
coe_calc.set_value(base.topic_basis_set_type, calc.basis_set_type)
# user relations
owner_user_id = calc_metadata.get('_uploader', int(self.user_id))
......
......@@ -21,7 +21,7 @@ It is not about representing every detail, but those parts that are directly inv
api, processing, migration, mirroring, or other 'infrastructure' operations.
"""
from typing import Type, TypeVar, Union, Iterable, cast
from typing import Type, TypeVar, Union, Iterable, cast, Callable, Dict
import datetime
T = TypeVar('T')
......@@ -95,3 +95,58 @@ class Upload(Entity):
@property
def calcs(self) -> Iterable[Calc]:
raise NotImplementedError
class UploadWithMetadata(dict, Entity):
def __init__(self, upload_id):
self.upload_id = upload_id
class CalcWithMetadata(dict, Entity):
"""
A dict/POPO class that can be used for mapping calc representations with calc metadata.
We have many representations of calcs and their calc metadata. To avoid implement
mappings between all combinations, just implement mappings with the class and use
mapping transitivity. E.g. instead of A -> B, A -> this -> B.
The other calc representations can register mappings from them, in order to allow
to use this classes `load_from` method.
"""
mappings: Dict[Type[Entity], Callable[[Entity], 'CalcWithMetadata']] = dict()
@classmethod
def register_mapping(
cls, from_type: Type[T], mapping: Callable[[T], 'CalcWithMetadata']):
"""
Register a mapping from instances of another calc representation to instances of
:class:`CalcWithMetadata`.
Arguments:
from_type: The source calc type of the mapping.
mapping: The mapping itself as a callable that takes a source object of the
source calc type and returns an instance of :class:`CalcWithMetadata`.
"""
cls.mappings[from_type] = mapping
@classmethod
def load_from(cls, obj):
return CalcWithMetadata.mappings[obj.__class__](obj)
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.upload = UploadWithMetadata(kwargs['upload_id'])
def __getattr__(self, name):
if name in self:
return self[name]
else:
raise AttributeError("No such attribute: " + name)
def __setattr__(self, name, value):
self[name] = value
def __delattr__(self, name):
if name in self:
del self[name]
else:
raise AttributeError("No such attribute: " + name)
......@@ -36,7 +36,7 @@ almost readonly (beside metadata) storage.
"""
from abc import ABCMeta
from typing import IO, Generator, Dict, Iterator, Iterable, Callable, List
from typing import IO, Generator, Dict, Iterator, Iterable, Callable
import ujson
import os.path
import os
......@@ -238,7 +238,7 @@ class Restricted(Exception):
pass
class UploadFiles(DirectoryObject, metaclass=ABCMeta):
class UploadFiles(DirectoryObject, datamodel.Entity, metaclass=ABCMeta):
_archive_ext = 'json'
......@@ -684,10 +684,14 @@ class Calc(datamodel.Calc):
def __init__(self, upload_id: str, calc_id: str) -> None:
self._calc_id = calc_id
upload_files = UploadFiles.get(upload_id, is_authorized=lambda: True)
if upload_files is None:
self.upload_files = UploadFiles.get(upload_id, is_authorized=lambda: True)
if self.upload_files is None:
raise KeyError
self._data = upload_files.metadata.get(calc_id)
self._data = self.upload_files.metadata.get(calc_id)
@property
def upload(self):
return self.upload_files
@property
def calc_data(self) -> dict:
......@@ -699,44 +703,24 @@ class Calc(datamodel.Calc):
@property
def mainfile(self) -> str:
return self.files[0]
@property
def files(self) -> List[str]:
return self._data['section_repository_info']['repository_filepaths']
@property
def program_name(self) -> str:
return self.calc_data['repository_program_name']
@property
def program_version(self) -> str:
return self.calc_data['repository_code_version']
@property
def chemical_composition(self) -> str:
return self.calc_data['repository_chemical_formula']
@property
def space_group_number(self) -> int:
return self.calc_data['repository_spacegroup_nr']
@property
def atom_species(self) -> list:
return self.calc_data['repository_atomic_elements']
@property
def system_type(self) -> str:
return self.calc_data['repository_system_type']
@property
def XC_functional_name(self) -> str:
return self.calc_data['repository_xc_treatment']
@property
def crystal_system(self) -> str:
return self.calc_data['repository_crystal_system']
@property
def basis_set_type(self) -> str:
return self.calc_data['repository_basis_set_type']
return self._data['section_repository_info']['repository_filepaths'][0]
def to_calc_with_metadata(self):
target = datamodel.CalcWithMetadata(upload_id=self.upload.upload_id)
target.calc_id = self.calc_id
target.basis_set_type = self.calc_data['repository_basis_set_type']
target.crystal_system = self.calc_data['repository_crystal_system']
target.XC_functional_name = self.calc_data['repository_xc_treatment']
target.system_type = self.calc_data['repository_system_type']
target.atom_species = self.calc_data['repository_atomic_elements']
target.space_group_number = self.calc_data['repository_spacegroup_nr']
target.chemical_composition = self.calc_data['repository_chemical_formula']
target.program_version = self.calc_data['repository_code_version']
target.program_name = self.calc_data['repository_program_name']
target.files = self._data['section_repository_info']['repository_filepaths']
target.mainfile = self.mainfile
return target
datamodel.CalcWithMetadata.register_mapping(Calc, Calc.to_calc_with_metadata)
......@@ -15,21 +15,33 @@
"""
This module contains functions to read data from NOMAD coe, external sources,
other/older nomad@FAIRDI instances to mass upload it to a new nomad@FAIRDI instance.
.. autoclass:: NomadCOEMigration
.. autoclass:: SourceCalc
"""
from typing import Generator, Tuple, List
import os.path
import json
import zipstream
import math
from mongoengine import Document, IntField, StringField, DictField
from passlib.hash import bcrypt
from nomad import utils
from nomad.coe_repo import User, Calc, DataSet
from nomad import utils, config
from nomad.coe_repo import User, Calc
from nomad.datamodel import CalcWithMetadata
from nomad.processing import FAILURE, SUCCESS
class SourceCalc(Document):
"""
Mongo document used as a calculation, upload, and metadata db and index
build from a given source db.
build from a given source db. Each :class:`SourceCacl` entry relates
a pid, mainfile, upload "id" with each other for a corressponding calculation.
It might alos contain the user metadata. The uploads are "id"ed via the
specific path segment that identifies an upload on the CoE repo FS(s) without
any prefixes (e.g. $EXTRACTED, /data/upload, etc.)
"""
pid = IntField(primary_key=True)
mainfile = StringField()
......@@ -40,50 +52,26 @@ class SourceCalc(Document):
sites = ['/data/nomad/extracted/', '/nomad/repository/extracted/']
prefixes = [extracted_prefix] + sites
_dataset_cache: dict = {}
meta = dict(indexes=['pid', 'upload'])
@staticmethod
def _read_metadata(calc: Calc) -> dict:
datasets: List[DataSet] = []
for parent in calc.parents:
parents = SourceCalc._dataset_cache.get(parent, None)
if parents is None:
parents = parent.all_datasets
SourceCalc._dataset_cache[parent] = parents
datasets.append(DataSet(parent))
datasets.extend(parents)
metadata = dict(
_pid=calc.pid,
_uploader=calc.uploader.user_id,
_upload_time=calc.calc_metadata.added,
_datasets=list(
dict(id=ds.id, dois=ds.dois, name=ds.name)
for ds in datasets),
with_embargo=calc.with_embargo,
comment=calc.comment,
references=calc.references,
coauthors=list(user.user_id for user in calc.coauthors),
shared_with=list(user.user_id for user in calc.shared_with)
)
return {
key: value for key, value in metadata.items()
if value is not None and value != []
}
_dataset_cache: dict = {}
@staticmethod
def index(source, drop: bool = False, with_metadata: bool = True, per_query: int = 100) \
-> Generator[Tuple['SourceCalc', int], None, None]:
"""
Creates a collection of :class:`SourceCalc` documents that represent source repo
db entries. Each document relates a calc's (pid, mainfile, upload). Where
upload is the 'id'/prefix of an upload directory or upload file in the source repo's
filesystem.
db entries.
Arguments:
source: The source db sql alchemy session
drop: True to create a new collection, update the existing otherwise, default is False.
with_metadata: True to also grab all user metadata and store it, default is True.
drop: True to drop and create a new collection, update the existing otherwise,
default is False.
with_metadata: True to also grab all metadata and store it, default is True.