From 2a70925da081442b740176844f8c45b4d9a03397 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Fri, 23 Aug 2019 19:02:24 +0200 Subject: [PATCH] Removed most repository db related artifacts. --- .gitlab-ci.yml | 8 - docs/introduction.md | 12 +- docs/reference.rst | 4 - docs/setup.md | 2 +- nomad/api/app.py | 24 +- nomad/api/auth.py | 246 +-- nomad/api/upload.py | 6 +- nomad/cli/admin/uploads.py | 21 +- nomad/cli/client/migration.py | 307 --- nomad/coe_repo/__init__.py | 52 - nomad/coe_repo/base.py | 148 -- nomad/coe_repo/calc.py | 451 ---- nomad/coe_repo/upload.py | 248 --- nomad/coe_repo/user.py | 279 --- nomad/config.py | 13 - nomad/datamodel/__init__.py | 9 +- nomad/datamodel/base.py | 67 +- nomad/empty_repository_db.sql | 1962 ----------------- nomad/files.py | 7 - nomad/infrastructure.py | 381 +--- nomad/migration.py | 1749 --------------- nomad/processing/base.py | 7 +- nomad/processing/data.py | 79 +- nomad/search.py | 40 +- ops/deployments/nomad.experiments.values.yaml | 3 - ops/deployments/nomad.migration.values.yaml | 3 - ops/deployments/nomad.prod-1.values.yaml | 9 - ops/deployments/nomad.prod-2.values.yaml | 9 - ops/deployments/nomad.prod-test.values.yaml | 5 - ops/deployments/nomad.reprocess.values.yaml | 5 - ops/deployments/nomad.staging.values.yaml | 9 - ops/deployments/nomad.testing.values.yaml | 3 - .../nomad/docker-compose.override.yml | 5 - .../nomad/docker-compose.prod.yml | 5 - ops/docker-compose/nomad/docker-compose.yml | 14 - ops/helm/nomad/templates/api-deployment.yaml | 7 - ops/helm/nomad/templates/nomad-configmap.yml | 13 - .../nomad/templates/worker-deployment.yaml | 17 - ops/helm/nomad/values.yaml | 9 - requirements.txt | 2 - tests/conftest.py | 189 +- .../migration/archive/upload/archive.tar.gz | Bin 825 -> 0 bytes .../migration/baseline/upload/1/template.json | 110 - .../migration/baseline/upload/2/template.json | 110 - tests/data/migration/example.tar.gz | Bin 454 -> 0 bytes tests/data/migration/example_source_db.sql | 64 - .../failed_calc/upload/1/template.json | 110 - .../failed_calc/upload/2/template.json | 4 - .../failed_upload/upload/1/template.json | 110 - .../failed_upload/upload/2/template.json | 110 - .../missing_calc/upload/1/template.json | 110 - .../missmatch/upload/1/template.json | 110 - .../missmatch/upload/2/template.json | 110 - .../migration/new_calc/upload/1/template.json | 110 - .../migration/new_calc/upload/2/template.json | 110 - .../migration/new_calc/upload/3/template.json | 110 - .../new_upload/new_upload/1/template.json | 110 - .../new_upload/new_upload/2/template.json | 110 - .../migration/packaging/baseline/1/file.txt | 1 - .../migration/packaging/baseline/2/file.txt | 1 - .../packaging/baseline/RESTRICTED_99 | 0 .../packaging/restriction/1/file.txt | 1 - .../packaging/restriction/2/file.txt | 1 - .../packaging/restriction/RESTRICTED_24 | 0 .../migration/packaging/too_big/1/file.txt | 1 - .../migration/packaging/too_big/2/file.txt | 1 - .../baseline/archive.tar.gz | Bin 215 -> 0 bytes .../restriction/archive.tar.gz | Bin 212 -> 0 bytes .../packaging_archived/too_big/archive.tar.gz | Bin 199 -> 0 bytes .../packaging_oqmd/baseline/oqmd/one.tar.gz | Bin 194 -> 0 bytes .../packaging_oqmd/baseline/oqmd/two.tar.gz | Bin 161 -> 0 bytes .../restriction/oqmd/archive.tar.gz | Bin 212 -> 0 bytes .../too_big/oqmd/archive.tar.gz | Bin 199 -> 0 bytes tests/data/migration/too_big.tar.gz | Bin 243 -> 0 bytes tests/data/test_coe_uploads.txt | 17 - tests/processing/test_data.py | 49 +- tests/test_api.py | 49 +- tests/test_coe_repo.py | 242 -- tests/test_datamodel.py | 6 +- tests/test_files.py | 7 - tests/test_migration.py | 361 --- tests/test_search.py | 11 +- 82 files changed, 372 insertions(+), 8303 deletions(-) delete mode 100644 nomad/cli/client/migration.py delete mode 100644 nomad/coe_repo/__init__.py delete mode 100644 nomad/coe_repo/base.py delete mode 100644 nomad/coe_repo/calc.py delete mode 100644 nomad/coe_repo/upload.py delete mode 100644 nomad/coe_repo/user.py delete mode 100644 nomad/empty_repository_db.sql delete mode 100644 nomad/migration.py delete mode 100644 tests/data/migration/archive/upload/archive.tar.gz delete mode 100644 tests/data/migration/baseline/upload/1/template.json delete mode 100644 tests/data/migration/baseline/upload/2/template.json delete mode 100644 tests/data/migration/example.tar.gz delete mode 100644 tests/data/migration/example_source_db.sql delete mode 100644 tests/data/migration/failed_calc/upload/1/template.json delete mode 100644 tests/data/migration/failed_calc/upload/2/template.json delete mode 100644 tests/data/migration/failed_upload/upload/1/template.json delete mode 100644 tests/data/migration/failed_upload/upload/2/template.json delete mode 100644 tests/data/migration/missing_calc/upload/1/template.json delete mode 100644 tests/data/migration/missmatch/upload/1/template.json delete mode 100644 tests/data/migration/missmatch/upload/2/template.json delete mode 100644 tests/data/migration/new_calc/upload/1/template.json delete mode 100644 tests/data/migration/new_calc/upload/2/template.json delete mode 100644 tests/data/migration/new_calc/upload/3/template.json delete mode 100644 tests/data/migration/new_upload/new_upload/1/template.json delete mode 100644 tests/data/migration/new_upload/new_upload/2/template.json delete mode 100644 tests/data/migration/packaging/baseline/1/file.txt delete mode 100644 tests/data/migration/packaging/baseline/2/file.txt delete mode 100644 tests/data/migration/packaging/baseline/RESTRICTED_99 delete mode 100644 tests/data/migration/packaging/restriction/1/file.txt delete mode 100644 tests/data/migration/packaging/restriction/2/file.txt delete mode 100644 tests/data/migration/packaging/restriction/RESTRICTED_24 delete mode 100644 tests/data/migration/packaging/too_big/1/file.txt delete mode 100644 tests/data/migration/packaging/too_big/2/file.txt delete mode 100644 tests/data/migration/packaging_archived/baseline/archive.tar.gz delete mode 100644 tests/data/migration/packaging_archived/restriction/archive.tar.gz delete mode 100644 tests/data/migration/packaging_archived/too_big/archive.tar.gz delete mode 100644 tests/data/migration/packaging_oqmd/baseline/oqmd/one.tar.gz delete mode 100644 tests/data/migration/packaging_oqmd/baseline/oqmd/two.tar.gz delete mode 100644 tests/data/migration/packaging_oqmd/restriction/oqmd/archive.tar.gz delete mode 100644 tests/data/migration/packaging_oqmd/too_big/oqmd/archive.tar.gz delete mode 100644 tests/data/migration/too_big.tar.gz delete mode 100755 tests/data/test_coe_uploads.txt delete mode 100644 tests/test_coe_repo.py delete mode 100644 tests/test_migration.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bb59cda902..0d6f084a31 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -76,10 +76,6 @@ tests: stage: test image: $TEST_IMAGE services: - - postgres:latest - # this will cause a warning, as the gitlab ci runner health check will test the wrong - # port on rabbitmq container: - # https://gitlab.com/gitlab-org/gitlab-runner/issues/3163 - rabbitmq - name: docker.elastic.co/elasticsearch/elasticsearch:6.3.2 alias: elastic @@ -87,14 +83,10 @@ tests: # https://gitlab.com/gitlab-org/gitlab-ce/issues/42214 command: [ "bin/elasticsearch", "-Ediscovery.type=single-node" ] variables: - POSTGRES_DB: nomad - POSTGRES_USER: postgres - POSTGRES_PASSWORD: nomad RABBITMQ_ERLANG_COOKIE: SWQOKODSQALRPCLNMEQG RABBITMQ_DEFAULT_USER: rabbitmq RABBITMQ_DEFAULT_PASS: rabbitmq RABBITMQ_DEFAULT_VHOST: / - NOMAD_REPOSITORY_DB_HOST: postgres NOMAD_RABBITMQ_HOST: rabbitmq NOMAD_ELASTIC_HOST: elastic NOMAD_MONGO_HOST: mongo diff --git a/docs/introduction.md b/docs/introduction.md index eadb7a6a8b..63b1a942fb 100644 --- a/docs/introduction.md +++ b/docs/introduction.md @@ -81,13 +81,9 @@ processing of uploaded files and therein contained calculations. We use [mongoengine](http://docs.mongoengine.org/) to program with mongodb. -### PostgreSQL +### Keycloak -A relational database is used to store all user provided metadata: users, datasets -(curated sets of uploaded data), references, comments, DOIs, coauthors, etc. -Furthermore, it is still used to store some of the calculation metadata derived -via parsing. *This will most likely move out of Postgres in the future.* We -use [SQLAlchemy](https://docs.sqlalchemy.org/en/latest/) as on ORM framework. +TODO ### flask, et al. @@ -124,7 +120,7 @@ The component library [Material-UI](https://material-ui.com/) ### docker To run a **nomad@FAIRDI** instance, many services have to be orchestrated: -the nomad api, nomad worker, mongodb, Elasticsearch, PostgreSQL, RabbitMQ, +the nomad api, nomad worker, mongodb, Elasticsearch, Keycloak, RabbitMQ, Elasticstack (logging), the nomad GUI, and a reverse proxy to keep everything together. Further services might be needed (e.g. JypiterHUB), when nomad grows. The container platform [Docker](https://docs.docker.com/) allows us to provide all services @@ -224,7 +220,7 @@ passed, stored, etc. by the various nomad modules. ### Implementation The different entities have often multiple implementations for different storage systems. For example, aspects of calculations are stored in files (raw files, calc metadata, archive data), -Postgres (user metadata), Elasticsearch (metadata), and mongodb (processing state). +Elasticsearch (metadata), and mongodb (metadata, processing state). Different transformation between different implementations exist. See :py:mod:`nomad.datamodel` for further information. diff --git a/docs/reference.rst b/docs/reference.rst index 7779ce4750..bbbb50e308 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -38,10 +38,6 @@ nomad.search .. automodule:: nomad.search :members: -nomad.coe_repo --------------- -.. automodule:: nomad.coe_repo - nomad.api --------- .. automodule:: nomad.api diff --git a/docs/setup.md b/docs/setup.md index baeadd8b14..d86456825d 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -263,7 +263,7 @@ The rest should be mocked or provided by the tests. Make sure that you do no run worker, as they will fight for tasks in the queue. ``` cd ops/docker-compose -docker-compose up -d elastic rabbitmq postgres +docker-compose up -d elastic rabbitmq cd ../.. pytest -svx tests ``` diff --git a/nomad/api/app.py b/nomad/api/app.py index 14f65dc308..a1a6a28834 100644 --- a/nomad/api/app.py +++ b/nomad/api/app.py @@ -26,10 +26,8 @@ import inspect from datetime import datetime import pytz import random -from flask_oidc import OpenIDConnect -import json -from nomad import config, utils +from nomad import config, utils, infrastructure base_path = config.services.api_base_path """ Provides the root path of the nomad APIs. """ @@ -61,24 +59,6 @@ app.config.RESTPLUS_MASK_SWAGGER = False # type: ignore app.config.SWAGGER_UI_OPERATION_ID = True # type: ignore app.config.SWAGGER_UI_REQUEST_DURATION = True # type: ignore -oidc_issuer_url = '%s/realms/%s' % (config.keycloak.server_url.rstrip('/'), config.keycloak.realm_name) -oidc_client_secrets = dict( - client_id=config.keycloak.client_id, - client_secret=config.keycloak.client_secret_key, - issuer=oidc_issuer_url, - auth_uri='%s/protocol/openid-connect/auth' % oidc_issuer_url, - token_uri='%s/protocol/openid-connect/token' % oidc_issuer_url, - userinfo_uri='%s/protocol/openid-connect/userinfo' % oidc_issuer_url, - token_introspection_uri='%s/protocol/openid-connect/token/introspect' % oidc_issuer_url, - redirect_uris=['http://localhost/fairdi/nomad/latest']) -oidc_client_secrets_file = os.path.join(config.fs.tmp, 'oidc_client_secrets') -with open(oidc_client_secrets_file, 'wt') as f: - json.dump(dict(web=oidc_client_secrets), f) -app.config.update(dict( - SECRET_KEY=config.services.api_secret, - OIDC_CLIENT_SECRETS=oidc_client_secrets_file, - OIDC_OPENID_REALM=config.keycloak.realm_name)) - def api_base_path_response(env, resp): resp('200 OK', [('Content-Type', 'text/plain')]) @@ -90,7 +70,7 @@ def api_base_path_response(env, resp): app.wsgi_app = DispatcherMiddleware( # type: ignore api_base_path_response, {config.services.api_base_path: app.wsgi_app}) -oidc = OpenIDConnect(app) +infrastructure.keycloak.configure_flask(app) CORS(app) diff --git a/nomad/api/auth.py b/nomad/api/auth.py index d6a2903d6b..d14968abe3 100644 --- a/nomad/api/auth.py +++ b/nomad/api/auth.py @@ -31,131 +31,60 @@ endpoints that require or support authentication. .. autofunction:: admin_login_required """ -from typing import Tuple from flask import g, request from flask_restplus import abort, Resource, fields -from datetime import datetime import functools -import basicauth +import jwt +import datetime -from nomad import config, processing, files, utils, coe_repo, infrastructure -from nomad.coe_repo import LoginException +from nomad import config, processing, files, utils, infrastructure, datamodel -from .app import api, RFC3339DateTime, oidc +from .app import api, RFC3339DateTime -class User: - """ - A data class that holds all information for a single user. This can be the logged in - and authenticated user, or other users (i.e. co-authors, etc.). - """ - def __init__( - self, email, name=None, first_name='', last_name='', affiliation=None, - created: datetime = None, **kwargs): - assert email is not None, 'Users must have an email, it is used as unique id' - - self.email = email - - first_name = kwargs.get('firstName', first_name) - last_name = kwargs.get('lastName', last_name) - name = kwargs.get('username', name) - created_timestamp = kwargs.get('createdTimestamp', None) - - if len(last_name) > 0 and len(first_name) > 0: - name = '%s, %s' % (last_name, first_name) - elif len(last_name) != 0: - name = last_name - elif len(first_name) != 0: - name = first_name - elif name is None: - name = 'unnamed user' - - self.name = name - - if created is not None: - self.created = None - elif created_timestamp is not None: - self.created = datetime.fromtimestamp(created_timestamp) - else: - self.created = None - - # TODO affliation - - -def _validate_token(require_token: bool = True, **kwargs) -> Tuple[bool, str]: - """ - Uses OIDC to check if the request carries token based authentication and if - this authentication is valid. - - Returns: A tuple with bool and potential error message - """ - token = None - if 'Authorization' in request.headers and request.headers['Authorization'].startswith('Bearer '): - token = request.headers['Authorization'].split(None, 1)[1].strip() - if 'access_token' in request.form: - token = request.form['access_token'] - elif 'access_token' in request.args: - token = request.args['access_token'] - - validity = oidc.validate_token(token, **kwargs) - - if validity: - g.oidc_id_token = g.oidc_token_info - - return (validity is True) or (not require_token), validity - - -def _get_user(): - """ - Retrieves OIDC user info and populate the global flask ``g.user`` variable. - """ - if g.oidc_id_token: - try: - g.user = User(**oidc.user_getinfo([ - 'email', 'firstName', 'lastName', 'username', 'createdTimestamp'])) - except Exception as e: - ## TODO logging - raise e - else: - g.user = None - - -def login_if_available(func): +def login_if_available(token_only: bool = True): """ A decorator for API endpoint implementations that might authenticate users, but provide limited functionality even without users. """ - @functools.wraps(func) - @api.response(401, 'Not authorized, some data require authentication and authorization') - @api.doc(security=list('OpenIDConnect Bearer Token')) - def wrapper(*args, **kwargs): - valid, msg = _validate_token(require_token=False) - if valid: - _get_user() + def decorator(func): + @functools.wraps(func) + @api.response(401, 'Not authorized, some data require authentication and authorization') + @api.doc(security=list('OpenIDConnect Bearer Token')) + def wrapper(*args, **kwargs): + user_or_error = infrastructure.keycloak.authorize_flask(token_only) + if user_or_error is None: + pass + elif isinstance(user_or_error, datamodel.User): + g.user = user_or_error + else: + abort(401, message=user_or_error) + return func(*args, **kwargs) - else: - abort(401, message=msg) - return wrapper + return wrapper + + return decorator -def login_really_required(func): +def login_really_required(token_only: bool = True): """ A decorator for API endpoint implementations that forces user authentication on endpoints. """ - @functools.wraps(func) - @api.response(401, 'Not authorized, this endpoint required authorization') - @api.doc(security=list('OpenIDConnect Bearer Token')) - def wrapper(*args, **kwargs): - valid, msg = _validate_token(require_token=True) - if valid: - _get_user() + def decorator(func): + @functools.wraps(func) + @api.response(401, 'Not authorized, this endpoint requires authorization') + @login_if_available(token_only) + def wrapper(*args, **kwargs): + if g.user is None: + abort(401, 'Not authorized, this endpoint requires authorization') + return func(*args, **kwargs) - else: - abort(401, message=msg) - return wrapper + return wrapper + + return decorator def admin_login_required(func): @@ -164,13 +93,12 @@ def admin_login_required(func): """ @functools.wraps(func) @api.response(401, 'Authentication required or not authorized as admin user. Only admin can access this endpoint.') - @api.doc(security=list('OpenIDConnect Bearer Token')) - @oidc.accept_token(require_token=True) + @login_really_required def wrapper(*args, **kwargs): - if oidc.user_getfield('email') == config.keycloak.adminEmail: - return func(*args, **kwargs) - else: - abort(401, message='Only the admin user can perform reset.') + if not g.user.is_admin: + abort(401, message='Only the admin user use this endpoint') + + return func(*args, **kwargs) return wrapper @@ -200,69 +128,11 @@ user_model = api.model('User', { class AuthResource(Resource): @api.doc('get_token') @api.marshal_with(user_model, skip_none=True, code=200, description='User info send') - @login_if_available + @login_really_required(token_only=False) def get(self): - if g.user is not None: - return g.user - - if 'Authorization' in request.headers and request.headers['Authorization'].startswith('Basic '): - try: - username, password = basicauth.decode(request.headers['Authorization']) - token = infrastructure.keycloak_oidc_client.token(username=username, password=password) - validity = oidc.validate_token(token['access_token']) - except Exception as e: - # TODO logging - abort(401, message='Could not authenticate Basic auth: %s' % str(e)) - - if validity is not True: - abort(401, message=validity) - else: - g.oidc_id_token = g.oidc_token_info - _get_user() - else: - abort(401, message='Authentication credentials found in your request') - - if g.user is None: - abort(401, message='User not authenticated') - return g.user -@ns.route('/user') -class UserResource(Resource): - @api.doc('create_user') - @api.expect(user_model) - @api.response(400, 'Invalid user data') - @api.marshal_with(user_model, skip_none=True, code=200, description='User created') - @admin_login_required - def put(self): - """ - Creates a new user account. Currently only the admin user is allows. The - NOMAD-CoE repository GUI should be used to create user accounts for now. - Passwords have to be encrypted by the client with bcrypt and 2y indent. - """ - data = request.get_json() - if data is None: - data = {} - - for required_key in ['last_name', 'first_name', 'password', 'email']: - if required_key not in data: - abort(400, message='The %s is missing' % required_key) - - if 'user_id' in data: - if coe_repo.User.from_user_id(data['user_id']) is not None: - abort(400, 'User with given user_id %d already exists.' % data['user_id']) - - user = coe_repo.User.create_user( - email=data['email'], password=data.get('password', None), crypted=True, - first_name=data['first_name'], last_name=data['last_name'], - created=data.get('created', datetime.utcnow()), - affiliation=data.get('affiliation', None), token=data.get('token', None), - user_id=data.get('user_id', None)) - - return user, 200 - - token_model = api.model('Token', { 'user': fields.Nested(user_model), 'token': fields.String(description='The short term token to sign URLs'), @@ -286,7 +156,11 @@ class TokenResource(Resource): URLs towards most API get request, e.g. for file downloads on the raw or archive api endpoints. Use the token query parameter to sign URLs. """ - token, expires_at = g.user.get_signature_token() + expires_at = datetime.datetime.utcnow() + datetime.timedelta(seconds=10) + token = jwt.encode( + dict(user=g.user.user_id, exp=expires_at), + config.services.api_secret, 'HS256').decode('utf-8') + return { 'user': g.user, 'token': token, @@ -298,18 +172,27 @@ def with_signature_token(func): """ A decorator for API endpoint implementations that validates signed URLs. """ + @functools.wraps(func) @api.response(401, 'Invalid or expired signature token') def wrapper(*args, **kwargs): token = request.args.get('token', None) if token is not None: try: - g.user = coe_repo.User.verify_signature_token(token) - except LoginException: - abort(401, 'Invalid or expired signature token') + decoded = jwt.decode(token, config.services.api_secret, algorithms=['HS256']) + user = datamodel.User.get(decoded['user']) + if user is None: + abort(401, 'User for token does not exist') + else: + g.user = user + except KeyError: + abort(401, 'Token with invalid/unexpected payload') + except jwt.ExpiredSignatureError: + abort(401, 'Expired token') + except jwt.InvalidTokenError: + abort(401, 'Invalid token') return func(*args, **kwargs) - wrapper.__name__ = func.__name__ - wrapper.__doc__ = func.__doc__ + return wrapper @@ -322,19 +205,12 @@ def create_authorization_predicate(upload_id, calc_id=None): if g.user is None: # guest users don't have authorized access to anything return False - elif g.user.user_id == 0: + elif g.user.is_admin: # the admin user does have authorization to access everything return True - # look in repository - upload = coe_repo.Upload.from_upload_id(upload_id) - if upload is not None: - return upload.user_id == g.user.user_id - - # look in staging - staging_upload = processing.Upload.get(upload_id) - if staging_upload is not None: - return str(g.user.user_id) == str(staging_upload.user_id) + # look in mongodb + processing.Upload.get(upload_id).user_id == g.user.user_id # There are no db entries for the given resource if files.UploadFiles.get(upload_id) is not None: diff --git a/nomad/api/upload.py b/nomad/api/upload.py index 2e77466cd2..ddbc65ecb8 100644 --- a/nomad/api/upload.py +++ b/nomad/api/upload.py @@ -63,10 +63,10 @@ metadata_model = api.model('MetaData', { 'with_embargo': fields.Boolean(default=False, description='Data with embargo is only visible to the upload until the embargo period ended.'), 'comment': fields.String(description='The comment are shown in the repository for each calculation.'), 'references': fields.List(fields.String, descriptions='References allow to link calculations to external source, e.g. URLs.'), - 'coauthors': fields.List(fields.Integer, description='A list of co-authors given by user_id.'), - 'shared_with': fields.List(fields.Integer, description='A list of users to share calculations with given by user_id.'), + 'coauthors': fields.List(fields.String, description='A list of co-authors given by user_id.'), + 'shared_with': fields.List(fields.String, description='A list of users to share calculations with given by user_id.'), '_upload_time': RFC3339DateTime(description='Overrride the upload time.'), - '_uploader': fields.Integer(description='Override the uploader with the given user id.'), + '_uploader': fields.String(description='Override the uploader with the given user id.'), 'datasets': fields.List(fields.Nested(model=dataset_model, skip_none=True), description='A list of datasets.') }) diff --git a/nomad/cli/admin/uploads.py b/nomad/cli/admin/uploads.py index 65ef9fc212..3eb1b7bfd9 100644 --- a/nomad/cli/admin/uploads.py +++ b/nomad/cli/admin/uploads.py @@ -20,7 +20,7 @@ from pymongo import UpdateOne import threading import elasticsearch_dsl as es -from nomad import processing as proc, config, infrastructure, utils, search, files, coe_repo +from nomad import processing as proc, config, infrastructure, utils, search, files, datamodel from .admin import admin @@ -90,17 +90,15 @@ def ls(ctx, uploads): @uploads.command(help='Change the owner of the upload and all its calcs.') -@click.argument('USER', nargs=1) +@click.argument('EMAIL', nargs=1) @click.argument('UPLOADS', nargs=-1) @click.pass_context -def chown(ctx, user, uploads): - infrastructure.setup_repository_db() +def chown(ctx, email, uploads): _, uploads = query_uploads(ctx, uploads) print('%d uploads selected, changing its owner ...' % uploads.count()) - user_id = user - user = coe_repo.User.from_user_id(int(user_id)) + user = datamodel.User.get_by_email(email) for upload in uploads: upload.user_id = user_id @@ -142,26 +140,17 @@ def index(ctx, uploads): @uploads.command(help='Delete selected upload') @click.argument('UPLOADS', nargs=-1) -@click.option('--with-coe-repo', help='Also attempt to delete from repository db', is_flag=True) @click.option('--skip-es', help='Keep the elastic index version of the data.', is_flag=True) @click.option('--skip-mongo', help='Keep uploads and calcs in mongo.', is_flag=True) @click.option('--skip-files', help='Keep all related files.', is_flag=True) @click.pass_context -def rm(ctx, uploads, with_coe_repo, skip_es, skip_mongo, skip_files): +def rm(ctx, uploads, skip_es, skip_mongo, skip_files): _, uploads = query_uploads(ctx, uploads) logger = utils.get_logger(__name__) print('%d uploads selected, deleting ...' % uploads.count()) - if with_coe_repo: - from nomad import coe_repo - infrastructure.setup_repository_db() - for upload in uploads: - # delete repository db entry - if with_coe_repo: - coe_repo.Upload.delete(upload.upload_id) - # delete elastic if not skip_es: search.delete_upload(upload_id=upload.upload_id) diff --git a/nomad/cli/client/migration.py b/nomad/cli/client/migration.py deleted file mode 100644 index 71a313d8c7..0000000000 --- a/nomad/cli/client/migration.py +++ /dev/null @@ -1,307 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import click -import time -import datetime -import os -import os.path -import re -import shutil -import multiprocessing -import queue -import json - -from nomad import config, infrastructure -from nomad.migration import NomadCOEMigration, SourceCalc, Package, missing_calcs_data - -from .client import client - - -def _Migration(**kwargs) -> NomadCOEMigration: - return NomadCOEMigration(**kwargs) - - -def _setup(): - pass - - -@client.group(help='Migrate data from NOMAD CoE to nomad@FAIRDI') -@click.option('-h', '--host', default=config.migration_source_db.host, help='The migration repository source db host, default is "%s".' % config.migration_source_db.host) -@click.option('-p', '--port', default=config.migration_source_db.port, help='The migration repository source db port, default is %d.' % config.migration_source_db.port) -@click.option('-u', '--user', default=config.migration_source_db.user, help='The migration repository source db user, default is %s.' % config.migration_source_db.user) -@click.option('-w', '--password', default=config.migration_source_db.password, help='The migration repository source db password.') -@click.option('-db', '--dbname', default=config.migration_source_db.dbname, help='The migration repository source db name, default is %s.' % config.migration_source_db.dbname) -@click.option('--migration-version', default=0, type=int, help='The version number, only packages with lower or no number will be migrated.') -@click.option('--package-directory', default=config.fs.migration_packages, help='The directory used as bucket for upload packages, default is %s.' % config.fs.migration_packages) -@click.option('--compress-packages', is_flag=True, help='Turn on compression for creating migration packages') -def migration( - host, port, user, password, dbname, migration_version, package_directory, compress_packages): - global _setup - - def _setup(): - infrastructure.setup_logging() - infrastructure.setup_repository_db( - readony=True, host=host, port=port, user=user, password=password, dbname=dbname) - infrastructure.setup_mongo() - - global _Migration - - def _Migration(**kwargs): - return NomadCOEMigration( - migration_version=migration_version, package_directory=package_directory, - compress_packages=compress_packages, **kwargs) - - -@migration.command(help='Create/update the coe repository db migration index') -@click.option('--drop', help='Drop the existing index, otherwise it will only add new data.', is_flag=True) -@click.option('--with-metadata', help='Extract metadata for each calc and add it to the index.', is_flag=True) -@click.option('--per-query', default=100, help='We index many objects with one query. Default is 100.') -@click.option('--start-pid', type=int, default=-1, help='Only index calculations with PID greater equal the given value') -def index(drop, with_metadata, per_query, start_pid): - _setup() - start = time.time() - indexed_total = 0 - indexed_calcs = 0 - for calc, total in _Migration().source_calc_index( - drop=drop, with_metadata=with_metadata, per_query=int(per_query), start_pid=start_pid): - indexed_total += 1 - indexed_calcs += 1 if calc is not None else 0 - eta = total * ((time.time() - start) / indexed_total) - print( - 'indexed: %8d, calcs: %8d, total: %8d, ETA: %s\r' % - (indexed_total, indexed_calcs, total, datetime.timedelta(seconds=eta)), end='') - print('done') - - -@migration.command(help='Transfer migration index to elastic search') -@click.argument('tar-file', nargs=1) -@click.option('--offset', default=None, type=int, help='Start processing the tar from a specific offset, e.g. to continue') -@click.option('--upload', default=None, type=str, help='Force the whole tar contents into a given upload id') -@click.option('--compress', is_flag=True, help='Turn on compression for creating migration packages') -def package_tar(tar_file, offset, upload, compress): - infrastructure.setup_logging() - infrastructure.setup_mongo() - Package.create_packages_from_tar(tar_file, offset=offset, compress=compress, forced_upload_id=upload) - - -@migration.command(help='Reset migration version to start a new migration.') -@click.option('--delete-packages', is_flag=True, help='Also remove all packages.') -def reset(delete_packages: bool): - infrastructure.setup_logging() - infrastructure.setup_mongo() - - SourceCalc.objects(migration_version__ne=-1).update(migration_version=-1) - if delete_packages: - for subdir in os.listdir(config.fs.migration_packages): - shutil.rmtree(os.path.join(config.fs.migration_packages, subdir)) - Package.objects().delete() - else: - Package.objects(migration_version__ne=-1).update(migration_version=-1) - - -def determine_upload_paths(paths, pattern=None, all=False): - if all: - return Package.objects().distinct('upload_path') - - if len(paths) == 1 and paths[0].endswith('.json'): - with open(paths[0], 'rt') as f: - data = json.load(f) - - if isinstance(data, list): - items = data - else: - if pattern is not None: - key = pattern - else: - key = 'uploads_with_no_package' - - items = [] - for item in data[key]: - if isinstance(item, str): - items.append(item) - else: - items.append(item['id']) - - paths = [] - for upload_id in items: - exists = False - for prefix in ['/nomad/repository/data/extracted', '/nomad/repository/data/uploads']: - path = os.path.join(prefix, upload_id) - if os.path.exists(path): - exists = True - paths.append(path) - - if not exists: - # This does not really matter, to save space we deleted some source - # data after packaging it. the migration will use the packages anyways. - # We just use the full path to communicate the upload_id at the end - # for historical reasons. - paths.append(os.path.join('/does/not/exist/anymore', upload_id)) - - elif pattern is not None: - assert len(paths) == 1, "Can only apply pattern on a single directory." - path = paths[0] - if pattern == "ALL": - paths = [os.path.join(path, directory) for directory in os.listdir(path)] - else: - paths = [] - compiled_pattern = re.compile(pattern) - directories = os.listdir(path) - directories.sort() - for sub_directory in directories: - if re.fullmatch(compiled_pattern, sub_directory): - paths.append(os.path.join(path, sub_directory)) - - return paths - - -@migration.command(help='Delete extracted files for given packages.') -@click.argument('upload-paths', nargs=-1) -@click.option('--pattern', default=None, type=str, help='Interpret the paths as directory and migrate those subdirectory that match the given regexp') -@click.option('--extracted', default='/nomad/repository/data/extracted', type=str, help='The parent directory with all extracted uploads') -@click.option('--uploads', default='/nomad/repository/data/uploads', type=str, help='The parent directory with all "uploaded" uploads') -def delete(upload_paths, pattern, extracted, uploads): - infrastructure.setup_logging() - infrastructure.setup_mongo() - migration = _Migration() - upload_paths = determine_upload_paths(upload_paths, pattern) - for upload_path in upload_paths: - packages_iterable = Package.get_packages(upload_path, migration.package_directory) - if packages_iterable is None: - continue - - packages = list(packages_iterable) - if len(packages) == 0: - continue - - if any(not os.path.exists(package.package_path) for package in packages): - migration.logger.error('package without packaged file', source_upload_id=package.upload_id) - continue - - package = packages[0] - for package in Package.get_packages(upload_path, migration.package_directory): - deleted, cause = package.delete_files(extracted, uploads) - if deleted: - migration.logger.info('deleted extracted files', source_upload_id=package.upload_id) - else: - migration.logger.warn('delete conditions not satisfied', source_upload_id=package.upload_id, cause=cause) - # doing this for one of the uploaded packages is enough - break - - -@migration.command(help='Add an upload folder to the package index.') -@click.argument('upload-paths', nargs=-1) -@click.option('--pattern', default=None, type=str, help='Interpret the paths as directory and migrate those subdirectory that match the given regexp') -@click.option('--parallel', default=1, type=int, help='Use the given amount of parallel processes to process uploads. Default is 1.') -@click.option('--parallel-zip', default=1, type=int, help='Use the given amount of parallel processes to pack packages. Default is 1.') -def package(upload_paths, pattern, parallel, parallel_zip): - upload_paths = determine_upload_paths(upload_paths, pattern) - upload_path_queue = multiprocessing.Queue(len(upload_paths)) - - print('Package %d uploads with %d/%d processes.' % (len(upload_paths), parallel, parallel_zip)) - - for upload_path in upload_paths: - upload_path_queue.put(upload_path) - - def package_paths(): - infrastructure.setup_logging() - infrastructure.setup_mongo() - - migration = _Migration() - - try: - while True: - upload_path = upload_path_queue.get() - migration.package_index(upload_path, parallel=parallel_zip) - except queue.Empty: - pass - - processes = [] - for _ in range(0, parallel): - process = multiprocessing.Process(target=package_paths) - process.start() - processes.append(process) - - for process in processes: - process.join() - upload_path_queue.close() - - -@migration.command(help='Get an report over all migrated packages.') -def report(): - infrastructure.setup_logging() - infrastructure.setup_mongo() - - report = _Migration().report() - print(report) - - -@migration.command(help='Copy users from source into empty target db') -def copy_users(**kwargs): - _setup() - _Migration().copy_users() - - -@migration.command(help='Set the repo db PID calc counter.') -@click.argument('prefix', nargs=1, type=int, default=7000000) -def pid_prefix(prefix: int): - infrastructure.setup_logging() - _Migration().set_pid_prefix(prefix=prefix) - - -@migration.command(help='Upload the given upload locations. Uses the existing index to provide user metadata') -@click.argument('upload-paths', nargs=-1) -@click.option('--pattern', default=None, type=str, help='Interpret the paths as directory and migrate those subdirectory that match the given regexp') -@click.option('--delete-failed', default='', type=str, help='String from N, U, P to determine if empty (N), failed (U), or failed to publish (P) uploads should be deleted or kept for debugging.') -@click.option('--parallel', default=1, type=int, help='Use the given amount of parallel processes. Default is 1.') -@click.option('--create-packages', is_flag=True, help='Indicate that packages should be created, if they do not already exist.') -@click.option('--only-republish', is_flag=True, help='Will only republish already published packages.') -@click.option('--republish', is_flag=True, help='Will process normally and republish already published packages.') -@click.option('--all', is_flag=True, help='Go through all known packages. Ignores pattern and args.') -@click.option('--wait', default=0, type=int, help='Wait for a random (upto given) number of seconds before each upload to scatter io and compute heavy processing tasks.') -def upload( - upload_paths: list, pattern: str, parallel: int, delete_failed: str, - create_packages: bool, only_republish: bool, republish: bool, wait: int, all: bool): - - infrastructure.setup_logging() - infrastructure.setup_mongo() - - _Migration(threads=parallel).migrate( - *determine_upload_paths(upload_paths, pattern=pattern, all=all), delete_failed=delete_failed, - create_packages=create_packages, only_republish=only_republish, wait=wait, republish=republish) - - -@migration.command(help='Get an report about not migrated calcs. This connects directly to nomad dbs, like admin commands.') -@click.option('--start-pid', type=int, default=0, help='Only index calculations with PID greater equal the given value') -@click.option('--uploads', is_flag=True, help='Instead of extensive information, only provide a list of missing uploads') -def missing(start_pid, uploads): - infrastructure.setup_logging() - infrastructure.setup_mongo() - - report = missing_calcs_data(start_pid=start_pid) - if not uploads: - print(json.dumps(report, indent=2)) - else: - uploads = set() - for reason, upload_list in report.items(): - if reason == 'summary': - continue - for upload_information in upload_list: - source_upload_id = upload_information.get('source_upload_id', None) - if source_upload_id is not None: - uploads.add(source_upload_id) - - for source_upload_id in uploads: - print(source_upload_id) diff --git a/nomad/coe_repo/__init__.py b/nomad/coe_repo/__init__.py deleted file mode 100644 index 96fee7edb9..0000000000 --- a/nomad/coe_repo/__init__.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Interface to the NOMAD-coe repository postgres database. This implementation is based on -SQLAlchemy. There are model classes that represent entries in the *users* and *session* -tables. All DB entities are implemented as classes, but most are hidden and data -can be accessed via the various relations with :class:`User`, :class:`Calc`, :class:`Upload`. - -To load an entity from the database use :data:`nomad.infrastructure.repository_db` -(the SQLAlchemy session), e.g.: - -.. code-block:: python - - repository_db.Query(coe_repo.Calc).filter_by(upload_id=some_id) - -.. autoclass:: User - :members: - :undoc-members: - -.. autofunction:: ensure_test_user -.. autodata:: admin_user -.. autoexception:: LoginException - -.. autoclass:: UploadMetaData - :members: -.. autoclass:: Upload - :members: - :undoc-members: -.. autoclass:: Calc - :members: - :undoc-members: -.. autofunction:: create_handle -.. autoclass:: DataSet - :members: - :undoc-members: -""" - -from .user import User, ensure_test_user, admin_user, LoginException -from .calc import Calc, DataSet, create_handle -from .upload import UploadMetaData, Upload diff --git a/nomad/coe_repo/base.py b/nomad/coe_repo/base.py deleted file mode 100644 index 5f368f9694..0000000000 --- a/nomad/coe_repo/base.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Declarative SQLAlchemy base model definitions for the repository db schema. Does -not include the *big* datamodel entities: `User`, `Upload`, `Calc`; they can -be found in their own submodules. -""" - -from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Enum, Table -from sqlalchemy.orm import relationship -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.dialects.postgresql import BYTEA -from sqlalchemy.ext.declarative import declarative_base - -from nomad import utils - - -Base = declarative_base() - -calc_citation_association = Table( - 'metadata_citations', Base.metadata, - Column('calc_id', Integer, ForeignKey('calculations.calc_id')), - Column('citation_id', Integer, ForeignKey('citations.citation_id'))) - - -ownership = Table( - 'ownerships', Base.metadata, - Column('calc_id', Integer, ForeignKey('calculations.calc_id')), - Column('user_id', Integer, ForeignKey('users.user_id'))) - -co_authorship = Table( - 'coauthorships', Base.metadata, - Column('calc_id', Integer, ForeignKey('calculations.calc_id')), - Column('user_id', Integer, ForeignKey('users.user_id'))) - -shareship = Table( - 'shareships', Base.metadata, - Column('calc_id', Integer, ForeignKey('calculations.calc_id')), - Column('user_id', Integer, ForeignKey('users.user_id'))) - - -class CalcMetaData(Base): # type: ignore - __tablename__ = 'metadata' - - calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) - calc = relationship('Calc') - added = Column(DateTime) - oadate = Column(DateTime) - chemical_formula = Column(String) - filenames = Column(BYTEA) - download_size = Column(Integer) - location = Column(String) - version_id = Column(Integer, ForeignKey('codeversions.version_id')) - version = relationship('CodeVersion', lazy='joined', uselist=False) - - -class UserMetaData(Base): # type: ignore - __tablename__ = 'user_metadata' - - calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) - label = Column(String) - calc = relationship('Calc') - permission = Column(Integer) - - -class StructRatio(Base): # type: ignore - __tablename__ = 'struct_ratios' - - calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) - calc = relationship('Calc') - formula_units = Column(Integer) - nelem = Column(Integer) - chemical_formula = Column(String) - - -class CodeVersion(Base): # type: ignore - __tablename__ = 'codeversions' - - version_id = Column(Integer, primary_key=True, autoincrement=True) - content = Column(String) - - -class Spacegroup(Base): # type: ignore - __tablename__ = 'spacegroups' - - calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) - calc = relationship('Calc') - n = Column(Integer) - - -topic_code = 220 -topic_atoms = 10 -topic_system_type = 50 -topic_xc_treatment = 75 -topic_crystal_system = 90 -topic_basis_set_type = 80 - - -class Tag(Base): # type: ignore - __tablename__ = 'tags' - calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) - calc = relationship('Calc') - tid = Column(Integer, ForeignKey('topics.tid'), primary_key=True) - topic = relationship('Topics', lazy='joined', uselist=False) - - def __repr__(self): - return '<Tag(calc_id="%d", tid="%d)>' % (int(self.calc_id), int(self.tid)) - - -class Topics(Base): # type: ignore - __tablename__ = 'topics' - tid = Column(Integer, primary_key=True, autoincrement=True) - cid = Column(Integer) - topic = Column(String) - - -class CalcSet(Base): # type: ignore - __tablename__ = 'calcsets' - - parent_calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) - children_calc_id = Column(Integer, ForeignKey('calculations.calc_id'), primary_key=True) - - -calc_dataset_containment = Table( - 'calcsets', Base.metadata, extend_existing=True) - - -class Citation(Base): # type: ignore - __tablename__ = 'citations' - - citation_id = Column(Integer, primary_key=True) - value = Column(String) - kind = Column(Enum('INTERNAL', 'EXTERNAL', name='citation_kind_enum')) - - def to_popo(self) -> utils.POPO: - return utils.POPO(id=self.citation_id, value=self.value) diff --git a/nomad/coe_repo/calc.py b/nomad/coe_repo/calc.py deleted file mode 100644 index 11c6b8b704..0000000000 --- a/nomad/coe_repo/calc.py +++ /dev/null @@ -1,451 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List, Dict, Any -import json -from sqlalchemy import Column, Integer, String, ForeignKey -from sqlalchemy.orm import relationship, aliased -from sqlalchemy.sql.expression import literal -from datetime import datetime -import os.path - -from nomad import infrastructure, utils, config, files -from nomad.datamodel import DFTCalcWithMetadata - -from . import base -from .user import User -from .base import Base, calc_citation_association, ownership, co_authorship, shareship, \ - Tag, Topics, CalcSet, calc_dataset_containment, Citation, Spacegroup, CalcMetaData, \ - CodeVersion, StructRatio, UserMetaData - - -handle_base = '0123456789abcdefghijklmnopqrstuvwxyz' - - -def create_handle(pid: int) -> str: - """ - Create a handle for the given pid. The pid is an autoincrement number. The handle - a 'base32' encoded string of that number. Therefore, its string representation is a - little shorter. The handle is prefixed with the configured handle prefix. - """ - - value = pid - result = '' - while value > 0: - result += handle_base[value & 31] - value = value >> 5 - - return config.repository_db.handle_prefix + result[::-1] - - -class PublishContext: - """ - Utilities necessary during adding calculations to the repo db. - Caches queries to avoid unnecessary flushes while bulk creating calcs. - Faster than even SQLAlchemy with ``autoflush=False``, because of reasons. - Access to a logger with bound data about the upload, etc. - """ - - def __init__(self, upload_id: str = None, **kwargs): - self._cache: Dict[str, Any] = {} - self.upload_id = upload_id - self.upload_files = None if upload_id is None else files.UploadFiles.get(upload_id, is_authorized=lambda: True) - self.logger = utils.get_logger(__name__, upload_id=upload_id, **kwargs) - - def cache(self, entity, **kwargs): - key = json.dumps(dict(entity=entity.__class__.__name__, **kwargs)) - value = self._cache.get(key, None) - if value is None: - value = infrastructure.repository_db.query(entity).filter_by(**kwargs).first() - if value is not None: - self._cache[key] = value - return value - - -class IllegalCalcMetadata(Exception): pass - - -class Calc(Base): - __tablename__ = 'calculations' - - coe_calc_id = Column('calc_id', Integer, primary_key=True, autoincrement=True) - handlepid = Column(String) - origin_id = Column(Integer, ForeignKey('uploads.upload_id')) - upload = relationship('Upload', lazy='joined') - checksum = Column(String) - - calc_metadata = relationship('CalcMetaData', uselist=False, lazy='joined') - user_metadata = relationship('UserMetaData', uselist=False, lazy='joined') - citations = relationship('Citation', secondary=calc_citation_association, lazy='joined') - owners = relationship('User', secondary=ownership, lazy='joined') - coauthors = relationship('User', secondary=co_authorship, lazy='joined') - shared_with = relationship('User', secondary=shareship, lazy='joined') - tags = relationship('Tag', lazy='subquery', join_depth=1) - spacegroup = relationship('Spacegroup', lazy='joined', uselist=False) - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.topic_ids = {} - - parents = relationship( - 'Calc', - secondary=calc_dataset_containment, - primaryjoin=calc_dataset_containment.c.children_calc_id == coe_calc_id, - secondaryjoin=calc_dataset_containment.c.parent_calc_id == coe_calc_id, - backref='children', lazy='subquery', join_depth=1) - - @staticmethod - def from_calc_id(calc_id: str) -> 'Calc': - repo_db = infrastructure.repository_db - calcs = repo_db.query(Calc).filter_by(checksum=calc_id) - assert calcs.count() <= 1, 'Calc id/checksum must be unique' - return calcs.first() - - @classmethod - def load_from(cls, obj): - repo_db = infrastructure.repository_db - return repo_db.query(Calc).filter_by(coe_calc_id=int(obj.pid)).first() - - @property - def mainfile(self) -> str: - return self.calc_metadata.location - - @property - def pid(self) -> int: - return self.coe_calc_id - - @property - def comment(self) -> str: - return self.user_metadata.label - - @property - def calc_id(self) -> str: - return self.checksum - - @property - def references(self) -> List[str]: - return list(citation.value for citation in self.citations if citation.kind == 'EXTERNAL') - - @property - def uploader(self) -> User: - assert len(self.owners) == 1, 'A calculation must have exactly one owner.' - return self.owners[0] - - @property - def with_embargo(self) -> bool: - # permission = 1 means public - # permission = 0 means not public, i.e. with embargo - return self.user_metadata.permission != 1 - - @property - def formula(self) -> str: - return self.calc_metadata.chemical_formula - - @property - def files(self) -> List[str]: - if self.calc_metadata is not None: - if self.calc_metadata.filenames is not None: - filenames = self.calc_metadata.filenames.decode('utf-8') - return json.loads(filenames) - - return [] - - @property - def all_datasets(self) -> List['DataSet']: - assert self.coe_calc_id is not None - repo_db = infrastructure.repository_db - query = repo_db.query(literal(self.coe_calc_id).label('coe_calc_id')).cte(recursive=True) - right = aliased(query) - left = aliased(CalcSet) - query = query.union_all(repo_db.query(left.parent_calc_id).join( - right, right.c.coe_calc_id == left.children_calc_id)) - query = repo_db.query(query) - dataset_calc_ids = list(r[0] for r in query if not r[0] == self.coe_calc_id) - if len(dataset_calc_ids) > 0: - return [ - DataSet(dataset_calc) - for dataset_calc in repo_db.query(Calc).filter(Calc.coe_calc_id.in_(dataset_calc_ids))] - else: - return [] - - @property - def direct_datasets(self) -> List['DataSet']: - return [DataSet(dataset_calc) for dataset_calc in self.parents] - - def _set_value(self, topic_cid: int, value: str, context: PublishContext) -> None: - if value is None: - return - - repo_db = infrastructure.repository_db - topic = context.cache(Topics, cid=topic_cid, topic=value) - if not topic: - topic = Topics(cid=topic_cid, topic=value) - repo_db.add(topic) - repo_db.flush() - - if topic.tid not in self.topic_ids: - tag = Tag(calc=self, topic=topic) - self.topic_ids[topic.tid] = topic.tid - repo_db.add(tag) - else: - logger = utils.get_logger( - __name__, calc_id=self.calc_id, upload_id=self.upload.upload_id) - logger.warning('double tag on same calc', cid=topic.cid, tid=topic.tid, value=topic.topic) - - _dataset_cache: dict = {} - - def apply_calc_with_metadata(self, calc: DFTCalcWithMetadata, context: PublishContext) -> None: - """ - Applies the data from ``source`` to this coe Calc object. - """ - repo_db = infrastructure.repository_db - - self.checksum = calc.calc_id - source_code_version = calc.code_version # TODO shorten version names - code_version_obj = context.cache(CodeVersion, content=source_code_version) - if code_version_obj is None: - code_version_obj = CodeVersion(content=source_code_version) - repo_db.add(code_version_obj) - repo_db.flush() - - if calc.upload_time is not None: - added_time = calc.upload_time - elif self.upload is not None and self.upload.upload_time is not None: - added_time = self.upload.upload_time - else: - added_time = datetime.utcnow() - - upload_id = context.upload_id - upload_files = context.upload_files - coe_files = list() - if upload_files is None: - upload_size = -1 - else: - upload_size = 0 - - for calc_file in calc.files: - if config.repository_db.mode == 'coe': - coe_file = os.path.join('$EXTRACTED', 'nomad', upload_id, calc_file).replace('/', '\\/') - else: - coe_file = calc_file - - if upload_files is not None: - upload_size += upload_files.raw_file_size(calc_file) - coe_files.append(coe_file) - - metadata = CalcMetaData( - calc=self, - added=added_time, - oadate=added_time, - chemical_formula=calc.formula, - filenames=('[%s]' % ','.join(['"%s"' % coe_file for coe_file in coe_files])).encode('utf-8'), - download_size=upload_size, - location=calc.mainfile, - version=code_version_obj) - repo_db.add(metadata) - - struct_ratio = StructRatio( - calc=self, - chemical_formula=calc.formula, - formula_units=1, nelem=len(calc.atoms)) - repo_db.add(struct_ratio) - - user_metadata = UserMetaData( - calc=self, - label=calc.comment, - permission=(0 if calc.with_embargo else 1)) - repo_db.add(user_metadata) - - if isinstance(calc.spacegroup, int) or calc.spacegroup.isdigit(): - spacegroup = Spacegroup(calc=self, n=calc.spacegroup) - else: - spacegroup = Spacegroup(calc=self, n='0') - repo_db.add(spacegroup) - - # topic based properties - self._set_value(base.topic_code, calc.code_name, context) - for atom in set(calc.atoms): - self._set_value(base.topic_atoms, str(atom), context) - self._set_value(base.topic_system_type, calc.system, context) - self._set_value(base.topic_xc_treatment, calc.xc_functional, context) - self._set_value(base.topic_crystal_system, calc.crystal_system, context) - self._set_value(base.topic_basis_set_type, calc.basis_set, context) - - # user relations - def add_users_to_relation(source_users, relation): - for source_user in source_users: - coe_user = context.cache(User, user_id=int(source_user.id)) - if coe_user is None: - raise IllegalCalcMetadata( - 'User with user_id %s does not exist.' % source_user.id) - source_user.update(coe_user.to_popo()) - relation.append(coe_user) - - if calc.uploader is not None: - add_users_to_relation([calc.uploader], self.owners) - elif self.upload is not None and self.upload.user is not None: - self.owners.append(self.upload.user) - calc.uploader = self.upload.user.to_popo() - - add_users_to_relation(calc.coauthors, self.coauthors) - add_users_to_relation(calc.shared_with, self.shared_with) - - # datasets - calcs_existing_datasets: List[int] = [] - for dataset in calc.datasets: - dataset_id = dataset.id - if dataset_id in calcs_existing_datasets: - continue - else: - calcs_existing_datasets.append(dataset_id) - - coe_dataset_calc: Calc = context.cache(Calc, coe_calc_id=dataset_id) - if coe_dataset_calc is None: - coe_dataset_calc = Calc(coe_calc_id=dataset_id) - repo_db.add(coe_dataset_calc) - - metadata = CalcMetaData( - calc=coe_dataset_calc, - added=self.upload.upload_time, - chemical_formula=dataset.name) - repo_db.add(metadata) - repo_db.flush() - - if dataset.doi is not None: - self._add_citation(coe_dataset_calc, dataset.doi['value'], 'INTERNAL', context) - - # cause a flush to create the backdirection of the above established - # metadata-dataset_calc relation - repo_db.flush() - - self.parents.append(coe_dataset_calc) - - dataset.update(DataSet(coe_dataset_calc).to_popo()) - - # references - for reference in calc.references: - self._add_citation(self, reference['value'], 'EXTERNAL', context) - - repo_db.flush() - - def _add_citation(self, coe_calc: 'Calc', value: str, kind: str, context: PublishContext) -> None: - if value is None or kind is None: - context.logger.warning( - 'citation without value or kind str', value=value, kind=kind, calc_id=self.calc_id) - return - - repo_db = infrastructure.repository_db - citation = context.cache(Citation, value=value, kind=kind) - - if citation is None: - citation = Citation(value=value, kind=kind) - repo_db.add(citation) - - coe_calc.citations.append(citation) - - def to_calc_with_metadata(self) -> DFTCalcWithMetadata: - """ - Creates a :class:`DFTCalcWithMetadata` instance with UCPM ids, and all UMD/CMD. - Be aware that ``upload_id`` and ``calc_id``, might be old coe repository - ``upload_name`` and calculation ``checksum`` depending on the context, i.e. used - database. - """ - result = DFTCalcWithMetadata( - upload_id=self.upload.upload_id if self.upload else None, - calc_id=self.checksum) - - result.pid = self.pid - result.mainfile = self.mainfile - result.files = self.files - - for topic in [tag.topic for tag in self.tags]: - if topic is None: - continue - - if topic.cid == base.topic_code: - result.code_name = topic.topic - elif topic.cid == base.topic_basis_set_type: - result.basis_set = topic.topic - elif topic.cid == base.topic_xc_treatment: - result.xc_functional = topic.topic - elif topic.cid == base.topic_system_type: - result.system = topic.topic - elif topic.cid == base.topic_atoms: - result.atoms.append(topic.topic) - elif topic.cid == base.topic_crystal_system: - result.crystal_system = topic.topic - elif topic.cid in [1996, 1994, 703, 702, 701, 100]: - # user/author, restriction, formulas?, another category - pass - else: - raise KeyError('topic cid %s.' % str(topic.cid)) - - result.code_version = self.calc_metadata.version.content - result.formula = self.calc_metadata.chemical_formula - if self.spacegroup is not None: - result.spacegroup = self.spacegroup.n - result.atoms.sort() - - datasets: List[DataSet] = [] - for parent in self.parents: - parents = Calc._dataset_cache.get(parent, None) - if parents is None: - parents = parent.all_datasets - Calc._dataset_cache[parent] = parents - datasets.append(DataSet(parent)) - datasets.extend(parents) - - result.pid = self.pid - result.uploader = self.uploader.to_popo() - result.upload_time = self.calc_metadata.added - result.datasets = list(ds.to_popo() for ds in datasets) - result.with_embargo = self.with_embargo - result.comment = self.comment - result.references = list( - citation.to_popo() for citation in self.citations - if citation.kind == 'EXTERNAL') - result.coauthors = list(user.to_popo() for user in self.coauthors) - result.shared_with = list(user.to_popo() for user in self.shared_with) - - return result - - -class DataSet: - def __init__(self, dataset_calc: Calc) -> None: - self._dataset_calc = dataset_calc - - @property - def id(self): - return self._dataset_calc.coe_calc_id - - @property - def doi(self) -> Citation: - doi = None - for citation in self._dataset_calc.citations: - if citation.kind == 'INTERNAL': - if doi is not None: - utils.get_logger(__name__).warning( - 'dataset with multiple dois', dataset_id=self.id) - doi = citation - return doi - - @property - def name(self): - return self._dataset_calc.calc_metadata.chemical_formula - - def to_popo(self): - return utils.POPO( - id=self.id, - name=self.name, - doi=self.doi.to_popo() if self.doi is not None else None) diff --git a/nomad/coe_repo/upload.py b/nomad/coe_repo/upload.py deleted file mode 100644 index 8643e1a2c4..0000000000 --- a/nomad/coe_repo/upload.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Interface to the NOMAD-coe repository postgres database. This implementation is based on -SQLAlchemy. There are model classes that represent entries in the *users* and *session* -tables. - -This module allows to authenticate users based on user password or session tokens. -It allows to access the user data like names and user_id. - -.. autoclass:: User - :members: - :undoc-members: - -.. autoclass:: Session - :members: - :undoc-members: - -.. autofunction:: ensure_test_user - -This module also provides functionality to add parsed calculation data to the db: - -.. autoclass:: UploadMetaData - :members: -.. autoclass:: Upload - :members: - :undoc-members: -.. autoclass:: Calc - :members: - :undoc-members: -""" - -from typing import Type, cast -import datetime -from sqlalchemy import Column, Integer, String, Boolean, DateTime, ForeignKey -from sqlalchemy.orm import relationship -import filelock -import os.path -import warnings -from sqlalchemy import exc as sa_exc - -from nomad import utils, infrastructure, config -from nomad.datamodel import UploadWithMetadata, DFTCalcWithMetadata - -from .calc import Calc, PublishContext, create_handle -from .base import Base -from .user import User - - -class UploadMetaData: - """ - Utility class that provides per upload meta data and overriding per calculation - meta data. For a given *mainfile* data is first read from the `calculations` key - (a list of calculation dict with a matching `mainfile` key), before it is read - from `metadata_dict` it self. - - The class is used to deal with user provided meta-data. - - Arguments: - metadata_dict: The python dict with the meta-data. - """ - def __init__(self, metadata_dict: dict) -> None: - self._upload_data = metadata_dict - self._calc_data: dict = { - calc['mainfile']: calc - for calc in self._upload_data.get('calculations', [])} - - def get(self, mainfile: str) -> dict: - return self._calc_data.get(mainfile, self._upload_data) - - -class Upload(Base): # type: ignore - __tablename__ = 'uploads' - - coe_upload_id = Column('upload_id', Integer, primary_key=True, autoincrement=True) - upload_name = Column(String) - target_path = Column(String) - user_id = Column(Integer, ForeignKey('users.user_id')) - is_processed = Column(Boolean) - is_all_uploaded = Column(Boolean) - created = Column(DateTime) - - user = relationship('User') - calcs = relationship('Calc', lazy='subquery', passive_deletes=True) - - @staticmethod - def from_upload_id(upload_id: str) -> 'Upload': - repo_db = infrastructure.repository_db - uploads = repo_db.query(Upload).filter_by(upload_name=upload_id) - assert uploads.count() <= 1, 'Upload id/name must be unique' - return uploads.first() - - @property - def upload_id(self) -> str: - return self.upload_name - - @property - def uploader(self) -> User: - return self.user - - @property - def upload_time(self) -> Type[datetime.datetime]: - return self.created - - @staticmethod - def delete(upload_id): - upload = Upload.from_upload_id(upload_id) - if upload is not None: - logger = utils.get_logger(__name__, upload_id=upload.upload_id) - - # there is a warning related to SQLalchemy not knowing about the delete cascades - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=sa_exc.SAWarning) - - repo_db = infrastructure.repository_db - repo_db.expunge_all() - repo_db.begin() - try: - repo_db.delete(upload) - repo_db.commit() - logger.info('deleted repo upload') - except Exception as e: - logger.error('could not delete repo upload', exc_info=e) - repo_db.rollback() - - @staticmethod - def publish(upload: UploadWithMetadata) -> int: - """ - Add the upload to the NOMAD-coe repository db. It creates an - uploads-entry, respective calculation and property entries. Everything in one - transaction. - - There are two modes (fairdi, coe). The coe mode will mimic the old python API - behaviour. An additional extracted raw-file copy needs to be stored for the old CoE - repository. Here, we will add the file path to the respective table. - In fairdi mode, only the .zip-based raw file archive is used. The file path data - will be replaced with information necessary to use nomad@fairdis raw-file API. - This function only handles the postgres entries. Files are created elsewhere - (e.g. nomad.processing.data). - - Arguments: - upload: The upload to add, including calculations with respective IDs, UMD, CMD. - - Returns: A callback that allows to commit or rollback the publish transaction. - The callback returns the ``coe_upload_id`` or -1, if rolledback or no upload - was created, due to no calcs in the upload. - """ - assert upload.uploader is not None - - logger = utils.get_logger(__name__, upload_id=upload.upload_id) - - last_error = None - retries = 0 - - while True: - if config.repository_db.sequential_publish: - publish_filelock = filelock.FileLock( - os.path.join(config.fs.tmp, 'publish.lock')) - logger.info('waiting for filelock') - while True: - try: - publish_filelock.acquire(timeout=15 * 60, poll_intervall=1) - logger.info('acquired filelock') - break - except filelock.Timeout: - logger.warning('could not acquire publish lock after generous timeout') - - repo_db = infrastructure.repository_db - repo_db.expunge_all() - repo_db.begin() - - try: - has_calcs = False - - # create upload - coe_upload = Upload( - upload_name=upload.upload_id, - target_path=upload.upload_id if config.repository_db.mode == 'coe' else None, - created=upload.upload_time, - user_id=upload.uploader.id, - is_processed=True, is_all_uploaded=True) - repo_db.add(coe_upload) - - # add calculations and metadata - # reuse the cache for the whole transaction to profit from repeating - # star schema entries for users, ds, topics, etc. - context = PublishContext(upload_id=upload.upload_id) - coe_calcs = [] - for calc in upload.calcs: - has_calcs = True - coe_calc = Calc( - coe_calc_id=calc.pid, - checksum=calc.calc_id, - upload=coe_upload) - repo_db.add(coe_calc) - coe_calcs.append(coe_calc) - - coe_calc.apply_calc_with_metadata( - cast(DFTCalcWithMetadata, calc), context=context) - logger.debug( - 'added calculation, not yet committed', calc_id=coe_calc.calc_id) - - logger.info('filled publish transaction') - - result = None - if has_calcs: - repo_db.flush() - for coe_calc in coe_calcs: - coe_calc.handlepid = create_handle(coe_calc.coe_calc_id) - logger.debug('created all handlepids') - - repo_db.commit() - logger.info('committed publish transaction') - result = coe_upload - else: - # empty upload case - repo_db.rollback() - return None - - logger.info('added upload') - return result - except Exception as e: - repo_db.rollback() - if last_error != str(e) and retries < 3: - last_error = str(e) - logger.error( - 'Retry publish after unexpected exception.', exc_info=e, - error=last_error, retry=retries) - retries += 1 - else: - logger.error('Unexpected exception.', exc_info=e) - raise e - finally: - if config.repository_db.sequential_publish: - publish_filelock.release() - logger.info('released filelock') diff --git a/nomad/coe_repo/user.py b/nomad/coe_repo/user.py deleted file mode 100644 index 65ce304e44..0000000000 --- a/nomad/coe_repo/user.py +++ /dev/null @@ -1,279 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict -from passlib.hash import bcrypt -from sqlalchemy import Column, Integer, String, ForeignKey, DateTime -from sqlalchemy.orm import relationship -import datetime -import jwt -import random -import string - -from nomad import infrastructure, config, utils - -from .base import Base - - -class Session(Base): # type: ignore - __tablename__ = 'sessions' - - token = Column(String) - user_id = Column(String, ForeignKey('users.user_id'), primary_key=True) - user = relationship('User') - - -class LoginException(Exception): - """ Exception that is raised if the user could not be logged in despite present - credentials. """ - pass - - -class Affiliation(Base): - __tablename__ = 'affiliations' - a_id = Column(Integer, primary_key=True, autoincrement=True) - name = Column(String) - address = Column(String) - email_domain = Column(String) - - -class User(Base): # type: ignore - """ - SQLAlchemy model class that represents NOMAD-coe repository postgresdb *users*. - Provides functions for authenticating via password or session token. - - It is not intended to create or update users. This should be done via the - NOMAD-coe repository GUI. - """ - __tablename__ = 'users' - - user_id = Column(Integer, primary_key=True) - affiliation_id = Column(Integer, ForeignKey('affiliations.a_id'), name='affiliation') - email = Column(String) - first_name = Column(String, name='firstname') - last_name = Column(String, name='lastname') - affiliation = relationship('Affiliation', lazy='joined') - password = Column(String) - created = Column(DateTime) - - _token_chars = string.ascii_uppercase + string.ascii_lowercase + string.digits - - def __repr__(self): - return '<User(email="%s")>' % self.email - - @staticmethod - def create_user( - email: str, password: str, crypted: bool, user_id: int = None, - affiliation: Dict[str, str] = None, token: str = None, generate_token: bool = True, - **kwargs): - repo_db = infrastructure.repository_db - repo_db.begin() - try: - if affiliation is not None: - affiliation = Affiliation(**affiliation) - repo_db.add(affiliation) - - user = User(email=email, user_id=user_id, affiliation=affiliation, **kwargs) - repo_db.add(user) - user.set_password(password, crypted) - - # TODO this has to change, e.g. trade for JWTs - if token is None and generate_token: - token = ''.join(random.choices(User._token_chars, k=64)) - if token is not None: - repo_db.add(Session(token=token, user=user)) - - repo_db.commit() - return user - except Exception as e: - repo_db.rollback() - utils.get_logger('__name__').error('could not create user', email=email, exc_info=e) - raise e - - def update(self, crypted: bool = True, password: str = None, **kwargs): - repo_db = infrastructure.repository_db - repo_db.begin() - try: - if password is not None: - self.set_password(password, crypted=crypted) - - for key in kwargs: - setattr(self, key, kwargs.get(key)) - - repo_db.commit() - except Exception as e: - repo_db.rollback() - utils.get_logger('__name__').error( - 'could not edit user', email=self.email, user_id=self.user_id, exc_info=e) - raise e - - def _verify_password(self, password): - return bcrypt.verify(password, self.password) - - @staticmethod - def from_user_id(user_id) -> 'User': - return infrastructure.repository_db.query(User).get(user_id) - - def get_auth_token(self): - repo_db = infrastructure.repository_db - session = repo_db.query(Session).filter_by(user_id=self.user_id).first() - - if not session: - repo_db.begin() - try: - # TODO this has to change, e.g. trade for JWTs - token = ''.join(random.choices(User._token_chars, k=64)) - session = Session(token=token, user=self) - # This will allow to add session to repo_db, even though it is already - # attached to a different repo_db SQL alchemy session. This happens during - # migration tests, where we mimic the use of two different postgres - # databases - session = repo_db.merge(session) - repo_db.add(session) - - repo_db.commit() - except Exception as e: - repo_db.rollback() - utils.get_logger('__name__').error( - 'could not generate token for user', email=self.email, user_id=self.user_id, - exc_info=e) - raise e - - return session.token.encode('utf-8') - - def get_signature_token(self, expiration=10): - """ - Genertes ver short term JWT token that can be used to sign download URLs. - - Returns: Tuple with token and expiration datetime - """ - expires_at = datetime.datetime.utcnow() + datetime.timedelta(seconds=expiration) - token = jwt.encode( - dict(user=self.email, exp=expires_at), - config.services.api_secret, 'HS256').decode('utf-8') - return token, expires_at - - def set_password(self, password: str, crypted: bool): - """ - Sets the users password. With ``crypted=True`` password is supposed to - be already bcrypted and 2y-indented. - """ - if password is None: - return - - if crypted: - self.password = password - else: - password_hash = bcrypt.encrypt(password, ident='2y') - self.password = password_hash - - @property - def token(self): - return self.get_auth_token().decode('utf-8') - - @property - def is_admin(self) -> bool: - return self.email == 'admin' - - @staticmethod - def verify_user_password(email, password): - if email is None or password is None or email == '' or password == '': - return None - - repo_db = infrastructure.repository_db - user = repo_db.query(User).filter_by(email=email).first() - if not user: - return None - - if user._verify_password(password): - return user - else: - raise LoginException('Wrong password') - - @staticmethod - def verify_auth_token(token): - if token is None or token == '': - return None - - repo_db = infrastructure.repository_db - session = repo_db.query(Session).filter_by(token=token).first() - if session is None: - return None - - user = repo_db.query(User).filter_by(user_id=session.user_id).first() - assert user, 'User in sessions must exist.' - return user - - @staticmethod - def verify_signature_token(token): - """ - Verifies the given JWT token. This should be used to verify URLs signed - with a short term signature token (see :func:`get_signature_token`) - """ - try: - decoded = jwt.decode(token, config.services.api_secret, algorithms=['HS256']) - repo_db = infrastructure.repository_db - user = repo_db.query(User).filter_by(email=decoded['user']).first() - if user is None: - raise LoginException('Token signed for invalid user') - - return user - except KeyError: - raise LoginException('Token with invalid/unexpected payload') - except jwt.ExpiredSignatureError: - raise LoginException('Expired token') - except jwt.InvalidTokenError: - raise LoginException('Invalid token') - - def to_popo(self) -> utils.POPO: - popo = utils.POPO( - id=self.user_id, - first_name=self.first_name, - last_name=self.last_name, - email=self.email) - if self.affiliation is not None: - popo.update(affiliation=dict( - name=self.affiliation.name, - address=self.affiliation.address)) - - return popo - - -def ensure_test_user(email): - """ - Allows tests to make sure that the default test users exist in the database. - Returns: - The user as :class:`User` instance. - """ - repo_db = infrastructure.repository_db - existing = repo_db.query(User).filter_by(email=email).first() - assert existing, 'Test user %s does not exist.' % email - - session = repo_db.query(Session).filter_by( - user_id=existing.user_id).first() - assert session, 'Test user %s has no session.' % email - assert session.token == existing.first_name.lower(), 'Test user %s session has unexpected token.' % email - - return existing - - -def admin_user(): - """ - Returns the admin user, a special user with `user_id==0`. - Its password is part of :mod:`nomad.config`. - """ - repo_db = infrastructure.repository_db - admin = repo_db.query(User).filter_by(user_id=0).first() - assert admin, 'Admin user does not exist.' - return admin diff --git a/nomad/config.py b/nomad/config.py index 672ac5a8c5..8112968250 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -99,7 +99,6 @@ fs = NomadConfig( tmp='.volumes/fs/tmp', staging='.volumes/fs/staging', public='.volumes/fs/public', - coe_extracted='.volumes/fs/extracted', migration_packages='.volumes/fs/migration_packages', local_tmp='/tmp', prefix_size=2, @@ -112,18 +111,6 @@ elastic = NomadConfig( index_name='nomad_fairdi_calcs' ) -repository_db = NomadConfig( - sequential_publish=False, - publish_enabled=True, - host='localhost', - port=5432, - dbname='nomad_fairdi_repo_db', - user='postgres', - password='nomad', - handle_prefix='21.11132/', - mode='fairdi' -) - keycloak = NomadConfig( server_url='http://localhost:8002/auth/', realm_name='fairdi_nomad_test', diff --git a/nomad/datamodel/__init__.py b/nomad/datamodel/__init__.py index b9956165ec..53ed3489fb 100644 --- a/nomad/datamodel/__init__.py +++ b/nomad/datamodel/__init__.py @@ -16,7 +16,7 @@ This module contains classes that allow to represent the core nomad data entities :class:`Upload` and :class:`Calc` on a high level of abstraction independent from their representation in the different modules -:py:mod:`nomad.processing`, :py:mod:`nomad.coe_repo`, :py:mod:`nomad.parsing`, +:py:mod:`nomad.processing`, :py:mod:`nomad.parsing`, :py:mod:`nomad.search`, :py:mod:`nomad.api`, :py:mod:`nomad.migration`. It is not about representing every detail, but those parts that are directly involved in @@ -45,11 +45,16 @@ quantities. :members: .. autoclass:: nomad.datamodel.DomainQuantity :members: + +The class :class:`User` is used to represent users and their attributes. + +.. autoclass:: nomad.datamodel.User + :members: """ import sys -from nomad.datamodel.base import UploadWithMetadata, CalcWithMetadata, Domain, DomainQuantity +from nomad.datamodel.base import UploadWithMetadata, CalcWithMetadata, Domain, DomainQuantity, User from nomad.datamodel import ems, dft from nomad.datamodel.dft import DFTCalcWithMetadata from nomad.datamodel.ems import EMSEntryWithMetadata diff --git a/nomad/datamodel/base.py b/nomad/datamodel/base.py index 2808f1beab..05f3a6237d 100644 --- a/nomad/datamodel/base.py +++ b/nomad/datamodel/base.py @@ -16,7 +16,56 @@ from typing import Iterable, List, Dict, Type, Tuple, Callable, Any import datetime from elasticsearch_dsl import Keyword -from nomad import utils, config +from nomad import utils, config, infrastructure + + +class User: + """ + A data class that holds all information for a single user. This can be the logged in + and authenticated user, or other users (i.e. co-authors, etc.). + + # TODO legacy ids + """ + def __init__( + self, email, user_id=None, name=None, first_name='', last_name='', affiliation=None, + created: datetime.datetime = None, token=None, **kwargs): + + self.user_id = kwargs.get('id', user_id) + self.email = email + + assert self.user_id is not None, 'Users must have a unique id' + assert email is not None, 'Users must have an email' + + self.first_name = kwargs.get('firstName', first_name) + self.last_name = kwargs.get('lastName', last_name) + name = kwargs.get('username', name) + created_timestamp = kwargs.get('createdTimestamp', None) + + if len(self.last_name) > 0 and len(self.first_name) > 0: + self.name = '%s, %s' % (self.last_name, self.first_name) + elif len(self.last_name) != 0: + self.name = self.last_name + elif len(self.first_name) != 0: + self.name = self.first_name + elif name is not None: + self.name = name + else: + self.name = 'unnamed user' + + if created is not None: + self.created = None + elif created_timestamp is not None: + self.created = datetime.datetime.fromtimestamp(created_timestamp) + else: + self.created = None + + self.token = token + + # TODO affliation + + @staticmethod + def get(*args, **kwargs) -> 'User': + return infrastructure.keycloak.get_user(*args, **kwargs) # type: ignore class UploadWithMetadata(): @@ -26,7 +75,7 @@ class UploadWithMetadata(): def __init__(self, **kwargs): self.upload_id: str = None - self.uploader: utils.POPO = None + self.uploader: str = None self.upload_time: datetime.datetime = None self.calcs: Iterable['CalcWithMetadata'] = list() @@ -85,7 +134,7 @@ class CalcWithMetadata(): # basic upload and processing related metadata self.upload_time: datetime.datetime = None self.files: List[str] = None - self.uploader: utils.POPO = None + self.uploader: str = None self.processed: bool = False self.last_processing: datetime.datetime = None self.nomad_version: str = None @@ -94,8 +143,8 @@ class CalcWithMetadata(): # user metadata, i.e. quantities given and editable by the user self.with_embargo: bool = None self.published: bool = False - self.coauthors: List[utils.POPO] = [] - self.shared_with: List[utils.POPO] = [] + self.coauthors: List[str] = [] + self.shared_with: List[str] = [] self.comment: str = None self.references: List[utils.POPO] = [] self.datasets: List[utils.POPO] = [] @@ -136,13 +185,15 @@ class CalcWithMetadata(): self.upload_time = metadata.get('_upload_time') uploader_id = metadata.get('_uploader') if uploader_id is not None: - self.uploader = utils.POPO(id=int(uploader_id)) + self.uploader = uploader_id self.references = [utils.POPO(value=ref) for ref in metadata.get('references', [])] self.with_embargo = metadata.get('with_embargo', False) self.coauthors = [ - utils.POPO(id=int(user)) for user in metadata.get('coauthors', [])] + user_id for user_id in metadata.get('coauthors', []) + if User.get(user_id=user_id) is not None] self.shared_with = [ - utils.POPO(id=int(user)) for user in metadata.get('shared_with', [])] + user_id for user_id in metadata.get('shared_with', []) + if User.get(user_id=user_id) is not None] self.datasets = [ utils.POPO(id=int(ds['id']), doi=utils.POPO(value=ds.get('_doi')), name=ds.get('_name')) for ds in metadata.get('datasets', [])] diff --git a/nomad/empty_repository_db.sql b/nomad/empty_repository_db.sql deleted file mode 100644 index df27962449..0000000000 --- a/nomad/empty_repository_db.sql +++ /dev/null @@ -1,1962 +0,0 @@ --- --- PostgreSQL database dump --- - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET client_min_messages = warning; - --- --- Name: plpgsql; Type: EXTENSION; Schema: -; Owner: --- - -CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog; - - --- --- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner: --- - -COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language'; - - --- --- Name: citation_kind_enum; Type: TYPE; Schema: public; Owner: postgres --- - -CREATE TYPE public.citation_kind_enum AS ENUM ( - 'EXTERNAL', - 'INTERNAL' -); - - -ALTER TYPE public.citation_kind_enum OWNER TO postgres; - -SET default_tablespace = ''; - -SET default_with_oids = false; - --- --- Name: affiliations; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.affiliations ( - a_id integer NOT NULL, - name character varying NOT NULL, - address character varying NOT NULL, - email_domain character varying -); - - -ALTER TABLE public.affiliations OWNER TO postgres; - --- --- Name: affiliations_a_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.affiliations_a_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.affiliations_a_id_seq OWNER TO postgres; - --- --- Name: affiliations_a_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.affiliations_a_id_seq OWNED BY public.affiliations.a_id; - - --- --- Name: alembic_version; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.alembic_version ( - version_num character varying(32) NOT NULL -); - - -ALTER TABLE public.alembic_version OWNER TO postgres; - --- --- Name: atoms; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.atoms ( - atom_id integer NOT NULL, - struct_id integer NOT NULL, - number integer NOT NULL, - x double precision NOT NULL, - y double precision NOT NULL, - z double precision NOT NULL, - charge double precision, - magmom double precision, - rmt double precision -); - - -ALTER TABLE public.atoms OWNER TO postgres; - --- --- Name: atoms_atom_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.atoms_atom_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.atoms_atom_id_seq OWNER TO postgres; - --- --- Name: atoms_atom_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.atoms_atom_id_seq OWNED BY public.atoms.atom_id; - - --- --- Name: basis_sets; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.basis_sets ( - calc_id integer NOT NULL, - type character varying NOT NULL, - rgkmax double precision, - lmaxapw double precision, - lmaxmat double precision, - lmaxvr double precision, - gmaxvr double precision, - repr bytea -); - - -ALTER TABLE public.basis_sets OWNER TO postgres; - --- --- Name: calcsets; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.calcsets ( - parent_calc_id integer NOT NULL, - children_calc_id integer NOT NULL -); - - -ALTER TABLE public.calcsets OWNER TO postgres; - --- --- Name: calculations; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.calculations ( - checksum character varying, - siblings_count integer, - pottype_id integer, - origin_id integer, - nested_depth integer, - frozen boolean, - calc_id integer NOT NULL, - handlepid varchar(100) NULL -); - - -ALTER TABLE public.calculations OWNER TO postgres; - --- --- Name: calculations_calc_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.calculations_calc_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.calculations_calc_id_seq OWNER TO postgres; - --- --- Name: calculations_calc_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.calculations_calc_id_seq OWNED BY public.calculations.calc_id; - - --- --- Name: charges; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.charges ( - calc_id integer NOT NULL, - core double precision NOT NULL, - leakage double precision NOT NULL, - valence double precision NOT NULL, - interstitial double precision NOT NULL, - muffintins double precision NOT NULL, - total double precision NOT NULL -); - - -ALTER TABLE public.charges OWNER TO postgres; - --- --- Name: citations; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.citations ( - citation_id integer NOT NULL, - value character varying NOT NULL, - kind public.citation_kind_enum -); - - -ALTER TABLE public.citations OWNER TO postgres; - --- --- Name: citations_citation_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.citations_citation_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.citations_citation_id_seq OWNER TO postgres; - --- --- Name: citations_citation_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.citations_citation_id_seq OWNED BY public.citations.citation_id; - - --- --- Name: coauthorships; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.coauthorships ( - calc_id integer, - user_id integer -); - - -ALTER TABLE public.coauthorships OWNER TO postgres; - --- --- Name: codefamilies; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.codefamilies ( - family_id integer NOT NULL, - content character varying NOT NULL -); - - -ALTER TABLE public.codefamilies OWNER TO postgres; - --- --- Name: codefamilies_family_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.codefamilies_family_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.codefamilies_family_id_seq OWNER TO postgres; - --- --- Name: codefamilies_family_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.codefamilies_family_id_seq OWNED BY public.codefamilies.family_id; - - --- --- Name: codeversions; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.codeversions ( - version_id integer NOT NULL, - family_id integer, - content character varying NOT NULL -); - - -ALTER TABLE public.codeversions OWNER TO postgres; - --- --- Name: codeversions_version_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.codeversions_version_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.codeversions_version_id_seq OWNER TO postgres; - --- --- Name: codeversions_version_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.codeversions_version_id_seq OWNED BY public.codeversions.version_id; - - --- --- Name: doi_mapping; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.doi_mapping ( - calc_id integer NOT NULL, - id_str character varying NOT NULL -); - - -ALTER TABLE public.doi_mapping OWNER TO postgres; - --- --- Name: doi_mapping_calc_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.doi_mapping_calc_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.doi_mapping_calc_id_seq OWNER TO postgres; - --- --- Name: doi_mapping_calc_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.doi_mapping_calc_id_seq OWNED BY public.doi_mapping.calc_id; - - --- --- Name: eigenvalues; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.eigenvalues ( - eid integer NOT NULL, - electrons_calc_id integer, - phonons_calc_id integer, - dos bytea, - bands bytea, - projected bytea, - eigenvalues bytea -); - - -ALTER TABLE public.eigenvalues OWNER TO postgres; - --- --- Name: eigenvalues_eid_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.eigenvalues_eid_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.eigenvalues_eid_seq OWNER TO postgres; - --- --- Name: eigenvalues_eid_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.eigenvalues_eid_seq OWNED BY public.eigenvalues.eid; - - --- --- Name: electrons; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.electrons ( - calc_id integer NOT NULL, - gap double precision, - is_direct integer -); - - -ALTER TABLE public.electrons OWNER TO postgres; - --- --- Name: energies; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.energies ( - calc_id integer NOT NULL, - convergence bytea, - total double precision -); - - -ALTER TABLE public.energies OWNER TO postgres; - --- --- Name: forces; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.forces ( - calc_id integer NOT NULL, - "values" bytea NOT NULL -); - - -ALTER TABLE public.forces OWNER TO postgres; - --- --- Name: grid; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.grid ( - calc_id integer NOT NULL, - info bytea -); - - -ALTER TABLE public.grid OWNER TO postgres; - --- --- Name: lattices; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.lattices ( - struct_id integer NOT NULL, - a double precision NOT NULL, - b double precision NOT NULL, - c double precision NOT NULL, - alpha double precision NOT NULL, - beta double precision NOT NULL, - gamma double precision NOT NULL, - a11 double precision NOT NULL, - a12 double precision NOT NULL, - a13 double precision NOT NULL, - a21 double precision NOT NULL, - a22 double precision NOT NULL, - a23 double precision NOT NULL, - a31 double precision NOT NULL, - a32 double precision NOT NULL, - a33 double precision NOT NULL -); - - -ALTER TABLE public.lattices OWNER TO postgres; - --- --- Name: login_tokens; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.login_tokens ( - user_id integer, - token character varying NOT NULL, - valid_until timestamp with time zone -); - - -ALTER TABLE public.login_tokens OWNER TO postgres; - --- --- Name: metadata; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.metadata ( - version_id int4 NULL, - "location" varchar NULL, - finished int4 NULL, - raw_input text NULL, - modeling_time float8 NULL, - chemical_formula varchar NULL, - added timestamptz NULL, - download_size int8 NULL, - filenames bytea NULL, - calc_id int4 NOT NULL, - oadate timestamptz NULL -); - -ALTER TABLE public.metadata OWNER TO postgres; - --- --- Name: metadata_citations; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.metadata_citations ( - calc_id integer, - citation_id integer -); - - -ALTER TABLE public.metadata_citations OWNER TO postgres; - --- --- Name: ownerships; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.ownerships ( - calc_id integer, - user_id integer -); - - -ALTER TABLE public.ownerships OWNER TO postgres; - --- --- Name: phonons; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.phonons ( - calc_id integer NOT NULL -); - - -ALTER TABLE public.phonons OWNER TO postgres; - --- --- Name: pottypes; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.pottypes ( - pottype_id integer NOT NULL, - name character varying NOT NULL -); - - -ALTER TABLE public.pottypes OWNER TO postgres; - --- --- Name: pottypes_pottype_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.pottypes_pottype_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.pottypes_pottype_id_seq OWNER TO postgres; - --- --- Name: pottypes_pottype_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.pottypes_pottype_id_seq OWNED BY public.pottypes.pottype_id; - - --- --- Name: pragma; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.pragma ( - content character varying NOT NULL -); - - -ALTER TABLE public.pragma OWNER TO postgres; - --- --- Name: recipintegs; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.recipintegs ( - calc_id integer NOT NULL, - kgrid character varying, - kshift double precision, - smearing double precision, - smeartype character varying -); - - -ALTER TABLE public.recipintegs OWNER TO postgres; - --- --- Name: sessions; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.sessions ( - token character varying NOT NULL, - user_id integer, - valid_until timestamp with time zone, - last_access timestamp with time zone, - permission integer -); - - -ALTER TABLE public.sessions OWNER TO postgres; - --- --- Name: shareships; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.shareships ( - calc_id integer, - user_id integer -); - - -ALTER TABLE public.shareships OWNER TO postgres; - --- --- Name: spacegroups; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.spacegroups ( - calc_id integer NOT NULL, - n integer NOT NULL -); - - -ALTER TABLE public.spacegroups OWNER TO postgres; - --- --- Name: struct_optimisation; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.struct_optimisation ( - calc_id integer NOT NULL, - tresholds bytea, - ncycles bytea -); - - -ALTER TABLE public.struct_optimisation OWNER TO postgres; - --- --- Name: struct_ratios; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.struct_ratios ( - calc_id integer NOT NULL, - chemical_formula character varying NOT NULL, - is_primitive boolean, - formula_units integer NOT NULL, - nelem integer NOT NULL, - dimensions double precision -); - - -ALTER TABLE public.struct_ratios OWNER TO postgres; - --- --- Name: structures; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.structures ( - struct_id integer NOT NULL, - calc_id integer NOT NULL, - step integer NOT NULL, - final boolean NOT NULL -); - - -ALTER TABLE public.structures OWNER TO postgres; - --- --- Name: structures_struct_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.structures_struct_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.structures_struct_id_seq OWNER TO postgres; - --- --- Name: structures_struct_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.structures_struct_id_seq OWNED BY public.structures.struct_id; - - --- --- Name: tags; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.tags ( - calc_id integer NOT NULL, - tid integer NOT NULL -); - - -ALTER TABLE public.tags OWNER TO postgres; - --- --- Name: topics; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.topics ( - tid integer NOT NULL, - cid integer NOT NULL, - topic character varying -); - - -ALTER TABLE public.topics OWNER TO postgres; - --- --- Name: topics_tid_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.topics_tid_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.topics_tid_seq OWNER TO postgres; - --- --- Name: topics_tid_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.topics_tid_seq OWNED BY public.topics.tid; - - --- --- Name: uploads; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.uploads ( - upload_id integer NOT NULL, - upload_name character varying NOT NULL, - user_id integer, - is_processed boolean NOT NULL, - created timestamp with time zone, - is_all_uploaded boolean, - target_path character varying, - skip_extraction boolean -); - - -ALTER TABLE public.uploads OWNER TO postgres; - --- --- Name: uploads_upload_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.uploads_upload_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.uploads_upload_id_seq OWNER TO postgres; - --- --- Name: uploads_upload_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.uploads_upload_id_seq OWNED BY public.uploads.upload_id; - - --- --- Name: user_metadata; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.user_metadata ( - calc_id integer NOT NULL, - permission integer, - label character varying -); - - -ALTER TABLE public.user_metadata OWNER TO postgres; - --- --- Name: users; Type: TABLE; Schema: public; Owner: postgres; Tablespace: --- - -CREATE TABLE public.users ( - user_id integer NOT NULL, - firstname character varying NOT NULL, - lastname character varying NOT NULL, - username character varying, - email character varying, - affiliation integer, - password character varying, - created timestamp with time zone -); - - -ALTER TABLE public.users OWNER TO postgres; - --- --- Name: users_user_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.users_user_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.users_user_id_seq OWNER TO postgres; - --- --- Name: users_user_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.users_user_id_seq OWNED BY public.users.user_id; - - --- --- Name: a_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.affiliations ALTER COLUMN a_id SET DEFAULT nextval('public.affiliations_a_id_seq'::regclass); - - --- --- Name: atom_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.atoms ALTER COLUMN atom_id SET DEFAULT nextval('public.atoms_atom_id_seq'::regclass); - - --- --- Name: calc_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.calculations ALTER COLUMN calc_id SET DEFAULT nextval('public.calculations_calc_id_seq'::regclass); - - --- --- Name: citation_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.citations ALTER COLUMN citation_id SET DEFAULT nextval('public.citations_citation_id_seq'::regclass); - - --- --- Name: family_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.codefamilies ALTER COLUMN family_id SET DEFAULT nextval('public.codefamilies_family_id_seq'::regclass); - - --- --- Name: version_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.codeversions ALTER COLUMN version_id SET DEFAULT nextval('public.codeversions_version_id_seq'::regclass); - - --- --- Name: calc_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.doi_mapping ALTER COLUMN calc_id SET DEFAULT nextval('public.doi_mapping_calc_id_seq'::regclass); - - --- --- Name: eid; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.eigenvalues ALTER COLUMN eid SET DEFAULT nextval('public.eigenvalues_eid_seq'::regclass); - - --- --- Name: pottype_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.pottypes ALTER COLUMN pottype_id SET DEFAULT nextval('public.pottypes_pottype_id_seq'::regclass); - - --- --- Name: struct_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.structures ALTER COLUMN struct_id SET DEFAULT nextval('public.structures_struct_id_seq'::regclass); - - --- --- Name: tid; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.topics ALTER COLUMN tid SET DEFAULT nextval('public.topics_tid_seq'::regclass); - - --- --- Name: upload_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.uploads ALTER COLUMN upload_id SET DEFAULT nextval('public.uploads_upload_id_seq'::regclass); - - --- --- Name: user_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.users ALTER COLUMN user_id SET DEFAULT nextval('public.users_user_id_seq'::regclass); - - --- --- Data for Name: affiliations; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: affiliations_a_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.affiliations_a_id_seq', 1, false); - - --- --- Data for Name: alembic_version; Type: TABLE DATA; Schema: public; Owner: postgres --- - -INSERT INTO public.alembic_version VALUES ('12ed242720e1'); - - --- --- Data for Name: atoms; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: atoms_atom_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.atoms_atom_id_seq', 1, false); - - --- --- Data for Name: basis_sets; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: calcsets; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: calculations; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: calculations_calc_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.calculations_calc_id_seq', 1, false); - - --- --- Data for Name: charges; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: citations; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: citations_citation_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.citations_citation_id_seq', 1, false); - - --- --- Data for Name: coauthorships; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: codefamilies; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: codefamilies_family_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.codefamilies_family_id_seq', 1, false); - - --- --- Data for Name: codeversions; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: codeversions_version_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.codeversions_version_id_seq', 1, false); - - --- --- Data for Name: doi_mapping; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: doi_mapping_calc_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.doi_mapping_calc_id_seq', 1, false); - - --- --- Data for Name: eigenvalues; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: eigenvalues_eid_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.eigenvalues_eid_seq', 1, false); - - --- --- Data for Name: electrons; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: energies; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: forces; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: grid; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: lattices; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: login_tokens; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: metadata; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: metadata_citations; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: ownerships; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: phonons; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: pottypes; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: pottypes_pottype_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.pottypes_pottype_id_seq', 1, false); - - --- --- Data for Name: pragma; Type: TABLE DATA; Schema: public; Owner: postgres --- - -INSERT INTO public.pragma VALUES ('4.59'); - - --- --- Data for Name: recipintegs; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: sessions; Type: TABLE DATA; Schema: public; Owner: postgres --- -INSERT INTO public.sessions VALUES ('leonard', 2, '2100-12-17 09:00:00+00', NULL, NULL); -INSERT INTO public.sessions VALUES ('sheldon', 1, '2100-12-17 09:00:00+00', NULL, NULL); -INSERT INTO public.sessions VALUES ('admin', 0, '2100-12-17 09:00:00+00', NULL, NULL); - - --- --- Data for Name: shareships; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: spacegroups; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: struct_optimisation; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: struct_ratios; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: structures; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: structures_struct_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.structures_struct_id_seq', 1, false); - - --- --- Data for Name: tags; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: topics; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: topics_tid_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.topics_tid_seq', 1, false); - - --- --- Data for Name: uploads; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Name: uploads_upload_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.uploads_upload_id_seq', 1, false); - - --- --- Data for Name: user_metadata; Type: TABLE DATA; Schema: public; Owner: postgres --- - - - --- --- Data for Name: users; Type: TABLE DATA; Schema: public; Owner: postgres --- -INSERT INTO public.affiliations VALUES (0, 'UCAL', 'LA, California', NULL); -INSERT INTO public.users VALUES (0, 'admin', 'admin', 'admin', 'admin', NULL, NULL, NULL); -INSERT INTO public.users VALUES (1, 'Sheldon', 'Cooper', 'sheldon.cooper', 'sheldon.cooper@nomad-fairdi.tests.de', 0, '$2y$12$jths1LQPsLofuBQ3evVIluhQeQ/BZfbdTSZHFcPGdcNmHz2WvDj.y', NULL); -INSERT INTO public.users VALUES (2, 'Leonard', 'Hofstadter', 'leonard.hofstadter', 'leonard.hofstadter@nomad-fairdi.tests.de', 0, '$2y$12$jths1LQPsLofuBQ3evVIluhQeQ/BZfbdTSZHFcPGdcNmHz2WvDj.y', NULL); - - --- --- Name: users_user_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('public.users_user_id_seq', 3, true); - - --- --- Name: affiliations_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.affiliations - ADD CONSTRAINT affiliations_pkey PRIMARY KEY (a_id); - - --- --- Name: atoms_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.atoms - ADD CONSTRAINT atoms_pkey PRIMARY KEY (atom_id); - - --- --- Name: basis_sets_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.basis_sets - ADD CONSTRAINT basis_sets_pkey PRIMARY KEY (calc_id); - - --- --- Name: calculations_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.calculations - ADD CONSTRAINT calculations_pkey PRIMARY KEY (calc_id); - - --- --- Name: charges_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.charges - ADD CONSTRAINT charges_pkey PRIMARY KEY (calc_id); - - --- --- Name: citations_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.citations - ADD CONSTRAINT citations_pkey PRIMARY KEY (citation_id); - - --- --- Name: citations_value_key; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.citations - ADD CONSTRAINT citations_value_key UNIQUE (value); - - --- --- Name: codefamilies_content_key; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.codefamilies - ADD CONSTRAINT codefamilies_content_key UNIQUE (content); - - --- --- Name: codefamilies_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.codefamilies - ADD CONSTRAINT codefamilies_pkey PRIMARY KEY (family_id); - - --- --- Name: codeversions_content_key; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.codeversions - ADD CONSTRAINT codeversions_content_key UNIQUE (content); - - --- --- Name: codeversions_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.codeversions - ADD CONSTRAINT codeversions_pkey PRIMARY KEY (version_id); - - --- --- Name: doi_mapping_id_str_key; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.doi_mapping - ADD CONSTRAINT doi_mapping_id_str_key UNIQUE (id_str); - - --- --- Name: doi_mapping_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.doi_mapping - ADD CONSTRAINT doi_mapping_pkey PRIMARY KEY (calc_id); - - --- --- Name: eigenvalues_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.eigenvalues - ADD CONSTRAINT eigenvalues_pkey PRIMARY KEY (eid); - - --- --- Name: electrons_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.electrons - ADD CONSTRAINT electrons_pkey PRIMARY KEY (calc_id); - - --- --- Name: energies_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.energies - ADD CONSTRAINT energies_pkey PRIMARY KEY (calc_id); - - --- --- Name: forces_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.forces - ADD CONSTRAINT forces_pkey PRIMARY KEY (calc_id); - - --- --- Name: grid_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.grid - ADD CONSTRAINT grid_pkey PRIMARY KEY (calc_id); - - --- --- Name: lattices_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.lattices - ADD CONSTRAINT lattices_pkey PRIMARY KEY (struct_id); - - --- --- Name: login_tokens_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.login_tokens - ADD CONSTRAINT login_tokens_pkey PRIMARY KEY (token); - - --- --- Name: metadata_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.metadata - ADD CONSTRAINT metadata_pkey PRIMARY KEY (calc_id); - - --- --- Name: phonons_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.phonons - ADD CONSTRAINT phonons_pkey PRIMARY KEY (calc_id); - - --- --- Name: pottypes_name_key; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.pottypes - ADD CONSTRAINT pottypes_name_key UNIQUE (name); - - --- --- Name: pottypes_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.pottypes - ADD CONSTRAINT pottypes_pkey PRIMARY KEY (pottype_id); - - --- --- Name: pragma_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.pragma - ADD CONSTRAINT pragma_pkey PRIMARY KEY (content); - - --- --- Name: recipintegs_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.recipintegs - ADD CONSTRAINT recipintegs_pkey PRIMARY KEY (calc_id); - - --- --- Name: sessions_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.sessions - ADD CONSTRAINT sessions_pkey PRIMARY KEY (token); - - --- --- Name: spacegroups_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.spacegroups - ADD CONSTRAINT spacegroups_pkey PRIMARY KEY (calc_id); - - --- --- Name: struct_optimisation_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.struct_optimisation - ADD CONSTRAINT struct_optimisation_pkey PRIMARY KEY (calc_id); - - --- --- Name: struct_ratios_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.struct_ratios - ADD CONSTRAINT struct_ratios_pkey PRIMARY KEY (calc_id); - - --- --- Name: structures_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.structures - ADD CONSTRAINT structures_pkey PRIMARY KEY (struct_id); - - --- --- Name: topics_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.topics - ADD CONSTRAINT topics_pkey PRIMARY KEY (tid); - - --- --- Name: u_children_parent_calc_id; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.calcsets - ADD CONSTRAINT u_children_parent_calc_id UNIQUE (children_calc_id, parent_calc_id); - - --- --- Name: u_coauthorships_calc_id_user; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.coauthorships - ADD CONSTRAINT u_coauthorships_calc_id_user UNIQUE (calc_id, user_id); - - --- --- Name: u_coauthorships_user_calc_id; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.coauthorships - ADD CONSTRAINT u_coauthorships_user_calc_id UNIQUE (user_id, calc_id); - - --- --- Name: u_metadata_citations_calc_citation; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.metadata_citations - ADD CONSTRAINT u_metadata_citations_calc_citation UNIQUE (calc_id, citation_id); - - --- --- Name: u_metadata_citations_citation_calc; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.metadata_citations - ADD CONSTRAINT u_metadata_citations_citation_calc UNIQUE (citation_id, calc_id); - - --- --- Name: u_ownerships_calc_id_user; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.ownerships - ADD CONSTRAINT u_ownerships_calc_id_user UNIQUE (calc_id, user_id); - - --- --- Name: u_ownerships_user_calc_id; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.ownerships - ADD CONSTRAINT u_ownerships_user_calc_id UNIQUE (user_id, calc_id); - - --- --- Name: u_parent_children_calc_id; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.calcsets - ADD CONSTRAINT u_parent_children_calc_id PRIMARY KEY (parent_calc_id, children_calc_id); - - --- --- Name: u_shareships_calc_id_user; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.shareships - ADD CONSTRAINT u_shareships_calc_id_user UNIQUE (calc_id, user_id); - - --- --- Name: u_shareships_user_calc_id; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.shareships - ADD CONSTRAINT u_shareships_user_calc_id UNIQUE (user_id, calc_id); - - --- --- Name: u_tags_calc_id_tid; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.tags - ADD CONSTRAINT u_tags_calc_id_tid UNIQUE (calc_id, tid); - - --- --- Name: u_tags_tid_calc_id; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.tags - ADD CONSTRAINT u_tags_tid_calc_id UNIQUE (tid, calc_id); - - --- --- Name: uploads_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.uploads - ADD CONSTRAINT uploads_pkey PRIMARY KEY (upload_id); - - --- --- Name: user_metadata_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.user_metadata - ADD CONSTRAINT user_metadata_pkey PRIMARY KEY (calc_id); - - --- --- Name: users_email_key; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.users - ADD CONSTRAINT users_email_key UNIQUE (email); - - --- --- Name: users_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.users - ADD CONSTRAINT users_pkey PRIMARY KEY (user_id); - - --- --- Name: users_username_key; Type: CONSTRAINT; Schema: public; Owner: postgres; Tablespace: --- - -ALTER TABLE ONLY public.users - ADD CONSTRAINT users_username_key UNIQUE (username); - - --- --- Name: atoms_struct_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.atoms - ADD CONSTRAINT atoms_struct_id_fkey FOREIGN KEY (struct_id) REFERENCES public.structures(struct_id); - - --- --- Name: basis_sets_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.basis_sets - ADD CONSTRAINT basis_sets_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: calcsets_children_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.calcsets - ADD CONSTRAINT calcsets_children_calc_id_fkey FOREIGN KEY (children_calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: calcsets_parent_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.calcsets - ADD CONSTRAINT calcsets_parent_calc_id_fkey FOREIGN KEY (parent_calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: calculations_origin_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.calculations - ADD CONSTRAINT calculations_origin_id_fkey FOREIGN KEY (origin_id) REFERENCES public.uploads(upload_id) ON DELETE CASCADE; - - --- --- Name: calculations_pottype_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.calculations - ADD CONSTRAINT calculations_pottype_id_fkey FOREIGN KEY (pottype_id) REFERENCES public.pottypes(pottype_id); - - --- --- Name: charges_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.charges - ADD CONSTRAINT charges_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: coauthorships_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.coauthorships - ADD CONSTRAINT coauthorships_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: coauthorships_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.coauthorships - ADD CONSTRAINT coauthorships_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(user_id); - - --- --- Name: codeversions_family_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.codeversions - ADD CONSTRAINT codeversions_family_id_fkey FOREIGN KEY (family_id) REFERENCES public.codefamilies(family_id); - - --- --- Name: eigenvalues_electrons_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.eigenvalues - ADD CONSTRAINT eigenvalues_electrons_calc_id_fkey FOREIGN KEY (electrons_calc_id) REFERENCES public.electrons(calc_id) ON DELETE CASCADE; - - --- --- Name: eigenvalues_phonons_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.eigenvalues - ADD CONSTRAINT eigenvalues_phonons_calc_id_fkey FOREIGN KEY (phonons_calc_id) REFERENCES public.phonons(calc_id) ON DELETE CASCADE; - - --- --- Name: electrons_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.electrons - ADD CONSTRAINT electrons_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: energies_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.energies - ADD CONSTRAINT energies_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: forces_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.forces - ADD CONSTRAINT forces_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: grid_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.grid - ADD CONSTRAINT grid_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: lattices_struct_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.lattices - ADD CONSTRAINT lattices_struct_id_fkey FOREIGN KEY (struct_id) REFERENCES public.structures(struct_id); - - --- --- Name: login_tokens_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.login_tokens - ADD CONSTRAINT login_tokens_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(user_id); - - --- --- Name: metadata_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.metadata - ADD CONSTRAINT metadata_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: metadata_citations_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.metadata_citations - ADD CONSTRAINT metadata_citations_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: metadata_citations_citation_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.metadata_citations - ADD CONSTRAINT metadata_citations_citation_id_fkey FOREIGN KEY (citation_id) REFERENCES public.citations(citation_id); - - --- --- Name: metadata_version_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.metadata - ADD CONSTRAINT metadata_version_id_fkey FOREIGN KEY (version_id) REFERENCES public.codeversions(version_id); - - --- --- Name: ownerships_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.ownerships - ADD CONSTRAINT ownerships_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: ownerships_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.ownerships - ADD CONSTRAINT ownerships_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(user_id); - - --- --- Name: phonons_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.phonons - ADD CONSTRAINT phonons_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: recipintegs_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.recipintegs - ADD CONSTRAINT recipintegs_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: sessions_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.sessions - ADD CONSTRAINT sessions_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(user_id); - - --- --- Name: shareships_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.shareships - ADD CONSTRAINT shareships_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: shareships_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.shareships - ADD CONSTRAINT shareships_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(user_id); - - --- --- Name: spacegroups_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.spacegroups - ADD CONSTRAINT spacegroups_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: struct_optimisation_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.struct_optimisation - ADD CONSTRAINT struct_optimisation_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: struct_ratios_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.struct_ratios - ADD CONSTRAINT struct_ratios_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: structures_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.structures - ADD CONSTRAINT structures_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: tags_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.tags - ADD CONSTRAINT tags_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: tags_tid_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.tags - ADD CONSTRAINT tags_tid_fkey FOREIGN KEY (tid) REFERENCES public.topics(tid); - - --- --- Name: uploads_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.uploads - ADD CONSTRAINT uploads_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(user_id); - - --- --- Name: user_metadata_calc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.user_metadata - ADD CONSTRAINT user_metadata_calc_id_fkey FOREIGN KEY (calc_id) REFERENCES public.calculations(calc_id) ON DELETE CASCADE; - - --- --- Name: users_affiliation_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.users - ADD CONSTRAINT users_affiliation_fkey FOREIGN KEY (affiliation) REFERENCES public.affiliations(a_id); - - --- --- Name: SCHEMA public; Type: ACL; Schema: -; Owner: postgres --- - -REVOKE ALL ON SCHEMA public FROM PUBLIC; -REVOKE ALL ON SCHEMA public FROM postgres; -GRANT ALL ON SCHEMA public TO postgres; -GRANT ALL ON SCHEMA public TO PUBLIC; - - --- --- PostgreSQL database dump complete --- - diff --git a/nomad/files.py b/nomad/files.py index 68e87230c2..139b1b54cf 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -421,13 +421,6 @@ class StagingUploadFiles(UploadFiles): """ Returns True if this upload is already *bagged*. """ return self._frozen_file.exists() - def create_extracted_copy(self) -> None: - """ - Copies all raw-file to the extracted bucket to mimic the behavior of the old - CoE python API. TODO: should be removed after migration. - """ - copytree(self._raw_dir.os_path, os.path.join(config.fs.coe_extracted, self.upload_id)) - def pack( self, upload: UploadWithMetadata, target_dir: DirectoryObject = None, skip_raw: bool = False) -> None: diff --git a/nomad/infrastructure.py b/nomad/infrastructure.py index 3008788359..a203936804 100644 --- a/nomad/infrastructure.py +++ b/nomad/infrastructure.py @@ -19,20 +19,19 @@ is run once for each *api* and *worker* process. Individual functions for partia exist to facilitate testing, :py:mod:`nomad.migration`, aspects of :py:mod:`nomad.cli`, etc. """ +from typing import Union import os.path import shutil -from contextlib import contextmanager -import psycopg2 -import psycopg2.extensions -from sqlalchemy import create_engine -from sqlalchemy.orm import Session from elasticsearch.exceptions import RequestError from elasticsearch_dsl import connections from mongoengine import connect -from passlib.hash import bcrypt import smtplib from email.mime.text import MIMEText -import keycloak +from keycloak import KeycloakOpenID, KeycloakAdmin +from flask_oidc import OpenIDConnect +import json +from flask import g, request +import basicauth from nomad import config, utils @@ -44,16 +43,6 @@ elastic_client = None mongo_client = None """ The pymongo mongodb client. """ -repository_db = None -""" The repository postgres db sqlalchemy session. """ -repository_db_conn = None -""" The repository postgres db sqlalchemy connection. """ - -keycloak_oidc_client = None -""" The keycode OpenID connect client. """ -keycloak_admin_client = None -""" The keycode admin client. """ - def setup(): """ @@ -66,7 +55,6 @@ def setup(): setup_logging() setup_mongo() setup_elastic() - setup_repository_db(readonly=False) def setup_logging(): @@ -114,140 +102,130 @@ def setup_elastic(): return elastic_client -def setup_keycloak(): - """ Creates a keycloak client. """ - global keycloak_oidc_client - global keycloak_admin_client +class Keycloak(): + """ + A class that encapsulates all keycloak related functions for easier mocking and + configuration + """ + def __init__(self): + self._flask_oidc = None + self.__oidc_client = None + self.__admin_client = None + + def configure_flask(self, app): + oidc_issuer_url = '%s/realms/%s' % (config.keycloak.server_url.rstrip('/'), config.keycloak.realm_name) + oidc_client_secrets = dict( + client_id=config.keycloak.client_id, + client_secret=config.keycloak.client_secret_key, + issuer=oidc_issuer_url, + auth_uri='%s/protocol/openid-connect/auth' % oidc_issuer_url, + token_uri='%s/protocol/openid-connect/token' % oidc_issuer_url, + userinfo_uri='%s/protocol/openid-connect/userinfo' % oidc_issuer_url, + token_introspection_uri='%s/protocol/openid-connect/token/introspect' % oidc_issuer_url, + redirect_uris=['http://localhost/fairdi/nomad/latest']) + oidc_client_secrets_file = os.path.join(config.fs.tmp, 'oidc_client_secrets') + with open(oidc_client_secrets_file, 'wt') as f: + json.dump(dict(web=oidc_client_secrets), f) + app.config.update(dict( + SECRET_KEY=config.services.api_secret, + OIDC_CLIENT_SECRETS=oidc_client_secrets_file, + OIDC_OPENID_REALM=config.keycloak.realm_name)) + + self._flask_oidc = OpenIDConnect(app) + + @property + def _oidc_client(self): + if self.__oidc_client is None: + self.__oidc_client = KeycloakOpenID( + server_url=config.keycloak.server_url, + client_id=config.keycloak.client_id, + realm_name=config.keycloak.realm_name, + client_secret_key=config.keycloak.client_secret_key) + + return self.__oidc_client + + def authorize_flask(self, token_only: bool = True) -> Union[str, object]: + token = None + if 'Authorization' in request.headers and request.headers['Authorization'].startswith('Bearer '): + token = request.headers['Authorization'].split(None, 1)[1].strip() + elif 'access_token' in request.form: + token = request.form['access_token'] + elif 'access_token' in request.args: + token = request.args['access_token'] + elif 'Authorization' in request.headers and request.headers['Authorization'].startswith('Basic '): + if token_only: + return 'Basic authentication not allowed, use Bearer token instead' - keycloak_oidc_client = keycloak.KeycloakOpenID( - server_url=config.keycloak.server_url, - client_id=config.keycloak.client_id, - realm_name=config.keycloak.realm_name, - client_secret_key=config.keycloak.client_secret_key) + try: + username, password = basicauth.decode(request.headers['Authorization']) + token_info = self._oidc_client.token(username=username, password=password) + token = token_info['access_token'] + except Exception as e: + # TODO logging + return 'Could not authenticate Basic auth: %s' % str(e) - keycloak_admin_client = keycloak.KeycloakAdmin( - server_url=config.keycloak.server_url, - username=config.keycloak.username, - password=config.keycloak.password, - realm_name='master', - verify=True) - keycloak_admin_client.realm_name = config.keycloak.realm_name + if token is not None: + validity = self._flask_oidc.validate_token(token) + if validity is not True: + return validity -def setup_repository_db(**kwargs): - """ Creates a connection and stores it in the module variables. """ - repo_args = dict(readonly=False) - repo_args.update(kwargs) - connection, db = sqlalchemy_repository_db(**kwargs) + else: + g.oidc_id_token = g.oidc_token_info + return self.get_user() - global repository_db - global repository_db_conn + else: + return None - if repository_db is None: - repository_db_conn, repository_db = connection, db - logger.info('setup repository db connection') + def get_user(self, user_id: str = None, email: str = None) -> object: + from nomad import datamodel - return repository_db_conn, repository_db + if email is not None: + try: + user_id = self._admin_client.get_user_id(email) + except Exception: + raise KeyError('User does not exist') + if user_id is None and g.oidc_id_token is not None and self._flask_oidc is not None: + try: + return datamodel.User(token=g.oidc_id_token, **self._flask_oidc.user_getinfo([ + 'email', 'firstName', 'lastName', 'username', 'createdTimestamp'])) + except Exception as e: + # TODO logging + raise e -def sqlalchemy_repository_db(exists: bool = False, readonly: bool = True, **kwargs): - """ - Returns SQLAlchemy connection and session for the given db parameters. + assert user_id is not None, 'Could not determine user from given kwargs' - Arguments: - exists: Set to False to check and ensure db and schema existence - readonly: Set to False for a write enabled connection - **kwargs: Overwrite `config.repository_db` parameters - """ - dbname = kwargs.get('dbname', config.repository_db.dbname) - db_exists = exists - if not db_exists: try: - with repository_db_connection(dbname=dbname): - logger.info('repository db postgres database already exists') - db_exists = True - except psycopg2.OperationalError as e: - if not ('database "%s" does not exist' % dbname) in str(e): - raise e + keycloak_user = self._admin_client.get_user(user_id) + except Exception: + raise KeyError('User does not exist') - if not db_exists: - logger.info('repository db postgres database does not exist') - try: - with repository_db_connection(dbname='postgres', with_trans=False) as con: - with con.cursor() as cursor: - cursor.execute("CREATE DATABASE %s ;" % dbname) - logger.info('repository db postgres database created') - except Exception as e: - logger.info('could not create repository db postgres database', exc_info=e) - raise e + return datamodel.User(**keycloak_user) - # ensure that the schema exists - schema_exists = exists - if not schema_exists: - with repository_db_connection(dbname=dbname) as conn: - with conn.cursor() as cur: - cur.execute( - "select exists(select * from information_schema.tables " - "where table_name='users')") - schema_exists = cur.fetchone()[0] - if not schema_exists: - logger.info('repository db postgres schema does not exists') - reset_repository_db_schema(dbname=dbname) - else: - logger.info('repository db postgres schema already exists') - - # set the admin user password - if not exists: - with repository_db_connection(dbname=dbname) as conn: - with conn.cursor() as cur: - try: - cur.execute( - "UPDATE public.users SET password='%s' WHERE user_id=0;" % - bcrypt.encrypt(config.services.admin_password, ident='2y')) - except Exception as e: - logger.warning('could not update admin password', exc_info=e) - - def no_flush(): - pass - - params = dict(**config.repository_db) - params.update(**kwargs) - url = 'postgresql://%s:%s@%s:%d/%s' % utils.to_tuple(params, 'user', 'password', 'host', 'port', 'dbname') - # We tried to set a very high isolation level, to prevent conflicts between transactions on the - # start-shaped schema, which usually involve read/writes to many tables at once. - # Unfortunately, this had week performance, and postgres wasn't even able to serialize on all - # occasions. We are now simply rollingback and retrying on conflicts. - # engine = create_engine(url, echo=False, isolation_level="SERIALIZABLE") - engine = create_engine(url, echo=False) - - repository_db_conn = engine.connect() - repository_db = Session(bind=repository_db_conn, autocommit=True) - if readonly: - repository_db.flush = no_flush - - return repository_db_conn, repository_db - - -def set_pid_prefix(prefix=7000000, target_db=None): - if target_db is None: - target_db = repository_db - - target_db.begin() - target_db.execute('ALTER SEQUENCE calculations_calc_id_seq RESTART WITH %d' % prefix) - target_db.commit() - logger.info('set pid prefix', pid_prefix=prefix) - - -def reset(repo_content_only: bool = False): + @property + def _admin_client(self): + if self.__admin_client is None: + self.__admin_client = KeycloakAdmin( + server_url=config.keycloak.server_url, + username=config.keycloak.username, + password=config.keycloak.password, + realm_name='master', + verify=True) + self.__admin_client.realm_name = config.keycloak.realm_name + + return self.__admin_client + + +keycloak = Keycloak() + + +def reset(): """ - Resets the databases mongo, elastic/calcs, repository db and all files. Be careful. + Resets the databases mongo, elastic/calcs, and all files. Be careful. In contrast to :func:`remove`, it will only remove the contents of dbs and indicies. This function just attempts to remove everything, there is no exception handling or any warranty it will succeed. - - Arguments: - repo_content_only: True will only remove the calc/upload data from the repo db. - But still reset all other dbs. """ try: if not mongo_client: @@ -267,15 +245,6 @@ def reset(repo_content_only: bool = False): except Exception as e: logger.error('exception resetting elastic', exc_info=e) - try: - if repo_content_only: - reset_repository_db_content() - else: - reset_repository_db() - logger.info('repository db resetted') - except Exception as e: - logger.error('exception resetting repository db', exc_info=e) - try: shutil.rmtree(config.fs.staging, ignore_errors=True) shutil.rmtree(config.fs.public, ignore_errors=True) @@ -296,7 +265,7 @@ def reset(repo_content_only: bool = False): def remove(): """ - Removes the databases mongo, elastic, repository db, and all files. Be careful. + Removes the databases mongo, elastic, and all files. Be careful. This function just attempts to remove everything, there is no exception handling or any warranty it will succeed. """ @@ -316,19 +285,6 @@ def remove(): except Exception as e: logger.error('exception deleting elastic', exc_info=e) - try: - if repository_db is not None: - repository_db.expunge_all() - repository_db.invalidate() - if repository_db_conn is not None: - repository_db_conn.close() - with repository_db_connection(dbname='postgres', with_trans=False) as con: - with con.cursor() as cur: - cur.execute('DROP DATABASE IF EXISTS %s' % config.repository_db.dbname) - logger.info('repository db deleted') - except Exception as e: - logger.error('exception deleting repository db', exc_info=e) - logger.info('reset files') try: shutil.rmtree(config.fs.staging, ignore_errors=True) @@ -338,97 +294,6 @@ def remove(): logger.error('exception deleting files', exc_info=e) -@contextmanager -def repository_db_connection(dbname=None, with_trans=True): - """ Contextmanager for a psycopg2 session for the NOMAD-coe repository postgresdb """ - conn_str = "host='%s' port=%d dbname='%s' user='%s' password='%s'" % ( - config.repository_db.host, - config.repository_db.port, - config.repository_db.dbname if dbname is None else dbname, - config.repository_db.user, - config.repository_db.password) - - conn = psycopg2.connect(conn_str) - if not with_trans: - conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) - try: - yield conn - except Exception as e: - logger.error('Unhandled exception within repository db connection.', exc_info=e) - conn.rollback() - conn.close() - raise e - - conn.commit() - conn.close() - - -def reset_repository_db(): - """ Drops the existing NOMAD-coe repository postgres schema and creates a new minimal one. """ - global repository_db - global repository_db_conn - - # invalidate and close all connections and sessions - if repository_db is not None: - repository_db.expunge_all() - repository_db.invalidate() - repository_db.close_all() - if repository_db_conn is not None: - repository_db_conn.close() - repository_db_conn.engine.dispose() - - # perform the reset - reset_repository_db_schema() - - # try tp repair existing db connections - if repository_db is not None: - new_connection, repository_db = setup_repository_db(exists=False) - repository_db.bind = new_connection - repository_db_conn = new_connection - - -def reset_repository_db_schema(**kwargs): - with repository_db_connection(with_trans=False, **kwargs) as conn: - with conn.cursor() as cur: - cur.execute("DROP SCHEMA IF EXISTS public CASCADE;") - - cur.execute( - "CREATE SCHEMA public;" - "GRANT ALL ON SCHEMA public TO postgres;" - "GRANT ALL ON SCHEMA public TO public;") - sql_file = os.path.join(os.path.dirname(__file__), 'empty_repository_db.sql') - cur.execute(open(sql_file, 'r').read()) - logger.info('(re-)created repository db postgres schema') - - -def reset_repository_db_content(): - tables = [ - 'metadata', - 'codeversions', - 'codefamilies', - 'ownerships', - 'coauthorships', - 'shareships', - 'metadata_citations', - 'citations', - 'spacegroups', - 'struct_ratios', - 'tags', - 'topics', - 'user_metadata', - 'doi_mapping', - 'calcsets', - 'calculations', - 'uploads' - ] - with repository_db_connection(with_trans=True) as conn: - with conn.cursor() as cur: - for table in tables: - cur.execute('DELETE FROM %s;' % table) - - logger.info('removed repository db content') - - def send_mail(name: str, email: str, message: str, subject: str): if not config.mail.enabled: return @@ -464,21 +329,3 @@ def send_mail(name: str, email: str, message: str, subject: str): logger.error('Could not send email', exc_info=e) server.quit() - - -if __name__ == '__main__': - import logging, time - - config.console_log_level = logging.DEBUG - setup_logging() - setup_keycloak() - - token = keycloak_oidc_client.token( - username='sheldon.cooper@nomad-coe.eu', password='password') - - while True: - print(keycloak_oidc_client.userinfo(token['access_token'])) - keycloak_user_id = keycloak_admin_client.get_user_id('sheldon.cooper@nomad-coe.eu') - print(keycloak_admin_client.get_user(keycloak_user_id)) - time.sleep(5) - diff --git a/nomad/migration.py b/nomad/migration.py deleted file mode 100644 index 518c4a99c2..0000000000 --- a/nomad/migration.py +++ /dev/null @@ -1,1749 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This module contains functions to read data from NOMAD coe, external sources, -other/older nomad@FAIRDI instances to mass upload it to a new nomad@FAIRDI instance. - -.. autoclass:: NomadCOEMigration -.. autoclass:: SourceCalc -""" - -from typing import Generator, Tuple, List, Iterable, Any, Dict -import multiprocessing -import multiprocessing.pool -import time -import os -import os.path -import sys -import tarfile -import math -from mongoengine import Document, IntField, StringField, DictField, BooleanField -import datetime -from bravado.exception import HTTPNotFound, HTTPBadRequest, HTTPGatewayTimeout -import os -import runstats -import io -import threading -from contextlib import contextmanager -import shutil -import random -import io - -from nomad import utils, infrastructure, files, config -from nomad.coe_repo import User, Calc, LoginException -from nomad.datamodel import CalcWithMetadata -from nomad.processing import FAILURE - - -if sys.version_info >= (3, 7): - import zipfile -else: - import zipfile37 as zipfile - -default_pid_prefix = 7000000 -""" The default pid prefix for new non migrated calculations """ - -gb = 1024 * 1024 * 1024 - -max_package_size = 32 * gb -""" The maximum size of a package that will be used as an upload on nomad@FAIRDI """ -use_stats_for_filestats_threshold = 1024 - -default_comment = 'entry with unknown provernance' -default_uploader = dict(id=1) - -protected_uploads = ['ftp_upload_for_uid_125', 'ftp_upload_for_uid_290'] -""" Uploads that we will not delete existing extracted files for """ - - -def iterable_to_stream(iterable, buffer_size=io.DEFAULT_BUFFER_SIZE): - """ - Lets you use an iterable (e.g. a generator) that yields bytestrings as a read-only - input stream. - - The stream implements Python 3's newer I/O API (available in Python 2's io module). - For efficiency, the stream is buffered. - """ - class IterStream(io.RawIOBase): - def __init__(self): - self.leftover = None - self.iterator = iter(iterable) - - def readable(self): - return True - - def readinto(self, b): - requested_len = len(b) # We're supposed to return at most this much - while True: - try: - chunk = next(self.iterator) - except StopIteration: - if len(self.leftover) == 0: - return 0 # indicate EOF - chunk = self.leftover - output, self.leftover = chunk[:requested_len], chunk[requested_len:] - len_output = len(output) - if len_output == 0: - continue # do not prematurely indicate EOF - b[:len_output] = output - return len_output - - return io.BufferedReader(IterStream(), buffer_size=buffer_size) - - -Directory = Tuple[List[str], str, int] - - -def create_package_zip( - upload_id: str, upload_path: str, package_id: str, package_path: str, compress: bool, - package_filepaths: Iterable[str]) -> None: - logger = utils.get_logger( - __name__, source_upload_id=upload_id, - source_upload_path=upload_path, package_id=package_id) - - package_zip = zipfile.ZipFile( - package_path, 'w', - compression=zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED) - - try: - for filepath in package_filepaths: - package_zip.write(filepath, filepath[len(upload_path):]) - except Exception as e: - logger.error('could not write file to zip', filepath=filepath, exc_info=e) - finally: - package_zip.close() - - logger.info('package zip created') - - -def missing_calcs_data(start_pid: int = 0): - """ Produces data about missing calculations """ - results = utils.POPO( - no_package=[], - no_calcs=[], - not_migrated=[], - failed_packages=[], - missing_mainfile=[], - others=[]) - - # not not check these uploads - not_check_uploads = [ - 'fairdi', # these are links to uploads already processed with nomad@fairdi - 'nomad', # these are links to uploads already processed with nomad@fairdi - 'ftp_upload_for_uid_125', - 'ftp_upload_for_uid_290', - 'ftp_upload_for_uid_502_2011-09-06-15-33-33-333221', - 'ftp_upload_for_uid_502_2011-09-27-20-49-58-937390', - 'ftp_upload_for_uid_502_2011-10-01-20-48-22-561661', - 'ftp_upload_for_uid_502_2011-10-07-08-52-06-841358', - 'ftp_upload_for_uid_502_2011-10-07-08-57-17-804213', - 'ftp_upload_for_uid_502_2011-10-07-08-59-32-464608', - 'ftp_upload_for_uid_502_2011-10-07-19-04-54-725186', - 'ftp_upload_for_uid_502_2011-11-15-20-50-34-020718', - 'ftp_upload_for_uid_502_2011-11-15-20-56-28-015287', - 'ftp_upload_for_uid_502_2011-11-15-21-30-01-561680', - 'ftp_upload_for_uid_502_2011-11-15-21-33-26-574967', - 'ftp_upload_for_uid_502_2011-11-15-21-40-33-307359', - 'ftp_upload_for_uid_502_2011-11-26-23-17-19-882290', - 'ftp_upload_for_uid_502_2011-11-26-23-50-30-089143', - 'ftp_upload_for_uid_502_2011-12-01-00-14-18-140240', - 'ftp_upload_for_uid_502_2011-12-01-14-04-45-404271', - 'ftp_upload_for_uid_502_2011-12-01-23-09-09-854328', - 'ftp_upload_for_uid_502_2011-12-05-08-46-20-831174', - 'ftp_upload_for_uid_502_2011-12-05-10-46-30-923923', - 'ftp_upload_for_uid_502_2011-12-23-09-26-49-935721', - 'ftp_upload_for_uid_502_2011-12-23-10-39-22-459271', - 'ftp_upload_for_uid_502_2012-03-15-09-16-22-390174', - 'ftp_upload_for_uid_502_2012-03-23-19-18-02-789330', - 'ftp_upload_for_uid_502_2012-03-24-06-09-06-576223', - 'ftp_upload_for_uid_502_2012-03-26-08-53-28-847937', - 'ftp_upload_for_uid_502_2012-03-28-09-53-35-930264', - 'ftp_upload_for_uid_502_2012-04-25-17-12-51-662156', - 'ftp_upload_for_uid_502_2012-04-26-00-04-07-260381', - 'ftp_upload_for_uid_502_2012-04-26-09-31-29-421336', - 'ftp_upload_for_uid_502_2012-04-27-07-15-28-871403', - 'ftp_upload_for_uid_502_2012-04-27-22-53-49-117894', - 'ftp_upload_for_uid_502_2012-05-16-13-36-29-938929', - 'ftp_upload_for_uid_502_2012-05-18-17-18-20-527193', - 'ftp_upload_for_uid_502_2012-05-19-19-51-50-814160', - 'ftp_upload_for_uid_502_2012-05-21-15-14-17-579123', - 'ftp_upload_for_uid_502_2012-05-25-13-52-49-651647', - 'ftp_upload_for_uid_502_2012-06-14-17-47-19-089204', - 'ftp_upload_for_uid_502_2012-06-21-21-34-07-966108', - 'ftp_upload_for_uid_502_2012-06-26-22-25-28-412879', - 'ftp_upload_for_uid_502_2012-07-02-10-35-45-887222', - 'ftp_upload_for_uid_502_2012-07-02-10-36-33-740348', - 'ftp_upload_for_uid_502_2012-07-09-10-03-15-368689', - 'ftp_upload_for_uid_502_2012-07-26-07-27-00-284225', - 'ftp_upload_for_uid_502_2012-07-26-07-29-11-627501', - 'ftp_upload_for_uid_502_2012-08-14-13-16-25-535995', - 'ftp_upload_for_uid_502_2012-08-16-15-04-45-599710', - 'ftp_upload_for_uid_502_2012-08-23-06-23-02-115869', - 'ftp_upload_for_uid_502_2012-08-23-16-36-49-087908', - 'ftp_upload_for_uid_502_2012-08-24-17-10-15-161628', - 'ftp_upload_for_uid_502_2012-08-26-05-04-25-027012', - 'ftp_upload_for_uid_502_2012-08-29-18-31-26-494251', - 'ftp_upload_for_uid_502_2012-08-30-07-01-07-502171', - 'ftp_upload_for_uid_502_2012-09-01-08-01-03-573873', - 'ftp_upload_for_uid_502_2012-09-06-13-54-56-201039', - 'ftp_upload_for_uid_502_2012-09-07-21-38-22-787875', - 'ftp_upload_for_uid_502_2012-09-09-07-32-31-653109', - 'ftp_upload_for_uid_502_2012-09-10-09-48-57-289279', - 'ftp_upload_for_uid_502_2012-09-11-07-04-32-036763', - 'ftp_upload_for_uid_502_2012-09-15-20-31-02-157060', - 'ftp_upload_for_uid_502_2012-09-20-06-29-02-132434', - 'ftp_upload_for_uid_502_2012-09-21-11-27-20-615773', - 'ftp_upload_for_uid_502_2012-09-21-17-31-17-335523', - 'ftp_upload_for_uid_502_2012-09-24-20-27-36-041292', - 'ftp_upload_for_uid_502_2012-09-25-16-21-09-043610', - 'ftp_upload_for_uid_502_2012-10-01-17-27-20-733800', - 'ftp_upload_for_uid_502_2012-10-02-17-02-03-194493', - 'ftp_upload_for_uid_502_2012-10-08-14-10-54-373136', - 'ftp_upload_for_uid_502_2012-10-12-12-40-36-780644', - 'ftp_upload_for_uid_502_2012-10-24-14-51-09-134377', - 'ftp_upload_for_uid_502_2012-10-29-11-01-45-431034', - 'ftp_upload_for_uid_502_2012-11-16-17-02-37-016199', - 'ftp_upload_for_uid_502_2012-11-19-09-16-47-377264', - 'ftp_upload_for_uid_502_2012-11-23-13-23-45-623620', - 'ftp_upload_for_uid_502_2012-11-26-14-56-17-339064', - 'ftp_upload_for_uid_502_2012-12-03-09-52-02-714224', - 'ftp_upload_for_uid_502_2012-12-10-20-09-30-463926', - 'ftp_upload_for_uid_502_2011-08-17-14-29-25-505869'] - - # aggregate missing calcs based on uploads - source_uploads = SourceCalc._get_collection().aggregate([ - {'$match': {'migration_version': -1, '_id': {'$gte': start_pid}, 'upload': {'$nin': not_check_uploads}}}, - {'$group': {'_id': '$upload', 'calcs': {'$push': {'mainfile': '$mainfile', 'pid': '$metadata.pid'}}}}]) - source_uploads = list(source_uploads) - - for source_upload in source_uploads: - source_upload['calcs'] = sorted(source_upload['calcs'], key=lambda a: a['mainfile']) - - source_uploads = [ - utils.POPO(source_upload_id=d['_id'], calcs=d['calcs']) - for d in source_uploads] - - source_uploads = sorted(source_uploads, key=lambda u: len(u.calcs)) - - # go through all problematic uploads - for source_upload in source_uploads: - logger = utils.get_logger(__name__, source_upload_id=source_upload.source_upload_id) - - def package_query(**kwargs): - return Package.objects(upload_id=source_upload.source_upload_id, **kwargs) - - def cause(upload, **kwargs): - cause = dict( - source_upload_id=upload.source_upload_id, calcs=len(upload.calcs), - example_mainfile=upload.calcs[0]['mainfile'], - example_pid=upload.calcs[0]['pid']) - cause.update(**kwargs) - - return cause - - try: - - # check if packages exist - if package_query().count() == 0: - results.no_package.append(cause(source_upload)) - continue - - logger.debug('package exists') - - # check if packages are not migrated - not_migrated_query = package_query(migration_version__lt=0) - if not_migrated_query.count() > 0: - results.not_migrated.append(cause( - source_upload, - packages=list(package.package_id for package in not_migrated_query))) - continue - logger.debug('packages are migrated') - - # check if packages all failed due to no calcs - no_calcs_query = package_query(report__total_calcs=0) - if no_calcs_query.count() == package_query().count(): - results.no_calcs.append(cause( - source_upload, - packages=list(package.package_id for package in no_calcs_query))) - continue - logger.debug('packages have calcs') - - # check if packages failed - failed_packages_query = package_query(report__failed_packages__ne=0) - if failed_packages_query.count() > 0: - results.failed_packages.append(cause( - source_upload, - packages=list(package.package_id for package in failed_packages_query))) - continue - logger.debug('packages are processed') - - # check if a mainfile does not exist in the package - checkall = True - if checkall: - all_files = {} - for package in package_query(): - with zipfile.ZipFile(package.package_path, 'r') as zf: - for path in zf.namelist(): - all_files[path] = path - exist, not_exist = 0, 0 - example_mainfile, example_exists_mainfile = '', '' - for calc in source_upload.calcs: - mainfile = calc['mainfile'] - if mainfile in all_files: - exist += 1 - example_exists_mainfile = mainfile - else: - not_exist += 1 - example_mainfile = mainfile - example_pid = calc['pid'] - if not_exist > 0: - results.missing_mainfile.append(cause( - source_upload, - missing=not_exist, - example_mainfile=example_mainfile, - example_pid=example_pid, - missing_but_exist=exist, - missing_but_exists_example=example_exists_mainfile)) - continue - else: - try: - for calc in source_upload.calcs: - mainfile = calc['mainfile'] - contained = False - for package in package_query(): - try: - with zipfile.ZipFile(package.package_path, 'r', allowZip64=True) as zf: - try: - if zf.getinfo(mainfile) is not None: - contained = True - break - except KeyError: - pass - except FileNotFoundError: - logger.info('cannot verify mainfile existence due to missing package data.') - - if not contained: - results.missing_mainfile.append(cause(source_upload, missing_mainfile=mainfile)) - raise KeyError - - # only check the first - break - except KeyError: - continue - logger.debug('mainfiles do exist') - - results.others.append(cause(source_upload)) - - except Exception as e: - logger.error('exception while checking upload', exc_info=e) - - summary = utils.POPO(overall_missing=0) - for key, values in results.items(): - summary[key] = 0 - for value in values: - summary[key] += value['calcs'] - summary['overall_missing'] += value['calcs'] - - results.summary = summary - - return results - - -class Package(Document): - """ - A Package represents split origin NOMAD CoE uploads. We use packages as uploads - in nomad@FAIRDI. Some of the uploads in nomad are very big (alfow lib) and need - to be split down to yield practical (i.e. for mirrors) upload sizes. Therefore, - uploads are split over multiple packages if one upload gets to large. A package - always contains full directories of files to preserve *mainfile* *aux* file relations. - Package have a package entry in mongo and a .zip file with the raw data. - """ - - package_id = StringField(primary_key=True) - """ A random UUID for the package. Could serve later its target upload id.""" - package_path = StringField(required=True) - """ The path to the package .zip file. """ - upload_path = StringField(required=True) - """ The absolute path of the source upload """ - upload_id = StringField(required=True) - """ The source upload_id. There might be multiple packages per upload (this is the point). """ - target_upload_id = StringField() - """ The current target upload id of the processed package """ - restricted = IntField(default=0) - """ The restricted in month, 0 for unrestricted. """ - size = IntField() - """ The sum of all file sizes. """ - files = IntField() - """ The number of files. """ - packages = IntField(default=-1) - """ The number of packages in the same upload. """ - - migration_version = IntField(default=-1) - """ The version of the last successful migration of this package """ - migration_id = StringField() - """ A random uuid that ids the migration run on this package """ - report = DictField() - """ The report of the last successful migration of this package """ - skip_migration = BooleanField() - """ Packages with known problems can be marked to be not migrated """ - skip_migration_reason = StringField() - """ Optional description of the reason for skipping migration """ - - migration_failure = StringField() - """ String that describe the cause for last failed migration attempt """ - migration_failure_type = StringField() - """ The type of migration failure: ``no_calcs``, ``processing``, ``publish``, ``exception``. """ - - next_offset = IntField(default=-1) - """ Only for packages created from large tars. Give the offset to continue to create the next package. """ - no_provernance = BooleanField(default=False) - """ Does this package have calculations in the CoE Repository postgres db """ - - meta = dict(indexes=['upload_id', 'migration_version']) - - @classmethod - def aggregate_reports(cls, migration_version: str = None) -> 'Report': - """ - Returns an aggregated report over all package reports of the given migration version, - or all packages. - """ - if migration_version is not None: - query = cls.objects(migration_version__gte=migration_version, report__exists=True) - else: - query = cls.objects(report__exists=True) - - report = Report() - for package in query: - report.add(Report(package.report)) - - return report - - @classmethod - def create_packages_from_tar( - cls, source_tar_path: str, offset: int = None, compress: bool = True, - forced_upload_id: str = None) -> None: - """ - Utility function for manually creating packages within a tar archive. - Assuming that the tarfile contains multiple extracted uploads. The first directory - hierarchy level is interpreted as upload_id. - """ - logger = utils.get_logger(__name__) - - f = io.open(source_tar_path, 'rb', buffering=128 * 1024 * 1024) - - tf = tarfile.TarFile.open(fileobj=f, copybufsize=1024 * 1024) # type: ignore - if offset is not None: - tf.offset = offset # type: ignore - - try: - last_offset = 0 - - class PackageFile(): - def __init__(self, upload_id: str): - upload_directory = files.DirectoryObject( - config.fs.migration_packages, upload_id, create=True, prefix=True) - - self.package = Package(upload_id=upload_id, package_id=utils.create_uuid()) - self.package.package_path = upload_directory.join_file( - self.package.package_id + '.zip').os_path - self.package.upload_path = os.path.join(source_tar_path, upload_id) - - self.package_file = zipfile.ZipFile( - self.package.package_path, 'w', - compression=zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED, - allowZip64=True) - - self.package.size = 0 - self.package.files = 0 - self.package.restricted = 0 - self.offset = last_offset - - def add_file(self, tarinfo: tarfile.TarInfo, path: str = None) -> None: - if path is None: - path = tarinfo.name - - basepath = os.path.basename(path) - zip64 = True if tarinfo.size >= (2 * gb) else None - with self.package_file.open(path, 'w', force_zip64=zip64) as target: # type: ignore - source = tf.fileobj - source.seek(tarinfo.offset_data) # type: ignore - bufsize = tf.copybufsize # type: ignore - - tarfile.copyfileobj( # type: ignore - source, target, tarinfo.size, tarfile.ReadError, bufsize) - - if basepath.startswith('RESTRICTED'): - self.package.restricted = 36 - try: - self.package.restricted = min(36, int(basepath[len('RESTRICTED_'):])) - except Exception: - pass - - self.package.size += tarinfo.size - self.package.files += 1 - - def close(self, all_packages: bool = False) -> None: - self.package_file.close() - self.package.next_offset = last_offset - self.package.save() - - if all_packages: - packages = Package.objects(upload_id=self.package.upload_id).count() - Package._get_collection().update_many( - dict(upload_id=self.package.upload_id), {'$set': dict(packages=packages)}) - - current_upload = None - # the package of the current directory, might be reused for other directories, - # if not too big - current_package = None - # the last package is the last created package, and can be used if not too - # big, but the current one is too big - last_package = None - # used to keep packages of parent directories - directories: Dict[str, PackageFile] = {} - - next_info = tf.next() - while next_info is not None: - if next_info.isfile(): - if forced_upload_id is not None: - name = os.path.join(forced_upload_id, next_info.name) - else: - name = next_info.name - - segments = name.split('/') - - upload = segments[0] - if upload != current_upload: - # new upload - for package in directories.values(): - if package != current_package: - package.close() - - if current_package is not None: - current_package.close(True) - - current_upload = upload - logger.info('new upload', source_upload_id=current_upload) - - current_directory = None - current_package = PackageFile(current_upload) - last_package = current_package - directories = {} - - if len(segments) == 2: - directory = '' - else: - directory = os.path.join(segments[1], *segments[2:-1]) - - if current_directory is None: - current_directory = directory - - if current_directory == directory: - # same directory ... add files to the same package no matter what - pass - - elif directory.startswith(current_directory): - # sub-directory ... - # keep the parent package - # the child package might be a new one, if current is too big - directories[current_directory] = current_package - - if current_package.package.size > max_package_size: - if last_package == current_package: - last_package = None - current_package = last_package if last_package is not None else PackageFile(current_upload) - last_package = current_package - - elif current_directory.startswith(directory): - # super-directory ... - # remove the child package if kept - # restore the parent package - # add files to the parent package no matter what (its basically the same directory) - if current_directory in directories: - del(directories[current_directory]) - - current_package = None - while current_package is None: - current_package = directories.get(directory, None) - directory = os.path.dirname(directory) - if directory == '': - break - if current_package is None: - current_package = last_package - if current_package.package.size > max_package_size: - if last_package == current_package: - last_package = None - current_package = last_package if last_package is not None else PackageFile(current_upload) - last_package = current_package - - else: - # sibling ... - # remove old directories package if kept - # use current or new package, if current is too big - if current_directory in directories: - del(directories[current_directory]) - - if current_package.package.size > max_package_size: - # close current_package, if it is not used by any parent directory - if all(package != current_package for package in directories.values()): - current_package.close() - - if current_package == last_package: - last_package = None - - current_package = last_package if last_package is not None else PackageFile(current_upload) - last_package = current_package - - current_directory = directory - - current_package.add_file(next_info, name[len(current_upload) + 1:]) - - else: - pass - - last_offset = tf.offset # type: ignore - next_info = tf.next() - except Exception: - import traceback - traceback.print_exc() - print('Exception while processing tarfile. The entry at %d' % last_offset) - print('Deleting files of open packages.') - packages = {} - smallest_offset = last_offset - for package in list(directories.values()) + [current_package, last_package]: - if package is not None: - packages[package.package.package_id] = package.package.package_path - smallest_offset = min(package.offset, smallest_offset) - - for package_path in packages.values(): - try: - os.remove(package_path) - except Exception: - pass - - print('The smallest offset of an open package was %d' % smallest_offset) - else: - for package in list(directories.values()) + [current_package, last_package]: - if package is not None and package != current_package: - package.close() - if current_package is not None: - current_package.close(True) - finally: - try: - tf.close() - f.close() - except Exception: - pass - - @classmethod - def get_packages( - cls, upload_path: str, target_dir: str, create: bool = False, - compress: bool = False, parallel: int = 1) -> Iterable['Package']: - """ - Will get packages for the given upload_path. Creates the package zip files and - package index entries if ``create`` is True. But, either will only be created if - it does not already exist. Yields the Package objects. - """ - upload_id = os.path.basename(upload_path) - logger = utils.get_logger(__name__, source_upload_path=upload_path, source_upload_id=upload_id) - - # The packages number is written after all packages of an upload have been created. - # this should allow to abort mid upload packaging and continue later by removing - # all started packages first. - is_packaged = cls.objects(upload_id=upload_id, packages__ne=-1).count() != 0 - - async_results: List[multiprocessing.pool.AsyncResult] = [] - pool = multiprocessing.Pool(parallel) - pool.__enter__() - - if not is_packaged: - if not create: - return None - - if not os.path.isdir(upload_path): - logger.error('upload path is not a directory') - return [] - - upload_directory = files.DirectoryObject(target_dir, upload_id, create=True, prefix=True) - restricted = 0 - - cls.objects(upload_id=upload_id).delete() - - def create_package_entry(): - package_id = utils.create_uuid() - return Package( - package_id=package_id, - upload_path=upload_path, - upload_id=upload_id, - package_path=upload_directory.join_file('%s.zip' % package_id).os_path) - - def close_package(package_size: int, package_filepaths: List[str]): - package_entry_to_close = package_entry - - def save_package_entry(*args) -> None: - package_entry_to_close.size = package_size - package_entry_to_close.files = len(package_filepaths) - package_entry_to_close.save() - - logger.debug( - 'created package', - package_id=package_entry.package_id, size=package_size) - - def handle_package_error(*args) -> None: - logger.error( - 'could not create package zip due to unexpected exception', - exc_info=args[0]) - - while len(async_results) > parallel: - async_results[:] = [ - async_result for async_result in async_results - if not async_result.ready()] - time.sleep(0.1) - - async_result = pool.apply_async( - create_package_zip, - args=( - upload_id, upload_path, package_entry.package_id, - package_entry.package_path, compress, package_filepaths), - callback=save_package_entry, error_callback=handle_package_error) - - async_results.append(async_result) - - package_entry = create_package_entry() - package_size = 0 - package_filepaths = [] - with cls.upload_iterator(upload_path) as directory: - for filepaths, parent_directory, size in directory: - for filepath in filepaths: - basepath = os.path.basename(filepath) - if basepath.startswith('RESTRICTED'): - restricted = 36 - try: - restricted = min(36, int(basepath[len('RESTRICTED_'):])) - except Exception: - pass - - package_filepaths.append(os.path.join(parent_directory, filepath)) - - if size > max_package_size: - logger.warn( - 'directory exceeds max package size', - package_id=package_entry.package_id, size=package_size) - - package_size += size - if package_size > max_package_size: - close_package(package_size, package_filepaths) - package_size, package_filepaths = 0, [] - package_entry = create_package_entry() - - if len(package_filepaths) > 0: - close_package(package_size, package_filepaths) - - # wait for all zip processes to complete - while not all(async_result.ready() for async_result in async_results): - time.sleep(0.1) - - pool.__exit__(None, None, None) - - package_query = cls.objects(upload_id=upload_id) - package_query.update(restricted=restricted, packages=package_query.count()) - logger.debug( - 'packaged upload', source_upload_id=upload_id, source_upload_path=upload_path, - restricted=restricted) - - return list(package_query) # prevent timeout, there are only ~10k packages - else: - return list(cls.objects(upload_id=upload_id)) # prevent timeout, there are only ~10k packages - - @classmethod - @contextmanager - def upload_iterator(cls, upload_path: str) -> Generator[Generator[Directory, None, None], None, None]: - """ - A contextmanager that opens the given upload and provides a generator for - directories. Directories are tuple of an iterable of upload relative filepaths - and the directory size. - """ - potential_archive_path = os.path.join(upload_path, 'archive.tar.gz') - if os.path.isfile(potential_archive_path): - with cls.extracted_archive(potential_archive_path) as extracted_archive: - yield cls.iterate_upload_directory(extracted_archive) - else: - yield cls.iterate_upload_directory(upload_path) - - @classmethod - def iterate_upload_directory(cls, upload_path) -> Generator[Directory, None, None]: - """ - Interprets the given upload path as a directory. Files path are given as upload - path relative paths. - """ - stats = runstats.Statistics() - for root, _, files in os.walk(upload_path): - directory_filepaths: List[str] = [] - directory_size = 0 - - if len(files) == 0: - continue - - if len(files) < 20 and any(file.endswith('.tar.gz') for file in files): - # TODO the OQMD case, files are managed as bunch of .tar.gz files - for file in files: - archive_path = os.path.join(root, file) - prefix = os.path.dirname(archive_path)[len(upload_path) + 1:] - with cls.extracted_archive(archive_path) as extracted_archive: - for paths, _, size in cls.iterate_upload_directory(extracted_archive): - yield [os.path.join(prefix, path) for path in paths], upload_path, size - continue - - for file in files: - filepath = os.path.join(root, file) - filename = filepath[len(upload_path) + 1:] - directory_filepaths.append(filename) - # getting file stats is pretty expensive with gpfs - # if an upload has more then 1000 files, its pretty likely that - # size patterns repeat ... goood enough - if len(stats) < use_stats_for_filestats_threshold: - try: - filesize = os.path.getsize(filepath) - except Exception: - # if there are individual files that cannot be accessed, we fully ignore them - # they are most likely just broken links - pass - - stats.push(filesize) - else: - filesize = stats.mean() - directory_size += filesize - - yield directory_filepaths, upload_path, directory_size - - @classmethod - @contextmanager - def extracted_archive(cls, archive_path: str) -> Generator[str, None, None]: - """ - Temporarily extracts the given archive and returns directory with the extracted - data. - """ - tmp_directory = os.path.join(config.fs.local_tmp, utils.create_uuid()) - os.mkdir(tmp_directory) - - with tarfile.TarFile.open(archive_path) as tar_file: - tar_file.extractall(tmp_directory) - # try to fix permissions, do not care if command fails - os.system('chmod -Rf 0755 %s/*' % tmp_directory) - - yield tmp_directory - - shutil.rmtree(tmp_directory) - - def delete_files(self, extracted_site: str, uploaded_site: str) -> Tuple[bool, str]: - """ - Deletes the extracted files that this package was created from if - - there is an "uploaded" version - - the package is complete - - it is not a "protected upload" - """ - if self.packages < 1: - return False, 'packaging not completed' - - upload_file = os.path.join(uploaded_site, self.upload_id, 'archive.tar.gz') - if not os.path.exists(upload_file): - return False, 'uploaded archive does not exist' - - extracted_dir = os.path.join(extracted_site, self.upload_id) - if not os.path.isdir(extracted_dir): - return False, 'extracted upload does not exist' - - if any(str(self.upload_id).startswith(upload) for upload in protected_uploads): - return False, 'is a protected upload' - - try: - shutil.rmtree(extracted_dir) - return True, None - except Exception as e: - utils.get_logger( - __name__, package_id=self.package_id, - source_upload_id=self.upload_id, exc_info=e) - return False, 'exception while deleting' - - -class SourceCalc(Document): - """ - Mongo document used as a calculation, upload, and metadata db and index - build from a given source db. Each :class:`SourceCacl` entry relates - a pid, mainfile, upload "id" with each other for a corressponding calculation. - It might alos contain the user metadata. The uploads are "id"ed via the - specific path segment that identifies an upload on the CoE repo FS(s) without - any prefixes (e.g. $EXTRACTED, /data/upload, etc.) - """ - pid = IntField(primary_key=True) - mainfile = StringField() - upload = StringField() - metadata = DictField() - - migration_version = IntField(default=-1) - - extracted_prefix = '$EXTRACTED/' - sites = ['/data/nomad/extracted/', '/nomad/repository/extracted/'] - prefixes = [extracted_prefix] + sites - - meta = dict(indexes=['upload', 'mainfile', 'migration_version']) - - _dataset_cache: dict = {} - - @staticmethod - def index( - source, drop: bool = False, with_metadata: bool = True, per_query: int = 100, - start_pid: int = -1) -> Generator[Tuple['SourceCalc', int], None, None]: - """ - Creates a collection of :class:`SourceCalc` documents that represent source repo - db entries. - - Arguments: - source: The source db sql alchemy session - drop: True to drop and create a new collection, update the existing otherwise, - default is False. - with_metadata: True to also grab all metadata and store it, default is True. - per_query: The implementation tries to grab almost all data with a heavely joined - query on the CoE snoflake/star shaped schema. - The query cannot ask for the whole db at once: choose how many calculations - should be read at a time to optimize for your application. - start_pid: Only index calculations with PID greater equal the given value - - Returns: - yields tuples (:class:`SourceCalc`, #calcs_total[incl. datasets]) - """ - logger = utils.get_logger(__name__) - if drop: - SourceCalc.drop_collection() - - last_source_calc = SourceCalc.objects().order_by('-pid').first() - if start_pid is None or start_pid == -1: - start_pid = last_source_calc.pid if last_source_calc is not None else 0 - source_query = source.query(Calc) - total = source_query.count() - SourceCalc.objects.count() - - do_continue = True - while do_continue: - query_timer = utils.timer(logger, 'query source db') - query_timer.__enter__() # pylint: disable=E1101 - calcs: Iterable[Calc] = source_query \ - .filter(Calc.coe_calc_id > start_pid) \ - .order_by(Calc.coe_calc_id) \ - .limit(per_query) - - source_calcs = [] - do_continue = False - for calc in calcs: - query_timer.__exit__(None, None, None) # pylint: disable=E1101 - try: - filenames = calc.files - if filenames is None or len(filenames) == 0: - continue # dataset case - - filename = filenames[0] - if len(filenames) == 1 and (filename.endswith('.tgz') or filename.endswith('.tar.gz') or filename.endswith('.zip')): - continue # also a dataset, some datasets have a downloadable archive - - for prefix in SourceCalc.prefixes: - filename = filename.replace(prefix, '') - segments = [file.strip('\\') for file in filename.split('/')] - - source_calc = SourceCalc(pid=calc.pid) - source_calc.upload = segments[0] - source_calc.mainfile = os.path.join(*segments[1:]) - - # this is taken from metadata.location and has inconsistent directory prefix, - # but is more accurate than taking the first file as mainfile, which - # also is sometimes not the actual mainfile. - if calc.mainfile is not None: - calc_mainfile = os.path.basename(calc.mainfile) - if calc_mainfile != os.path.basename(source_calc.mainfile): - source_calc.mainfile = os.path.join( - os.path.dirname(source_calc.mainfile), calc_mainfile) - - if with_metadata: - source_calc.metadata = calc.to_calc_with_metadata().__dict__ - source_calcs.append(source_calc) - except Exception as e: - logger.error('could not index', pid=calc.pid, exc_info=e) - start_pid += 1 - else: - start_pid = source_calc.pid - yield source_calc, total - - do_continue = True - - if len(source_calcs) > 0: - with utils.timer(logger, 'write index'): - SourceCalc.objects.insert(source_calcs) - - -NO_PROCESSED_CALCS = 0 -FAILED_PROCESSING = 1 -FAILED_PUBLISH = 2 - - -class NomadCOEMigration: - """ - Drives a migration from the NOMAD coe repository db to nomad@FAIRDI. - - Arguments: - migration_version: The migration version. Only packages/calculations with - no migration version or a lower migration version are migrated. - package_directory: The directory that packages are/get stored in. - compress_packages: True to use compression on package creation. - threads: Number of threads to run migration in parallel. - quiet: Prints stats if not quiet - """ - - default_sites = [ - '/nomad/repository/data/uploads', - '/nomad/repository/data/extracted', - '/data/nomad/uploaded/', - '/data/nomad/extracted/'] - - default_pid_prefix = int(1e7) - - archive_filename = 'archive.tar.gz' - """ The standard name for tarred uploads in the CoE repository. """ - - def __init__( - self, - migration_version: int = 0, - package_directory: str = None, - compress_packages: bool = False, - threads: int = 1, quiet: bool = False) -> None: - self.logger = utils.get_logger(__name__, migration_version=migration_version) - - self.migration_version = migration_version - self.migration_id = utils.create_uuid() - self.package_directory = package_directory if package_directory is not None else config.fs.migration_packages - self.compress_packages = compress_packages - self._client = None - self._threads = threads - self._quiet = quiet - - self.source = infrastructure.repository_db - - @property - def client(self): - if self._client is None: - from nomad.cli.client import create_client - self._client = create_client() - - return self._client - - def report(self): - """ returns an aggregated report over all prior migrated packages """ - return Package.aggregate_reports(migration_version=self.migration_version) - - def copy_users(self): - """ Copy all users. """ - for source_user in self.source.query(User).all(): - if source_user.user_id <= 2: - # skip first two users to keep example users - # they probably are either already the example users, or [root, Evgeny] - continue - - create_user_payload = dict( - user_id=source_user.user_id, - email=source_user.email, - first_name=source_user.first_name, - last_name=source_user.last_name, - password=source_user.password, - created=source_user.created - ) - - try: - create_user_payload.update(token=source_user.token) - except LoginException: - pass - - if source_user.affiliation is not None: - create_user_payload.update(affiliation=dict( - name=source_user.affiliation.name, - address=source_user.affiliation.address)) - - try: - self.client.auth.create_user(payload=create_user_payload).response() - self.logger.info('copied user', user_id=source_user.user_id) - except HTTPBadRequest as e: - self.logger.error('could not create user due to bad data', exc_info=e, user_id=source_user.user_id) - - expected_differences = { - '0d': 'molecule / cluster', - '3d': 'bulk', - '2d': '2d / surface', - '+u': 'gga' - } - - def validate(self, repo_calc: dict, source_calc: CalcWithMetadata, logger) -> bool: - """ - Validates the given processed calculation, assuming that the data in the given - source_calc is correct. - - Returns: - False, if the calculation differs from the source calc. - """ - keys_to_validate = [ - 'atoms', 'basis_set', 'xc_functional', 'system', 'crystal_system', - 'spacegroup', 'code_name', 'code_version'] - - def to_comparable_list(list): - for item in list: - if isinstance(item, dict): - for key in item.keys(): - if key.endswith('id'): - yield item.get(key) - else: - yield item - - is_valid = True - for key, target_value in repo_calc.items(): - if key not in keys_to_validate: - continue - - source_value = getattr(source_calc, key, None) - - def check_mismatch() -> bool: - # some exceptions - if isinstance(source_value, str) and \ - source_value in NomadCOEMigration.expected_differences and \ - target_value == NomadCOEMigration.expected_differences.get(source_value): - return True - - logger.info( - 'source target missmatch', quantity=key, - source_value=source_value, target_value=target_value, - value_diff='%s->%s' % (str(source_value), str(target_value))) - return False - - if source_value is None and target_value is not None: - continue - - if target_value is None and source_value is not None: - is_valid &= check_mismatch() - - if isinstance(target_value, list): - source_list = list(to_comparable_list(source_value)) - target_list = list(to_comparable_list(target_value)) - if len(source_list) != len(target_list): - is_valid &= check_mismatch() - elif any(a != b for a, b in zip(source_list, target_list)): - is_valid &= check_mismatch() - continue - - if isinstance(source_value, str): - source_value = source_value.lower() - target_value = str(target_value).lower() - - if source_value != target_value: - is_valid &= check_mismatch() - - return is_valid - - def surrogate_metadata(self, source: CalcWithMetadata): - """ - Compute metadata from the given metadata that can be used for new calcs of the - same upload. - """ - return CalcWithMetadata( - uploader=source.uploader, - with_embargo=source.with_embargo, - upload_time=source.upload_time, - coauthors=source.coauthors, - shared_with=source.shared_with, - comment=source.comment, - references=source.references, - datasets=source.datasets) - - def set_pid_prefix(self, prefix: int = default_pid_prefix): - """ - Sets the repo db pid counter to the given values. Allows to create new calcs - without interfering with migration calcs with already existing PIDs. - """ - self.logger.info('set pid prefix', pid_prefix=prefix) - self.client.admin.exec_pidprefix_command(payload=dict(prefix=prefix)).response() - - def call_paginated_api(self, *args, **kwargs) -> List[Any]: - """ - Calls nomad via :func:`call_api` multiple times and yields all paginated results. Works - only for endpoints with pagination of course. - """ - all_results: List[Any] = [] - page = 1 - stop = False - kwargs.update(page=page) - while not stop: - response = self.call_api(*args, **kwargs) - for result in response.results: - all_results.append(result) - - pagination = response.pagination - if pagination.total <= pagination.per_page * pagination.page: - stop = True - - return all_results - - def call_api(self, operation: str, *args, **kwargs) -> Any: - """ - Calls nomad via the bravado client. It deals with a very busy nomad and catches, - backsoff, and retries on gateway timouts. It also circumvents bravados/jsonschemas - thread safety issues using a global lock on client usage. - - Arguments: - operation: Comma separated string of api, endpoint, operation, - e.g. 'uploads.get_upload'. - """ - op_path = operation.split('.') - op = self.client - for op_path_segment in op_path: - op = getattr(op, op_path_segment) - - sleep = utils.SleepTimeBackoff() - while True: - try: - NomadCOEMigration._client_lock.acquire(blocking=True) - return op(*args, **kwargs).response().result - except HTTPGatewayTimeout: - sleep() - except Exception as e: - raise e - finally: - NomadCOEMigration._client_lock.release() - - def migrate( - self, *args, delete_failed: str = '', - create_packages: bool = False, only_republish: bool = False, - wait: int = 0, republish: bool = False) -> utils.POPO: - """ - Migrate the given uploads. - - It takes paths to extracted uploads as arguments. - - Requires :class:`Package` instances for the given upload paths. Those will - be created, if they do not already exists. The packages determine the uploads - for the target infrastructure. - - Requires a build :func:`index` to look for existing data in the source db. This - will be used to add user (and other, PID, ...) metadata and validate calculations. - - Uses PIDs of identified old calculations. Will create new PIDs for previously - unknown uploads. See :func:`set_pid_prefix` on how to avoid conflicts. - - Arguments: - upload_path: A filepath to the upload directory. - delete_failed: String from ``N``, ``U``, ``P`` to determine that uploads with - no processed calcs (N), failed upload processing (U), or failed publish - operation (P) should be deleted after the migration attempt. - create_packages: If True, it will attempt to create upload packages if they - do not exists. - only_republish: If the package exists and is published, it will be republished. - Nothing else. Useful to reindex/recreate coe repo, etc. This will not - reapply the metadata (see parameter ``republish``). - republish: Normally packages that are already uploaded and published are not republished. - If true, already published packages are republished. This is different from - ``only_republish``, because the package metadata will be updated, calc diffs - recomputed, etc. - offset: Will add a random sleep before migrating each package between 0 and - ``wait`` seconds. - - Returns: Dictionary with statistics on the migration. - """ - - cv = threading.Condition() - overall_report = Report() - threads = [] - - def print_report(): - if not self._quiet: - print(overall_report) - - def migrate_package(package: Package): - logger = self.logger.bind( - package_id=package.package_id, source_upload_id=package.upload_id) - - if package.skip_migration: - self.logger.info( - 'package is marked to skip migration', - package_id=package.package_id, source_upload_id=package.upload_id) - overall_report.total_packages += 1 - overall_report.skipped_packages += 1 - - package_report = package.report - if package_report is None: - package_report = Report() - - elif package.migration_version is not None and package.migration_version >= self.migration_version: - if only_republish: - self.republish_package(package) - else: - self.logger.info( - 'package already migrated, skip it', - package_id=package.package_id, source_upload_id=package.upload_id) - - package_report = package.report - overall_report.skipped_packages += 1 - - else: - try: - if wait > 0: - self.logger.info('wait for a random amount of time') - time.sleep(random.randint(0, wait)) - - package_report = self.migrate_package(package, delete_failed=delete_failed, republish=republish) - - except Exception as e: - package_report = Report() - package_report.failed_packages = 1 - event = 'unexpected exception while migrating packages' - package.migration_failure = event + ': ' + str(e) - package.migration_failure_type = 'exception' - logger.error(event, exc_info=e) - finally: - package.report = package_report - package.migration_version = self.migration_version - - with cv: - package.migration_id = self.migration_id - package.save() - - try: - overall_report.add(package_report) - - migrated_all_packages = all( - p.migration_id == self.migration_id - for p in Package.objects(upload_id=package.upload_id)) - - if migrated_all_packages: - missing_calcs = SourceCalc.objects( - upload=package.upload_id, migration_version__ne=self.migration_version).count() - total_source_calcs = SourceCalc.objects(upload=package.upload_id).count() - - overall_report.missing_calcs += missing_calcs - overall_report.total_source_calcs += total_source_calcs - - logger.info('migrated upload') - - print_report() - except Exception as e: - logger.error('unexpected exception while migrating packages', exc_info=e) - - self._threads += 1 - cv.notify() - - for arg in args: - packages = Package.get_packages( - arg, self.package_directory, create=create_packages, - compress=self.compress_packages) - - if packages is None: - self.logger.error('there are no packages for upload', upload_source_path=arg) - continue - - for package in packages: - with cv: - cv.wait_for(lambda: self._threads > 0) - self._threads -= 1 - thread = threading.Thread(target=lambda: migrate_package(package)) - threads.append(thread) - thread.start() - - for thread in threads: - thread.join() - - return overall_report - - _client_lock = threading.Lock() - - def republish_package(self, package: Package) -> None: - - source_upload_id = package.upload_id - package_id = package.package_id - - logger = self.logger.bind(package_id=package_id, source_upload_id=source_upload_id) - - uploads = self.call_paginated_api('uploads.get_uploads', state='all', name=package_id) - if len(uploads) > 1: - self.logger.warning('upload name is not unique') - if len(uploads) == 0: - self.logger.info('upload does not exist') - return - - for upload in uploads: - if not upload.published: - self.logger.info('upload is not published, therefore cannot re-publish') - continue - - upload = self.call_api( - 'uploads.exec_upload_operation', upload_id=upload.upload_id, - payload=dict(operation='publish')) - - sleep = utils.SleepTimeBackoff() - while upload.process_running: - upload = self.call_api('uploads.get_upload', upload_id=upload.upload_id) - sleep() - - if upload.tasks_status == FAILURE: - event = 'could not re publish upload' - logger.error(event, process_errors=upload.errors) - else: - logger.info('republished upload') - - def retrive_migrated_calcs(self, upload_id: str): - """ - Yields all nomad search entries for the given ``upload_id``. The given upload - id is a target ``upload_id``, i.e. uuid. The upload name, however, corresponds to the - migration ``package_id``. - """ - - scroll_id = 'first' - while scroll_id is not None: - scroll_args: Dict[str, Any] = dict(scroll=True) - if scroll_id != 'first': - scroll_args['scroll_id'] = scroll_id - - search = self.call_api('repo.search', upload_id=upload_id, owner='admin', **scroll_args) - scroll_id = search.scroll.scroll_id - for calc in search.results: - yield calc - - def migrate_package(self, package: Package, delete_failed: str = '', republish: bool = False) -> 'Report': - """ Migrates the given package. For other params see :func:`migrate`. """ - - source_upload_id = package.upload_id - package_id = package.package_id - - logger = self.logger.bind(package_id=package_id, source_upload_id=source_upload_id) - logger.debug('start to process package') - - report = Report() - report.total_packages = 1 - package.migration_failure = None - package.migration_failure_type = None - - # check if the package is already uploaded - upload = None - try: - uploads = self.call_paginated_api('uploads.get_uploads', state='all', name=package_id) - if len(uploads) > 1: - event = 'duplicate upload name' - package.migration_failure = event - package.migration_failure_type = 'exception' - report.failed_packages += 1 - return report - elif len(uploads) == 1: - upload = uploads[0] - - except Exception as e: - event = 'could not verify if upload already exists' - logger.error(event, exc_info=e) - package.migration_failure = event - package.migration_failure_type = 'exception' - report.failed_packages += 1 - return report - - # upload and process the upload file - if upload is None: - with utils.timer(logger, 'upload completed'): - try: - upload = self.call_api( - 'uploads.upload', name=package_id, local_path=package.package_path) - except Exception as e: - event = 'could not upload package' - logger.error(event, exc_info=e) - package.migration_failure = event + ': ' + str(e) - package.migration_failure_type = 'processing' - report.failed_packages += 1 - return report - else: - self.logger.info('package was already uploaded') - # get more details than the get_uploads call provided - upload = self.call_api('uploads.get_upload', upload_id=upload.upload_id) - - package.target_upload_id = upload.upload_id - - logger = logger.bind( - source_upload_id=source_upload_id, upload_id=upload.upload_id) - - def delete_upload(reason: int): - delete = \ - (reason == NO_PROCESSED_CALCS and 'N' in delete_failed) or \ - (reason == FAILED_PROCESSING and 'U' in delete_failed) or \ - (reason == FAILED_PUBLISH and 'P' in delete_failed) - - upload_to_delete = upload - - if delete: - upload_to_delete = self.call_api( - 'uploads.delete_upload', upload_id=upload_to_delete.upload_id) - - sleep = utils.SleepTimeBackoff() - while upload_to_delete.process_running: - try: - upload_to_delete = self.call_api( - 'uploads.get_upload', upload_id=upload_to_delete.upload_id) - sleep() - except HTTPNotFound: - # the proc upload will be deleted by the delete operation - break - logger.info('deleted upload after migration failure') - else: - logger.warning( - 'will keep upload after migration failure for debugging', - reason=reason, delete_failed=delete_failed) - - # grab source calcs, while waiting for upload - source_calcs = dict() - surrogate_source_calc_with_metadata = None - with utils.timer(logger, 'loaded source metadata'): - with zipfile.ZipFile(package.package_path) as zf: - for filenames_chunk in utils.chunks(zf.namelist(), 1000): - for source_calc in SourceCalc.objects( - upload=source_upload_id, mainfile__in=filenames_chunk): - - source_calc_with_metadata = CalcWithMetadata(**source_calc.metadata) - source_calc_with_metadata.pid = source_calc.pid - source_calc_with_metadata.mainfile = source_calc.mainfile - source_calcs[source_calc.mainfile] = (source_calc, source_calc_with_metadata) - - # establish a surrogate for new calcs - if surrogate_source_calc_with_metadata is None: - surrogate_source_calc_with_metadata = \ - self.surrogate_metadata(source_calc_with_metadata) - - # try to find a surrogate outside the package, if necessary - if surrogate_source_calc_with_metadata is None: - source_calc = SourceCalc.objects(upload=source_upload_id).first() - if source_calc is not None: - source_calc_with_metadata = CalcWithMetadata(**source_calc.metadata) - surrogate_source_calc_with_metadata = \ - self.surrogate_metadata(source_calc_with_metadata) - - # wait for complete upload - with utils.timer(logger, 'upload processing completed'): - sleep = utils.SleepTimeBackoff() - while upload.tasks_running: - upload = self.call_api('uploads.get_upload', upload_id=upload.upload_id) - sleep() - - if upload.tasks_status == FAILURE: - event = 'failed to process upload' - logger.error(event, process_errors=upload.errors) - package.migration_failure = event + ': ' + str(upload.errors) - package.migration_failure_type = 'processing' - report.failed_packages += 1 - delete_upload(FAILED_PROCESSING) - return report - else: - report.total_calcs += upload.calcs.pagination.total - - calc_mainfiles = [] - upload_total_calcs = upload.calcs.pagination.total - - # check for processing errors - with utils.timer(logger, 'checked upload processing'): - per_page = 10000 - for page in range(1, math.ceil(upload_total_calcs / per_page) + 1): - upload = self.call_api( - 'uploads.get_upload', upload_id=upload.upload_id, per_page=per_page, - page=page) - - for calc_proc in upload.calcs.results: - calc_logger = logger.bind( - calc_id=calc_proc.calc_id, - mainfile=calc_proc.mainfile) - - calc_mainfiles.append(calc_proc.mainfile) - - if calc_proc.tasks_status == FAILURE: - report.failed_calcs += 1 - calc_logger.info( - 'could not process a calc', process_errors=calc_proc.errors) - continue - - # verify upload against source - calcs_in_search = 0 - with utils.timer(logger, 'verified upload against source calcs'): - for calc in self.retrive_migrated_calcs(upload.upload_id): - calcs_in_search += 1 - source_calc, source_calc_with_metadata = source_calcs.get( - calc['mainfile'], (None, None)) - - if source_calc is not None: - report.migrated_calcs += 1 - - calc_logger = logger.bind(calc_id=calc['calc_id'], mainfile=calc['mainfile']) - if calc.get('processed', False): - try: - if not self.validate( - calc, source_calc_with_metadata, calc_logger): - report.calcs_with_diffs += 1 - except Exception as e: - calc_logger.warning( - 'unexpected exception during validation', exc_info=e) - report.calcs_with_diffs += 1 - else: - calc_logger.info('processed a calc that has no source') - report.new_calcs += 1 - # guessing the metadata from other calcs in upload/package - if surrogate_source_calc_with_metadata is not None: - new_calc_with_metadata = CalcWithMetadata(**surrogate_source_calc_with_metadata.to_dict()) - new_calc_with_metadata.mainfile = calc['mainfile'] - else: - calc_logger.warning('could not determine any metadata for new calc') - create_time_epoch = os.path.getctime(package.upload_path) - new_calc_with_metadata = CalcWithMetadata( - upload_time=datetime.datetime.fromtimestamp(create_time_epoch), - with_embargo=package.restricted > 0, - comment=default_comment, - uploader=default_uploader, - mainfile=calc['mainfile']) - surrogate_source_calc_with_metadata = new_calc_with_metadata - - source_calcs[calc['mainfile']] = (None, new_calc_with_metadata) - - if len(calc_mainfiles) != calcs_in_search: - logger.error('missmatch between processed calcs and calcs found with search') - - # publish upload - if len(calc_mainfiles) > 0 and (republish or not upload.published): - with utils.timer(logger, 'upload published'): - upload_metadata = dict(with_embargo=(package.restricted > 0)) - upload_metadata['calculations'] = [ - self._to_api_metadata(source_calc_with_metadata) - for _, source_calc_with_metadata in source_calcs.values()] - - upload = self.call_api( - 'uploads.exec_upload_operation', upload_id=upload.upload_id, - payload=dict(operation='publish', metadata=upload_metadata)) - - sleep = utils.SleepTimeBackoff() - while upload.process_running: - upload = self.call_api('uploads.get_upload', upload_id=upload.upload_id) - sleep() - - if upload.tasks_status == FAILURE: - event = 'could not publish upload' - logger.error(event, process_errors=upload.errors) - report.failed_calcs = report.total_calcs - report.migrated_calcs = 0 - report.calcs_with_diffs = 0 - report.new_calcs = 0 - report.failed_packages += 1 - package.migration_failure = event + ': ' + str(upload.errors) - package.migration_failure_type = 'publish' - - if not upload.published: - # only do this if the upload was not publish with prior migration - delete_upload(FAILED_PUBLISH) - SourceCalc.objects(upload=source_upload_id, mainfile__in=calc_mainfiles) \ - .update(migration_version=-1) - else: - SourceCalc.objects(upload=source_upload_id, mainfile__in=calc_mainfiles) \ - .update(migration_version=self.migration_version) - else: - if upload.published: - logger.info('package upload already published, skip publish') - else: - delete_upload(NO_PROCESSED_CALCS) - report.failed_packages += 1 - package.migration_failure = 'no calculcations found' - package.migration_failure_type = 'no_calcs' - logger.info('no successful calcs, skip publish') - - logger.info('migrated package', **report) - return report - - def _to_api_metadata(self, calc_with_metadata: CalcWithMetadata) -> dict: - """ Transforms to a dict that fullfils the API's uploade metadata model. """ - - source_datasets = calc_with_metadata.datasets - datasets = [] - if source_datasets is not None: - for ds in source_datasets: - doi = ds.get('doi', None) - if doi is not None: - doi = doi['value'] - datasets.append(dict(id=ds['id'], _doi=doi, _name=ds.get('name', None))) - - return dict( - _upload_time=calc_with_metadata.upload_time, - _uploader=calc_with_metadata.uploader['id'], - _pid=calc_with_metadata.pid, - references=[ref['value'] for ref in calc_with_metadata.references], - datasets=datasets, - mainfile=calc_with_metadata.mainfile, - with_embargo=calc_with_metadata.with_embargo, - comment=calc_with_metadata.comment, - coauthors=list(int(user['id']) for user in calc_with_metadata.coauthors), - shared_with=list(int(user['id']) for user in calc_with_metadata.shared_with) - ) - - def source_calc_index(self, *args, **kwargs): - """ see :func:`SourceCalc.index` """ - return SourceCalc.index(self.source, *args, **kwargs) - - def package_index(self, upload_path, **kwargs) -> None: - """ - Creates Package objects and respective package zip files for the given uploads. - The given uploads are supposed to be path to the extracted upload directories. - If the upload is already in the index, it is not recreated. - """ - logger = utils.get_logger(__name__) - - try: - for package_entry in Package.get_packages( - upload_path, self.package_directory, create=True, - compress=self.compress_packages, **kwargs): - - logger.info( - 'package in index', - source_upload_path=upload_path, - source_upload_id=package_entry.upload_id, - package_id=package_entry.package_id) - except Exception as e: - logger.error( - 'could not create package from upload', - upload_path=upload_path, exc_info=e) - - -class Report(utils.POPO): - def __init__(self, *args, **kwargs): - self.total_packages = 0 - self.failed_packages = 0 - self.skipped_packages = 0 - self.total_calcs = 0 # the calcs that have been found by the target - self.total_source_calcs = 0 # the calcs in the source index - self.failed_calcs = 0 # calcs that have been migrated with failed processing - self.migrated_calcs = 0 # the calcs from the source, successfully added to the target - self.calcs_with_diffs = 0 # the calcs from the source, successfully added to the target with different metadata - self.new_calcs = 0 # the calcs successfully added to the target that were not found in the source - self.missing_calcs = 0 # the calcs in the source, that could not be added to the target due to failure or not founding the calc - - super().__init__(*args, **kwargs) - - def add(self, other: 'Report') -> None: - for key, value in other.items(): - self[key] = self.get(key, 0) + value - - def __str__(self): - return ( - 'packages: {:,}, skipped: {:,}, source calcs: {:,}, migrated: {:,}, ' - 'failed: {:,}, missing: {:,}, new: {:,}'.format( - self.total_packages, self.skipped_packages, - self.total_source_calcs, self.migrated_calcs, - self.failed_calcs, self.missing_calcs, - self.new_calcs)) diff --git a/nomad/processing/base.py b/nomad/processing/base.py index c474921d15..32c6a4b520 100644 --- a/nomad/processing/base.py +++ b/nomad/processing/base.py @@ -387,10 +387,9 @@ class NomadCeleryRequest(Request): args = self._payload[0] # this might be run in the worker main thread, which does not have a mongo # connection by default - if infrastructure.mongo_client is None: - infrastructure.setup_mongo() - if infrastructure.repository_db is None: - infrastructure.setup_repository_db() + infrastructure.setup_mongo() + infrastructure.setup_keycloak() + proc = unwarp_task(self.task, *args) proc.fail(event, **kwargs) proc.process_status = PROCESS_COMPLETED diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 207bfaa65f..38484aa9cb 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -34,7 +34,7 @@ from datetime import datetime from pymongo import UpdateOne import hashlib -from nomad import utils, coe_repo, config, infrastructure, search, datamodel +from nomad import utils, config, infrastructure, search, datamodel from nomad.files import PathObject, UploadFiles, ExtractError, ArchiveBasedStagingUploadFiles, PublicUploadFiles, StagingUploadFiles from nomad.processing.base import Proc, process, task, PENDING, SUCCESS, FAILURE from nomad.parsing import parser_dict, match_parser, LocalBackend @@ -49,8 +49,8 @@ class Calc(Proc): It also contains the calculations processing and its state. - The attribute list, does not include the various repository properties generated - while parsing, including ``program_name``, ``program_version``, etc. + The attribute list, does not include the various metadata properties generated + while parsing, including ``code_name``, ``code_version``, etc. Attributes: calc_id: the calc_id of this calc @@ -197,7 +197,7 @@ class Calc(Proc): calc_hash=self.upload_files.calc_hash(self.mainfile), mainfile=self.mainfile) calc_with_metadata.published = False - calc_with_metadata.uploader = self.upload.uploader.to_popo() + calc_with_metadata.uploader = self.upload.user_id calc_with_metadata.upload_time = self.upload.upload_time calc_with_metadata.nomad_version = config.version calc_with_metadata.nomad_commit = config.commit @@ -419,7 +419,7 @@ class Upload(Proc): user_id: the id of the user that created this upload published: Boolean that indicates the publish status publish_time: Date when the upload was initially published - last_update: Date of the last (re-)publishing + last_update: Date of the last publishing/re-processing joined: Boolean indicates if the running processing has joined (:func:`check_join`) """ id_field = 'upload_id' @@ -476,13 +476,13 @@ class Upload(Proc): raise KeyError() @classmethod - def user_uploads(cls, user: coe_repo.User, **kwargs) -> List['Upload']: + def user_uploads(cls, user: datamodel.User, **kwargs) -> List['Upload']: """ Returns all uploads for the given user. Kwargs are passed to mongo query. """ return cls.objects(user_id=str(user.user_id), **kwargs) @property def uploader(self): - return coe_repo.User.from_user_id(self.user_id) + return datamodel.User.get(self.user_id) def get_logger(self, **kwargs): logger = super().get_logger() @@ -501,13 +501,15 @@ class Upload(Proc): The upload will be already saved to the database. Arguments: - user (coe_repo.User): The user that created the upload. + user: The user that created the upload. """ - user: coe_repo.User = kwargs['user'] + # use kwargs to keep compatibility with super method + user: datamodel.User = kwargs['user'] del(kwargs['user']) + if 'upload_id' not in kwargs: kwargs.update(upload_id=utils.create_uuid()) - kwargs.update(user_id=str(user.user_id)) + kwargs.update(user_id=user.user_id) self = super().create(**kwargs) self._continue_with('uploading') @@ -519,21 +521,14 @@ class Upload(Proc): Calc.objects(upload_id=self.upload_id).delete() super().delete() - def delete_upload_local(self, with_coe_repo: bool = False): + def delete_upload_local(self): """ - Deletes of the upload, including its processing state and + Deletes the upload, including its processing state and staging files. Local version without celery processing. """ logger = self.get_logger() with utils.lnr(logger, 'staged upload delete failed'): - - if with_coe_repo and self.published: - with utils.timer( - logger, 'upload deleted from repo db', step='repo', - upload_size=self.upload_files.size): - coe_repo.Upload.delete(self.upload_id) - with utils.timer( logger, 'upload deleted from index', step='index', upload_size=self.upload_files.size): @@ -543,59 +538,34 @@ class Upload(Proc): logger, 'staged upload deleted', step='files', upload_size=self.upload_files.size): self.upload_files.delete() - self.delete() + + self.delete() @process - def delete_upload(self, with_coe_repo: bool = False): + def delete_upload(self): """ Deletes of the upload, including its processing state and staging files. This starts the celery process of deleting the upload. """ - self.delete_upload_local(with_coe_repo=with_coe_repo) + self.delete_upload_local() return True # do not save the process status on the delete upload @process def publish_upload(self): """ - Moves the upload out of staging to add it to the coe repository. It will - pack the staging upload files in to public upload files, add entries to the - coe repository db and remove this instance and its calculation from the - processing state db. - - If the upload is already published (i.e. re-publish), it will update user metadata from - repository db, publish to repository db if not exists, update the search index. + Moves the upload out of staging to the public area. It will + pack the staging upload files in to public upload files. """ assert self.processed_calcs > 0 logger = self.get_logger() logger.info('started to publish') - with utils.lnr(logger, '(re-)publish failed'): + with utils.lnr(logger, 'publish failed'): upload_with_metadata = self.to_upload_with_metadata(self.metadata) calcs = upload_with_metadata.calcs - if config.repository_db.publish_enabled: - if config.repository_db.mode == 'coe' and isinstance(self.upload_files, StagingUploadFiles): - with utils.timer( - logger, 'coe extracted raw-file copy created', step='repo', - upload_size=self.upload_files.size): - - self.upload_files.create_extracted_copy() - - coe_upload = coe_repo.Upload.from_upload_id(upload_with_metadata.upload_id) - if coe_upload is None: - with utils.timer( - logger, 'upload added to repository', step='repo', - upload_size=self.upload_files.size): - coe_upload = coe_repo.Upload.publish(upload_with_metadata) - - with utils.timer( - logger, 'upload PIDs read from repository', step='repo', - upload_size=self.upload_files.size): - for calc, coe_calc in zip(calcs, coe_upload.calcs): - calc.pid = coe_calc.coe_calc_id - with utils.timer( logger, 'upload metadata updated', step='metadata', upload_size=self.upload_files.size): @@ -725,9 +695,8 @@ class Upload(Proc): @task def extracting(self): """ - The *task* performed before the actual parsing/normalizing. Extracting and bagging - the uploaded files, computing all keys, create an *upload* entry in the NOMAD-coe - repository db, etc. + The *task* performed before the actual parsing/normalizing: extracting + the uploaded files. """ # extract the uploaded file self._upload_files = ArchiveBasedStagingUploadFiles( @@ -1028,7 +997,7 @@ class Upload(Proc): result = UploadWithMetadata( upload_id=self.upload_id, - uploader=utils.POPO(id=int(self.user_id)), + uploader=self.user_id, upload_time=self.upload_time if user_upload_time is None else user_upload_time) result.calcs = [get_metadata(calc) for calc in Calc.objects(upload_id=self.upload_id)] diff --git a/nomad/search.py b/nomad/search.py index 5adc19d65e..e9b3c2e34c 100644 --- a/nomad/search.py +++ b/nomad/search.py @@ -24,7 +24,7 @@ import elasticsearch.helpers from elasticsearch.exceptions import NotFoundError from datetime import datetime -from nomad import config, datamodel, infrastructure, datamodel, coe_repo, utils +from nomad import config, datamodel, infrastructure, datamodel, utils path_analyzer = analyzer( @@ -32,13 +32,6 @@ path_analyzer = analyzer( tokenizer=tokenizer('path_tokenizer', 'pattern', pattern='/')) -user_cache: Dict[str, Any] = dict() -""" -A cache for user popos used in the index. We will not retrieve names all the time. -This cache should be cleared, before larger re-index operations. -""" - - class AlreadyExists(Exception): pass @@ -51,28 +44,9 @@ class ScrollIdNotFound(Exception): pass class User(InnerDoc): @classmethod - def from_user_popo(cls, user): - self = user_cache.get(user.id, None) - if self is None: - self = cls(user_id=user.id) - - if 'first_name' not in user: - user = coe_repo.User.from_user_id(user.id).to_popo() - - last_name = user['last_name'].strip() - first_name = user['first_name'].strip() - - if len(last_name) > 0 and len(first_name) > 0: - name = '%s, %s' % (user['last_name'], user['first_name']) - elif len(last_name) != 0: - name = last_name - elif len(first_name) != 0: - name = first_name - else: - name = 'unnamed user with id %d' % user.id - - self.name = name - user_cache[user.id] = self + def from_user_id(cls, user_id): + self = cls(user_id=user_id) + self.name = datamodel.User.get(user_id=user_id).name return self @@ -156,12 +130,12 @@ class Entry(Document, metaclass=WithDomain): else: self.files = source.files - self.uploader = User.from_user_popo(source.uploader) if source.uploader is not None else None + self.uploader = User.from_user_id(source.uploader) if source.uploader is not None else None self.with_embargo = source.with_embargo self.published = source.published - self.authors = [User.from_user_popo(user) for user in source.coauthors] - self.owners = [User.from_user_popo(user) for user in source.shared_with] + self.authors = [User.from_user_id(user_id) for user_id in source.coauthors] + self.owners = [User.from_user_id(user_id) for user_id in source.shared_with] if self.uploader is not None: if self.uploader not in self.authors: self.authors.append(self.uploader) diff --git a/ops/deployments/nomad.experiments.values.yaml b/ops/deployments/nomad.experiments.values.yaml index 5746dffcad..a9380ac3c7 100644 --- a/ops/deployments/nomad.experiments.values.yaml +++ b/ops/deployments/nomad.experiments.values.yaml @@ -12,9 +12,6 @@ worker: dbname: fairdi_nomad_experiments -postgres: - publish_enabled: false - uploadurl: 'https://labdev-nomad.rzg.mpg.de/fairdi/nomad/experiments/upload' volumes: diff --git a/ops/deployments/nomad.migration.values.yaml b/ops/deployments/nomad.migration.values.yaml index 6127d38a4a..50a6f4b2cf 100644 --- a/ops/deployments/nomad.migration.values.yaml +++ b/ops/deployments/nomad.migration.values.yaml @@ -17,9 +17,6 @@ worker: dbname: fairdi_nomad_migration -postgres: - publish_enabled: false - uploadurl: 'https://labdev-nomad.rzg.mpg.de/fairdi/nomad/migration/upload' volumes: diff --git a/ops/deployments/nomad.prod-1.values.yaml b/ops/deployments/nomad.prod-1.values.yaml index b90493fb95..84768fdf40 100644 --- a/ops/deployments/nomad.prod-1.values.yaml +++ b/ops/deployments/nomad.prod-1.values.yaml @@ -18,14 +18,6 @@ worker: dbname: fairdi_nomad_prod -postgres: - publish_enabled: true - mode: 'coe' - dbname: 'nomad_prod' - host: 'db-repository-nomad.esc' - user: 'nomadrep' - password_secret: 'nomad-production-repository-password' - uploadurl: 'https://repository.nomad-coe.eu/uploads/gui/upload' volumes: @@ -33,7 +25,6 @@ volumes: public: /nomad/fairdi/prod/fs/public staging: /nomad/fairdi/prod/fs/staging tmp: /nomad/fairdi/prod/fs/tmp - coe_extracted: /nomad/fairdi/prod/fs/extracted nomad: /nomad mail: diff --git a/ops/deployments/nomad.prod-2.values.yaml b/ops/deployments/nomad.prod-2.values.yaml index 73ac1f247e..a65b06ad17 100644 --- a/ops/deployments/nomad.prod-2.values.yaml +++ b/ops/deployments/nomad.prod-2.values.yaml @@ -18,14 +18,6 @@ worker: dbname: fairdi_nomad_prod -postgres: - publish_enabled: true - mode: 'coe' - dbname: 'nomad_prod' - host: 'db-repository-nomad.esc' - user: 'nomadrep' - password_secret: 'nomad-production-repository-password' - uploadurl: 'https://repository.nomad-coe.eu/uploads/gui/upload' volumes: @@ -33,7 +25,6 @@ volumes: public: /nomad/fairdi/prod/fs/public staging: /nomad/fairdi/prod/fs/staging tmp: /nomad/fairdi/prod/fs/tmp - coe_extracted: /nomad/fairdi/prod/fs/extracted nomad: /nomad mail: diff --git a/ops/deployments/nomad.prod-test.values.yaml b/ops/deployments/nomad.prod-test.values.yaml index 3951c87f72..b092573aac 100644 --- a/ops/deployments/nomad.prod-test.values.yaml +++ b/ops/deployments/nomad.prod-test.values.yaml @@ -17,10 +17,6 @@ worker: dbname: fairdi_nomad_prod -postgres: - publish_enabled: false - dbname: 'fairdi_nomad_migration' - uploadurl: 'https://labdev-nomad.rzg.mpg.de/fairdi/nomad/prod-test/upload' volumes: @@ -28,5 +24,4 @@ volumes: public: /nomad/fairdi/prod/fs/public staging: /nomad/fairdi/prod/fs/staging tmp: /nomad/fairdi/prod/fs/tmp - coe_extracted: /nomad/fairdi/prod/fs/extracted nomad: /nomad diff --git a/ops/deployments/nomad.reprocess.values.yaml b/ops/deployments/nomad.reprocess.values.yaml index 9bf9b52000..4e011c3835 100644 --- a/ops/deployments/nomad.reprocess.values.yaml +++ b/ops/deployments/nomad.reprocess.values.yaml @@ -16,10 +16,6 @@ worker: dbname: fairdi_nomad_prod -postgres: - publish_enabled: false - dbname: 'fairdi_nomad_migration' - uploadurl: 'https://labdev-nomad.rzg.mpg.de/fairdi/nomad/reprocess/upload' volumes: @@ -27,5 +23,4 @@ volumes: public: /nomad/fairdi/prod/fs/public staging: /scratch/fairdi/migration/fs/staging tmp: /nomad/fairdi/prod/fs/tmp - coe_extracted: /nomad/fairdi/prod/fs/extracted nomad: /nomad diff --git a/ops/deployments/nomad.staging.values.yaml b/ops/deployments/nomad.staging.values.yaml index a06ffffda9..b9ed04afb1 100644 --- a/ops/deployments/nomad.staging.values.yaml +++ b/ops/deployments/nomad.staging.values.yaml @@ -20,14 +20,6 @@ worker: dbname: fairdi_nomad_staging -postgres: - publish_enabled: true - mode: 'coe' - dbname: 'nomad_staging' - host: 'staging-nomad.esc' - user: 'nomad_fairdi' - password_secret: 'nomad-staging-repository-password' - uploadurl: 'https://staging-nomad.rzg.mpg.de/uploads/gui/upload' volumes: @@ -35,7 +27,6 @@ volumes: public: /nomad/fairdi/staging/fs/public staging: /nomad/fairdi/staging/fs/staging tmp: /nomad/fairdi/staging/fs/tmp - coe_extracted: /nomad/fairdi/staging/fs/extracted nomad: /nomad mail: diff --git a/ops/deployments/nomad.testing.values.yaml b/ops/deployments/nomad.testing.values.yaml index b891a71bd5..6fc231939c 100644 --- a/ops/deployments/nomad.testing.values.yaml +++ b/ops/deployments/nomad.testing.values.yaml @@ -16,9 +16,6 @@ worker: dbname: fairdi_nomad_testing -postgres: - publish_enabled: false - uploadurl: 'https://labdev-nomad.rzg.mpg.de/fairdi/nomad/testing/upload' volumes: diff --git a/ops/docker-compose/nomad/docker-compose.override.yml b/ops/docker-compose/nomad/docker-compose.override.yml index c0e4c804b8..c02e86d1d2 100644 --- a/ops/docker-compose/nomad/docker-compose.override.yml +++ b/ops/docker-compose/nomad/docker-compose.override.yml @@ -15,11 +15,6 @@ version: '3.4' services: - # postgres for NOMAD-coe repository API and GUI - postgres: - ports: - - 5432:5432 - # broker for celery rabbitmq: ports: diff --git a/ops/docker-compose/nomad/docker-compose.prod.yml b/ops/docker-compose/nomad/docker-compose.prod.yml index d317087e25..9a5581585c 100644 --- a/ops/docker-compose/nomad/docker-compose.prod.yml +++ b/ops/docker-compose/nomad/docker-compose.prod.yml @@ -20,11 +20,6 @@ services: volumes: - /nomad/fairdi/db/keycloak:/opt/jboss/keycloak/standalone - postgres: - ports: - - 5432:5432 - volumes: - - /nomad/fairdi/db/postgres:/var/lib/postgresql/data # the search engine elastic: ports: diff --git a/ops/docker-compose/nomad/docker-compose.yml b/ops/docker-compose/nomad/docker-compose.yml index 4c26aeb691..a1bf3be832 100644 --- a/ops/docker-compose/nomad/docker-compose.yml +++ b/ops/docker-compose/nomad/docker-compose.yml @@ -22,17 +22,6 @@ x-common-variables: &nomad_backend_env NOMAD_KEYCLOAK_HOST: keycloak services: - # postgres for NOMAD-coe repository API and GUI - postgres: - restart: always - image: postgres:9.4 - container_name: nomad_postgres - environment: - POSTGRES_PASSWORD: 'nomad' - POSTGRES_USER: 'postgres' - volumes: - - nomad_postgres:/var/lib/postgresql/data - # keycload for user management keycloak: restart: always @@ -90,7 +79,6 @@ services: NOMAD_SERVICE: nomad_worker links: - keycloak - - postgres - rabbitmq - elastic - mongo @@ -112,7 +100,6 @@ services: NOMAD_SERVICE: nomad_api links: - keycloak - - postgres - rabbitmq - elastic - mongo @@ -143,7 +130,6 @@ services: volumes: nomad_keycloak: - nomad_postgres: nomad_mongo: nomad_elastic: nomad_rabbitmq: diff --git a/ops/helm/nomad/templates/api-deployment.yaml b/ops/helm/nomad/templates/api-deployment.yaml index 62b6e34cd9..36fe3e6ee2 100644 --- a/ops/helm/nomad/templates/api-deployment.yaml +++ b/ops/helm/nomad/templates/api-deployment.yaml @@ -112,13 +112,6 @@ spec: value: "{{ .Values.api.console_loglevel }}" - name: NOMAD_LOGSTASH_LEVEL value: "{{ .Values.api.logstash_loglevel }}" - {{ if .Values.postgres.password_secret }} - - name: NOMAD_REPOSITORY_DB_PASSWORD - valueFrom: - secretKeyRef: - name: {{ .Values.postgres.password_secret }} - key: password - {{ end }} {{ if .Values.api.adminPasswordSecret }} - name: NOMAD_SERVICES_ADMIN_PASSWORD valueFrom diff --git a/ops/helm/nomad/templates/nomad-configmap.yml b/ops/helm/nomad/templates/nomad-configmap.yml index e5f86ae321..fdf37c5ecb 100644 --- a/ops/helm/nomad/templates/nomad-configmap.yml +++ b/ops/helm/nomad/templates/nomad-configmap.yml @@ -38,19 +38,6 @@ data: host: "{{ .Values.mongo.host }}" port: {{ .Values.mongo.port }} db_name: "{{ .Values.dbname }}" - repository_db: - host: "{{ .Values.postgres.host }}" - port: {{ .Values.postgres.port }} - dbname: {{ if .Values.postgres.dbname }} "{{ .Values.postgres.dbname }}" {{ else }} "{{ .Values.dbname }}" {{ end }} - sequential_publish: {{ .Values.postgres.sequential_publish }} - publish_enabled: {{ .Values.postgres.publish_enabled }} - mode: "{{ .Values.postgres.mode }}" - {{ if .Values.postgres.user }} - user: "{{ .Values.postgres.user }}" - {{ end }} - {{ if .Values.postgres.password }} - password: "{{ .Values.postgres.password }}" - {{ end }} mail: enabled: {{ .Values.mail.enabled }} host: "{{ .Values.mail.host }}" diff --git a/ops/helm/nomad/templates/worker-deployment.yaml b/ops/helm/nomad/templates/worker-deployment.yaml index e7c9d1c825..29126cf37f 100644 --- a/ops/helm/nomad/templates/worker-deployment.yaml +++ b/ops/helm/nomad/templates/worker-deployment.yaml @@ -43,10 +43,6 @@ spec: name: public-volume - mountPath: /app/.volumes/fs/staging name: staging-volume - {{ if .Values.volumes.coe_extracted }} - - mountPath: /app/.volumes/fs/extracted - name: extracted-volume - {{ end }} - mountPath: /nomad name: nomad-volume env: @@ -60,13 +56,6 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - {{ if .Values.postgres.password_secret }} - - name: NOMAD_REPOSITORY_DB_PASSWORD - valueFrom: - secretKeyRef: - name: {{ .Values.postgres.password_secret }} - key: password - {{ end }} {{ if .Values.api.adminPasswordSecret }} - name: NOMAD_SERVICES_ADMIN_PASSWORD valueFrom @@ -104,12 +93,6 @@ spec: hostPath: path: {{ .Values.volumes.public }} type: Directory - {{ if .Values.volumes.coe_extracted }} - - name: extracted-volume - hostPath: - path: {{ .Values.volumes.coe_extracted }} - type: Directory - {{ end }} - name: staging-volume {{ if (eq .Values.worker.storage "memory") }} emptyDir: diff --git a/ops/helm/nomad/values.yaml b/ops/helm/nomad/values.yaml index 80853aeb66..7f0a24025b 100644 --- a/ops/helm/nomad/values.yaml +++ b/ops/helm/nomad/values.yaml @@ -114,15 +114,6 @@ elastic: host: nomad-flink-01.esc port: 9200 -postgres: - sequential_publish: false - publish_enabled: true - host: nomad-flink-01.esc - port: 5432 - ## CoE repository mode, values are fairdi, coe. Fairdi stores raw-file path for - # new raw-file API; coe stores raw-file paths for extracted raw-file copy - mode: fairdi - logstash: port: 5000 host: nomad-flink-01.esc diff --git a/requirements.txt b/requirements.txt index dedfa36ac4..08f8631864 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,8 +34,6 @@ requests click zipstream bagit -psycopg2-binary -sqlalchemy bcrypt filelock ujson diff --git a/tests/conftest.py b/tests/conftest.py index 4bd59dfa58..fa90e0545e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,22 +15,19 @@ from typing import Tuple, List import pytest import logging -from sqlalchemy.orm import Session -from contextlib import contextmanager from collections import namedtuple from smtpd import SMTPServer from threading import Lock, Thread import asyncore import time -import pytest import shutil import os.path import datetime -import base64 from bravado.client import SwaggerClient -import basicauth +from flask import request, g -from nomad import config, infrastructure, parsing, processing, coe_repo, api +from nomad import config, infrastructure, parsing, processing, api, datamodel +from nomad.datamodel import User from tests import test_parsing, test_normalizing from tests.processing import test_data as test_processing @@ -62,14 +59,13 @@ def raw_files_infra(): config.fs.staging = '.volumes/test_fs/staging' config.fs.public = '.volumes/test_fs/public' config.fs.migration_packages = '.volumes/test_fs/migration_packages' - config.fs.coe_extracted = '.volumes/test_fs/extracted' config.fs.prefix_size = 2 @pytest.fixture(scope='function') def raw_files(raw_files_infra): """ Provides cleaned out files directory structure per function. Clears files after test. """ - directories = [config.fs.staging, config.fs.public, config.fs.migration_packages, config.fs.tmp, config.fs.coe_extracted] + directories = [config.fs.staging, config.fs.public, config.fs.migration_packages, config.fs.tmp] for directory in directories: if not os.path.exists(directory): os.makedirs(directory) @@ -119,22 +115,6 @@ def celery_inspect(purged_app): yield purged_app.control.inspect() -# The follwing workarround seems unnecessary. I'll leave it here for an incubation period -# @pytest.fixture() -# def patched_celery(monkeypatch): -# # There is a bug in celery, which prevents to use the celery_worker for multiple -# # tests: https://github.com/celery/celery/issues/4088 -# # The bug has a fix from Aug 2018, but it is not yet released (TODO). -# # We monkeypatch a similar solution here. -# def add_reader(self, fds, callback, *args): -# from kombu.utils.eventio import ERR, READ, poll -# if self.poller is None: -# self.poller = poll() -# return self.add(fds, callback, READ | ERR, args) -# monkeypatch.setattr('kombu.asynchronous.hub.Hub.add_reader', add_reader) -# yield - - # It might be necessary to make this a function scoped fixture, if old tasks keep # 'bleeding' into successive tests. @pytest.fixture(scope='function') @@ -171,14 +151,14 @@ def mongo(mongo_infra): @pytest.fixture(scope='session') def elastic_infra(): """ Provides elastic infrastructure to the session """ - config.elastic.index_name = 'test_nomad_fairdi_calcs' + config.elastic.index_name = 'test_nomad_fairdi_0_6' try: return infrastructure.setup_elastic() except Exception: # try to delete index, error might be caused by changed mapping from elasticsearch_dsl import connections connections.create_connection(hosts=['%s:%d' % (config.elastic.host, config.elastic.port)]) \ - .indices.delete(index='test_nomad_fairdi_calcs') + .indices.delete(index='test_nomad_fairdi_0_6') return infrastructure.setup_elastic() @@ -186,7 +166,7 @@ def clear_elastic(elastic): while True: try: elastic.delete_by_query( - index='test_nomad_fairdi_calcs', body=dict(query=dict(match_all={})), + index='test_nomad_fairdi_0_6', body=dict(query=dict(match_all={})), wait_for_completion=True, refresh=True) break except Exception: @@ -202,140 +182,82 @@ def elastic(elastic_infra): return elastic_infra -@pytest.fixture(scope='session') -def keycloak(): - infrastructure.setup_keycloak() - - return infrastructure.keycloak_oidc_client - - -@contextmanager -def create_postgres_infra(patch=None, **kwargs): - """ - A generator that sets up and tears down a test db and monkeypatches it to the - respective global infrastructure variables. - """ - db_args = dict(dbname='test_nomad_fairdi_repo_db') - db_args.update(**kwargs) - - old_config = config.repository_db - new_config = config.NomadConfig(**config.repository_db) - new_config.update(**db_args) - - if patch is not None: - patch.setattr('nomad.config.repository_db', new_config) - - connection, _ = infrastructure.sqlalchemy_repository_db(**db_args) - assert connection is not None - - # we use a transaction around the session to rollback anything that happens within - # test execution - trans = connection.begin() - db = Session(bind=connection, autocommit=True) - - old_connection, old_db = None, None - if patch is not None: - from nomad.infrastructure import repository_db_conn, repository_db - old_connection, old_db = repository_db_conn, repository_db - patch.setattr('nomad.infrastructure.repository_db_conn', connection) - patch.setattr('nomad.infrastructure.repository_db', db) +test_users = { + '0': dict(email='admin'), + '1': dict(email='sheldon.cooper@nomad-coe.eu', first_name='Sheldon', last_name='Cooper'), + '2': dict(email='leonard.hofstadter@nomad-coe.eu', first_name='Leonard', last_name='Hofstadter') +} - yield db - if patch is not None: - patch.setattr('nomad.infrastructure.repository_db_conn', old_connection) - patch.setattr('nomad.infrastructure.repository_db', old_db) - patch.setattr('nomad.config.repository_db', old_config) - - trans.rollback() - db.expunge_all() - db.invalidate() - db.close_all() - - connection.close() - connection.engine.dispose() - - -@pytest.fixture(scope='module') -def postgres_infra(monkeysession): - """ Provides a clean coe repository db per module """ - with create_postgres_infra(monkeysession, exists=False) as db: - yield db +class KeycloakMock: + def configure_flask(self, *args, **kwargs): + pass + def authorize_flask(self, *args, **kwargs): + if 'Authorization' in request.headers and request.headers['Authorization'].startswith('Mocked '): + user_id = request.headers['Authorization'].split(None, 1)[1].strip() + g.user = User(user_id=user_id, **test_users[user_id]) -@pytest.fixture(scope='function') -def proc_infra(worker, postgres, elastic, mongo, raw_files): - """ Combines all fixtures necessary for processing (postgres, elastic, worker, files, mongo) """ - return dict( - postgres=postgres, - elastic=elastic) + def get_user(self, user_id=None, email=None): + if user_id is not None: + return User(user_id=user_id, **test_users[user_id]) + elif email is not None: + for user_id, user_values in test_users.items(): + if user_values['email'] == email: + return User(user_id=user_id, **user_values) + assert False, 'only test user emails are recognized' + else: + assert False, 'no token based get_user during tests' -@pytest.fixture(scope='function') -def expandable_postgres(monkeysession, postgres_infra): - """ Provides a coe repository db that can be deleted during test """ - with create_postgres_infra(monkeysession, dbname='test_nomad_fairdi_expandable_repo_db', exists=False) as db: - yield db +@pytest.fixture(scope='session', autouse=True) +def keycloak(monkeysession): + monkeysession.setattr('nomad.infrastructure.keycloak', KeycloakMock()) @pytest.fixture(scope='function') -def postgres(postgres_infra): - """ Provides a clean coe repository db per function. Clears db before test. """ - # do not wonder, this will not setback the id counters - postgres_infra.execute('TRUNCATE uploads CASCADE;') - postgres_infra.execute('DELETE FROM sessions WHERE user_id >= 3;') - postgres_infra.execute('DELETE FROM users WHERE user_id >= 3;') - yield postgres_infra +def proc_infra(worker, elastic, mongo, raw_files): + """ Combines all fixtures necessary for processing (elastic, worker, files, mongo) """ + return dict(elastic=elastic) @pytest.fixture(scope='module') -def test_user(postgres_infra): - from nomad import coe_repo - return coe_repo.ensure_test_user(email='sheldon.cooper@nomad-fairdi.tests.de') +def test_user(keycloak): + return User(id='1', **test_users['1']) @pytest.fixture(scope='module') -def other_test_user(postgres_infra): - from nomad import coe_repo - return coe_repo.ensure_test_user(email='leonard.hofstadter@nomad-fairdi.tests.de') +def other_test_user(keycloak): + return User(id='2', **test_users['2']) @pytest.fixture(scope='module') -def admin_user(postgres_infra): - from nomad import coe_repo - return coe_repo.admin_user() +def admin_user(keycloak): + return User(id='0', **test_users['0']) -def create_auth_headers(user): - basic_auth_str = '%s:password' % user.email - basic_auth_bytes = basic_auth_str.encode('utf-8') - basic_auth_base64 = base64.b64encode(basic_auth_bytes).decode('utf-8') +def create_auth_headers(user: User): return { - 'Authorization': 'Basic %s' % basic_auth_base64 + 'Authorization': 'Mocked %s' % user.user_id } @pytest.fixture(scope='module') -def test_user_auth(test_user: coe_repo.User): - return dict(Authorization=basicauth.encode('sheldon.cooper@nomad-coe.eu', 'password')) - - -# @pytest.fixture(scope='module') -# def test_user_auth(test_user: coe_repo.User): -# return create_auth_headers(test_user) +def test_user_auth(test_user: User): + return create_auth_headers(test_user) @pytest.fixture(scope='module') -def other_test_user_auth(other_test_user: coe_repo.User): +def other_test_user_auth(other_test_user: User): return create_auth_headers(other_test_user) @pytest.fixture(scope='module') -def admin_user_auth(admin_user: coe_repo.User): +def admin_user_auth(admin_user: User): return create_auth_headers(admin_user) @pytest.fixture(scope='function') -def bravado(client, postgres, test_user_auth): +def bravado(client, test_user_auth): http_client = FlaskTestHttpClient(client, headers=test_user_auth) return SwaggerClient.from_url('/swagger.json', http_client=http_client) @@ -560,7 +482,7 @@ def non_empty_uploaded(non_empty_example_upload: str, raw_files) -> Tuple[str, s @pytest.mark.timeout(config.tests.default_timeout) @pytest.fixture(scope='function') -def processed(uploaded: Tuple[str, str], test_user: coe_repo.User, proc_infra) -> processing.Upload: +def processed(uploaded: Tuple[str, str], test_user: User, proc_infra) -> processing.Upload: """ Provides a processed upload. Upload was uploaded with test_user. """ @@ -569,7 +491,7 @@ def processed(uploaded: Tuple[str, str], test_user: coe_repo.User, proc_infra) - @pytest.mark.timeout(config.tests.default_timeout) @pytest.fixture(scope='function') -def processeds(non_empty_example_upload: str, test_user: coe_repo.User, proc_infra) -> List[processing.Upload]: +def processeds(non_empty_example_upload: str, test_user: User, proc_infra) -> List[processing.Upload]: result: List[processing.Upload] = [] for i in range(2): upload_id = '%s_%d' % (os.path.basename(non_empty_example_upload).replace('.zip', ''), i) @@ -581,7 +503,7 @@ def processeds(non_empty_example_upload: str, test_user: coe_repo.User, proc_inf @pytest.mark.timeout(config.tests.default_timeout) @pytest.fixture(scope='function') -def non_empty_processed(non_empty_uploaded: Tuple[str, str], test_user: coe_repo.User, proc_infra) -> processing.Upload: +def non_empty_processed(non_empty_uploaded: Tuple[str, str], test_user: User, proc_infra) -> processing.Upload: """ Provides a processed upload. Upload was uploaded with test_user. """ @@ -604,15 +526,6 @@ def published(non_empty_processed: processing.Upload, example_user_metadata) -> return non_empty_processed -@pytest.fixture(scope='function', params=[None, 'fairdi', 'coe']) -def with_publish_to_coe_repo(monkeypatch, request): - mode = request.param - if mode is not None: - monkeypatch.setattr('nomad.config.repository_db.publish_enabled', True) - monkeypatch.setattr('nomad.config.repository_db.mode', mode) - return request.param is not None - - @pytest.fixture def reset_config(): """ Fixture that resets the log-level after test. """ diff --git a/tests/data/migration/archive/upload/archive.tar.gz b/tests/data/migration/archive/upload/archive.tar.gz deleted file mode 100644 index a665e418544655927532e20d5c5b3d3c2250e573..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 825 zcmb2|=3tOo;v2)j{5Cw=|F(h1JLCO_P8qk{@SKy9e}K*DHuI$mEhY<3EXkX$+i7f_ zSkm#|ZaHu4qh01Mk{SQv?%eoy?&HoMSJ>tF%J_~KGTmptEO_3Hy>jzL)`oTMG2#aq zb{}XIc$~oS=dOIN!3tN`9X4KmKl-C|?yGO_b<=*brR>acbEb-ji+j#^xW0YgBx-$8 z@o%GTe&(Or^Ixa_sGj$jw_)?-%Njq<Kd#@;uA<{OSu54`%<Dr{?EC!G8F%En?^M+D z@Ov7R|GX}}wkD3}<=i_DOVYg09h>u{_|x>o=f$U`{hd~%*lHGSBYoRQzM(p#NS%dq z@=ee83>uv3k#5ExG<9F?{(M<@?i^{QAVKb^qgR*A$(ph<HsC||ouG@VHc=BVzmk?N z^LaOWt@&Qnx7xRN$-U6O=woE5eMv=3bp7O2&%UhBdj0q1Qmr?ESFd{7tm=N5KZ*0{ zobL3i^TP!<tzY<Jk$L>vkIyfiKiBtY^^ZqCnwJ{LRo%Z{y=!W=ug(#d6H!}Ib)qh< ziue-dGVA)LTc%f9(q<l8{K}kRpYH0Q1_P%r3|TTyZv660eHV3i%L-5JOy;lWmv~yg zym@xNdEX&}S&A8b3$xxFk14D=)D>v8?&q2Js!uhgPwh7FDULGv9yk5Qr+EQi4NgA` z(^3~b{diL6mZ(LU7cEclIZGsoAJS#&%Q?>5bfMd3ed&~Q{{B5@b_u@?`T6<j_bTIS zR&}e7%sytPY#tJAy*7DY0MFX~?%?^sZ|?-3j*03qmiqqQ#c9)$wcfYI4wOFHrqR1v z^{8m}>D)6fGlCf>U72e5`+BregV>aQ^^YBwj~!vo_&u?kS>^yom7vp!>5+o3EJas) zn5myEY7J~D6#3b>yG<nCiue3hK^KYkN!-fr*DPAh_%$c?i)8+}JWp7w`g4Qsimk_{ zoaDFbjEr1b`Kx*wleDzaq<#&p`~LRXQ7R?g)*_#drG2TqtW#~`GhdBMG1=%~+AY`9 z0r|T&pNzfx`kYBT|Nmcw|Ck)sZ~4zJbNT;%mJ8W$|JQ;TKL7K7)nEQ!p7`s(9h5!$ d-~BKDPdjIU#HA%5U=Qyf#`5B&9Sj-_3;?9qoooOA diff --git a/tests/data/migration/baseline/upload/1/template.json b/tests/data/migration/baseline/upload/1/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/baseline/upload/1/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/baseline/upload/2/template.json b/tests/data/migration/baseline/upload/2/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/baseline/upload/2/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/example.tar.gz b/tests/data/migration/example.tar.gz deleted file mode 100644 index df4bea96b4383c55a1cb25f5eda2d7fe00312108..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 454 zcmb2|=3o#PxDvy_{Pxy;ze5HhYz0E`DgFy`%Wt)1KRp!v<d`;-)fpv&&Amsa8{17< zpCD@1_K{!bwPxG<=@a5MI6Qp3WV!Zb&xvO)rxsoO?9P$6wa?Cdvs~+<uR8=!&N$(r zd`DTxs898)(%VA6t)YQ_MdnkFs?^7b+Q&cFn|-#fa_-sZD~{JEMLpI3`<E%|iNDm( z%_}~KyHDHi_D|;3pZ4b27eD+eGT9;Z{rBB{?Z54KE-L=aR`t~vy6gA2`o#WK|0AvZ z&z*k0>GSs=*7ZM4>sNC8ERJZI$G~#+#s9w5|B|oRw=cgS+_2!&5{=Ja*7<B+x}EpH z!@u^N*%$v8XFPlVTFjyQPiW7dNR!X`IgGKt@6HwbZC@^Xt)B1fpW3@jS;7BR0{+XC zeg3|gHRIy{=)}PPG8_Ke=kES{U+}8^yZ@UG{QJLUYNXk`W&C#i|8LLv?>XoHrxl;R zSNz$Z|F7+i)xx!h{@Y80{;an7ci+1H?Y!`5*M3SrIiEOt|Mti8ZGYMCJ{i9&?8D-J zC$r}K-(XQ)U8~vk`@-c0hi$LxfBd???tjW!m%qh3O24j8lB|+qKnE8NS%*%)v6LZz GfdK#+Nb?5( diff --git a/tests/data/migration/example_source_db.sql b/tests/data/migration/example_source_db.sql deleted file mode 100644 index 185d7307ef..0000000000 --- a/tests/data/migration/example_source_db.sql +++ /dev/null @@ -1,64 +0,0 @@ -SET statement_timeout = 0; -SET lock_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET client_min_messages = warning; - -TRUNCATE TABLE public.users CASCADE; -INSERT INTO public.users VALUES (3, 'one', 'one', 'one', 'one', NULL, '$2y$12$jths1LQPsLofuBQ3evVIluhQeQ/BZfbdTSZHFcPGdcNmHz2WvDj.y', NULL); -INSERT INTO public.users VALUES (4, 'two', 'two', 'two', 'two', NULL, '$2y$12$jths1LQPsLofuBQ3evVIluhQeQ/BZfbdTSZHFcPGdcNmHz2WvDj.y', NULL); -INSERT INTO public.calculations VALUES (NULL, NULL, NULL, NULL, 0, false, 1, NULL); -INSERT INTO public.calculations VALUES (NULL, NULL, NULL, NULL, 0, false, 2, NULL); -INSERT INTO public.codefamilies VALUES (1, 'VASP'); -INSERT INTO public.codeversions VALUES (1, 1, '4.6.35'); --- topcis -INSERT INTO public.topics VALUES (1, 90, 'tetragonal'); -INSERT INTO public.topics VALUES (2, 220, 'VASP'); -INSERT INTO public.topics VALUES (3, 50, '3d'); -INSERT INTO public.topics VALUES (4, 75, 'GGA'); -INSERT INTO public.topics VALUES (5, 80, 'plane waves'); -INSERT INTO public.topics VALUES (6, 10, 'Br'); -INSERT INTO public.topics VALUES (7, 10, 'K'); -INSERT INTO public.topics VALUES (8, 10, 'Si'); --- mapping topics to calcs via tags -INSERT INTO public.tags VALUES(1, 1); -INSERT INTO public.tags VALUES(2, 1); -INSERT INTO public.tags VALUES(1, 2); -INSERT INTO public.tags VALUES(2, 2); -INSERT INTO public.tags VALUES(1, 3); -INSERT INTO public.tags VALUES(2, 3); -INSERT INTO public.tags VALUES(1, 4); -INSERT INTO public.tags VALUES(2, 4); -INSERT INTO public.tags VALUES(1, 5); -INSERT INTO public.tags VALUES(2, 5); -INSERT INTO public.tags VALUES(1, 6); -INSERT INTO public.tags VALUES(2, 6); -INSERT INTO public.tags VALUES(1, 7); -INSERT INTO public.tags VALUES(2, 7); -INSERT INTO public.tags VALUES(1, 8); -INSERT INTO public.tags VALUES(2, 8); - -INSERT INTO public.metadata VALUES (1, 'different/prefix/template.json', NULL, NULL, NULL, 'BrKSi2', '2019-01-01 12:00:00', NULL, decode('["$EXTRACTED/upload/1/template.json"]', 'escape'), 1, NULL); -INSERT INTO public.metadata VALUES (1, 'different/prefix/template.json', NULL, NULL, NULL, 'BrKSi2', '2015-01-01 13:00:00', NULL, decode('["$EXTRACTED/upload/2/wrong_mainfile"]', 'escape'), 2, NULL); -INSERT INTO public.spacegroups VALUES (1, 123); -INSERT INTO public.spacegroups VALUES (2, 123); -INSERT INTO public.user_metadata VALUES (1, 1, 'label1'); -INSERT INTO public.user_metadata VALUES (2, 0, 'label2'); -INSERT INTO public.ownerships VALUES (1, 3); -INSERT INTO public.ownerships VALUES (2, 4); -INSERT INTO public.coauthorships VALUES (1, 4); -INSERT INTO public.shareships VALUES (2, 3); - --- example dataset -INSERT INTO public.calculations VALUES (NULL, NULL, NULL, NULL, 1, false, 3, NULL); -INSERT INTO public.calcsets VALUES (3, 1); -INSERT INTO public.calcsets VALUES (3, 2); -INSERT INTO public.citations VALUES(1, 'internal_ref', 'INTERNAL'); -INSERT INTO public.metadata_citations VALUES (3, 1); -INSERT INTO public.metadata VALUES (NULL, NULL, NULL, NULL, NULL, 'test_dataset', '2019-01-01 12:00:00', NULL, NULL, 3, NULL); - --- example ref -INSERT INTO public.citations VALUES(2, 'external_ref', 'EXTERNAL'); -INSERT INTO public.metadata_citations VALUES (1, 2); \ No newline at end of file diff --git a/tests/data/migration/failed_calc/upload/1/template.json b/tests/data/migration/failed_calc/upload/1/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/failed_calc/upload/1/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/failed_calc/upload/2/template.json b/tests/data/migration/failed_calc/upload/2/template.json deleted file mode 100644 index 108dd63a26..0000000000 --- a/tests/data/migration/failed_calc/upload/2/template.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "section_run": [ - { - not parsable \ No newline at end of file diff --git a/tests/data/migration/failed_upload/upload/1/template.json b/tests/data/migration/failed_upload/upload/1/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/failed_upload/upload/1/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/failed_upload/upload/2/template.json b/tests/data/migration/failed_upload/upload/2/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/failed_upload/upload/2/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/missing_calc/upload/1/template.json b/tests/data/migration/missing_calc/upload/1/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/missing_calc/upload/1/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/missmatch/upload/1/template.json b/tests/data/migration/missmatch/upload/1/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/missmatch/upload/1/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/missmatch/upload/2/template.json b/tests/data/migration/missmatch/upload/2/template.json deleted file mode 100644 index 4a2b1b571e..0000000000 --- a/tests/data/migration/missmatch/upload/2/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "missmatch" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/new_calc/upload/1/template.json b/tests/data/migration/new_calc/upload/1/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/new_calc/upload/1/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/new_calc/upload/2/template.json b/tests/data/migration/new_calc/upload/2/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/new_calc/upload/2/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/new_calc/upload/3/template.json b/tests/data/migration/new_calc/upload/3/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/new_calc/upload/3/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/new_upload/new_upload/1/template.json b/tests/data/migration/new_upload/new_upload/1/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/new_upload/new_upload/1/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/new_upload/new_upload/2/template.json b/tests/data/migration/new_upload/new_upload/2/template.json deleted file mode 100644 index b85b5bd174..0000000000 --- a/tests/data/migration/new_upload/new_upload/2/template.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "section_run": [ - { - "_name": "section_run", - "_gIndex": 0, - "program_name": "VASP", - "program_version": "4.6.35 3Apr08 complex parallel LinuxIFC", - "program_basis_set_type": "plane waves", - "section_method": [ - { - "_name": "section_method", - "_gIndex": 0, - - "electronic_structure_method": "DFT", - "section_XC_functionals": [ - { - "_name": "section_XC_functionals", - "_gIndex": 0, - "XC_functional_name": "GGA_X_PBE" - } - ] - } - ], - "section_system": [ - { - "_name": "section_system", - "_gIndex": 0, - "simulation_cell": [ - [ - 5.76372622e-10, - 0.0, - 0.0 - ], - [ - 0.0, - 5.76372622e-10, - 0.0 - ], - [ - 0.0, - 0.0, - 4.0755698899999997e-10 - ] - ], - "configuration_periodic_dimensions": [ - true, - true, - true - ], - "atom_positions": [ - [ - 2.88186311e-10, - 0.0, - 2.0377849449999999e-10 - ], - [ - 0.0, - 2.88186311e-10, - 2.0377849449999999e-10 - ], - [ - 0.0, - 0.0, - 0.0 - ], - [ - 2.88186311e-10, - 2.88186311e-10, - 0.0 - ] - ], - "atom_labels": [ - "Br", - "K", - "Si", - "Si" - ] - } - ], - "section_single_configuration_calculation": [ - { - "_name": "section_single_configuration_calculation", - "_gIndex": 0, - "single_configuration_calculation_to_system_ref": 0, - "single_configuration_to_calculation_method_ref": 0, - "energy_free": -1.5936767191492225e-18, - "energy_total": -1.5935696296699573e-18, - "energy_total_T0": -3.2126683561907e-22 - } - ], - "section_sampling_method": [ - { - "_name": "section_sampling_method", - "_gIndex": 0, - "sampling_method": "geometry_optimization" - } - ], - "section_frame_sequence": [ - { - "_name": "section_frame_sequence", - "_gIndex": 0, - "frame_sequence_to_sampling_ref": 0, - "frame_sequence_local_frames_ref": [ - 0 - ] - } - ] - } - ] - } \ No newline at end of file diff --git a/tests/data/migration/packaging/baseline/1/file.txt b/tests/data/migration/packaging/baseline/1/file.txt deleted file mode 100644 index 4f6c4ee9d9..0000000000 --- a/tests/data/migration/packaging/baseline/1/file.txt +++ /dev/null @@ -1 +0,0 @@ -p \ No newline at end of file diff --git a/tests/data/migration/packaging/baseline/2/file.txt b/tests/data/migration/packaging/baseline/2/file.txt deleted file mode 100644 index 08b9811c98..0000000000 --- a/tests/data/migration/packaging/baseline/2/file.txt +++ /dev/null @@ -1 +0,0 @@ -m \ No newline at end of file diff --git a/tests/data/migration/packaging/baseline/RESTRICTED_99 b/tests/data/migration/packaging/baseline/RESTRICTED_99 deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/data/migration/packaging/restriction/1/file.txt b/tests/data/migration/packaging/restriction/1/file.txt deleted file mode 100644 index 4f6c4ee9d9..0000000000 --- a/tests/data/migration/packaging/restriction/1/file.txt +++ /dev/null @@ -1 +0,0 @@ -p \ No newline at end of file diff --git a/tests/data/migration/packaging/restriction/2/file.txt b/tests/data/migration/packaging/restriction/2/file.txt deleted file mode 100644 index 08b9811c98..0000000000 --- a/tests/data/migration/packaging/restriction/2/file.txt +++ /dev/null @@ -1 +0,0 @@ -m \ No newline at end of file diff --git a/tests/data/migration/packaging/restriction/RESTRICTED_24 b/tests/data/migration/packaging/restriction/RESTRICTED_24 deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/data/migration/packaging/too_big/1/file.txt b/tests/data/migration/packaging/too_big/1/file.txt deleted file mode 100644 index d3e5839c31..0000000000 --- a/tests/data/migration/packaging/too_big/1/file.txt +++ /dev/null @@ -1 +0,0 @@ -Y�231 \ No newline at end of file diff --git a/tests/data/migration/packaging/too_big/2/file.txt b/tests/data/migration/packaging/too_big/2/file.txt deleted file mode 100644 index 73ea5c1e0a..0000000000 --- a/tests/data/migration/packaging/too_big/2/file.txt +++ /dev/null @@ -1 +0,0 @@ -� \ No newline at end of file diff --git a/tests/data/migration/packaging_archived/baseline/archive.tar.gz b/tests/data/migration/packaging_archived/baseline/archive.tar.gz deleted file mode 100644 index 12933b2e50c0d61c780e041adcd317a1e38f9861..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 215 zcmb2|=3rR%b7~9&^V@5?xeghKusyKxns?w?(WFa});q4fyL7NA!?CK-Zr?$%)+W(* z6Vd<qmfjhDRVVyYj_eV3I&$oT`&MVst~RSjLBf)<lc#oZYv%g&NdI4Jc+E67aCg|M zMf*z5{w~dnj67G9ZgYI|KMu*M&tGR={<e0%?VbM{T#7$WpZqi1cUIxU^-HJQ&;PGm z!YnW)D*K=Glm9|jqCU@`@ZWodlR1N9>PyA*-)`I7m7RR#vfTXl|4shi{e1q>KV}9b QQ1hDadiuLa1`P%V0HK*^RR910 diff --git a/tests/data/migration/packaging_archived/restriction/archive.tar.gz b/tests/data/migration/packaging_archived/restriction/archive.tar.gz deleted file mode 100644 index 2240d5fc1430e2722cfb989592ec96ef87fc5b1c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 212 zcmb2|=3qGRb7~9&^V=)FT!#z<+8)ZNs&Du?a|?Tbc;3~$C5et(6rv73j$Wf7vSx=z z^v7DW&73Dyob6_>c+q1R$?+@h@-iuP-`We;x>@FXX3aIbwrr+`?4#|=t~`x-x_sAE zBm1}d&%eKZmFjwN<N3#O=YJ{Z1ij(A?OD3^(cHlO?pl{j>u=B6|NYTavyV|#KR^GJ zZ_i_BE=oT8-{If)gT1EzTmQUwwk%<mxKgw)N?-i)*P3mGLN9yH|KC6Nf83nM|Ct$J Q;QlVg1aaO!3>pj!0O#>(EdT%j diff --git a/tests/data/migration/packaging_archived/too_big/archive.tar.gz b/tests/data/migration/packaging_archived/too_big/archive.tar.gz deleted file mode 100644 index 6b19455db4bb2aa991d46eaf9346f40ff510a255..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 199 zcmb2|=3uz^b7~9&^V=)lT!#!qS|2t@pXAEPlRFzFQ<mU(#Z&&lvm>rM1hyw^bamXQ zd|B(?{U?4-#h19<ji$~qRBKz1Z~T7w#(nddb<h2Nu;~2JEage=>q|W3eOGSF>sr;l zNq<#t^nUy6vf8I7zUO(r<Gj3L<j$LGr=MNuS?>PbzUck&Z}RiMy^j%C>buK>zkc8U zt4~X|_&w`uoOvVv?SF^={s-$^|KFbe?|b9F`G+3alru5F!2$Ic#(x{RwHP!Q7ytk) BV157q diff --git a/tests/data/migration/packaging_oqmd/baseline/oqmd/one.tar.gz b/tests/data/migration/packaging_oqmd/baseline/oqmd/one.tar.gz deleted file mode 100644 index 6a4ebd33215c54859f952aa8f0b948411daa19fb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 194 zcmb2|=3w}jb0CI+`R$dxT!#z<4t%unns-3$tozF~vN74^FB2R?Ce$_b=Wldfm{7{; z`se?&=)OCu$6ei(ES82mnp5Lq?;?6zv2TglHLJ-H>yuY*`PM0VsWsoXV_oLyUHMOg zt}S0zIsf|d(5q)GAKT9R=>B4b)ja;y->$W-zPP`Ads*JUDx>=3HG)B{*)N}-e_kKE ui&01Ge3|RMSn-=_^OrSd)!h64^8fwy`_un3G9bXdLiTApni3c^7#IN7I9@;i diff --git a/tests/data/migration/packaging_oqmd/baseline/oqmd/two.tar.gz b/tests/data/migration/packaging_oqmd/baseline/oqmd/two.tar.gz deleted file mode 100644 index a6b700ecfc39fe7cc81157d2d6b55916d0058ce7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 161 zcmb2|=3rpSJrKjd{PyBSu0skU#|q_-s%N;{zWV%Tk>iy-Gj`j$OKy%A^q4wd{NMdk zz{TEnpYhpFkG_ldjMr+$UB43{9+G@w)|~B+VoqJWabw>s!_zUpR)$Ibe!1t&`|nqC z4{s<w_pQ(UAIE*(<)=RetXq0l_N)D*SNHzhJ^jZyR#N+|mhIo4e~#yiGa!Qx8>J8W LZC=Zu!N33jk4#TI diff --git a/tests/data/migration/packaging_oqmd/restriction/oqmd/archive.tar.gz b/tests/data/migration/packaging_oqmd/restriction/oqmd/archive.tar.gz deleted file mode 100644 index 2240d5fc1430e2722cfb989592ec96ef87fc5b1c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 212 zcmb2|=3qGRb7~9&^V=)FT!#z<+8)ZNs&Du?a|?Tbc;3~$C5et(6rv73j$Wf7vSx=z z^v7DW&73Dyob6_>c+q1R$?+@h@-iuP-`We;x>@FXX3aIbwrr+`?4#|=t~`x-x_sAE zBm1}d&%eKZmFjwN<N3#O=YJ{Z1ij(A?OD3^(cHlO?pl{j>u=B6|NYTavyV|#KR^GJ zZ_i_BE=oT8-{If)gT1EzTmQUwwk%<mxKgw)N?-i)*P3mGLN9yH|KC6Nf83nM|Ct$J Q;QlVg1aaO!3>pj!0O#>(EdT%j diff --git a/tests/data/migration/packaging_oqmd/too_big/oqmd/archive.tar.gz b/tests/data/migration/packaging_oqmd/too_big/oqmd/archive.tar.gz deleted file mode 100644 index 6b19455db4bb2aa991d46eaf9346f40ff510a255..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 199 zcmb2|=3uz^b7~9&^V=)lT!#!qS|2t@pXAEPlRFzFQ<mU(#Z&&lvm>rM1hyw^bamXQ zd|B(?{U?4-#h19<ji$~qRBKz1Z~T7w#(nddb<h2Nu;~2JEage=>q|W3eOGSF>sr;l zNq<#t^nUy6vf8I7zUO(r<Gj3L<j$LGr=MNuS?>PbzUck&Z}RiMy^j%C>buK>zkc8U zt4~X|_&w`uoOvVv?SF^={s-$^|KFbe?|b9F`G+3alru5F!2$Ic#(x{RwHP!Q7ytk) BV157q diff --git a/tests/data/migration/too_big.tar.gz b/tests/data/migration/too_big.tar.gz deleted file mode 100644 index 8f47721fc3f88312c9c03fb3e24e95a30cf3840c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 243 zcmb2|=3tn}cO`~_`R&y}t|kWowuFV<Z#c7#&An^u?H@QrP}C{!!qY{{rLAULd6Zr{ z#xDBLEGaK0`&ah*^Pf2zBmF1I2~IS2n|d{DYS79o-;H7$b2MeTA~sKSnQ1q>%0jHJ zj$`ec=k0Gd{@A0Zb9&RX*H_MeTw{Oe`p@G2_yf~_Mk{VQw)FcSQ|14THlO?-KmTv_ z{~>$BpY{t+=I55J-e~{x-Fv}%?V_2d?@#@le~9yT^}k1R{6tFcpW1DIzP@DdobDva t-Mk%3ZvKCKzJBxn_Mhoz{tGNi{1<$7<820H(7<<x;s3*{vl%oP7yucwc?<vm diff --git a/tests/data/test_coe_uploads.txt b/tests/data/test_coe_uploads.txt deleted file mode 100755 index d8c6a82319..0000000000 --- a/tests/data/test_coe_uploads.txt +++ /dev/null @@ -1,17 +0,0 @@ -/nomad/nomadlab/raw-data/data/ReU/ReUaDKWQVZ55N8aJqgQr48Uiljy1z.zip -/nomad/nomadlab/raw-data/data/R--/R--24XOABhczATS4I5QIDg-0MXd_8.zip -/nomad/nomadlab/raw-data/data/R-1/R-173P-ju6WxRSCnbb1eAL3gO0BtF.zip -/nomad/nomadlab/raw-data/data/R2V/R2VndW9osqfkbNZXN0ETXe8Jo8WYj.zip -/nomad/nomadlab/raw-data/data/R6J/R6JYXAnfqhWvN329Pniz0Zg6OUTIm.zip - -# vasp -/nomad/repository/data/uploads/ftp_upload_for_uid_487_at_2017_06_27_20_45_44/archive.tar.gz -# aims -/nomad/repository/data/uploads/zGbAfPhne9SDmg7Vl207Gw68MxaU6FQtgTlFKAMU/archive.tar.gz - -# aims, extracted -/nomad/repository/data/extracted/0wFKM2jQ7qH4f3moVtW3_LI7bpL-smp9K2oXlQFI -# very large repo of very small vasp files -/nomad/repository/data/extracted/01ff7cb7276543e5ad72cd74d249a8ca -# small one calc aims -/nomad/repository/data/extracted/0p-_qU8NvoQ9L5A6INOe3-hi0GdkUOMm9o66pAFI \ No newline at end of file diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index ebec92222f..aa63125748 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -27,7 +27,6 @@ from nomad.processing.base import task as task_decorator, FAILURE, SUCCESS from tests.test_search import assert_search_upload from tests.test_files import assert_upload_files -from tests.test_coe_repo import assert_coe_upload def test_send_mail(mails, monkeypatch): @@ -119,13 +118,11 @@ def test_processing_with_large_dir(test_user, proc_infra): assert len(calc.warnings) == 1 -def test_publish(non_empty_processed: Upload, no_warn, example_user_metadata, with_publish_to_coe_repo, monkeypatch): +def test_publish(non_empty_processed: Upload, no_warn, example_user_metadata, monkeypatch): processed = non_empty_processed processed.compress_and_set_metadata(example_user_metadata) additional_keys = ['with_embargo'] - if with_publish_to_coe_repo: - additional_keys.append('pid') processed.publish_upload() try: @@ -134,25 +131,18 @@ def test_publish(non_empty_processed: Upload, no_warn, example_user_metadata, wi pass upload = processed.to_upload_with_metadata(example_user_metadata) - if with_publish_to_coe_repo: - assert_coe_upload(upload.upload_id, user_metadata=example_user_metadata) assert_upload_files(upload, PublicUploadFiles, published=True) assert_search_upload(upload, additional_keys, published=True) - if with_publish_to_coe_repo and config.repository_db.mode == 'coe': - assert(os.path.exists(os.path.join(config.fs.coe_extracted, upload.upload_id))) - assert_processing(Upload.get(upload.upload_id, include_published=True), published=True) -def test_republish(non_empty_processed: Upload, no_warn, example_user_metadata, monkeypatch, with_publish_to_coe_repo): +def test_republish(non_empty_processed: Upload, no_warn, example_user_metadata, monkeypatch): processed = non_empty_processed processed.compress_and_set_metadata(example_user_metadata) additional_keys = ['with_embargo'] - if with_publish_to_coe_repo: - additional_keys.append('pid') processed.publish_upload() processed.block_until_complete(interval=.01) @@ -162,42 +152,14 @@ def test_republish(non_empty_processed: Upload, no_warn, example_user_metadata, processed.block_until_complete(interval=.01) upload = processed.to_upload_with_metadata(example_user_metadata) - if with_publish_to_coe_repo: - assert_coe_upload(upload.upload_id, user_metadata=example_user_metadata) - - assert_upload_files(upload, PublicUploadFiles, published=True) - assert_search_upload(upload, additional_keys, published=True) - - -def test_republish_to_coe(non_empty_processed: Upload, no_warn, example_user_metadata, monkeypatch): - """ - Test the following scenario: initial processing + publish without coe repo, then - republishing with coe repo. - """ - monkeypatch.setattr('nomad.config.repository_db.publish_enabled', False) - - processed = non_empty_processed - processed.compress_and_set_metadata(example_user_metadata) - processed.publish_upload() - processed.block_until_complete(interval=.01) - assert Upload.get('examples_template') is not None - - monkeypatch.setattr('nomad.config.repository_db.publish_enabled', True) - - processed.publish_upload() - processed.block_until_complete(interval=.01) - - upload = processed.to_upload_with_metadata(example_user_metadata) - additional_keys = ['with_embargo', 'pid'] - assert_coe_upload(upload.upload_id, user_metadata=example_user_metadata) assert_upload_files(upload, PublicUploadFiles, published=True) assert_search_upload(upload, additional_keys, published=True) def test_publish_failed( non_empty_uploaded: Tuple[str, str], example_user_metadata, test_user, - monkeypatch, proc_infra, with_publish_to_coe_repo): + monkeypatch, proc_infra): mock_failure(Calc, 'parsing', monkeypatch) @@ -205,8 +167,6 @@ def test_publish_failed( processed.compress_and_set_metadata(example_user_metadata) additional_keys = ['with_embargo'] - if with_publish_to_coe_repo: - additional_keys.append('pid') processed.publish_upload() try: @@ -215,10 +175,7 @@ def test_publish_failed( pass upload = processed.to_upload_with_metadata(example_user_metadata) - if with_publish_to_coe_repo: - assert_coe_upload(upload.upload_id, user_metadata=example_user_metadata) - assert_upload_files(upload, PublicUploadFiles, published=True, no_archive=True) assert_search_upload(upload, additional_keys, published=True, processed=False) diff --git a/tests/test_api.py b/tests/test_api.py index b005311c49..bf6605b874 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -26,15 +26,14 @@ import os.path from urllib.parse import urlencode from nomad.api.app import rfc3339DateTime -from nomad import coe_repo, search, parsing, files, config, utils +from nomad import search, parsing, files, config, utils from nomad.files import UploadFiles, PublicUploadFiles from nomad.processing import Upload, Calc, SUCCESS -from nomad.datamodel import UploadWithMetadata, CalcWithMetadata +from nomad.datamodel import UploadWithMetadata, CalcWithMetadata, User from tests.conftest import create_auth_headers, clear_elastic from tests.test_files import example_file, example_file_mainfile, example_file_contents from tests.test_files import create_staging_upload, create_public_upload, assert_upload_files -from tests.test_coe_repo import assert_coe_upload from tests.test_search import assert_search_upload @@ -73,13 +72,13 @@ class TestInfo: class TestAdmin: @pytest.mark.timeout(config.tests.default_timeout) - def test_reset(self, client, admin_user_auth, expandable_postgres, monkeypatch): + def test_reset(self, client, admin_user_auth, monkeypatch): monkeypatch.setattr('nomad.config.services.disable_reset', False) rv = client.post('/admin/reset', headers=admin_user_auth) assert rv.status_code == 200 @pytest.mark.timeout(config.tests.default_timeout) - def test_remove(self, client, admin_user_auth, expandable_postgres, monkeypatch): + def test_remove(self, client, admin_user_auth, monkeypatch): monkeypatch.setattr('nomad.config.services.disable_reset', False) rv = client.post('/admin/remove', headers=admin_user_auth) assert rv.status_code == 200 @@ -92,7 +91,7 @@ class TestAdmin: rv = client.post('/admin/reset', headers=test_user_auth) assert rv.status_code == 401 - def test_disabled(self, client, admin_user_auth, expandable_postgres, monkeypatch): + def test_disabled(self, client, admin_user_auth, monkeypatch): monkeypatch.setattr('nomad.config.services.disable_reset', True) rv = client.post('/admin/reset', headers=admin_user_auth) assert rv.status_code == 400 @@ -107,14 +106,14 @@ class TestAuth: rv = client.get('/auth/', headers=test_user_auth) assert rv.status_code == 200 - def test_xtoken_auth(self, client, test_user: coe_repo.User, no_warn): + def test_xtoken_auth(self, client, test_user: User, no_warn): rv = client.get('/uploads/', headers={ 'X-Token': test_user.first_name.lower() # the test users have their firstname as tokens for convinience }) assert rv.status_code == 200 - def test_xtoken_auth_denied(self, client, no_warn, postgres): + def test_xtoken_auth_denied(self, client, no_warn): rv = client.get('/uploads/', headers={ 'X-Token': 'invalid' }) @@ -132,7 +131,7 @@ class TestAuth: }) assert rv.status_code == 401 - def test_get_user(self, client, test_user_auth, test_user: coe_repo.User, no_warn): + def test_get_user(self, client, test_user_auth, test_user: User, no_warn): rv = client.get('/auth/user', headers=test_user_auth) assert rv.status_code == 200 self.assert_user(client, json.loads(rv.data)) @@ -153,7 +152,7 @@ class TestAuth: @pytest.mark.parametrize('token, affiliation', [ ('test_token', dict(name='HU Berlin', address='Unter den Linden 6')), (None, None)]) - def test_put_user(self, client, postgres, admin_user_auth, token, affiliation): + def test_put_user(self, client, admin_user_auth, token, affiliation): data = dict( email='test@email.com', last_name='Tester', first_name='Testi', token=token, affiliation=affiliation, @@ -183,7 +182,7 @@ class TestAuth: email='test@email.com', password=bcrypt.encrypt('test_password', ident='2y')))) assert rv.status_code == 400 - def test_post_user(self, client, postgres, admin_user_auth): + def test_post_user(self, client, admin_user_auth): rv = client.put( '/auth/user', headers=admin_user_auth, content_type='application/json', data=json.dumps(dict( @@ -256,7 +255,7 @@ class TestUploads: assert_upload_files(upload_with_metadata, files.StagingUploadFiles) assert_search_upload(upload_with_metadata, additional_keys=['atoms', 'system']) - def assert_published(self, client, test_user_auth, upload_id, proc_infra, with_coe_repo=True, metadata={}, publish_with_metadata: bool = True): + def assert_published(self, client, test_user_auth, upload_id, proc_infra, metadata={}, publish_with_metadata: bool = True): rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth) upload = self.assert_upload(rv.data) @@ -273,16 +272,12 @@ class TestUploads: assert upload['process_running'] additional_keys = ['with_embargo'] - if with_coe_repo: - additional_keys.append('pid') self.block_until_completed(client, upload_id, test_user_auth) upload_proc = Upload.objects(upload_id=upload_id).first() assert upload_proc is not None assert upload_proc.published is True - if with_coe_repo: - assert_coe_upload(upload_with_metadata.upload_id, user_metadata=metadata) assert_upload_files(upload_with_metadata, files.PublicUploadFiles, published=True) assert_search_upload(upload_with_metadata, additional_keys=additional_keys, published=True) @@ -385,11 +380,11 @@ class TestUploads: yield True monkeypatch.setattr('nomad.processing.data.Upload.cleanup', old_cleanup) - def test_delete_published(self, client, test_user_auth, proc_infra, no_warn, with_publish_to_coe_repo): + def test_delete_published(self, client, test_user_auth, proc_infra, no_warn): rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth) upload = self.assert_upload(rv.data) self.assert_processing(client, test_user_auth, upload['upload_id']) - self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_coe_repo=with_publish_to_coe_repo) + self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra) rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth) assert rv.status_code == 400 @@ -412,12 +407,12 @@ class TestUploads: content_type='application/json') assert rv.status_code == 400 - def test_post(self, client, test_user_auth, non_empty_example_upload, proc_infra, no_warn, with_publish_to_coe_repo): + def test_post(self, client, test_user_auth, non_empty_example_upload, proc_infra, no_warn): rv = client.put('/uploads/?local_path=%s' % non_empty_example_upload, headers=test_user_auth) assert rv.status_code == 200 upload = self.assert_upload(rv.data) self.assert_processing(client, test_user_auth, upload['upload_id']) - self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_coe_repo=with_publish_to_coe_repo) + self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra) # still visible assert client.get('/uploads/%s' % upload['upload_id'], headers=test_user_auth).status_code == 200 @@ -480,7 +475,7 @@ class TestUploads: assert self.block_until_completed(client, upload_id, test_user_auth) is not None # TODO validate metadata (or all input models in API for that matter) - # def test_post_bad_metadata(self, client, proc_infra, test_user_auth, postgres): + # def test_post_bad_metadata(self, client, proc_infra, test_user_auth): # rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth) # upload = self.assert_upload(rv.data) # self.assert_processing(client, test_user_auth, upload['upload_id']) @@ -499,7 +494,7 @@ class TestUploads: upload = self.assert_upload(rv.data) upload_id = upload['upload_id'] self.assert_processing(client, test_user_auth, upload_id) - self.assert_published(client, test_user_auth, upload_id, proc_infra, with_coe_repo=True) + self.assert_published(client, test_user_auth, upload_id, proc_infra) rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id) assert rv.status_code == 401 rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id, headers=test_user_auth) @@ -567,7 +562,7 @@ class UploadFilesBasedTests: return wrapper @pytest.fixture(scope='function') - def test_data(self, request, postgres, mongo, raw_files, no_warn, test_user, other_test_user): + def test_data(self, request, mongo, raw_files, no_warn, test_user, other_test_user): # delete potential old test files for _ in [0, 1]: upload_files = UploadFiles.get('test_upload') @@ -594,12 +589,6 @@ class UploadFilesBasedTests: _, upload_files = create_staging_upload('test_upload', calc_specs=calc_specs) else: _, upload_files = create_public_upload('test_upload', calc_specs=calc_specs) - postgres.begin() - coe_upload = coe_repo.Upload( - upload_name='test_upload', - user_id=test_user.user_id, is_processed=True) - postgres.add(coe_upload) - postgres.commit() yield 'test_upload', authorized, auth_headers @@ -652,7 +641,7 @@ class TestRepo(): @pytest.fixture(scope='class') def example_elastic_calcs( self, elastic_infra, normalized: parsing.LocalBackend, - test_user: coe_repo.User, other_test_user: coe_repo.User): + test_user: User, other_test_user: User): clear_elastic(elastic_infra) calc_with_metadata = CalcWithMetadata(upload_id=0, calc_id=0, upload_time=today) diff --git a/tests/test_coe_repo.py b/tests/test_coe_repo.py deleted file mode 100644 index fd55e9331f..0000000000 --- a/tests/test_coe_repo.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest -from typing import cast, Tuple -from passlib.hash import bcrypt -from datetime import datetime - -from nomad.coe_repo import User, Calc, Upload, create_handle -from nomad.coe_repo.user import Session -from nomad.coe_repo.calc import PublishContext -from nomad import processing, parsing, datamodel - - -@pytest.fixture(scope='module') -def example_user_metadata_with_dataset(example_user_metadata) -> dict: - result = dict(**example_user_metadata) - result.update(datasets=[dict( - id=23, _doi='test_doi', _name='test_dataset')]) - return result - - -def assert_user(user, reference): - assert user is not None - assert user.user_id == reference.user_id - assert user.email == reference.email - - -def test_token_authorize(test_user): - user = User.verify_auth_token(test_user.first_name.lower()) - assert_user(user, test_user) - - -def test_password_authorize(test_user): - user = User.verify_user_password(test_user.email, 'password') - assert_user(user, test_user) - - -def test_generate_token(postgres, test_user): - postgres.begin() - session = postgres.query(Session).filter_by(user_id=test_user.user_id).first() - if session is not None: - postgres.delete(session) - postgres.commit() - - assert test_user.get_auth_token() is not None - - -def assert_coe_upload(upload_id: str, upload: datamodel.UploadWithMetadata = None, user_metadata: dict = None): - coe_upload = Upload.from_upload_id(upload_id) - - if upload is not None: - calcs = list(upload.calcs) - elif coe_upload is None: - calcs = [] - else: - calcs = list(calc.to_calc_with_metadata() for calc in coe_upload.calcs) - - if len(calcs) == 0: - assert coe_upload is None - else: - assert coe_upload is not None - assert len(coe_upload.calcs) == len(calcs) - for coe_calc, calc in zip(coe_upload.calcs, calcs): - if user_metadata is not None: - calc.apply_user_metadata(user_metadata) - - assert_coe_calc(coe_calc, cast(datamodel.DFTCalcWithMetadata, calc), has_handle=True) - - if upload is not None and upload.upload_time is not None: - assert coe_upload.created.isoformat()[:26] == upload.upload_time.isoformat() - - -def assert_coe_calc(coe_calc: Calc, calc: datamodel.DFTCalcWithMetadata, has_handle: bool = False): - if calc.pid is not None: - assert coe_calc.pid == calc.pid - elif has_handle: - assert coe_calc.pid is not None - assert create_handle(coe_calc.pid) == coe_calc.handlepid - - # calc data - assert len(coe_calc.files) == len(calc.files) - assert coe_calc.formula == calc.formula - - # calc files - assert len(coe_calc.files) == len(calc.files) - - # user meta data - assert coe_calc.comment == calc.comment - assert len(coe_calc.references) == len(calc.references) - assert coe_calc.uploader is not None - if calc.uploader is not None: - assert coe_calc.uploader.user_id == calc.uploader.id - assert sorted(user.user_id for user in coe_calc.coauthors) == sorted(user.id for user in calc.coauthors) - assert sorted(user.user_id for user in coe_calc.shared_with) == sorted(user.id for user in calc.shared_with) - if calc.with_embargo is not None: - assert coe_calc.with_embargo == calc.with_embargo - else: - # with out metadata, the default setting is no embargo - assert not coe_calc.with_embargo - - -def test_add_normalized_calc(postgres, example_mainfile: Tuple[str, str], normalized: parsing.LocalBackend, test_user): - _, mainfile = example_mainfile - calc_with_metadata = datamodel.DFTCalcWithMetadata(mainfile=mainfile) - calc_with_metadata.apply_domain_metadata(normalized) - calc_with_metadata.uploader = test_user.to_popo() - calc_with_metadata.files = [calc_with_metadata.mainfile, '1', '2', '3', '4'] - coe_calc = Calc() - coe_calc.apply_calc_with_metadata(calc_with_metadata, PublishContext()) - - assert_coe_calc(coe_calc, calc_with_metadata) - - -def test_add_normalized_calc_with_metadata( - postgres, normalized: parsing.LocalBackend, example_user_metadata_with_dataset: dict): - - calc_with_metadata = datamodel.DFTCalcWithMetadata() - calc_with_metadata.apply_domain_metadata(normalized) - calc_with_metadata.files = [calc_with_metadata.mainfile, '1', '2', '3', '4'] - calc_with_metadata.apply_user_metadata(example_user_metadata_with_dataset) - - coe_upload = Upload( - upload_name='test_upload', - created=datetime.utcnow(), - user_id=0, - is_processed=True) - coe_calc = Calc(coe_calc_id=calc_with_metadata.pid, upload=coe_upload) - coe_calc.apply_calc_with_metadata(calc_with_metadata, PublishContext()) - - assert_coe_calc(coe_calc, calc_with_metadata) - - -def test_add_upload(processed: processing.Upload): - upload_with_metadata = processed.to_upload_with_metadata() - Upload.publish(upload_with_metadata) - assert_coe_upload(upload_with_metadata.upload_id, upload_with_metadata) - - -def test_delete_upload(processed: processing.Upload, example_user_metadata_with_dataset, no_warn): - upload_with_metadata = processed.to_upload_with_metadata(example_user_metadata_with_dataset) - Upload.publish(upload_with_metadata) - assert_coe_upload(upload_with_metadata.upload_id, upload_with_metadata) - - for calc in upload_with_metadata.calcs: - assert Calc.from_calc_id(calc.calc_id) is not None - - Upload.delete(processed.upload_id) - assert Upload.from_upload_id(processed.upload_id) is None - for calc in upload_with_metadata.calcs: - assert Calc.from_calc_id(calc.calc_id) is None - - Upload.delete(processed.upload_id) - - -# def test_large_upload(processed: processing.Upload, example_user_metadata): -# processed.metadata = example_user_metadata -# upload_with_metadata = processed.to_upload_with_metadata() -# calcs = list(upload_with_metadata.calcs) - -# if len(calcs) == 0: -# return - -# def many_calcs(): -# count = 0 -# while True: -# for calc in calcs: -# calc.pid = count + 10 -# yield calc -# count += 1 -# if count > 1000: -# return - -# import time -# start = time.time() -# upload_with_metadata.calcs = many_calcs() -# Upload.publish(upload_with_metadata) -# print('########### %d' % (time.time() - start)) - - -def test_add_upload_with_metadata(processed, example_user_metadata_with_dataset): - upload_with_metadata = processed.to_upload_with_metadata(example_user_metadata_with_dataset) - Upload.publish(upload_with_metadata) - assert_coe_upload(upload_with_metadata.upload_id, upload_with_metadata) - - -@pytest.mark.parametrize('crypted', [True, False]) -def test_create_user(postgres, crypted): - password = bcrypt.encrypt('test_password', ident='2y') if crypted else 'test_password' - data = dict( - email='test@email.com', last_name='Teser', first_name='testi', password=password) - - user = User.create_user(**data, crypted=crypted) - - authenticated_user = User.verify_user_password('test@email.com', 'test_password') - assert authenticated_user is not None - assert user.user_id == authenticated_user.user_id - assert user.get_auth_token() is not None - - -class TestDataSets: - - @pytest.fixture(scope='function') - def datasets(self, postgres): - postgres.begin() - one = Calc() - two = Calc() - three = Calc() - postgres.add(one) - postgres.add(two) - postgres.add(three) - one.children.append(two) - two.children.append(three) - postgres.commit() - - return one, two, three - - def assert_datasets(self, datasets, id_list): - assert sorted([ds.id for ds in datasets]) == sorted(id_list) - - def test_all(self, datasets): - one, two, three = datasets - self.assert_datasets(one.all_datasets, []) - self.assert_datasets(two.all_datasets, [one.coe_calc_id]) - self.assert_datasets(three.all_datasets, [one.coe_calc_id, two.coe_calc_id]) - - def test_direct(self, datasets): - one, two, three = datasets - self.assert_datasets(one.direct_datasets, []) - self.assert_datasets(two.direct_datasets, [one.coe_calc_id]) - self.assert_datasets(three.direct_datasets, [two.coe_calc_id]) diff --git a/tests/test_datamodel.py b/tests/test_datamodel.py index 0c799441d8..4a561edca1 100644 --- a/tests/test_datamodel.py +++ b/tests/test_datamodel.py @@ -30,7 +30,7 @@ number_of = 20 random.seed(0) gen = DocumentGenerator() -users = [(i + 1, names.get_first_name(), names.get_last_name(), gen.email()) for i in range(0, number_of)] +users = [(str(i + 1), names.get_first_name(), names.get_last_name(), gen.email()) for i in range(0, number_of)] basis_sets = ['Numeric AOs', 'Gaussians', '(L)APW+lo', 'Plane waves'] xc_functionals = ['LDA', 'GGA', 'hybrid', 'meta-GGA', 'GW', 'unknown'] crystal_systems = ['triclinic', 'monoclinic', 'orthorombic', 'tetragonal', 'hexagonal', 'cubic'] @@ -49,8 +49,8 @@ low_numbers_for_geometries = [1, 2, 2, 3, 3, 4, 4] def _gen_user(): - id, first, last, email = random.choice(users) - return utils.POPO(id=id, first_name=first, last_name=last, email=email) + user_id, first, last, email = random.choice(users) + return User(user_id=user_id, first_name=first, last_name=last, email=email) def _gen_dataset(): diff --git a/tests/test_files.py b/tests/test_files.py index ce5c876dfc..4e898b13ad 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -347,13 +347,6 @@ class TestStagingUploadFiles(UploadFilesContract): upload_files.delete() assert not upload_files.exists() - def test_create_extracted_copy(self, test_upload: StagingUploadWithFiles): - upload, upload_files = test_upload - upload_files.create_extracted_copy() - for calc in upload.calcs: - assert os.path.exists(os.path.join( - config.fs.coe_extracted, upload_files.upload_id, calc.mainfile)) - class TestArchiveBasedStagingUploadFiles(UploadFilesFixtures): def test_create(self, test_upload_id): diff --git a/tests/test_migration.py b/tests/test_migration.py deleted file mode 100644 index f06d410569..0000000000 --- a/tests/test_migration.py +++ /dev/null @@ -1,361 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest -import os -import os.path -from bravado.client import SwaggerClient -import json -import glob -import bravado.exception -import zipfile - -from nomad import infrastructure, coe_repo, utils, files, processing, config - -from nomad.migration import NomadCOEMigration, SourceCalc, Package -from nomad.infrastructure import repository_db_connection - -from tests.conftest import create_postgres_infra, create_auth_headers -from tests.bravado_flask import FlaskTestHttpClient -from tests.test_api import create_auth_headers -import tests.utils as test_utils -from tests.test_search import assert_search_upload -from tests.test_files import assert_upload_files - -test_source_db_name = 'test_nomad_fairdi_migration_source' -test_target_db_name = 'test_nomad_fairdi_migration_target' - - -@pytest.fixture(scope='module') -def source_repo(monkeysession, postgres_infra): - """ - Fixture for an example migration source db with: - - two user - - two calculations (1 per user) - - one calculation with all metadata (dataset, ref, comment, coauther, sharewith) - """ - try: - with repository_db_connection(dbname='postgres', with_trans=False) as con: - with con.cursor() as cursor: - cursor.execute("CREATE DATABASE %s ;" % test_source_db_name) - except Exception: - pass - - with repository_db_connection(dbname=test_source_db_name, with_trans=False) as con: - with con.cursor() as cur: - cur.execute( - 'DROP SCHEMA IF EXISTS public CASCADE;' - 'CREATE SCHEMA IF NOT EXISTS public;' - 'GRANT ALL ON SCHEMA public TO postgres;' - 'GRANT ALL ON SCHEMA public TO public;') - - schema_sql_file, example_data_sql_file = ( - os.path.join(os.path.dirname(infrastructure.__file__), 'empty_repository_db.sql'), - os.path.join('tests', 'data', 'migration', 'example_source_db.sql')) - - for sql_file in [schema_sql_file, example_data_sql_file]: - with open(sql_file, 'r') as f: - cur.execute(f.read()) - - with create_postgres_infra(monkeysession, exists=True, readonly=True, dbname=test_source_db_name) as db: - yield db - - -@pytest.fixture(scope='function') -def target_repo(postgres): - with create_postgres_infra(readonly=False, exists=False, dbname=test_target_db_name) as db: - db.execute('DELETE FROM sessions WHERE user_id >= 3;') - db.execute('DELETE FROM users WHERE user_id >= 3;') - db.execute('DELETE FROM affiliations WHERE a_id >= 1;') - assert db.query(coe_repo.User).filter_by(email='admin').first() is not None - yield db - db.execute('TRUNCATE uploads CASCADE;') - - -@pytest.fixture(scope='function') -def migration(source_repo, target_repo, raw_files): - Package.objects().delete() # the mongo fixture drops the db, but we still get old results, probably mongoengine caching - migration = NomadCOEMigration(quiet=True) - yield migration - - -@pytest.fixture(scope='function') -def source_package(mongo, migration): - migration.package(*glob.glob('tests/data/migration/*')) - - -def assert_packages(restriction: int = 0, upload_id: str = None): - if upload_id is None: - packages = Package.objects() - else: - packages = Package.objects(upload_id=upload_id) - for package in packages: - assert os.path.exists(package.package_path) - assert package.size > 0 - assert package.files > 0 - assert package.restricted == restriction - with zipfile.ZipFile(package.package_path, 'r') as zf: - len(zf.filelist) == package.files - - return packages.count() - - -package_specs = [(1, 36, 'baseline'), (2, 0, 'too_big'), (1, 24, 'restriction')] - - -@pytest.mark.parametrize('variant', ['', '_archived', '_oqmd']) -@pytest.mark.parametrize('n_packages, restriction, upload', package_specs) -def test_package( - mongo, migration: NomadCOEMigration, monkeypatch, n_packages, restriction, upload, variant): - monkeypatch.setattr('nomad.migration.max_package_size', 3) - upload = os.path.join('tests/data/migration/packaging%s' % variant, upload) - - migration.package_index(upload) - assert assert_packages(restriction=restriction) == n_packages - - -def test_tar_package(mongo, raw_files, monkeypatch): - Package.objects().delete() # the mongo fixture drops the db, but we still get old results, probably mongoengine caching - monkeypatch.setattr('nomad.migration.max_package_size', 3) - example_tar_file = 'tests/data/migration/example.tar.gz' - assert os.path.isfile(example_tar_file) - Package.create_packages_from_tar(example_tar_file) - - for n_packages, restriction, upload_id in package_specs: - assert assert_packages(restriction=restriction, upload_id=upload_id) == n_packages - - -def perform_index(migration, has_indexed, with_metadata, **kwargs): - has_source_calc = False - for source_calc, total in SourceCalc.index(migration.source, with_metadata=with_metadata, **kwargs): - assert source_calc.pid is not None - assert source_calc.mainfile in ['1/template.json', '2/template.json'] - assert source_calc.upload == 'upload' - has_source_calc = True - assert total == 3 # 2 calcs + 1 dataset - - assert has_source_calc == has_indexed - - test_calc = SourceCalc.objects(mainfile='1/template.json', upload='upload').first() - assert test_calc is not None - - if with_metadata: - assert test_calc.metadata['uploader']['id'] == 3 - assert test_calc.metadata['comment'] == 'label1' - - -@pytest.mark.parametrize('with_metadata', [False, True]) -def test_create_index(migration, mongo, with_metadata: bool): - perform_index(migration, has_indexed=True, drop=True, with_metadata=with_metadata) - - -@pytest.mark.parametrize('with_metadata', [True, False]) -def test_update_index(migration, mongo, with_metadata: bool): - perform_index(migration, has_indexed=True, drop=True, with_metadata=with_metadata) - perform_index(migration, has_indexed=False, drop=False, with_metadata=with_metadata) - - -@pytest.fixture(scope='function') -def migrate_infra(migration, target_repo, proc_infra, client, monkeypatch): - """ - Parameters to test - - missing upload, extracted, archive, broken archive - - upload process failure - - upload with no parsable files - - calculations with process errors - - matching, non matching calculations - - to few calculations - - to many caclualtions - - not in the index - - All with two calcs, two users (for coauthors) - """ - # source repo is the infrastructure repo - indexed = list(migration.source_calc_index(drop=True, with_metadata=True)) - assert len(indexed) == 2 - - # target repo is the infrastructure repo - monkeypatch.setattr('nomad.infrastructure.repository_db', target_repo) - - infra = utils.POPO() - - infra.admin_auth = create_auth_headers(coe_repo.User.from_user_id(0)) - - def create_client_for_user(auth): - http_client = FlaskTestHttpClient(client, headers=auth) - return SwaggerClient.from_url('/swagger.json', http_client=http_client) - - def create_client(): - return create_client_for_user(infra.admin_auth) - - monkeypatch.setattr('nomad.cli.client.create_client', create_client) - - def stream_upload_with_client(client, upload_f, name=None): - return client.uploads.upload(file=upload_f, name=name).response().result - - monkeypatch.setattr('nomad.cli.client.stream_upload_with_client', stream_upload_with_client) - - # source repo is the still the original infrastructure repo - migration.copy_users() - infra.one_auth = create_auth_headers(coe_repo.User.from_user_id(3)) - infra.two_auth = create_auth_headers(coe_repo.User.from_user_id(4)) - - infra.migration = migration - infra.flask = client - infra.one_client = create_client_for_user(infra.one_auth) - infra.two_client = create_client_for_user(infra.one_auth) - - yield infra - - -def test_copy_users(migrate_infra, target_repo): - assert target_repo.query(coe_repo.User).filter_by(user_id=3).first().email == 'one' - assert target_repo.query(coe_repo.User).filter_by(user_id=4).first().email == 'two' - - -mirgation_test_specs = [ - ('baseline', 'baseline', dict(migrated=2, source=2)), - ('archive', 'baseline', dict(migrated=2, source=2)), - ('new_upload', 'new_upload', dict(new=2)), - ('new_calc', 'new_calc', dict(migrated=2, source=2, new=1)), - ('missing_calc', 'missing_calc', dict(migrated=1, source=2, missing=1)), - ('missmatch', 'missmatch', dict(migrated=2, source=2, diffs=1)), - ('failed_calc', 'failed_calc', dict(migrated=2, source=2, diffs=0, missing=0, failed=1)), - ('failed_upload', 'baseline', dict(migrated=0, source=2, missing=2, errors=1)), - ('failed_publish', 'baseline', dict(migrated=0, source=2, missing=2, errors=1, not_migrated=2)) -] - - -@pytest.mark.filterwarnings("ignore:SAWarning") -@pytest.mark.parametrize('name, test_directory, assertions', mirgation_test_specs) -@pytest.mark.timeout(config.tests.default_timeout) -def test_migrate(migrate_infra, name, test_directory, assertions, monkeypatch, caplog): - perform_migration_test(migrate_infra, name, test_directory, assertions, monkeypatch, caplog) - - -def perform_migration_test(migrate_infra, name, test_directory, assertions, monkeypatch, caplog): - - def with_error(*args, **kwargs): - raise Exception('test error') - - if name == 'failed_upload': - monkeypatch.setattr('nomad.files.ArchiveBasedStagingUploadFiles.extract', with_error) - - if name == 'failed_publish': - monkeypatch.setattr('nomad.processing.data.Upload.to_upload_with_metadata', with_error) - - upload_path = os.path.join('tests/data/migration', test_directory) - upload_path = os.path.join(upload_path, os.listdir(upload_path)[0]) - - pid_prefix = 10 - migrate_infra.migration.set_pid_prefix(pid_prefix) - report = migrate_infra.migration.migrate(upload_path, create_packages=True) - - assert report.total_calcs == assertions.get('migrated', 0) + assertions.get('new', 0) + assertions.get('not_migrated', 0) - - # assert if new, diffing, migrated calcs where detected correctly - assert report.total_source_calcs == assertions.get('source', 0) - assert report.migrated_calcs == assertions.get('migrated', 0) - assert report.calcs_with_diffs == assertions.get('diffs', 0) - assert report.new_calcs == assertions.get('new', 0) - assert report.missing_calcs == assertions.get('missing', 0) - - # assert if migrated calcs have correct user metadata - repo_db = infrastructure.repository_db - if assertions.get('migrated', 0) > 0: - calc_1: coe_repo.Calc = repo_db.query(coe_repo.Calc).get(1) - assert calc_1 is not None - metadata = calc_1.to_calc_with_metadata() - assert metadata.pid <= 2 - assert metadata.uploader['id'] == 3 - assert metadata.upload_time.isoformat() == '2019-01-01T12:00:00+00:00' - assert len(metadata.datasets) == 1 - assert metadata.datasets[0]['id'] == 3 - assert metadata.datasets[0]['name'] == 'test_dataset' - assert metadata.datasets[0]['doi']['value'] == 'internal_ref' - assert metadata.comment == 'label1' - assert len(metadata.coauthors) == 1 - assert metadata.coauthors[0]['id'] == 4 - assert len(metadata.references) == 1 - assert metadata.references[0]['value'] == 'external_ref' - - if assertions.get('migrated', 0) > 1: - calc_2: coe_repo.Calc = repo_db.query(coe_repo.Calc).get(2) - assert calc_1 is not None - metadata = calc_2.to_calc_with_metadata() - assert len(metadata.shared_with) == 1 - assert metadata.shared_with[0]['id'] == 3 - - # assert pid prefix of new calcs - if assertions.get('new', 0) > 0: - assert repo_db.query(coe_repo.Calc).get(pid_prefix) is not None - - errors = 0 - for record in caplog.get_records(when='call'): - if record.levelname in ['ERROR', 'CRITICAL']: - record_data = json.loads(record.getMessage()) - if 'source_upload_id' in record_data: - errors += 1 - - assert errors == assertions.get('errors', 0) - - if name == 'baseline': - result = migrate_infra.two_client.repo.search(per_page=2, order=1, order_by='pid').response().result - assert result.pagination.total == 2 - calc_1 = result.results[0] - assert calc_1['pid'] == '1' - assert calc_1['with_embargo'] is False - - calc_2 = result.results[1] - assert calc_2['pid'] == '2' - assert calc_2['with_embargo'] is True - - # assert if with_embargo is passed through to files - with test_utils.assert_exception(bravado.exception.HTTPUnauthorized): - migrate_infra.one_client.archive.get_archive_calc( - upload_id=calc_2['upload_id'], calc_id=calc_2['calc_id']).response().result - - with test_utils.assert_exception(bravado.exception.HTTPUnauthorized): - migrate_infra.one_client.raw.get( - upload_id=calc_2['upload_id'], path=calc_2['mainfile']).response().result - - migrate_infra.two_client.archive.get_archive_calc( - upload_id=calc_1['upload_id'], calc_id=calc_1['calc_id']).response().result - migrate_infra.two_client.raw.get( - upload_id=calc_1['upload_id'], path=calc_1['mainfile']).response().result - - upload_proc = processing.Upload.get(calc_1['upload_id'], include_published=True) - upload_with_metadata = upload_proc.to_upload_with_metadata() - assert_search_upload( - upload_with_metadata, additional_keys=['with_embargo', 'pid'], published=True) - assert_upload_files( - upload_with_metadata, files.PublicUploadFiles, published=True) - - -def test_skip_on_same_version(migrate_infra, monkeypatch, caplog): - assertions = dict(migrated=2, source=2, skipped_packages=0) - perform_migration_test(migrate_infra, 'baseline', 'baseline', assertions, monkeypatch, caplog) - - assertions = dict(migrated=2, source=2, skipped_packages=1) - perform_migration_test(migrate_infra, 'baseline', 'baseline', assertions, monkeypatch, caplog) - - -def test_republish(migrate_infra, monkeypatch, caplog): - assertions = dict(migrated=2, source=2, skipped_packages=0) - perform_migration_test(migrate_infra, 'baseline', 'baseline', assertions, monkeypatch, caplog) - - upload_path = os.path.join('tests/data/migration', 'baseline') - upload_path = os.path.join(upload_path, os.listdir(upload_path)[0]) - - migrate_infra.migration.migrate(upload_path, only_republish=True) diff --git a/tests/test_search.py b/tests/test_search.py index 03ce1cfc08..1e64d08b9f 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -15,7 +15,7 @@ from typing import List from elasticsearch_dsl import Q -from nomad import datamodel, search, processing, parsing, infrastructure, config, coe_repo +from nomad import datamodel, search, processing, parsing, infrastructure, config from nomad.search import Entry, metrics_search, quantity_search, scroll_search, entry_search @@ -110,13 +110,13 @@ def test_scroll_search(elastic, normalized: parsing.LocalBackend): assert 'scroll_id' not in results['scroll'] -def test_quantity_search(elastic, normalized: parsing.LocalBackend, test_user: coe_repo.User, other_test_user: coe_repo.User): +def test_quantity_search(elastic, normalized: parsing.LocalBackend, test_user: datamodel.User, other_test_user: datamodel.User): calc_with_metadata = datamodel.CalcWithMetadata(upload_id='test upload id', calc_id='test id') calc_with_metadata.apply_domain_metadata(normalized) - calc_with_metadata.uploader = test_user.to_popo() + calc_with_metadata.uploader = test_user.user_id create_entry(calc_with_metadata) calc_with_metadata.calc_id = 'other test id' - calc_with_metadata.uploader = other_test_user.to_popo() + calc_with_metadata.uploader = other_test_user.user_id create_entry(calc_with_metadata) refresh_index() @@ -131,7 +131,8 @@ def refresh_index(): def create_entry(calc_with_metadata: datamodel.CalcWithMetadata): - search.Entry.from_calc_with_metadata(calc_with_metadata).save() + entry = search.Entry.from_calc_with_metadata(calc_with_metadata) + entry.save() assert_entry(calc_with_metadata.calc_id) -- GitLab