Commit d63b17d1 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added POTCAR restriction and stripping #150.

parent cfc3fba7
......@@ -66,6 +66,15 @@ from nomad.datamodel import UploadWithMetadata
user_metadata_filename = 'user_metadata.pickle'
def always_restricted(path: str):
"""
Used to put general restrictions on files, e.g. due to licensing issues. Will be
called during packing and while accessing public files.
"""
if os.path.basename(path) == 'POTCAR':
return True
class PathObject:
"""
Object storage-like abstraction for paths in general.
......@@ -403,7 +412,8 @@ class StagingUploadFiles(UploadFiles):
mainfile = calc.mainfile
assert mainfile is not None
for filepath in self.calc_files(mainfile):
public_files[filepath] = None
if not always_restricted(filepath):
public_files[filepath] = None
# 1.2 remove the non public mainfiles that have been added as auxfiles of public mainfiles
for calc in upload.calcs:
if calc.with_embargo:
......@@ -569,9 +579,9 @@ class PublicUploadFiles(UploadFiles):
try:
zip_file = self.join_file('%s-%s.%s.zip' % (prefix, access, ext))
with ZipFile(zip_file.os_path) as zf:
f = zf.open(path, 'r', **kwargs)
if access == 'restricted' and not self._is_authorized():
if (access == 'restricted' or always_restricted(path)) and not self._is_authorized():
raise Restricted
f = zf.open(path, 'r', **kwargs)
if 't' in mode:
return io.TextIOWrapper(f)
else:
......
......@@ -498,6 +498,16 @@ class SourceCalc(Document):
source_calc = SourceCalc(pid=calc.pid)
source_calc.upload = segments[0]
source_calc.mainfile = os.path.join(*segments[1:])
# this is taken from metadata.location and has inconsistent directory prefix,
# but is more accurate than taking the first file as mainfile, which
# also is sometimes not the actual mainfile.
if calc.mainfile is not None:
calc_mainfile = os.path.basename(calc.mainfile)
if calc_mainfile != os.path.basename(source_calc.mainfile):
source_calc.mainfile = os.path.join(
os.path.dirname(source_calc.mainfile), calc_mainfile)
if with_metadata:
source_calc.metadata = calc.to_calc_with_metadata().__dict__
source_calcs.append(source_calc)
......@@ -773,7 +783,6 @@ class NomadCOEMigration:
logger = self.logger.bind(
package_id=package.package_id, source_upload_id=package.upload_id)
if package.migration_version is not None and package.migration_version >= self.migration_version:
if only_republish:
self.republish_package(package)
......
......@@ -32,6 +32,7 @@ from contextlib import contextmanager
import os.path
from datetime import datetime
from pymongo import UpdateOne
import hashlib
from nomad import utils, coe_repo, config, infrastructure, search, datamodel
from nomad.files import PathObject, UploadFiles, ExtractError, ArchiveBasedStagingUploadFiles, PublicUploadFiles, StagingUploadFiles
......@@ -163,6 +164,7 @@ class Calc(Proc):
calc_with_metadata.nomad_commit = config.commit
calc_with_metadata.last_processing = datetime.now()
calc_with_metadata.files = self.upload_files.calc_files(self.mainfile)
self.preprocess_files(calc_with_metadata.files)
self.metadata = calc_with_metadata.to_dict()
self.parsing()
......@@ -213,6 +215,36 @@ class Calc(Proc):
self.upload.reload()
self.upload.check_join()
def preprocess_files(self, filepaths):
for path in filepaths:
if os.path.basename(path) == 'POTCAR':
# create checksum
hash = hashlib.sha224()
with open(self.upload_files.raw_file_object(path).os_path, 'rb') as f:
for line in f.readlines():
hash.update(line)
checksum = hash.hexdigest()
# created stripped POTCAR
stripped_path = path + '.stripped'
with open(self.upload_files.raw_file_object(stripped_path).os_path, 'wt') as f:
f.write('Stripped POTCAR file. Checksum of original file (sha224): %s\n' % checksum)
os.system(
'''
awk < %s >> %s '
BEGIN { dump=1 }
/End of Dataset/ { dump=1 }
dump==1 { print }
/END of PSCTR/ { dump=0 }'
''' % (
self.upload_files.raw_file_object(path).os_path,
self.upload_files.raw_file_object(stripped_path).os_path))
filepaths.append(stripped_path)
return filepaths
@task
def parsing(self):
context = dict(parser=self.parser, step=self.parser)
......
......@@ -40,8 +40,8 @@ INSERT INTO public.tags VALUES(2, 7);
INSERT INTO public.tags VALUES(1, 8);
INSERT INTO public.tags VALUES(2, 8);
INSERT INTO public.metadata VALUES (1, NULL, NULL, NULL, NULL, 'BrKSi2', '2019-01-01 12:00:00', NULL, decode('["$EXTRACTED/upload/1/template.json"]', 'escape'), 1, NULL);
INSERT INTO public.metadata VALUES (1, NULL, NULL, NULL, NULL, 'BrKSi2', '2015-01-01 13:00:00', NULL, decode('["$EXTRACTED/upload/2/template.json"]', 'escape'), 2, NULL);
INSERT INTO public.metadata VALUES (1, 'different/prefix/template.json', NULL, NULL, NULL, 'BrKSi2', '2019-01-01 12:00:00', NULL, decode('["$EXTRACTED/upload/1/template.json"]', 'escape'), 1, NULL);
INSERT INTO public.metadata VALUES (1, 'different/prefix/template.json', NULL, NULL, NULL, 'BrKSi2', '2015-01-01 13:00:00', NULL, decode('["$EXTRACTED/upload/2/wrong_mainfile"]', 'escape'), 2, NULL);
INSERT INTO public.spacegroups VALUES (1, 123);
INSERT INTO public.spacegroups VALUES (2, 123);
INSERT INTO public.user_metadata VALUES (1, 0, 'label1');
......
......@@ -445,6 +445,21 @@ class TestUploads:
# content_type='application/json')
# assert rv.status_code == 400
def test_potcar(self, client, proc_infra, test_user_auth):
example_file = 'tests/data/proc/examples_potcar.zip'
rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth)
upload = self.assert_upload(rv.data)
upload_id = upload['upload_id']
self.assert_processing(client, test_user_auth, upload_id)
self.assert_published(client, test_user_auth, upload_id, proc_infra, with_coe_repo=True)
rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id)
assert rv.status_code == 401
rv = client.get('/raw/%s/examples_potcar/POTCAR' % upload_id, headers=test_user_auth)
assert rv.status_code == 200
rv = client.get('/raw/%s/examples_potcar/POTCAR.stripped' % upload_id)
assert rv.status_code == 200
class UploadFilesBasedTests:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment