Commit e888ac47 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added generation of test data for search and files.

parent af312303
......@@ -18,7 +18,7 @@ from essential_generators import DocumentGenerator
import datetime
from ase.data import chemical_symbols
from nomad import datamodel, parsing, utils
from nomad import datamodel, parsing, utils, files
number_of = 20
......@@ -34,9 +34,10 @@ comments = [gen.sentence() for _ in range(0, number_of)]
references = [(i + 1, gen.url()) for i in range(0, number_of)]
datasets = [(i + 1, gen.slug()) for i in range(0, number_of)]
codes = [parser[8:] for parser in parsing.parser_dict.keys()]
files = ['/'.join(gen.url().split('/')[3:]) for _ in range(0, number_of)]
filepaths = ['/'.join(gen.url().split('/')[3:]) for _ in range(0, number_of)]
low_numbers_for_atoms = [1, 1, 2, 2, 2, 2, 2, 3, 3, 4]
low_numbers_for_files = [1, 2, 2, 3, 3, 3, 3, 3, 4, 4]
low_numbers_for_refs_and_datasets = [0, 0, 0, 0, 1, 1, 1, 2]
......@@ -55,19 +56,19 @@ def _gen_ref():
return utils.POPO(id=id, value=value)
def generate_calc(pid: int = 0) -> datamodel.CalcWithMetadata:
def generate_calc(pid: int = 0, calc_id: str = None, upload_id: str = None) -> datamodel.CalcWithMetadata:
random.seed(pid)
self = datamodel.CalcWithMetadata()
self.upload_id = utils.create_uuid()
self.calc_id = utils.create_uuid()
self.upload_id = upload_id if upload_id is not None else utils.create_uuid()
self.calc_id = calc_id if calc_id is not None else utils.create_uuid()
self.upload_time = datetime.datetime.now()
self.calc_hash = utils.create_uuid()
self.pid = pid
self.mainfile = random.choice(files)
self.files = list([self.mainfile] + random.choices(files, k=random.choice(low_numbers_for_atoms)))
self.mainfile = random.choice(filepaths)
self.files = list([self.mainfile] + random.choices(filepaths, k=random.choice(low_numbers_for_files)))
self.uploader = _gen_user()
self.with_embargo = random.choice([True, False])
......@@ -96,11 +97,49 @@ def generate_calc(pid: int = 0) -> datamodel.CalcWithMetadata:
if __name__ == '__main__':
import time
n = 2
start = time.time()
for pid in range(0, n):
calc = generate_calc(pid)
print(calc.to_dict())
print('%f' % ((time.time() - start) / n))
import sys
import json
from elasticsearch.helpers import bulk
from nomad import infrastructure, search
print('Generate test data and add it to search and files')
print(' first arg is number of calcs (code runs)')
print(' second arg is number uploads to spread calcs over')
infrastructure.setup_logging()
infrastructure.setup_elastic()
n_calcs, n_uploads = int(sys.argv[1]), int(sys.argv[2])
pid = 1
for calcs_per_upload in utils.chunks(range(0, n_calcs), int(n_calcs / n_uploads)):
upload_id = utils.create_uuid()
upload_files = files.StagingUploadFiles(
upload_id=upload_id, create=True, is_authorized=lambda: True)
search_entries = []
for _ in calcs_per_upload:
calc = generate_calc(pid, upload_id=upload_id)
assert calc.upload_id == upload_id
calc.published = True
upload_files.metadata.insert(calc.to_dict())
for filepath in calc.files:
if len(filepath) > 0:
with upload_files.raw_file(filepath, 'wt') as f:
f.write('this is a generated test file')
with upload_files.archive_file(calc.calc_id, 'wt') as f:
f.write(json.dumps({'section_run': [{'test': 'this is a generated test files'}]}))
with upload_files.archive_log_file(calc.calc_id, 'wt') as f:
f.write('this is a generated test file')
search_entries.append(search.Entry.from_calc_with_metadata(calc))
pid += 1
bulk(
infrastructure.elastic_client,
[entry.to_dict(include_meta=True) for entry in search_entries])
upload_files.pack()
upload_files.delete()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment