Creating archives from inside another archive's normalizer
Hey Nomad,
these days I am using a workaround for creating Archives inside the normalizer of another archive. The usecase is, I have a batch, which contains samples and i need to create the batch and automatically create the samles. I know the code is not pretty, it is just a placeholder for something better.
class HySprint_Batch(BatchWithID, EntryData):
m_def = Section(
a_eln=dict(hide=['lab_id', 'author', 'user']),
a_template=dict(institute="HZB_Hysprint"))
number_of_new_substrates = Quantity(
type=np.dtype(np.int64),
description='The number of substrates in the batch.',
default=0,
a_eln=dict(component='NumberEditQuantity')
)
number_of_substrates = Quantity(
type=np.dtype(np.int64),
description='The number of substrates in the batch.',
default=0
)
samples = Quantity(
type=Reference(HySprint_Sample.m_def),
shape=['*'],
descriptions='The samples in the batch.',
# a_eln=dict(component='ReferenceEditQuantity')
)
create_samples = Quantity(
type=bool,
default=False,
a_eln=dict(component='BoolEditQuantity')
)
substrate = Quantity(
type=Reference(HySprint_Substrate.m_def),
a_eln=dict(component='ReferenceEditQuantity'))
def normalize(self, archive, logger):
super(HySprint_Batch, self).normalize(archive, logger)
import json
from nomad.utils import hash
upload_id = self.m_root().metadata.upload_id
if self.number_of_new_substrates > 0 and self.create_samples and self.batch_id:
sample_refs = []
for idx in range(
self.number_of_substrates,
self.number_of_substrates + self.number_of_new_substrates):
# For each sample number, create instance of sample.
sample = HySprint_Sample(
institute=self.institute if self.institute else '',
sample_owner=self.batch_owner if self.batch_owner else '',
creation_datetime=self.creation_datetime if self.creation_datetime else '',
sample_short_name=self.batch_short_name + "_" + str(idx) if self.batch_short_name else str(idx),
substrate=self.substrate)
sample_entry = sample.m_to_dict(with_root_def=True)
with archive.m_context.raw_file(self.batch_id + '_' + str(idx) + ".archive.json", 'w') as outfile:
json.dump({"data": sample_entry}, outfile)
entry_id = hash(upload_id, os.path.basename(outfile.name))
print(entry_id)
sample_refs.append(
f'../uploads/{upload_id}/archive/{entry_id}#data')
self.number_of_substrates += self.number_of_new_substrates
self.number_of_new_substrates = 0
self.samples = sample_refs
self.create_samples = False
As you can see I convert the class Hyprint_Sample
to dict, hash it and append a reference to it. If I reprocess the upload the samples are picked up.
There are some aspects here:
- Are you planning on having sth like this which is easier to use then this hack?
- Will it be possible to process the archive when created automatically?
- Is it related to the ability to create an empty archive?
Best Micha