Commit f48ff090 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Added phonon method processing through a new step before Upload cleanup.

parent 7766a086
Pipeline #74012 failed with stages
in 23 minutes and 24 seconds
......@@ -18,7 +18,7 @@ DFT specific metadata
import re
from nomad import config
from nomad import config, utils
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
......@@ -261,8 +261,18 @@ class DFTMetadata(MSection):
return parser.code_name
return config.services.unavailable_value
def update_group_hash(self):
self.group_hash = utils.hash(
self.m_parent.formula,
self.spacegroup,
self.basis_set,
self.xc_functional,
self.code_name,
self.code_version,
self.m_parent.with_embargo,
self.m_parent.uploader)
def apply_domain_metadata(self, backend):
from nomad import utils
from nomad.normalizing.system import normalized_atom_labels
entry = self.m_parent
......@@ -315,15 +325,7 @@ class DFTMetadata(MSection):
get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
# grouping
self.group_hash = utils.hash(
entry.formula,
self.spacegroup,
self.basis_set,
self.xc_functional,
self.code_name,
self.code_version,
entry.with_embargo,
entry.uploader)
self.update_group_hash()
# metrics and quantities
quantities = set()
......
......@@ -470,7 +470,7 @@ class Properties(MSection):
type=Reference(section_thermodynamical_properties.m_def),
shape=[],
description="""
Reference to a specific heat capacity.
Reference to a section containing thermodynamical properties.
"""
)
......
......@@ -27,12 +27,7 @@ from nomad.normalizing.encyclopedia.context import Context
from nomad.normalizing.encyclopedia.material import MaterialBulkNormalizer, Material2DNormalizer, Material1DNormalizer
from nomad.normalizing.encyclopedia.method import MethodDFTNormalizer, MethodGWNormalizer
from nomad.normalizing.encyclopedia.properties import PropertiesNormalizer
from nomad import config, files
from nomad.archive import query_archive
from nomad.datamodel import EntryArchive
# from nomad.datamodel.metainfo.public import section_run
J_to_Ry = 4.587425e+17
from nomad import config
class EncyclopediaNormalizer(Normalizer):
......@@ -137,40 +132,10 @@ class EncyclopediaNormalizer(Normalizer):
material.material_type = material_type
return system, material_type
def method_type(self, method: Method, run_type: str) -> tuple:
def method_type(self, method: Method) -> tuple:
repr_method = None
method_id = config.services.unavailable_value
# Special handling for phonon calculations. This simply assumes that
# the method information can be retrieved from the first referenced
# calculation. TODO: Once we have standardized approach to handling
# references between entries, this implementation should change.
calc_enums = Calculation.calculation_type.type
if run_type == calc_enums.phonon_calculation:
try:
# The reference is given as an absolute path in the host
# machine. We take the path that is relative to the upload
# root.
ref = self.section_run.section_single_configuration_calculation[0].section_calculation_to_calculation_refs[0].calculation_to_calculation_external_url
ref = ref.split("/", 6)[6]
# We get the referenced archive as an ArchiveReader object and get sec
upload_id = self.backend.entry_archive.section_metadata.upload_id
upload_files = files.StagingUploadFiles(upload_id, is_authorized=lambda: True)
calc_id = upload_files.calc_id(ref)
with upload_files.read_archive(calc_id) as archive:
arch = query_archive(archive, {calc_id: calc_id})[calc_id]
ref_archive = EntryArchive.m_from_dict(arch)
methods = ref_archive.section_run[0].section_method
except Exception:
self.logger.warn(
"Could not retrieve the method information from a "
"referenced archive within a phonon calculation."
)
methods = []
else:
methods = self.section_run.section_method
methods = self.section_run.section_method
n_methods = len(methods)
if n_methods == 1:
......@@ -281,7 +246,7 @@ class EncyclopediaNormalizer(Normalizer):
# Get the method type. For now, we allow unknown method type.
# Mostly to allow phonon calculations through.
representative_method, method_type = self.method_type(method, calc_type)
representative_method, method_type = self.method_type(method)
# Get representative scc
try:
......
......@@ -40,6 +40,12 @@ from nomad.files import PathObject, UploadFiles, ExtractError, ArchiveBasedStagi
from nomad.processing.base import Proc, process, task, PENDING, SUCCESS, FAILURE
from nomad.parsing import parser_dict, match_parser, Backend
from nomad.normalizing import normalizers
from nomad.normalizing.encyclopedia.method import MethodDFTNormalizer, MethodGWNormalizer
from nomad.normalizing.encyclopedia.encyclopedia import EncyclopediaNormalizer, Context
from nomad.metainfo.encyclopedia import Method
from nomad.datamodel import EntryArchive
from nomad.archive import query_archive
import phonopyparser
def _pack_log_event(logger, method_name, event_dict):
......@@ -382,6 +388,73 @@ class Calc(Proc):
error = self._parser_backend.status[1]
self.fail('parser failed', error=error, **context)
def process_phonon(self):
"""Function that is run for phonon calculation before cleanup.
This task is run by the celery process that is calling the join for the
upload.
This function re-opens the Archive for this calculation to add method
information from another referenced archive. Updates the method
information in section_encyclopedia as well as the DFT domain metadata.
"""
try:
# Re-create a backend
context = dict(parser=self.parser, step=self.parser)
logger = self.get_logger(**context)
metainfo = phonopyparser.metainfo.m_env
backend = Backend(metainfo, logger=logger, domain="dft")
# Open the archive of the phonon calculation.
upload_files = StagingUploadFiles(self.upload_id, is_authorized=lambda: True)
with upload_files.read_archive(self.calc_id) as archive:
arch = query_archive(archive, {self.calc_id: self.calc_id})[self.calc_id]
phonon_archive = EntryArchive.m_from_dict(arch)
backend.entry_archive = phonon_archive
# Read in the first referenced calculation. The reference is given as
# an absolute path which needs to be converted into a path that is
# relative to upload root.
scc = backend.entry_archive.section_run[0].section_single_configuration_calculation[0]
ref = scc.section_calculation_to_calculation_refs[0].calculation_to_calculation_external_url
relative_ref = ref.split("/", 6)[-1]
ref_id = upload_files.calc_id(relative_ref)
with upload_files.read_archive(ref_id) as archive:
arch = query_archive(archive, {ref_id: ref_id})[ref_id]
ref_archive = EntryArchive.m_from_dict(arch)
# Get encyclopedia method information directly from the referenced calculation.
ref_enc_method = ref_archive.section_encyclopedia.method
backend.entry_archive.section_encyclopedia.method = ref_enc_method
# Overwrite old entry with new data. The metadata is updated with
# new timestamp and method details taken from the referenced
# archive.
self._parser_backend = backend
self._entry_metadata = backend.entry_archive.section_metadata
self._entry_metadata.last_processing = datetime.utcnow()
self._entry_metadata.dft.xc_functional = ref_archive.section_metadata.dft.xc_functional
self._entry_metadata.dft.basis_set = ref_archive.section_metadata.dft.basis_set
self._entry_metadata.dft.update_group_hash()
# persist the calc metadata
with utils.timer(logger, 'saved calc metadata', step='metadata'):
self.apply_entry_metadata(self._entry_metadata)
# index in search
with utils.timer(logger, 'indexed', step='index'):
self._entry_metadata.a_elastic.index()
# persist the archive
with utils.timer(
logger, 'archived', step='archive',
input_size=self.mainfile_file.size) as log_data:
archive_size = self.write_archive(self._parser_backend)
log_data.update(archive_size=archive_size)
except Exception as e:
logger.error("Could not retrieve method information for phonon calculation.", exception=e)
@contextmanager
def use_parser_backend(self, processor_name):
self._parser_backend.reset_status()
......@@ -946,6 +1019,15 @@ class Upload(Proc):
{'$set': {'joined': True}})
if modified_upload is not None:
self.get_logger().debug('join')
# Before cleaning up, run an additional normalizer on phonon
# calculations. TODO: This should be replaced by a more
# extensive mechamism that supports more complex dependencies
# between calculations.
phonon_calculations = Calc.objects(upload_id=self.upload_id, parser="parsers/phonopy")
for calc in phonon_calculations:
calc.process_phonon()
self.cleanup()
else:
# the join was already done due to a prior call
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment