Commit 7ae2caa6 authored by Lauri Himanen's avatar Lauri Himanen

Merged the new nomad_utils folder to the new package structure under the utils-subpackage.

parents a86eb472 797171f7
......@@ -6,7 +6,7 @@
Benjamin Regler - Apache 2.0 License
@license http://www.apache.org/licenses/LICENSE-2.0
@author Benjamin Regler
@version 1.0.0
@version 2.0.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -31,11 +31,11 @@ import random
if sys.version_info.major > 2:
# For Python 3.0 and later
from urllib.parse import quote, unquote_plus
from urllib.parse import quote, unquote_plus, urlencode
from urllib.request import urlopen, Request
else:
# Fall back to Python 2's urllib2
from urllib import quote, unquote_plus
from urllib import quote, unquote_plus, urlencode
from urllib2 import urlopen, Request
......@@ -58,33 +58,66 @@ class NomadQueryResult(object):
(default: {1.0})
"""
self._uri = []
self._download_url = ''
self._query = query or {}
self._timestamp = int(time.time())
self._response = response.get('result', {})
# Load response information
self._load(response, version)
def _load(self, response, version):
"""Load response information
Arguments:
response {dict} -- Response of the Nomad Query API
version {float} -- Version of the Nomad Query data file
"""
# Set version of the Nomad Query data file
self._version = version
# Construct download path
path = response.get('path', '')
self._download_url = self._query.get('endpoint', '') + 'download/' + \
path.split('_')[-1] + '?file=' + quote(path.encode('utf-8')) + '.json'
# Initialize
if version == 1.0:
self._response = response.get('result', {})
# Get Nomad URIs
response = NomadQuery().request(self._download_url)
if response['status'] == 'success':
regex = re.compile(r'(?<=/[a-zA-Z0-9\-_]{3}/)[^\.]+')
paths = response['data'].get('result', [])
# Construct download path
path = response.get('path', '')
self._download_url = self._query.get('endpoint', '') + \
'download/' + path.split('_')[-1] + '?file=' + \
quote(path.encode('utf-8')) + '.json'
for path in paths:
match = regex.search(path)
if match:
# Substitute prefixes
groups = match.group(0).split('/')
groups[0] = 'N' + groups[0][1:] # Normalized
# Get Nomad URIs
response = NomadQuery.request(self._download_url)
if response['status'] == 'success':
regex = re.compile(r'(?<=/[a-zA-Z0-9\-_]{3}/)[^.]+')
paths = response['data'].get('result', [])
for path in paths:
match = regex.search(path)
if match:
# Substitute prefixes
groups = match.group(0).split('/')
groups[0] = 'N' + groups[0][1:] # Normalized
if len(groups) == 2:
groups[1] = 'C' + groups[1][1:] # Computed
self._uri.append('nmd://' + '/'.join(groups))
elif version == 2.0:
self._response = response.get('data', {})
# Construct and get Nomad URIs
for entry in self._response:
if not entry['type'].lower().endswith('calculation'):
continue
if len(groups) == 2:
groups[1] = 'C' + groups[1][1:] # Computed
# Get archive gid
context = entry['attributes']['metadata']['archive_context']
gid = context['archive_gid'][0]
self._uri.append('nmd://' + '/'.join(groups))
# Assemble Nomad Uri
uri = 'nmd://N' + gid[1:] + '/' + entry['id']
self._uri.append(uri)
def version(self):
"""Get the version of the Nomad Query data file.
......@@ -107,6 +140,10 @@ class NomadQueryResult(object):
Returns:
str -- The download URL of the query
Deprecated:
Since version 2.0.0, this method is no longer used by internal code
and not recommended.
"""
return self._download_url
......@@ -142,23 +179,25 @@ class NomadQuery(object):
"""
# Version of the Nomad Query API
__version__ = 1.0
# Nomad API endpoint
endpoint = os.environ.get('NOMAD_BASE_URI','https://analytics-toolkit.nomad-coe.eu') + '/api/'
__version__ = 2.0
# Private user path
user_path = '/data/private'
def __init__(self, username='', endpoint=''):
# Nomad API endpoints
endpoint = 'https://analytics-toolkit.nomad-coe.eu/api/'
query_endpoint = 'https://analytics-toolkit.nomad-coe.eu/archive/nql-api/'
def __init__(self, username='', endpoint='', query_endpoint=''):
"""Constructor.
Keyword Arguments:
username {str} -- Current username. Leave empty to auto-detect
username (default: {''})
endpoint {str} -- Endpoint of the Nomad API (default:
${NOMAD_BASE_URI}/api if set, otherwise
{'https://analytics-toolkit.nomad-coe.eu/api/'})
username {str} -- Current username. Leave empty to auto-detect
username (default: {''})
endpoint {str} -- Endpoint of the Nomad API (default:
{'https://analytics-toolkit.nomad-coe.eu/api/'})
query_endpoint {str} -- Endpoint of the Nomad Query API (default:
{'https://analytics-toolkit.nomad-coe.eu/nql-api/'})
"""
self._username = ''
self._base_path = ''
......@@ -170,11 +209,14 @@ class NomadQuery(object):
if len(paths) == 1 and paths[0].lower() != 'nomad':
username = paths[0]
# Set username and overwrite endpoint, if required
# Set username and overwrite endpoints, if required
self.username(username)
if endpoint:
self.endpoint = str(endpoint)
if query_endpoint:
self.query_endpoint = str(query_endpoint)
def username(self, username=''):
"""Get or set the username.
......@@ -303,7 +345,7 @@ class NomadQuery(object):
if not os.path.isdir(base_path):
return queries
# Get all stored queries
# Get all stored queries
for filename in os.listdir(base_path):
path = os.path.join(base_path, filename)
if os.path.isfile(path):
......@@ -322,17 +364,22 @@ class NomadQuery(object):
queries.sort(key=lambda x: -x['timestamp'])
return queries
def query(self, query, group_by='', context='', timeout=10):
def query(self, query, group_by='', timeout=10, **kwargs):
"""Query the Nomad Database.
Arguments:
query {str} -- The query string (see Nomad API reference)
Keyword Arguments:
group_by {str} -- Group-by field. (default: {''})
context {str} -- Query context. Leave empty to use
`single_configuration_calculation` (default: {''})
timeout {number} -- Timeout of the request in seconds (default: {10})
group_by {str} -- Group-by field. (default: {''})
num_results {int} -- Number of calculations to return
(default: {10000})
num_groups {int} -- Number of distinct calculation groups to return
(default: {10})
context {str} -- Deprecated: Query context. Leave empty to use
`single_configuration_calculation` (default: {''})
compat {bool} -- Compatibility mode (default: {True})
timeout {number} -- Timeout of the request in seconds (default: {10})
Returns:
NomadQueryResult -- The Nomad query result
......@@ -343,17 +390,27 @@ class NomadQuery(object):
RuntimeError -- Unknown error. Please inform the Nomad team to
solve this problem.
"""
# Set default context
if not context:
context = 'single_configuration_calculation'
# Construct URL
url = self.endpoint + ('queryGroup/' if group_by else 'query/') + context
url = self.query_endpoint + ('search_grouped' if group_by else 'search')
params = {
'source_fields': 'archive_gid',
'sort_field': 'calculation_gid',
'num_results': max(min(kwargs.get('num_results', 10000), 10000), 1),
'format': 'nested'
}
# Normalize query - compatibility fallback
if kwargs.get('compat', True):
query = self._normalize(query)
# Add query
url += '?filter=' + quote(query.strip())
params['query'] = query.strip()
if group_by:
url += quote(' GROUPBY ' + group_by.strip().lower())
params['group_by'] = group_by.strip().lower()
params['num_groups'] = max(kwargs.get('num_groups', 10), 1)
# Construct URL
url += '?' + urlencode(params).replace('+', '%20')
# Read URL
response = self.request(url, timeout=timeout)
......@@ -362,21 +419,18 @@ class NomadQuery(object):
# Check connection timeout
response = response['data']
if 'timed_out' in response['result'] and response['result']['timed_out']:
if response['meta'].get('is_timed_out', False) or \
response['meta'].get('is_terminated_early', False):
response['message'] = 'Connection timed out.'
# Check for additional error messages
if 'message' in response or 'msg' in response:
raise RuntimeError(response.get('message', response['msg']))
# Construct Nomad Query response
query = {
'context': context,
'endpoint': self.endpoint,
'filter': query.strip(),
'group_by': group_by.strip().lower(),
'endpoint': self.query_endpoint,
'query': params.get('query', ''),
'group_by': params.get('group_by', ''),
'url': url
}
return NomadQueryResult(query, response, self.__version__)
def fetch(self, name_or_index='', resolve=False, **params):
......@@ -531,6 +585,97 @@ class NomadQuery(object):
data['data'] = self._resolve(data['uri'], **params)
return data
@staticmethod
def request(url, timeout=10):
"""Request a URL
Arguments:
url {str} -- The URL of a web address
Keyword Arguments:
timeout {number} -- Timeout of the request in seconds (default: {10})
Returns:
dict -- A dictionary with success status, response data, or
error message
"""
# Default request response
result = {
'url': url,
'status': 'error',
'message': 'Unknown error. Please inform the Nomad team to '
'solve this problem.'
}
try:
# Get URL
response = urlopen(Request(url), timeout=timeout)
# Check response code
if response.code != 200:
raise RuntimeError(result['message'])
# Read response
data = json.loads(response.read().decode('utf-8'), 'utf-8')
# Populate result
result.pop('message')
result.update({
'status': 'success',
'data': data
})
except Exception as exc:
exc = sys.exc_info()[1]
response = result.copy()
# Get error message
message = exc
if sys.version_info <= (2, 5) and hasattr(exc, 'message'):
message = exc.message
elif hasattr(exc, 'reason'):
message = exc.reason
response['message'] = str(message)
# Fix error message
if response['message'].endswith('timed out'):
response['message'] = 'Connection timed out. The Nomad ' + \
'Analytics API Service is currently unavailable.'
# Return result
return result
def _normalize(self, query):
"""[Protected] Normalize query syntax
Arguments:
query {str} -- The query string (see Nomad API reference)
Returns:
str -- The normalized query string
"""
# Convert nomad query syntax v1 to v2
if re.search(r'(?<!\\):', query):
values = re.split('\sand\s', query, 0, re.I)
# Convert query
regex = re.compile(r'([^:]+):(.+)')
for i in range(len(values)):
match = regex.search(values[i])
if match:
# Make sure strings are properly escaped
value = map(str.strip, match.group(2).split(','))
value = ','.join((v if v.isdigit()
else '"' + v.strip('\'" ') + '"')
for v in value)
# Replace colons with equal symbols
values[i] = match.group(1) + ' = ' + value
# Rebuild query
query = ' AND '.join(values)
return query
def _resolve(self, paths, size=None, seed=None, **params):
"""[Protected] Resolve Nomad URIs.
......
......@@ -16,7 +16,7 @@
import os
import logging
from nomadcore.local_meta_info import InfoKindEl, loadJsonFile
from nomadcore.metainfo.local_meta_info import InfoKindEl, loadJsonFile
logger = logging.getLogger(__name__)
baseDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
......
......@@ -569,12 +569,17 @@ class CacheService(object):
"""Get the value identified by name. If the cachemode does not support
getting the value, an exception is raised.
returns:
Args:
name(string): The name of the cached object to return.
raises:
Returns:
The requested object from the cache
"""
cache_object = self.get_cache_object(name)
return cache_object.value
if cache_object is None:
return None
else:
return cache_object.value
def get_cache_object(self, name):
......@@ -586,8 +591,7 @@ class CacheService(object):
def __setitem__(self, name, value):
"""Used to set the value for an item. The CacheObject corresponding to
the name has to be first created by using the function
add_cache_object().
the name has to be first dclared by using the function add().
"""
cache_object = self._cache[name]
cache_object.value = value
......
import ase.calculators.calculator
if 'potential_energy' not in calculator.all_properties:
calculator.all_properties += ['potential_energy', 'kinetic_energy']
import json
import numpy as np
import sys
import re
import requests
import ase.units as units
from ase import Atoms
from ase.atoms import symbols2numbers
from nomadcore.utils import nomad_json
from nomadcore.utils import nomad_ziptxt
from ase.data import chemical_symbols
from nomadcore.utils.singlepoint import SinglePointCalculator
if sys.version_info.major > 2:
# For Python 3.0 and later
from urllib.request import urlopen, Request
from urllib.parse import quote, unquote_plus
else:
# Fall back to Python 2's urllib2
from urllib import quote, unquote_plus
from urllib2 import urlopen, Request
nomad_api_url = 'https://labdev-nomad.esc.rzg.mpg.de'
nomad_query_url = 'https://analytics-toolkit.nomad-coe.eu'
nomad_api_template = (nomad_api_url + '/api/resolve/{hash}?format=recursiveJson')
nomad_nql_api_query_template = (nomad_api_url + '/dev/archive/nql-api/search?query={hash}')
# The next link for queries will be DEPRECATED from NOMAD!
nomad_api_query_template = (nomad_query_url + '/api/query/section_run?filter={hash}')
nomad_enc_url = 'https://encyclopedia.nomad-coe.eu/api/v1.0/materials'
nomad_enc_saml = 'https://encyclopedia.nomad-coe.eu/api/v1.0/saml/'
nomad_enc_calc_template = (nomad_enc_url + '/{}/calculations?pagination=off')
nomad_enc_sgrp_template = (nomad_enc_url + '/{}?property=space_group')
nomad_enc_cell_template = (nomad_enc_url + '/{}/cells')
nomad_enc_elmt_template = (nomad_enc_url + '/{}/elements')
def nmd2https(uri):
assert uri.startswith('nmd://')
return nomad_api_template.format(hash=uri[6:])
def nmd2dict(uri):
try:
from urllib2 import urlopen
except ImportError:
from urllib.request import urlopen
httpsuri = nmd2https(uri)
try:
response = urlopen(httpsuri)
txt = response.read().decode('utf8')
return json.loads(txt, object_hook=lambda dct: NomadEntry(dct))
except Exception as exc:
exc = sys.exc_info()[1]
print('NOMAD Server ERROR: ' + str(exc))
return dict()
def read(fd):
dct = json.load(fd, object_hook=lambda dct: NomadEntry(dct))
return dct
def download(uri, only_atoms=False, skip_errors=False):
# Might want to look/return sections also
dct = nmd2dict(uri)
return NomadEntry(dct, only_atoms=only_atoms, skip_errors=skip_errors)
def section_method2metadata(method, methods, metainfo=None):
# Collect all information starting from reference method
if not metainfo:
metainfo = {}
xc_funcs = method.get('section_XC_functionals', [])
if xc_funcs:
xc_info = ','.join([
xc_func['XC_functional_name'] for xc_func in xc_funcs])
if 'nomad_XC_functionals' in metainfo:
metainfo['nomad_XC_functionals'] = metainfo['nomad_XC_functionals'] + ',' + xc_info
else:
metainfo['nomad_XC_functionals'] = xc_info
e_calc_method = method.get('electronic_structure_method', [])
if e_calc_method:
metainfo['nomad_electronic_structure_method'] = e_calc_method
ref_methods = method.get('section_method_to_method_refs', [])
if ref_methods:
for ref_method in ref_methods:
ref_id = ref_method.get('method_to_method_ref', [])
if ref_id:
metainfo.update(section_method2metadata(
methods[ref_id], methods, metainfo=metainfo))
return metainfo
def add_nomad_metainfo(d, run, calc, system=[]):
# More nomad metainfo can be add to key_value_pairs and
# key_value_pairs can also be stored at ASE db.
info = {}
info['nomad_metadata_type'] = run['type']
info['nomad_run_gIndex'] = run['gIndex']
if system:
info['nomad_uri'] = system['uri']
info['nomad_system_gIndex'] = system['gIndex']
info['nomad_calculation_uri'] = d['uri']
if 'program_name' in run:
info['nomad_program_name'] = run['program_name']
if 'program_version' in run:
info['nomad_program_version'] = ' '.join(run['program_version'].split())
if 'energy_total_T0' in calc:
info['potential_energy'] = calc['energy_total_T0'] * units.J
if 'energy_total' in calc:
info['nomad_total_energy'] = calc['energy_total'] * units.J
info['energy'] = calc['energy_total'] * units.J
if 'energy_free' in calc:
info['free_energy'] = calc['energy_free'] * units.J
if 'single_configuration_calculation_converged' in calc:
info['nomad_converged'] = calc['single_configuration_calculation_converged']
# Checking the reference section_method for this calc,
# section_single_configuration_calculation
ref_method = calc.get('single_configuration_to_calculation_method_ref')
methods = run.get('section_method', [])
if methods:
if ref_method is not None:
try:
info.update(section_method2metadata(
methods[ref_method],
methods))
except IndexError:
pass
return info
# ?? In case there is no reference to section_method,
# ?? can we assume section_method(s) is(are) nested in
# ?? section_single_configuration_calculation
methods = calc.get('section_method', [])
if methods:
for method in methods:
info.update(section_method2metadata(
method,
methods))
return info
def dict2images(d, only_atoms=False, skip_errors=False):
extracted_systems = []
if 'error' in d:
if not skip_errors:
# Check if server return with error or json file has error field.
assert 'error' not in d, 'Request return with following error: ' + d['error']
else:
runs = d.get('section_run', [])
assert 'section_run' in d, 'Missing section_run!'
single_confs = {}
for run in runs:
calculations = run.get('section_single_configuration_calculation', [])
systems = run.get('section_system', [])
if not only_atoms:
assert 'section_system' in run, 'No section_system in section_run!'
for nmd_calc in calculations:
system_ref = nmd_calc.get('single_configuration_calculation_to_system_ref', -1)
# if single calculation w/o system, the system ref is -1
single_confs[run.get('gIndex'), system_ref] = nmd_calc
nmd_system = []
if systems and system_ref > -1:
# if the system is already written in the image list
# we can skip this system_ref link and just add calculation info
if system_ref not in extracted_systems:
try:
nmd_system = systems[system_ref]
if system_ref not in extracted_systems:
extracted_systems.append(system_ref)
except IndexError:
pass
metainfo = add_nomad_metainfo(d, run, nmd_calc, nmd_system)
calc = SinglePointCalculator(**metainfo)
if not nmd_system: yield calc
atom_pos_true = None
try:
atom_pos_true = nmd_system['atom_positions']
except (TypeError, KeyError):
yield calc
if atom_pos_true is None:
yield calc
else:
atoms = section_system2atoms(nmd_system)
if atoms is None:
if not only_atoms:
yield calc
else:
if not only_atoms:
calc.atoms = atoms.copy()
yield calc
else:
info = atoms.info.get('key_value_pairs', {})
info.update(metainfo)
atoms.info['key_value_pairs'] = info
yield atoms
def calcs2atoms(dct):
for calc in list(dict2images(dct,
only_atoms=dct.only_atoms,
skip_errors=dct.skip_errors)):
if calc.atoms is not None:
atm = calc.atoms.copy()
atm.info['key_value_pairs'] = calc.results
yield atm
class NomadEntry(dict):
def __init__(self, dct, only_atoms=False, skip_errors=False):
#assert dct['type'] == 'nomad_calculation_2_0'
#assert dct['name'] == 'calculation_context'
# We could implement NomadEntries that represent sections.
dict.__init__(self, dct)
self.only_atoms = only_atoms
self.skip_errors = skip_errors
@property
def hash(self):
# The hash is a string, so not __hash__
assert self['uri'].startswith('nmd://')
return self['uri'][6:]
def toatoms(self):
if not self.only_atoms:
return calcs2atoms(self)
else:
return list(dict2images(self,
only_atoms=self.only_atoms,
skip_errors=self.skip_errors))
def iterimages(self):
return dict2images(self,
only_atoms=self.only_atoms,
skip_errors=self.skip_errors)
def section_system2atoms(section):
#assert section['name'] == 'section_system'
numbers = None
if 'atom_species' in section:
numbers = section['atom_species']
numbers = np.array(numbers, int)
numbers[numbers < 0] = 0
numbers[numbers > len(chemical_symbols)] = 0
if 'atom_positions' not in section:
return None
else:
positions = section['atom_positions']['flatData']
positions = np.array(positions).reshape(-1, 3) * units.m
pbc = section.get('configuration_periodic_dimensions')
cell = section.get('lattice_vectors')
if numbers is not None:
atoms = Atoms(numbers, positions=positions)
else:
atoms = Atoms(positions=positions)
if pbc is not None:
assert len(pbc) == 1