diff --git a/nomadcore/archive/nomad_query.py b/nomadcore/archive/nomad_query.py index 052c753982d6d1f79ebdcd68d35e243ef2f14eb6..7e5ebcc9b8e604042c9ae8e6f418bc6c5313d633 100644 --- a/nomadcore/archive/nomad_query.py +++ b/nomadcore/archive/nomad_query.py @@ -6,7 +6,7 @@ Benjamin Regler - Apache 2.0 License @license http://www.apache.org/licenses/LICENSE-2.0 @author Benjamin Regler -@version 1.0.0 +@version 2.0.0 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -31,11 +31,11 @@ import random if sys.version_info.major > 2: # For Python 3.0 and later - from urllib.parse import quote, unquote_plus + from urllib.parse import quote, unquote_plus, urlencode from urllib.request import urlopen, Request else: # Fall back to Python 2's urllib2 - from urllib import quote, unquote_plus + from urllib import quote, unquote_plus, urlencode from urllib2 import urlopen, Request @@ -58,33 +58,66 @@ class NomadQueryResult(object): (default: {1.0}) """ self._uri = [] + self._download_url = '' self._query = query or {} self._timestamp = int(time.time()) - self._response = response.get('result', {}) + + # Load response information + self._load(response, version) + + def _load(self, response, version): + """Load response information + + Arguments: + response {dict} -- Response of the Nomad Query API + version {float} -- Version of the Nomad Query data file + """ + # Set version of the Nomad Query data file self._version = version - # Construct download path - path = response.get('path', '') - self._download_url = self._query.get('endpoint', '') + 'download/' + \ - path.split('_')[-1] + '?file=' + quote(path.encode('utf-8')) + '.json' + # Initialize + if version == 1.0: + self._response = response.get('result', {}) - # Get Nomad URIs - response = NomadQuery().request(self._download_url) - if response['status'] == 'success': - regex = re.compile(r'(?<=/[a-zA-Z0-9\-_]{3}/)[^\.]+') - paths = response['data'].get('result', []) + # Construct download path + path = response.get('path', '') + self._download_url = self._query.get('endpoint', '') + \ + 'download/' + path.split('_')[-1] + '?file=' + \ + quote(path.encode('utf-8')) + '.json' - for path in paths: - match = regex.search(path) - if match: - # Substitute prefixes - groups = match.group(0).split('/') - groups[0] = 'N' + groups[0][1:] # Normalized + # Get Nomad URIs + response = NomadQuery.request(self._download_url) + if response['status'] == 'success': + regex = re.compile(r'(?<=/[a-zA-Z0-9\-_]{3}/)[^.]+') + paths = response['data'].get('result', []) + + for path in paths: + match = regex.search(path) + if match: + # Substitute prefixes + groups = match.group(0).split('/') + groups[0] = 'N' + groups[0][1:] # Normalized + + if len(groups) == 2: + groups[1] = 'C' + groups[1][1:] # Computed + + self._uri.append('nmd://' + '/'.join(groups)) + + elif version == 2.0: + self._response = response.get('data', {}) + + # Construct and get Nomad URIs + for entry in self._response: + if not entry['type'].lower().endswith('calculation'): + continue - if len(groups) == 2: - groups[1] = 'C' + groups[1][1:] # Computed + # Get archive gid + context = entry['attributes']['metadata']['archive_context'] + gid = context['archive_gid'][0] - self._uri.append('nmd://' + '/'.join(groups)) + # Assemble Nomad Uri + uri = 'nmd://N' + gid[1:] + '/' + entry['id'] + self._uri.append(uri) def version(self): """Get the version of the Nomad Query data file. @@ -107,6 +140,10 @@ class NomadQueryResult(object): Returns: str -- The download URL of the query + + Deprecated: + Since version 2.0.0, this method is no longer used by internal code + and not recommended. """ return self._download_url @@ -142,23 +179,25 @@ class NomadQuery(object): """ # Version of the Nomad Query API - __version__ = 1.0 - - # Nomad API endpoint - endpoint = os.environ.get('NOMAD_BASE_URI','https://analytics-toolkit.nomad-coe.eu') + '/api/' + __version__ = 2.0 # Private user path user_path = '/data/private' - def __init__(self, username='', endpoint=''): + # Nomad API endpoints + endpoint = 'https://analytics-toolkit.nomad-coe.eu/api/' + query_endpoint = 'https://analytics-toolkit.nomad-coe.eu/archive/nql-api/' + + def __init__(self, username='', endpoint='', query_endpoint=''): """Constructor. Keyword Arguments: - username {str} -- Current username. Leave empty to auto-detect - username (default: {''}) - endpoint {str} -- Endpoint of the Nomad API (default: - ${NOMAD_BASE_URI}/api if set, otherwise - {'https://analytics-toolkit.nomad-coe.eu/api/'}) + username {str} -- Current username. Leave empty to auto-detect + username (default: {''}) + endpoint {str} -- Endpoint of the Nomad API (default: + {'https://analytics-toolkit.nomad-coe.eu/api/'}) + query_endpoint {str} -- Endpoint of the Nomad Query API (default: + {'https://analytics-toolkit.nomad-coe.eu/nql-api/'}) """ self._username = '' self._base_path = '' @@ -170,11 +209,14 @@ class NomadQuery(object): if len(paths) == 1 and paths[0].lower() != 'nomad': username = paths[0] - # Set username and overwrite endpoint, if required + # Set username and overwrite endpoints, if required self.username(username) if endpoint: self.endpoint = str(endpoint) + if query_endpoint: + self.query_endpoint = str(query_endpoint) + def username(self, username=''): """Get or set the username. @@ -303,7 +345,7 @@ class NomadQuery(object): if not os.path.isdir(base_path): return queries - # Get all stored queries + # Get all stored queries for filename in os.listdir(base_path): path = os.path.join(base_path, filename) if os.path.isfile(path): @@ -322,17 +364,22 @@ class NomadQuery(object): queries.sort(key=lambda x: -x['timestamp']) return queries - def query(self, query, group_by='', context='', timeout=10): + def query(self, query, group_by='', timeout=10, **kwargs): """Query the Nomad Database. Arguments: query {str} -- The query string (see Nomad API reference) Keyword Arguments: - group_by {str} -- Group-by field. (default: {''}) - context {str} -- Query context. Leave empty to use - `single_configuration_calculation` (default: {''}) - timeout {number} -- Timeout of the request in seconds (default: {10}) + group_by {str} -- Group-by field. (default: {''}) + num_results {int} -- Number of calculations to return + (default: {10000}) + num_groups {int} -- Number of distinct calculation groups to return + (default: {10}) + context {str} -- Deprecated: Query context. Leave empty to use + `single_configuration_calculation` (default: {''}) + compat {bool} -- Compatibility mode (default: {True}) + timeout {number} -- Timeout of the request in seconds (default: {10}) Returns: NomadQueryResult -- The Nomad query result @@ -343,17 +390,27 @@ class NomadQuery(object): RuntimeError -- Unknown error. Please inform the Nomad team to solve this problem. """ - # Set default context - if not context: - context = 'single_configuration_calculation' - # Construct URL - url = self.endpoint + ('queryGroup/' if group_by else 'query/') + context + url = self.query_endpoint + ('search_grouped' if group_by else 'search') + params = { + 'source_fields': 'archive_gid', + 'sort_field': 'calculation_gid', + 'num_results': max(min(kwargs.get('num_results', 10000), 10000), 1), + 'format': 'nested' + } + + # Normalize query - compatibility fallback + if kwargs.get('compat', True): + query = self._normalize(query) # Add query - url += '?filter=' + quote(query.strip()) + params['query'] = query.strip() if group_by: - url += quote(' GROUPBY ' + group_by.strip().lower()) + params['group_by'] = group_by.strip().lower() + params['num_groups'] = max(kwargs.get('num_groups', 10), 1) + + # Construct URL + url += '?' + urlencode(params).replace('+', '%20') # Read URL response = self.request(url, timeout=timeout) @@ -362,21 +419,18 @@ class NomadQuery(object): # Check connection timeout response = response['data'] - if 'timed_out' in response['result'] and response['result']['timed_out']: + if response['meta'].get('is_timed_out', False) or \ + response['meta'].get('is_terminated_early', False): response['message'] = 'Connection timed out.' - # Check for additional error messages - if 'message' in response or 'msg' in response: - raise RuntimeError(response.get('message', response['msg'])) - # Construct Nomad Query response query = { - 'context': context, - 'endpoint': self.endpoint, - 'filter': query.strip(), - 'group_by': group_by.strip().lower(), + 'endpoint': self.query_endpoint, + 'query': params.get('query', ''), + 'group_by': params.get('group_by', ''), 'url': url } + return NomadQueryResult(query, response, self.__version__) def fetch(self, name_or_index='', resolve=False, **params): @@ -531,6 +585,97 @@ class NomadQuery(object): data['data'] = self._resolve(data['uri'], **params) return data + @staticmethod + def request(url, timeout=10): + """Request a URL + + Arguments: + url {str} -- The URL of a web address + + Keyword Arguments: + timeout {number} -- Timeout of the request in seconds (default: {10}) + + Returns: + dict -- A dictionary with success status, response data, or + error message + """ + # Default request response + result = { + 'url': url, + 'status': 'error', + 'message': 'Unknown error. Please inform the Nomad team to ' + 'solve this problem.' + } + + try: + # Get URL + response = urlopen(Request(url), timeout=timeout) + + # Check response code + if response.code != 200: + raise RuntimeError(result['message']) + + # Read response + data = json.loads(response.read().decode('utf-8'), 'utf-8') + + # Populate result + result.pop('message') + result.update({ + 'status': 'success', + 'data': data + }) + except Exception as exc: + exc = sys.exc_info()[1] + response = result.copy() + + # Get error message + message = exc + if sys.version_info <= (2, 5) and hasattr(exc, 'message'): + message = exc.message + elif hasattr(exc, 'reason'): + message = exc.reason + response['message'] = str(message) + + # Fix error message + if response['message'].endswith('timed out'): + response['message'] = 'Connection timed out. The Nomad ' + \ + 'Analytics API Service is currently unavailable.' + + # Return result + return result + + def _normalize(self, query): + """[Protected] Normalize query syntax + + Arguments: + query {str} -- The query string (see Nomad API reference) + + Returns: + str -- The normalized query string + """ + # Convert nomad query syntax v1 to v2 + if re.search(r'(?<!\\):', query): + values = re.split('\sand\s', query, 0, re.I) + + # Convert query + regex = re.compile(r'([^:]+):(.+)') + for i in range(len(values)): + match = regex.search(values[i]) + if match: + # Make sure strings are properly escaped + value = map(str.strip, match.group(2).split(',')) + value = ','.join((v if v.isdigit() + else '"' + v.strip('\'" ') + '"') + for v in value) + + # Replace colons with equal symbols + values[i] = match.group(1) + ' = ' + value + + # Rebuild query + query = ' AND '.join(values) + + return query + def _resolve(self, paths, size=None, seed=None, **params): """[Protected] Resolve Nomad URIs. diff --git a/nomadcore/metainfo/basic_metainfo.py b/nomadcore/metainfo/basic_metainfo.py index 271f02fcfa621c538bc89cca96007889c437effe..8d8222fd7044add8b3b57afe0a77fdc95528e8a1 100644 --- a/nomadcore/metainfo/basic_metainfo.py +++ b/nomadcore/metainfo/basic_metainfo.py @@ -16,7 +16,7 @@ import os import logging -from nomadcore.local_meta_info import InfoKindEl, loadJsonFile +from nomadcore.metainfo.local_meta_info import InfoKindEl, loadJsonFile logger = logging.getLogger(__name__) baseDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) diff --git a/nomadcore/parsing/base_classes.py b/nomadcore/parsing/base_classes.py index 88a8559764e45fdb4047eb0bcde44dff87573753..edb788e4bdbcefc76c2b36486038ba36cc5514d5 100644 --- a/nomadcore/parsing/base_classes.py +++ b/nomadcore/parsing/base_classes.py @@ -569,12 +569,17 @@ class CacheService(object): """Get the value identified by name. If the cachemode does not support getting the value, an exception is raised. - returns: + Args: + name(string): The name of the cached object to return. - raises: + Returns: + The requested object from the cache """ cache_object = self.get_cache_object(name) - return cache_object.value + if cache_object is None: + return None + else: + return cache_object.value def get_cache_object(self, name): @@ -586,8 +591,7 @@ class CacheService(object): def __setitem__(self, name, value): """Used to set the value for an item. The CacheObject corresponding to - the name has to be first created by using the function - add_cache_object(). + the name has to be first dclared by using the function add(). """ cache_object = self._cache[name] cache_object.value = value diff --git a/nomadcore/utils/calc_fix.py b/nomadcore/utils/calc_fix.py new file mode 100644 index 0000000000000000000000000000000000000000..58073be82e44379d256f38c8fcadf13caffb4b0a --- /dev/null +++ b/nomadcore/utils/calc_fix.py @@ -0,0 +1,4 @@ +import ase.calculators.calculator + +if 'potential_energy' not in calculator.all_properties: + calculator.all_properties += ['potential_energy', 'kinetic_energy'] diff --git a/nomadcore/utils/nomad_fetch.py b/nomadcore/utils/nomad_fetch.py new file mode 100644 index 0000000000000000000000000000000000000000..c8309c24fc4e248dfee5471e61e533ff14a4b017 --- /dev/null +++ b/nomadcore/utils/nomad_fetch.py @@ -0,0 +1,932 @@ +import json +import numpy as np +import sys +import re +import requests + +import ase.units as units +from ase import Atoms +from ase.atoms import symbols2numbers +from nomadcore.utils import nomad_json +from nomadcore.utils import nomad_ziptxt +from ase.data import chemical_symbols +from nomadcore.utils.singlepoint import SinglePointCalculator + +if sys.version_info.major > 2: + # For Python 3.0 and later + from urllib.request import urlopen, Request + from urllib.parse import quote, unquote_plus +else: + # Fall back to Python 2's urllib2 + from urllib import quote, unquote_plus + from urllib2 import urlopen, Request + +nomad_api_url = 'https://labdev-nomad.esc.rzg.mpg.de' +nomad_query_url = 'https://analytics-toolkit.nomad-coe.eu' +nomad_api_template = (nomad_api_url + '/api/resolve/{hash}?format=recursiveJson') +nomad_nql_api_query_template = (nomad_api_url + '/dev/archive/nql-api/search?query={hash}') +# The next link for queries will be DEPRECATED from NOMAD! +nomad_api_query_template = (nomad_query_url + '/api/query/section_run?filter={hash}') + +nomad_enc_url = 'https://encyclopedia.nomad-coe.eu/api/v1.0/materials' +nomad_enc_saml = 'https://encyclopedia.nomad-coe.eu/api/v1.0/saml/' +nomad_enc_calc_template = (nomad_enc_url + '/{}/calculations?pagination=off') +nomad_enc_sgrp_template = (nomad_enc_url + '/{}?property=space_group') +nomad_enc_cell_template = (nomad_enc_url + '/{}/cells') +nomad_enc_elmt_template = (nomad_enc_url + '/{}/elements') + + +def nmd2https(uri): + assert uri.startswith('nmd://') + return nomad_api_template.format(hash=uri[6:]) + + +def nmd2dict(uri): + try: + from urllib2 import urlopen + except ImportError: + from urllib.request import urlopen + + httpsuri = nmd2https(uri) + try: + response = urlopen(httpsuri) + txt = response.read().decode('utf8') + return json.loads(txt, object_hook=lambda dct: NomadEntry(dct)) + except Exception as exc: + exc = sys.exc_info()[1] + print('NOMAD Server ERROR: ' + str(exc)) + return dict() + + +def read(fd): + dct = json.load(fd, object_hook=lambda dct: NomadEntry(dct)) + return dct + + +def download(uri, only_atoms=False, skip_errors=False): + # Might want to look/return sections also + dct = nmd2dict(uri) + return NomadEntry(dct, only_atoms=only_atoms, skip_errors=skip_errors) + + +def section_method2metadata(method, methods, metainfo=None): + # Collect all information starting from reference method + if not metainfo: + metainfo = {} + xc_funcs = method.get('section_XC_functionals', []) + if xc_funcs: + xc_info = ','.join([ + xc_func['XC_functional_name'] for xc_func in xc_funcs]) + if 'nomad_XC_functionals' in metainfo: + metainfo['nomad_XC_functionals'] = metainfo['nomad_XC_functionals'] + ',' + xc_info + else: + metainfo['nomad_XC_functionals'] = xc_info + e_calc_method = method.get('electronic_structure_method', []) + if e_calc_method: + metainfo['nomad_electronic_structure_method'] = e_calc_method + ref_methods = method.get('section_method_to_method_refs', []) + if ref_methods: + for ref_method in ref_methods: + ref_id = ref_method.get('method_to_method_ref', []) + if ref_id: + metainfo.update(section_method2metadata( + methods[ref_id], methods, metainfo=metainfo)) + return metainfo + + +def add_nomad_metainfo(d, run, calc, system=[]): + # More nomad metainfo can be add to key_value_pairs and + # key_value_pairs can also be stored at ASE db. + info = {} + info['nomad_metadata_type'] = run['type'] + info['nomad_run_gIndex'] = run['gIndex'] + if system: + info['nomad_uri'] = system['uri'] + info['nomad_system_gIndex'] = system['gIndex'] + info['nomad_calculation_uri'] = d['uri'] + if 'program_name' in run: + info['nomad_program_name'] = run['program_name'] + if 'program_version' in run: + info['nomad_program_version'] = ' '.join(run['program_version'].split()) + if 'energy_total_T0' in calc: + info['potential_energy'] = calc['energy_total_T0'] * units.J + if 'energy_total' in calc: + info['nomad_total_energy'] = calc['energy_total'] * units.J + info['energy'] = calc['energy_total'] * units.J + if 'energy_free' in calc: + info['free_energy'] = calc['energy_free'] * units.J + if 'single_configuration_calculation_converged' in calc: + info['nomad_converged'] = calc['single_configuration_calculation_converged'] + # Checking the reference section_method for this calc, + # section_single_configuration_calculation + ref_method = calc.get('single_configuration_to_calculation_method_ref') + methods = run.get('section_method', []) + if methods: + if ref_method is not None: + try: + info.update(section_method2metadata( + methods[ref_method], + methods)) + except IndexError: + pass + return info + # ?? In case there is no reference to section_method, + # ?? can we assume section_method(s) is(are) nested in + # ?? section_single_configuration_calculation + methods = calc.get('section_method', []) + if methods: + for method in methods: + info.update(section_method2metadata( + method, + methods)) + return info + + +def dict2images(d, only_atoms=False, skip_errors=False): + extracted_systems = [] + if 'error' in d: + if not skip_errors: + # Check if server return with error or json file has error field. + assert 'error' not in d, 'Request return with following error: ' + d['error'] + else: + runs = d.get('section_run', []) + assert 'section_run' in d, 'Missing section_run!' + single_confs = {} + for run in runs: + calculations = run.get('section_single_configuration_calculation', []) + systems = run.get('section_system', []) + if not only_atoms: + assert 'section_system' in run, 'No section_system in section_run!' + for nmd_calc in calculations: + system_ref = nmd_calc.get('single_configuration_calculation_to_system_ref', -1) + # if single calculation w/o system, the system ref is -1 + single_confs[run.get('gIndex'), system_ref] = nmd_calc + nmd_system = [] + if systems and system_ref > -1: + # if the system is already written in the image list + # we can skip this system_ref link and just add calculation info + if system_ref not in extracted_systems: + try: + nmd_system = systems[system_ref] + if system_ref not in extracted_systems: + extracted_systems.append(system_ref) + except IndexError: + pass + metainfo = add_nomad_metainfo(d, run, nmd_calc, nmd_system) + calc = SinglePointCalculator(**metainfo) + if not nmd_system: yield calc + atom_pos_true = None + try: + atom_pos_true = nmd_system['atom_positions'] + except (TypeError, KeyError): + yield calc + if atom_pos_true is None: + yield calc + else: + atoms = section_system2atoms(nmd_system) + if atoms is None: + if not only_atoms: + yield calc + else: + if not only_atoms: + calc.atoms = atoms.copy() + yield calc + else: + info = atoms.info.get('key_value_pairs', {}) + info.update(metainfo) + atoms.info['key_value_pairs'] = info + yield atoms + + +def calcs2atoms(dct): + for calc in list(dict2images(dct, + only_atoms=dct.only_atoms, + skip_errors=dct.skip_errors)): + if calc.atoms is not None: + atm = calc.atoms.copy() + atm.info['key_value_pairs'] = calc.results + yield atm + + +class NomadEntry(dict): + def __init__(self, dct, only_atoms=False, skip_errors=False): + #assert dct['type'] == 'nomad_calculation_2_0' + #assert dct['name'] == 'calculation_context' + # We could implement NomadEntries that represent sections. + dict.__init__(self, dct) + self.only_atoms = only_atoms + self.skip_errors = skip_errors + + @property + def hash(self): + # The hash is a string, so not __hash__ + assert self['uri'].startswith('nmd://') + return self['uri'][6:] + + def toatoms(self): + if not self.only_atoms: + return calcs2atoms(self) + else: + return list(dict2images(self, + only_atoms=self.only_atoms, + skip_errors=self.skip_errors)) + + def iterimages(self): + return dict2images(self, + only_atoms=self.only_atoms, + skip_errors=self.skip_errors) + + +def section_system2atoms(section): + #assert section['name'] == 'section_system' + numbers = None + if 'atom_species' in section: + numbers = section['atom_species'] + numbers = np.array(numbers, int) + numbers[numbers < 0] = 0 + numbers[numbers > len(chemical_symbols)] = 0 + if 'atom_positions' not in section: + return None + else: + positions = section['atom_positions']['flatData'] + positions = np.array(positions).reshape(-1, 3) * units.m + pbc = section.get('configuration_periodic_dimensions') + cell = section.get('lattice_vectors') + if numbers is not None: + atoms = Atoms(numbers, positions=positions) + else: + atoms = Atoms(positions=positions) + if pbc is not None: + assert len(pbc) == 1 + pbc = pbc[0] # it's a list?? + pbc = pbc['flatData'] + assert len(pbc) == 3 + atoms.pbc = pbc + + # celldisp? + if cell is not None: + cell = cell['flatData'] + cell = np.array(cell).reshape(3, 3) * units.m + atoms.cell = cell + + return atoms + + +def section_singleconfig2calc(dct, nmd_run, nmd_calc, nmd_system): + # Forces, total energy, ........ + # We should be able to extract e.g. a band structure as well. + kwargs = add_nomad_metainfo(dct, nmd_run, nmd_calc, nmd_system) + calc = SinglePointCalculator(**kwargs) + return calc + + +class NomadQuery(object): + """ + NOMAD Query class. Requests archive info from NOMAD servers. + + Parameters: + =========== + atom_labels = String that includes atom element symbols seperated with commas. + query = Raw query string for general usage + nomad_interface = 'a' or 'archive' (Default NOMAD archive with NQL API) + 'o' or 'old-archive' (Uses same NOMAD archive but with old API) + 'e' or 'encyclopeadia' (Access to NOMAD encyclopeadia via its API) + space_group = Integer. Supply with nomad_interface = e + program_name = String to specify name of Ab-initio/MD program. + exclusive = True to get archives only with specified atom symbols, set it to + False if you would like to get all archive data that inlcudes these symbols. + + Returns: + ======== + NomadQuery object. + + Methods: + ======== + query() : To start a new query with given parameters. + download() : Download archive data for nmd:// sequences that are + retrieved from query(). + save_db() : Save downloaded ASE.Atoms list to ASE.db file. + + """ + def __init__(self, atom_labels=None, query=None, + nomad_interface='archive', nomad_token='', + space_group=None, program_name='', + exclusive=True, number_of_results=None): + self.response = None + if nomad_interface.startswith('e'): + self.nomad_interface = 'e' + elif nomad_interface.startswith('o'): + self.nomad_interface = 'o' + else: + self.nomad_interface = 'a' + if nomad_token != '': + self.auth = (nomad_token, '') + else: + if nomad_interface.startswith('e'): + try: + response = requests.get( + nomad_enc_saml + '/user/', + verify=False + ) + response = response.json() + except Exception as exc: + response = self._handle_error(exc) + if 'status' in response: + if 'Unauthenticated' in response['status']: + print("Your NOMAD Encyclopedia session is not authenticated." + " Type '" + nomad_enc_saml + "' in your browser.") + if "token" in response: + nomad_token = response["token"]["data"] + self.auth = (nomad_token, '') + else: + self.auth = ('', '') + else: + self.auth = ('', '') + init_request = False + if isinstance(atom_labels, (tuple, list, np.ndarray)): + init_request = True + elif isinstance(atom_labels, str): + if ',' in atom_labels: + init_request = True + else: + init_request = True + else: + if query is not None: + if isinstance(query, str): + init_request = True + if init_request: + self.request(atom_labels=atom_labels, query=query, + nomad_interface=self.nomad_interface, nomad_token=self.auth[0], + space_group=space_group, program_name=program_name, + exclusive=exclusive, number_of_results=number_of_results) + + def _reset_query(self): + self.query = None + self.number_of_results = None + self.response = {} + self.atom_labels = [] + + def request(self, atom_labels=None, query=None, + nomad_interface='a', nomad_token='', + space_group=None, program_name='', + exclusive=True, number_of_results=None): + assert (atom_labels is not None or + query is not None), 'One of atom_labels or query should be given for NOMAD request.' + + self._reset_query() + + if isinstance(atom_labels, (tuple, list, np.ndarray)): + self.atom_labels = atom_labels + elif isinstance(atom_labels, str): + if ',' in atom_labels: + self.atom_labels = [c for c in atom_labels.split(',')] + else: + self.atom_labels = [atom_labels] + + if nomad_interface.startswith('e'): + # encyclopedia + self.nomad_interface = 'e' + elif nomad_interface.startswith('o'): + # old API for archive + self.nomad_interface = 'o' + else: + # NQL API for archive + self.nomad_interface = 'a' + + self.exclusive = exclusive + + if nomad_token != '': + self.auth = (nomad_token, '') + else: + self.auth = ('', '') + + if isinstance(query, str): + if len(query) > 0: + self.query = query + else: + self.query = None + + if self.nomad_interface == 'o': + if self.query is None: + the_query = '' + if self.atom_labels: + the_query = the_query + 'atom_symbols:' + ','.join( + [str(at) for at in self.atom_labels]) + if program_name != '': + the_query = the_query + ' AND ' + 'program_name:' + str(program_name) + self.query = the_query + elif self.nomad_interface == 'e': + self.query = { + "search_by": { + "exclusive": "0" if self.exclusive is False else "1", + "element": ','.join([at for at in self.atom_labels]), + "pagination": "off" + }, + } + if space_group: + self.query["space_group"] = str(space_group) + else: + if self.query is None: + the_query = '' + if self.atom_labels: + the_query = the_query + 'all atom_species=' + ','.join([ + str(num) for num in symbols2numbers(self.atom_labels)]) + if program_name != '': + the_query = the_query + ' and ' + 'program_name:' + str(program_name) + if number_of_results is not None: + the_query = the_query + "&num_results="+str(int(number_of_results)) + self.query = the_query + '&sort_field=calculation_gid&format=nested' + + response = self._request() + + self.response = {} + if self.nomad_interface == 'o': + # Need LICENSE for this part ? + # See accessing json file with URI list at + # https://analytics-toolkit.nomad-coe.eu/notebook-edit/data/shared/ + # tutorialsNew/nomad-query/nomad-query.bkr + download_path = response.get('path', '') + nmd_uri_list = [] + if download_path: + download_url = ''.join([ + nomad_query_url + '/api/download/', + download_path.split('_')[-1], + '?file=', quote(download_path.encode('utf-8')) + ]) + download_json = download_url + '.json' + print(download_json) + json_file_request = self._request(url=download_json) + if json_file_request['status'] == 'success': + regex = re.compile(r'(?<=/[a-zA-Z0-9\-_]{3}/)[^\.]+') + for uri_path in json_file_request['result']: + match = regex.search(uri_path) + if match: + # Substitute prefixes + groups = match.group(0).split('/') + groups[0] = 'N' + groups[0][1:] # Normalized + + if len(groups) == 2: + groups[1] = 'C' + groups[1][1:] # Computed + + nmd_uri_list.append('nmd://' + '/'.join(groups)) + self.response['data'] = response + self.response['info'] = response['info'] + self.response['nmd_uri_list'] = nmd_uri_list + elif self.nomad_interface == 'e': + if 'result' in response and response['status'] != 'error': + nomad_metarial_id = response["result"][0]['id'] + self.response['material_id'] = nomad_metarial_id + try: + nomadmatdata = requests.get( + url=nomad_enc_calc_template.format(nomad_metarial_id), + auth=self.auth + ) + nomadmatdata = nomadmatdata.json() + self.response['data'] = nomadmatdata['results'] + except Exception as exc: + nomadmatdata = self._handle_error(exc) + print(nomadmatdata) + try: + nomadsgrpdata = requests.get( + url=nomad_enc_sgrp_template.format(nomad_metarial_id), + auth=self.auth + ) + nomadsgrpdata = nomadsgrpdata.json() + self.response['space_group'] = nomadsgrpdata['space_group'] + except Exception as exc: + nomadsgrpdata = self._handle_error(exc) + print(nomadsgrpdata) + else: + print(response) + else: + nmd_uri_list = [] + data_list = response.get('result', []) + for query_data in data_list: + qdat = query_data["attributes"]["metadata"] + archive_gid = qdat["archive_context"]['archive_gid'][0].replace('R', 'N', 1) + for cgid in qdat["calculation_context"]['calculation_gid']: + nmd_uri_list.append('nmd://' + str(archive_gid) + '/' + str(cgid)) + self.response['data'] = response + self.response['info'] = response['info'] + self.response['nmd_uri_list'] = nmd_uri_list + print('nmd_uri_list', len(nmd_uri_list)) + + def _handle_error(self, exc): + # Need LICENSE for this part ? + # See handling of errors from NOMAD API at + # https://analytics-toolkit.nomad-coe.eu/notebook-edit/data/shared/ + # tutorialsNew/nomad-query/nomad-query.bkr + error = { + 'status': 'error', + } + if self.nomad_interface == 'a': + error['message'] = 'Unknown error for NOMAD Archive API.' + elif self.nomad_interface == 'o': + error['message'] = 'Unknown error for NOMAD Archive API.' + else: + error['message'] = 'Unknown error for NOMAD Encyclopedia API.' + exc = sys.exc_info()[1] + + # Get error message + message = exc + if sys.version_info <= (2, 5) and hasattr(exc, 'message'): + message = exc.message + elif hasattr(exc, 'reason'): + message = exc.reason + error['message'] = str(message) + + # Fix error message + if error['message'].endswith('timed out'): + error['message'] = 'Connection timed out for NOMAD ' + \ + 'API Service. Service may currently unavailable.' + return error + + def _handle_response(self, response, interface): + # Need LICENSE for this part ? + # See handling of responses from NOMAD APIs at + # https://analytics-toolkit.nomad-coe.eu/notebook-edit/data/shared/ + # tutorialsNew/nomad-query/nomad-query.bkr + # and + # https://encyclopedia.nomad-coe.eu/doc/ + error = { + 'status': 'error', + } + + if interface == 'a': + error['message'] = 'Unknown error for NOMAD Archive NQL API.' + elif interface == 'o': + error['message'] = 'Unknown error for NOMAD Archive API.' + else: + error['message'] = 'Unknown error for NOMAD Encyclopedia API.' + + # Extracts the response and its code + if not isinstance(response, dict): + if response.code != 200: + response = error + else: + data = json.loads(response.read().decode('utf-8')) + if 'error' in data: + response = error.copy() + response['message'] = 'Error:' + data['error'] + '.' + elif 'errors' in data: + response = error.copy() + if isinstance(data['errors'], dict): + response['message'] = 'Error:' + '.'.join([ + str(k)+str(v) for k, v in data['errors'].items()]) + '.' + elif isinstance(data['errors'], list): + response['message'] = 'Error:' + '.'.join(['.'.join([ + str(k)+str(v) for k, v in errs.items() + ]) for errs in data['errors']]) + '.' + else: + response['message'] = 'Error:' + data['errors'] + '.' + elif 'msg' in data: + response = error.copy() + response['message'] = response['message'] + ' ' + data['msg'] + elif 'message' in data: + response = error.copy() + response['message'] = response['message'] + ' ' + data['message'] + else: + # Everthing was ok + message = '' + status = 'success' + + if interface == 'o': + # Get status from backend + if data['status'] != 'success': + status = data['status'] + message = data.get('message', '') + + # Check connection timeout + elif('timed_out' in data['result'] and + data['result']['timed_out']): + status = 'error' + message = 'Connection timed out.' + + # Construct response + response = { + 'status': status, + 'message': message, + 'result': (data['result'] if status == 'success' + else []), + 'info': (data['result']['aggregations'] if 'aggregations' in data['result'] + else {}), + 'path': data.get('path', '') + } + elif interface == 'e': + # Construct response + response = { + 'status': status, + 'message': message, + 'result': (data['results'] if status == 'success' + else []), + 'path': data.get('login_url', '') + } + else: + # Construct response + response = { + 'status': status, + 'message': message, + 'result': (data['data'] if status == 'success' + else []), + 'info': (data['meta'] if status == 'success' + else ''), + 'path': data.get('path', '') + } + else: + data = response + if 'error' in data or 'msg' in data or 'message' in data: + response = error.copy() + response['login_url'] = data.get('login_url', '') + if 'error' in data: + response['message'] = 'Error:' + data['error'] + '.' + elif 'errors' in data: + response = error.copy() + if isinstance(data['errors'], dict): + response['message'] = 'Error:' + '.'.join([ + str(k)+str(v) for k, v in data['errors'].items()]) + '.' + elif isinstance(data['errors'], list): + response['message'] = 'Error:' + '.'.join(['.'.join([ + str(k)+str(v) for k, v in errs.items() + ]) for errs in data['errors']]) + '.' + else: + response['message'] = 'Error:' + data['errors'] + '.' + elif 'msg' in data: + response['message'] = response['message'] + ' ' + data['msg'] + elif 'message' in data: + response['message'] = response['message'] + ' ' + data['message'] + else: + # Everthing was ok + message = '' + status = 'success' + + # Construct response + if interface == 'e': + response = { + 'status' : status, + 'message': message, + 'result' : data.get('results', []), + 'info' : data.get('meta', ''), + 'path' : data.get('url', '') + } + else: + response = { + 'status' : status, + 'message': message, + 'result' : data.get('data', []), + 'info' : data.get('meta', ''), + 'path' : data.get('path', '') + } + return response + + def _request(self, url=None, timeout=30): + response = {'status': 'error'} + # Resuest from NOMAD Archive API + if url is not None: + if self.nomad_interface == 'o': + try: + response = urlopen(Request(url), timeout=timeout) + except Exception as exc: + response = self._handle_error(exc) + #print(response) + response = self._handle_response(response, interface='o') + else: + try: + response = requests.get(url) + response = response.json() + except Exception as exc: + response = self._handle_error(exc) + print(response) + response = self._handle_response(response, interface='a') + else: + if self.nomad_interface == 'o': + url = nomad_api_query_template.format(hash=self.query) + print(url) + # Sends the request and catches the response + try: + response = urlopen(Request(url), timeout=timeout) + except Exception as exc: + response = self._handle_error(exc) + print(response) + elif self.nomad_interface == 'e': + if isinstance(self.query, dict): + req_json = self.query + else: + req_json = {} + # Sends the request and catches the response + try: + response = requests.post( + url=nomad_enc_url, + json=req_json, + auth=self.auth + ) + response = response.json() + except Exception as exc: + response = self._handle_error(exc) + print(response) + else: + url = nomad_nql_api_query_template.format(hash=self.query) + # Sends the request and catches the response + try: + response = requests.get(url) + response = response.json() + except Exception as exc: + response = self._handle_error(exc) + print(response) + response = self._handle_response(response, interface=self.nomad_interface) + return response + + def download(self, index=':', only_atoms=False, + skip_errors=False, no_dublicate_positions=False, + only_last_entries=False): + images = [] + from ase.utils import basestring + from ase.io.formats import string2index + from math import pi + from ase.spacegroup import crystal + if isinstance(index, basestring): + index = string2index(index) + if(self.nomad_interface == 'a' or + self.nomad_interface == 'o'): + for nmd_link in self.response["nmd_uri_list"]: + ref_positions = None + if 'section_run' in nmd_link: + nmd_uri = nmd_link.split('/section_run') + nmduri = nmd_uri[0] + else: + nmduri = nmd_link + print(nmduri) + try: + entry = download(nmduri, only_atoms=only_atoms, skip_errors=skip_errors) + nmd_images_download = entry.toatoms() + nmd_images = list(nmd_images_download) + except(TypeError, AttributeError, AssertionError, IndexError): + print(exc=sys.exc_info()[1]) + nmd_images = [] + if self.atom_labels: + if self.exclusive: + nmd_images = [img for img in nmd_images if np.all(np.in1d( + img.get_chemical_symbols(), self.atom_labels + ))] + if no_dublicate_positions: + nmd_new_images = [] + if len(nmd_images) > 0: + ref_positions = nmd_images[-1].positions + nmd_new_images.append(nmd_images[-1]) + for img in reversed(nmd_images): + if np.linalg.norm(img.positions - ref_positions) > 0.: + nmd_new_images.append(img) + ref_positions = img.positions + nmd_images = nmd_new_images + if only_last_entries: + formulas = list(set([','.join([str(i) for i in ni.numbers]) for ni in nmd_images])) + nmd_new_images = [] + for formula in formulas: + for img in reversed(nmd_images): + if formula == ','.join([str(i) for i in img.numbers]): + nmd_new_images.append(img) + break + nmd_images = nmd_new_images + #print(nmd_images) + if len(nmd_images) > 0: + print('Adding ' + str(len(nmd_images)) + ' structure(s) with ' + ','.join( + list(set([str(ni.get_chemical_formula('reduce')) for ni in nmd_images])))) + else: + print('No structures is retrieved from this NOMAD archive!') + images.extend(nmd_images) + else: + if 'data' in self.response: + for result in self.response["data"]: + wyckoff_basis = [] + nmd_elements = [] + for item in result['wyckoff_groups_json']: + if item['variables']: + wyckoff_basis.append([ + float(item['variables']['x']), + float(item['variables']['y']), + float(item['variables']['z'])]) + if item['element']: + nmd_elements.append(str(item['element'])) + lat_par = np.array([float(s) for s in result[ + "lattice_parameters"][1:-1].split(",")]) + nmd_cell = [] + nmd_cell.extend(lat_par[0:3] * units.m) + nmd_cell.extend(lat_par[3:6] * 180.0/pi) + atoms = crystal(nmd_elements, + basis=wyckoff_basis, + spacegroup=int(self.response['space_group']), + cellpar=nmd_cell) + images.append(atoms) + self.images = list(images)[index] + + def save_db(self, filename=None): + self.db_file = filename if isinstance( + filename, str) else None + if self.db_file is None: + self.db_file = 'nomad_asedb.db' + if self.db_file and self.images: + import ase.db + with ase.db.connect(self.db_file) as atoms_db: + for image in self.images: + if isinstance(image, Atoms): + atoms_db.write(image) + + def __repr__(self): + tokens = [] + + if self.response: + info = self.response.get('info', {}) + if info: + if 'total_calculations' in info: + tokens.append("{0} calculations".format( + info['total_calculations'])) + if 'total_unique_geometries' in info: + tokens.append("{0} unique geometries".format( + info['total_unique_geometries'])) + if 'total_single_config_calculations' in info: + tokens.append("{0} configurations".format( + info['total_single_config_calculations'])) + if 'num_single_configurations' in info: + tokens.append("{0} configurations".format( + info['num_single_configurations']['value'])) + else: + data = self.response.get('data', {}) + if 'status' in data: + if 'error' in data['status']: + if 'message' in data: + tokens.append("Status: {0}.".format(data.get('status', ''))) + tokens.append("Message: {0}.".format(data.get('message', ''))) + + return '{0}({1})'.format(self.__class__.__name__, ', '.join(tokens)) + + +def main(argv): + uri = "nmd://N9Jqc1y-Bzf7sI1R9qhyyyoIosJDs/C74RJltyQeM9_WFuJYO49AR4gKuJ2" + # TRY THESE ALSO: + # nmd://N9GXsYFhz9mUMxVWxlvwnn7mCz3j1/CtiXMrvFRdyQV4fOxJkh8NwoCZR6Z + # nmd://N9GXsYFhz9mUMxVWxlvwnn7mCz3j1/CHYLKLtXXU7w7VTzesEaWibL3_A7O + # nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C-1SH_T1kd13-U3MEB7Xz-_eToBHT + nmd_query = None + if len(argv) > 0: + uri = argv[0] + only_atoms = True if len(argv) > 1 else False + write_to_atomsdb = True if len(argv) > 2 else False + nmd_int = 'a' + if len(argv) > 3: + nmd_int = 'e' if argv[3] == 'e' else 'o' + nmd_auth = argv[4] if len(argv) > 4 else '' + nmd_sgroup = int(argv[5]) if len(argv) > 5 else None + if uri.startswith('nmd://'): + print(nmd2https(uri)) + entry = download(uri) + nmd_images = entry.toatoms() + else: + if uri.endswith('.json'): + with open(uri) as fd: + nmd_images = nomad_json.read_nomad_json(fd, only_atoms=only_atoms) + elif uri.endswith('.zip'): + import zipfile + zipfilelist = [] + ziptxts = [] + nmd_images = [] + with zipfile.ZipFile(uri) as zin: + zipfilelist = zin.namelist() + for zfile_name in zipfilelist: + if zfile_name.startswith(uri.replace('.zip', '')) and '.txt' in zfile_name: + ziptxts.append(zfile_name) + with zipfile.ZipFile(uri) as zin: + for zfile in ziptxts: + print('Found NMD txt file: ', str(zfile)) + with zin.open(zfile, 'r') as fd: + nmd_txt_images = nomad_ziptxt.read_nomad_ziptxt(fd, + only_atoms=only_atoms, skip_errors=True) + nmd_images.extend(nmd_txt_images) + else: + nmd_query = NomadQuery(atom_labels=uri, + nomad_interface=nmd_int, + nomad_token=nmd_auth, + space_group=nmd_sgroup) + print(nmd_query) + nmd_query.download(skip_errors=True, + no_dublicate_positions=True, + only_last_entries=False) + if write_to_atomsdb: + if nmd_query: + nmd_query.save_db(argv[2]) + else: + from ase.visualize import view + nmd_atoms = [] + for image in nmd_images: + if isinstance(image, SinglePointCalculator): + print(image) + if image.atoms: + nmd_atoms.append(image.atoms) + else: + print(image) + if image.info: + print(image.info['key_value_pairs']) + nmd_atoms.append(image) + view(nmd_atoms) + +if __name__ == '__main__': + import sys + main(sys.argv[1:]) diff --git a/nomadcore/utils/nomad_formats.py b/nomadcore/utils/nomad_formats.py new file mode 100644 index 0000000000000000000000000000000000000000..04fb559db36374c6943041b58a917bdd08579526 --- /dev/null +++ b/nomadcore/utils/nomad_formats.py @@ -0,0 +1,485 @@ +"""File formats. + +This module implements the read(), iread() and write() functions in ase.io. +For each file format there is a namedtuple (IOFormat) that has the following +elements: + +* a read(filename, index, **kwargs) generator that will yield Atoms objects +* a write(filename, images) function +* a 'single' boolean (False if multiple configurations is supported) +* a 'acceptsfd' boolean (True if file-descriptors are accepted) + +There is a dict 'ioformats' that is filled with IOFormat objects as they are +needed. The 'initialize()' function will create the IOFormat object by +looking at the all_formats dict and by importing the correct read/write +functions from the correct module. The 'single' and 'acceptsfd' bools are +parsed from two-charcter string in the all_formats dict below. + + +Example +======= + +The xyz format is implemented in the ase/io/xyz.py file which has a +read_xyz() generator and a write_xyz() function. + +""" + +import collections +import functools +import inspect +import os +import sys + +from ase.atoms import Atoms +from ase.utils import import_module, basestring, PurePath +from ase.parallel import parallel_function, parallel_generator + + +class UnknownFileTypeError(Exception): + pass + + +IOFormat = collections.namedtuple('IOFormat', + 'read, write, single, acceptsfd, isbinary') +ioformats = {} # will be filled at run-time + +# 1=single, +=multiple, F=accepts a file-descriptor, S=needs a file-name str, +# B=like F, but opens in binary mode +all_formats = { + 'nomad-json': ('JSON from Nomad archive', '+F'), + 'nomad-ziptxt': ('ZIPPED TXT from Nomad archive', '+F'), +} + +# Special cases: +format2modulename = { +} + +extension2format = { +} + +netcdfconventions2format = { + 'http://www.etsf.eu/fileformats': 'etsf', + 'AMBER': 'netcdftrajectory' +} + + +def initialize(format): + """Import read and write functions.""" + if format in ioformats: + return # already done + + _format = format.replace('-', '_') + module_name = format2modulename.get(format, _format) + + try: + module = import_module('ase.io.' + module_name) + except ImportError as err: + raise ValueError('File format not recognized: %s. Error: %s' + % (format, err)) + + read = getattr(module, 'read_' + _format, None) + write = getattr(module, 'write_' + _format, None) + + if read and not inspect.isgeneratorfunction(read): + read = functools.partial(wrap_read_function, read) + if not read and not write: + raise ValueError('File format not recognized: ' + format) + code = all_formats[format][1] + single = code[0] == '1' + assert code[1] in 'BFS' + acceptsfd = code[1] != 'S' + isbinary = code[1] == 'B' + ioformats[format] = IOFormat(read, write, single, acceptsfd, isbinary) + + +def get_ioformat(format): + """Initialize and return IOFormat tuple.""" + initialize(format) + return ioformats[format] + + +def get_compression(filename): + """ + Parse any expected file compression from the extension of a filename. + Return the filename without the extension, and the extension. Recognises + ``.gz``, ``.bz2``, ``.xz``. + + >>> get_compression('H2O.pdb.gz') + ('H2O.pdb', 'gz') + >>> get_compression('crystal.cif') + ('crystal.cif', None) + + Parameters + ========== + filename: str + Full filename including extension. + + Returns + ======= + (root, extension): (str, str or None) + Filename split into root without extension, and the extension + indicating compression format. Will not split if compression + is not recognised. + """ + # Update if anything is added + valid_compression = ['gz', 'bz2', 'xz'] + + # Use stdlib as it handles most edge cases + root, compression = os.path.splitext(filename) + + # extension keeps the '.' so remember to remove it + if compression.strip('.') in valid_compression: + return root, compression.strip('.') + else: + return filename, None + + +def open_with_compression(filename, mode='r'): + """ + Wrapper around builtin `open` that will guess compression of a file + from the filename and open it for reading or writing as if it were + a standard file. + + Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). Either + Python 3 or the ``backports.lzma`` module are required for ``xz``. + + Supported modes are: + * 'r', 'rt', 'w', 'wt' for text mode read and write. + * 'rb, 'wb' for binary read and write. + Depending on the Python version, you may get errors trying to write the + wrong string type to the file. + + Parameters + ========== + filename: str + Path to the file to open, including any extensions that indicate + the compression used. + mode: str + Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'. + + Returns + ======= + fd: file + File-like object open with the specified mode. + """ + + if sys.version_info[0] > 2: + # Compressed formats sometimes default to binary, so force + # text mode in Python 3. + if mode == 'r': + mode = 'rt' + elif mode == 'w': + mode = 'wt' + elif mode == 'a': + mode = 'at' + else: + # The version of gzip in Anaconda Python 2 on Windows forcibly + # adds a 'b', so strip any 't' and let the string conversions + # be carried out implicitly by Python. + mode = mode.strip('t') + + root, compression = get_compression(filename) + + if compression is None: + return open(filename, mode) + elif compression == 'gz': + import gzip + fd = gzip.open(filename, mode=mode) + elif compression == 'bz2': + import bz2 + if hasattr(bz2, 'open'): + # Python 3 only + fd = bz2.open(filename, mode=mode) + else: + # Python 2 + fd = bz2.BZ2File(filename, mode=mode) + elif compression == 'xz': + try: + from lzma import open as lzma_open + except ImportError: + from backports.lzma import open as lzma_open + fd = lzma_open(filename, mode) + else: + fd = open(filename, mode) + + return fd + + +def wrap_read_function(read, filename, index=None, **kwargs): + """Convert read-function to generator.""" + if index is None: + yield read(filename, **kwargs) + else: + for atoms in read(filename, index, **kwargs): + yield atoms + + +def write(filename, images, format=None, parallel=True, append=False, + **kwargs): + """Write Atoms object(s) to file. + + filename: str or file + Name of the file to write to or a file descriptor. The name '-' + means standard output. + images: Atoms object or list of Atoms objects + A single Atoms object or a list of Atoms objects. + format: str + Used to specify the file-format. If not given, the + file-format will be taken from suffix of the filename. + parallel: bool + Default is to write on master only. Use parallel=False to write + from all slaves. + append: bool + Default is to open files in 'w' or 'wb' mode, overwriting existing files. + In some cases opening the file in 'a' or 'ab' mode (appending) is usefull, + e.g. writing trajectories or saving multiple Atoms objects in one file. + WARNING: If the file format does not support multiple entries without + additional keywords/headers, files created using 'append=True' + might not be readable by any program! They will nevertheless be + written without error message. + + The use of additional keywords is format specific.""" + + if isinstance(filename, basestring): + filename = os.path.expanduser(filename) + fd = None + if filename == '-': + fd = sys.stdout + filename = None + elif format is None: + format = filetype(filename, read=False) + else: + fd = filename + filename = None + + format = format or 'json' # default is json + + io = get_ioformat(format) + + _write(filename, fd, format, io, images, parallel=parallel, append=append, **kwargs) + + +@parallel_function +def _write(filename, fd, format, io, images, parallel=None, append=False, **kwargs): + if isinstance(images, Atoms): + images = [images] + + if io.single: + if len(images) > 1: + raise ValueError('{}-format can only store 1 Atoms object.' + .format(format)) + images = images[0] + + if io.write is None: + raise ValueError("Can't write to {}-format".format(format)) + + # Special case for json-format: + if format == 'json' and len(images) > 1: + if filename is not None: + io.write(filename, images, **kwargs) + return + raise ValueError("Can't write more than one image to file-descriptor" + 'using json-format.') + + if io.acceptsfd: + open_new = (fd is None) + if open_new: + mode = 'wb' if io.isbinary else 'w' + if append: + mode = mode.replace('w', 'a') + fd = open_with_compression(filename, mode) + io.write(fd, images, **kwargs) + if open_new: + fd.close() + else: + if fd is not None: + raise ValueError("Can't write {}-format to file-descriptor" + .format(format)) + if 'append' in io.write.__code__.co_varnames: + io.write(filename, images, append=append, **kwargs) + elif append: + raise ValueError("Cannot append to {}-format, write-function " + "does not support the append keyword.".format(format)) + else: + io.write(filename, images, **kwargs) + + +def read(filename, index=None, format=None, parallel=True, **kwargs): + """Read Atoms object(s) from file. + + filename: str or file + Name of the file to read from or a file descriptor. + index: int, slice or str + The last configuration will be returned by default. Examples: + + * ``index=0``: first configuration + * ``index=-2``: second to last + * ``index=':'`` or ``index=slice(None)``: all + * ``index='-3:`` or ``index=slice(-3, None)``: three last + * ``index='::2`` or ``index=slice(0, None, 2)``: even + * ``index='1::2`` or ``index=slice(1, None, 2)``: odd + format: str + Used to specify the file-format. If not given, the + file-format will be guessed by the *filetype* function. + parallel: bool + Default is to read on master and broadcast to slaves. Use + parallel=False to read on all slaves. + + Many formats allow on open file-like object to be passed instead + of ``filename``. In this case the format cannot be auto-decected, + so the ``format`` argument should be explicitly given.""" + + if isinstance(filename, PurePath): + filename = str(filename) + if isinstance(index, basestring): + try: + index = string2index(index) + except ValueError: + pass + + filename, index = parse_filename(filename, index) + if index is None: + index = -1 + format = format or filetype(filename) + io = get_ioformat(format) + if isinstance(index, (slice, basestring)): + return list(_iread(filename, index, format, io, parallel=parallel, + **kwargs)) + else: + return next(_iread(filename, slice(index, None), format, io, + parallel=parallel, **kwargs)) + + +def iread(filename, index=None, format=None, parallel=True, **kwargs): + """Iterator for reading Atoms objects from file. + + Works as the `read` function, but yields one Atoms object at a time + instead of all at once.""" + + if isinstance(index, basestring): + index = string2index(index) + + filename, index = parse_filename(filename, index) + + if index is None or index == ':': + index = slice(None, None, None) + + if not isinstance(index, (slice, basestring)): + index = slice(index, (index + 1) or None) + + format = format or filetype(filename) + io = get_ioformat(format) + + for atoms in _iread(filename, index, format, io, parallel=parallel, + **kwargs): + yield atoms + + +@parallel_generator +def _iread(filename, index, format, io, parallel=None, full_output=False, + **kwargs): + if isinstance(filename, basestring): + filename = os.path.expanduser(filename) + + if not io.read: + raise ValueError("Can't read from {}-format".format(format)) + + if io.single: + start = index.start + assert start is None or start == 0 or start == -1 + args = () + else: + args = (index,) + + must_close_fd = False + if isinstance(filename, basestring): + if io.acceptsfd: + mode = 'rb' if io.isbinary else 'r' + fd = open_with_compression(filename, mode) + must_close_fd = True + else: + fd = filename + else: + assert io.acceptsfd + fd = filename + + # Make sure fd is closed in case loop doesn't finish: + try: + for dct in io.read(fd, *args, **kwargs): + if not isinstance(dct, dict): + dct = {'atoms': dct} + if full_output: + yield dct + else: + yield dct['atoms'] + finally: + if must_close_fd: + fd.close() + + +def parse_filename(filename, index=None): + if not isinstance(filename, basestring): + return filename, index + + extension = os.path.basename(filename) + if '@' not in extension: + return filename, index + + newindex = None + newfilename, newindex = filename.rsplit('@', 1) + + if isinstance(index, slice): + return newfilename, index + try: + newindex = string2index(newindex) + except ValueError: + pass + + return newfilename, newindex + + +def string2index(string): + if ':' not in string: + return int(string) + i = [] + for s in string.split(':'): + if s == '': + i.append(None) + else: + i.append(int(s)) + i += (3 - len(i)) * [None] + return slice(*i) + + +def filetype(filename, read=True, guess=True): + """Try to guess the type of the file. + + First, special signatures in the filename will be checked for. If that + does not identify the file type, then the first 2000 bytes of the file + will be read and analysed. Turn off this second part by using + read=False. + + Can be used from the command-line also:: + + $ ase info filename ... + """ + + ext = None + if isinstance(filename, basestring): + # strip any compression extensions that can be read + root, compression = get_compression(filename) + basename = os.path.basename(root) + + if basename.endswith('.nomad.json'): + return 'nomad-json' + + if basename.endswith('.nomad.zip'): + return 'nomad-ziptxt' + + format = extension2format.get(ext) + if format is None and guess: + format = ext + if format is None: + raise UnknownFileTypeError('Could not guess file type') + + return format diff --git a/nomadcore/utils/nomad_get.py b/nomadcore/utils/nomad_get.py new file mode 100644 index 0000000000000000000000000000000000000000..de3982f0cd4f07d510760fc70a2b64388675b95e --- /dev/null +++ b/nomadcore/utils/nomad_get.py @@ -0,0 +1,22 @@ +from __future__ import print_function +import json + + +class CLICommand: + short_description = 'Get calculations from NOMAD and write to JSON files.' + + @staticmethod + def add_arguments(p): + p.add_argument('uri', nargs='+', metavar='nmd://<hash>', + + help='URIs to get') + + @staticmethod + def run(args): + from ase.nomad import download + for uri in args.uri: + calculation = download(uri) + identifier = calculation.hash.replace('/', '.') + fname = 'nmd.{}.nomad.json'.format(identifier) + with open(fname, 'w') as fd: + json.dump(calculation, fd) + print(uri) diff --git a/nomadcore/utils/nomad_json.py b/nomadcore/utils/nomad_json.py new file mode 100644 index 0000000000000000000000000000000000000000..5563c6e650e1b56ad4a44ca8a5bfa628a8cafff7 --- /dev/null +++ b/nomadcore/utils/nomad_json.py @@ -0,0 +1,13 @@ +import json +from nomadcore.utils.nomad_fetch import dict2images +from ase.utils import basestring + + +def read_nomad_json(fd, index=':', only_atoms=False): + # wth, we should not be passing index like this! + from ase.io.formats import string2index + if isinstance(index, basestring): + index = string2index(index) + d = json.load(fd) + images = dict2images(d, only_atoms=only_atoms) + return list(images)[index] diff --git a/nomadcore/utils/nomad_ziptxt.py b/nomadcore/utils/nomad_ziptxt.py new file mode 100644 index 0000000000000000000000000000000000000000..f6e9c85a387e10d78649be6b0b25cac9ba16079b --- /dev/null +++ b/nomadcore/utils/nomad_ziptxt.py @@ -0,0 +1,27 @@ +import ase +from ase.utils import basestring +import nomadcore.utils + + +def read_nomad_ziptxt(fd, index=':', only_atoms=False, skip_errors=False): + images = [] + from ase.io.formats import string2index + if isinstance(index, basestring): + index = string2index(index) + for bline in fd: + line = bline.decode("utf-8") + if line.startswith('#'): + pass + else: + nmduri = line.split('/section_run') + print('Requesting NOMAD archive at ' + nmduri[0]) + entry = nomadcore.utils.nomad_fetch.download(nmduri[0], only_atoms=only_atoms, skip_errors=skip_errors) + nmd_entry_images = entry.toatoms() + nmd_images = list(nmd_entry_images) + if len(nmd_images) > 0: + print('Adding ' + str(len(nmd_images)) + ' structure(s) with ' + ','.join( + list(set([str(ni.get_chemical_formula('reduce')) for ni in nmd_images])))) + else: + print('No structures retrieved from this NOMAD archive!') + images.extend(nmd_images) + return list(images)[index] diff --git a/nomadcore/utils/singlepoint.py b/nomadcore/utils/singlepoint.py new file mode 100644 index 0000000000000000000000000000000000000000..703e41bade4dd05cd706f96731198c367c7f3d48 --- /dev/null +++ b/nomadcore/utils/singlepoint.py @@ -0,0 +1,170 @@ +import numpy as np +from ase.calculators.calculator import Calculator, all_properties +if 'potential_energy' not in all_properties: + all_properties += ['potential_energy', 'kinetic_energy'] +from ase.calculators.calculator import PropertyNotImplementedError + + +class SinglePointCalculator(Calculator): + """Special calculator for a single configuration. + + Used to remember the energy, force and stress for a given + configuration. If the positions, atomic numbers, unit cell, or + boundary conditions are changed, then asking for + energy/forces/stress will raise an exception.""" + + name = 'unknown' + + def __init__(self, atoms=None, **results): + """Save energy, forces, stress, ... for the current configuration.""" + Calculator.__init__(self) + self.results = {} + for property, value in results.items(): + if property.startswith('nomad_'): + pass + else: + assert property in all_properties + if value is None: + continue + if(property in ['energy', 'magmom', 'free_energy'] or + property.startswith('nomad_')): + self.results[property] = value + else: + self.results[property] = np.array(value, float) + if atoms: + self.atoms = atoms.copy() + + def __str__(self): + tokens = [] + for key, val in sorted(self.results.items()): + if np.isscalar(val): + txt = '{}={}'.format(key, val) + else: + txt = '{}=...'.format(key) + tokens.append(txt) + return '{}({})'.format(self.__class__.__name__, ', '.join(tokens)) + + def get_property(self, name, atoms=None, allow_calculation=True): + if name not in self.results or self.check_state(atoms): + if allow_calculation: + raise PropertyNotImplementedError( + 'The property "{0}" is not available.'.format(name)) + return None + + result = self.results[name] + if isinstance(result, np.ndarray): + result = result.copy() + return result + + +class SinglePointKPoint: + def __init__(self, weight, s, k, eps_n=[], f_n=[]): + self.weight = weight + self.s = s # spin index + self.k = k # k-point index + self.eps_n = eps_n + self.f_n = f_n + + +class SinglePointDFTCalculator(SinglePointCalculator): + def __init__(self, atoms, + efermi=None, bzkpts=None, ibzkpts=None, bz2ibz=None, + **results): + self.bz_kpts = bzkpts + self.ibz_kpts = ibzkpts + self.bz2ibz = bz2ibz + self.eFermi = efermi + + SinglePointCalculator.__init__(self, atoms, **results) + self.kpts = None + + def get_fermi_level(self): + """Return the Fermi-level(s).""" + return self.eFermi + + def get_bz_to_ibz_map(self): + return self.bz2ibz + + def get_bz_k_points(self): + """Return the k-points.""" + return self.bz_kpts + + def get_number_of_spins(self): + """Return the number of spins in the calculation. + + Spin-paired calculations: 1, spin-polarized calculation: 2.""" + if self.kpts is not None: + nspin = set() + for kpt in self.kpts: + nspin.add(kpt.s) + return len(nspin) + return None + + def get_spin_polarized(self): + """Is it a spin-polarized calculation?""" + nos = self.get_number_of_spins() + if nos is not None: + return nos == 2 + return None + + def get_ibz_k_points(self): + """Return k-points in the irreducible part of the Brillouin zone.""" + return self.ibz_kpts + + def get_kpt(self, kpt=0, spin=0): + if self.kpts is not None: + counter = 0 + for kpoint in self.kpts: + if kpoint.s == spin: + if kpt == counter: + return kpoint + counter += 1 + return None + + def get_occupation_numbers(self, kpt=0, spin=0): + """Return occupation number array.""" + kpoint = self.get_kpt(kpt, spin) + if kpoint is not None: + return kpoint.f_n + return None + + def get_eigenvalues(self, kpt=0, spin=0): + """Return eigenvalue array.""" + kpoint = self.get_kpt(kpt, spin) + if kpoint is not None: + return kpoint.eps_n + return None + + def get_homo_lumo(self): + """Return HOMO and LUMO energies.""" + if self.kpts is None: + raise RuntimeError('No kpts') + eHs = [] + eLs = [] + for kpt in self.kpts: + eH, eL = self.get_homo_lumo_by_spin(kpt.s) + eHs.append(eH) + eLs.append(eL) + return np.array(eHs).max(), np.array(eLs).min() + + def get_homo_lumo_by_spin(self, spin=0): + """Return HOMO and LUMO energies for a given spin.""" + if self.kpts is None: + raise RuntimeError('No kpts') + for kpt in self.kpts: + if kpt.s == spin: + break + else: + raise RuntimeError('No k-point with spin {0}'.format(spin)) + if self.eFermi is None: + raise RuntimeError('Fermi level is not available') + eH = -1.e32 + eL = 1.e32 + for kpt in self.kpts: + if kpt.s == spin: + for e in kpt.eps_n: + if e <= self.eFermi: + eH = max(eH, e) + else: + eL = min(eL, e) + return eH, eL