Commit 5dadf385 authored by Berk Onat's avatar Berk Onat

Initial commit of OpenKIM parser in nomad-lab

parents
MIT License
Copyright (c) 2018 Berk Onat
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# OpenKIM Parser
[NOMAD Laboratory CoE](http://nomad-coe.eu) parser for [OpenKIM](https://openkim.org)
## Version 0.0.1
This is the parser for OpenKIM queries at [OpenKIM](https://openkim.org).
The official version lives at:
git@gitlab.mpcdf.mpg.de:nomad-lab/parser-openkim.git
You can browse it at:
https://gitlab.rzg.mpg.de/nomad-lab/parser-openkim
It relies on having the nomad-meta-info and the python-common repositories one level higher.
The simplest way to have this is to check out nomad-lab-base recursively:
git clone --recursive git@gitlab.mpcdf.mpg.de:nomad-lab/nomad-lab-base.git
This parser will be in the directory parsers/openkim of this repository.
## Running and Testing the Parser
### Requirements
The required python packages can be installed with (see [python-common](https://gitlab.rzg.mpg.de/nomad-lab/python-common)):
pip install -r nomad-lab-base/python-common/requirements.txt
### Usage
The query output of OpenKIM simulation results can be parsed with:
python parser-openkim.py test_nomad_id openkim_query_data.json
### Test Files
Example log output files of OpenKIM query can be found in the directory test/examples.
More details about the calculations and files are explained in README file of test/examples.
import requests
from requests.exceptions import Timeout, TooManyRedirects, RequestException, HTTPError
from datetime import datetime
import re
import sys
import numpy as np
import json
from tinydb import TinyDB, Query
dbasefile = "openkim_query_database.json"
kimdb = TinyDB(dbasefile)
atomlabel = 'Al'
structure = 'fcc'
#nomad_space_group_map={
# 'sc' : '221',
# "fcc" : '225',
# 'bcc' : '229',
# 'diamond': '227'
#}
def secondsFromEpoch(date):
epoch = datetime(1970,1,1)
ts=date-epoch
return ts.seconds + ts.microseconds/1000.0
def get_timestep(kim_date):
pdate = None
if kim_date:
pdate = datetime.strptime(kim_date.strip(), "%Y-%m-%d %H:%M:%S.%f")
if pdate:
pdate = secondsFromEpoch(pdate)
return pdate
def OPENKIM_query(atomlabel, structure, properties=None):
"""
atomlabel:
----------
string : element symbol
structure:
----------
string : fcc, bcc, sc, diamond
properties:
-----------
string : lattice_energy, elastic_constants
returns:
--------
dictionary of OpenKIM entry
"""
openkim_query = None
try:
query = requests.post(
url="https://query.openkim.org/api",
data={
'flat' : 'on',
'database': 'data',
'limit' : '0',
'fields' : json.dumps({
#"a.si-value": "1",
#"cohesive-potential-energy.si-value" : "1",
#"meta.subject.kimcode": "1"
}),
'query' : json.dumps({
"meta.type" : "tr",
#"property-id" : {
# "$regex" : ":property/structure-cubic-crystal-npt"
#},
#"meta.runner.kimcode" : {
# "$regex" : "^LatticeConstantCubicEnergy"
#},
#"species.source-value": {
# "$all" : [atomlabel],
# "$not" : {
# "$elemMatch" : {
# "$nin" : [atomlabel]
# }
# }
# },
"short-name.source-value": structure
})
}
)
query.raise_for_status()
openkim_query = query.json()
except(ConnectionError, HTTPError, Timeout, TooManyRedirects, RequestException):
print("OpenKIM request exception: %s" % sys.exc_info()[1])
return openkim_query
openkim_data = OPENKIM_query(atomlabel, structure)
if openkim_data:
now = datetime.today().strftime("%Y-%m-%d %H:%M:%S.%f")
updated_data = []
kim_item = Query()
for item in openkim_data:
kim_instance_id = item["instance-id"]
kim_unique_code = item["meta._id"]
kim_result_code = item["meta.test-result-id"]
kim_unique_code = kim_unique_code + '-' + str(kim_instance_id)
kim_result_code = kim_result_code + '-' + str(kim_instance_id)
kim_create_date = get_timestep(item["meta.created_on"])
kim_insert_date = get_timestep(item["inserted_on"])
db_entry = kimdb.search((kim_item.unique_id == kim_unique_code and
kim_item.result_id == kim_result_code))
if db_entry:
if db_entry[0]["insert_timestep"] < kim_insert_date:
kimdb.update({
'insert_timestep': kim_insert_date,
},
(kim_item.unique_id == kim_unique_code and
kim_item.result_id == kim_result_code))
updated_data.append(item)
else:
kimdb.insert({
'unique_id': kim_unique_code,
'result_id': kim_unique_code,
'insert_timestep': kim_insert_date,
})
updated_data.append(item)
if updated_data:
openkim_query = {}
openkim_query['OPENKIM_QUERY_OUTPUT'] = 'OPENKIM_QUERY_OUTPUT'
openkim_query['QUERY'] = updated_data
with open('data.json', 'w') as outfile:
json.dump(openkim_query, outfile,
sort_keys = True, indent = 4,
ensure_ascii = True)
with open('data.json', 'r') as databfile:
qdata = json.load(databfile)
# coding=utf-8
from __future__ import division
from builtins import map
from builtins import range
from builtins import object
import logging, sys, bisect
import setup_paths
from datetime import datetime
import os, logging, re, traceback
from nomadcore.parser_backend import JsonParseEventsWriterBackend
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
import numpy as np
from nomadcore.unit_conversion.unit_conversion import convert_unit_function
from nomadcore.unit_conversion.unit_conversion import convert_unit
import ase.geometry
import ase.data
import ase.build
import json
from ase import Atoms
from ase.data import atomic_numbers, chemical_symbols
from math import pi
from contextlib import contextmanager
@contextmanager
def open_section(parser, backend, name):
parser.gidSections[name] = backend.openSection(name)
yield parser.gidSections[name]
backend.closeSection(name, parser.gidSections[name])
def metaNameConverter(keyName):
newName = keyName.lower().replace(" ", "").replace("-", "")
newName = newName.replace("(", "").replace(")", "")
newName = newName.replace("[", "").replace("]", "")
newName = newName.replace(",", "").replace(".", "")
newName = newName.replace("\\", "").replace("/", "")
newName = newName.replace("'", "").replace(":", "")
return newName
def metaNameConverter_UnderscoreAll(keyName):
newName = keyName.lower()
newName = ' '.join(newName.split())
newName = newName.replace(" ", "_").replace("-", "_").replace(".", "_")
newName = metaNameConverter(newName)
return newName
def metaNameConverter_OpenKIM(keyName):
newName = metaNameConverter_UnderscoreAll(keyName)
newName = 'x_openkim_' + newName
return newName
def secondsFromEpoch(date):
epoch = datetime(1970,1,1)
ts=date-epoch
return ts.seconds + ts.microseconds/1000.0
def KIMQueryReader(jsonfile):
with open(jsonfile, 'r') as dbfile:
data = json.load(dbfile)
return data['QUERY']
class OpenkimContext(object):
def __init__(self):
self.parser = None
self.weights = None
self.lastSystemDescription = None
self.labels = None
self.singleConfCalcs = []
self.KIM_TE = 0
self.KIM_TD = None
self.KIM_MO = None
self.KIM_MD = None
self.cell = None
self.temperature = []
self.cohesivepot = []
self.cohesiveeng = []
def reset(self):
self.lastSystemDescription = None
self.labels = None
self.singleConfCalcs = []
self.KIM_TE = 0
self.KIM_TD = None
self.KIM_MO = None
self.KIM_MD = None
self.cell = None
self.temperature = []
self.cohesivepot = []
self.cohesiveeng = []
def startedParsing(self, parser):
self.parser = parser
def onEnd_program(self, parser, querydict):
backend = parser.backend
backend.addValue("program_name", "OpenKIM")
backend.addValue("program_version", g(querydict, "meta.runner.short-id", ""))
date = g(querydict, "meta.created_on")
pdate = None
if date:
pdate = datetime.strptime(date.strip(), "%Y-%m-%d %H:%M:%S.%f")
if pdate:
backend.addValue("program_compilation_datetime", secondsFromEpoch(pdate))
def onEnd_openkim_data(self, parser, kimquery):
for k,v in kimquery.items():
nomad_k = metaNameConverter_OpenKIM(k)
if isinstance(v, (list,tuple)):
dictInList = False
if len(v)>0:
if isinstance(v[0], dict):
dictInList = True
if dictInList:
for it, item in enumerate(v):
if isinstance(item, dict):
#keyFooter = '_' + str(it+1)
for key, val in item.items():
#key = nomad_k + '_' + key + keyFooter
key = nomad_k + '_' + key
if isinstance(val, list):
backend.addArrayValues(key,np.asarray(val))
else:
backend.addValue(key,val)
else:
backend.addArrayValues(nomad_k,np.asarray(v))
elif isinstance(v, dict):
for key, val in item.items():
key = nomad_k + '_' + key
if isinstance(val, list):
backend.addArrayValues(key,np.asarray(val))
else:
backend.addValue(key,val)
else:
backend.addValue(nomad_k,v)
def onEnd_structure(self, parser, querydict, step=0):
backend = parser.backend
self.lastSystemDescription = parser.gidSections["section_system"]
self.cell = None
self.labels = None
celltype = None
if('basis-atom-coordinates.source-value' in querydict and
('a.si-value' in querydict) or
('b.si-value' in querydict) or
('c.si-value' in querydict) or
('a-host.si-value' in querydict) or
('b-host.si-value' in querydict) or
('c-host.si-value' in querydict)):
basis = querydict['basis-atom-coordinates.source-value']
#celltype = ase.geometry.crystal_structure_from_cell(basis)
if not celltype:
kimprefix = g(querydict, 'meta.runner.kimid-prefix', None).split('_')
celltype = kimprefix[1] if len(kimprefix)>1 else None
if not celltype:
kimprefix = g(querydict, 'meta.runner.extended-id', None).split('_')
celltype = kimprefix[1] if len(kimprefix)>1 else None
if celltype:
backend.addValue("x_openkim_cubic_crystal_type", celltype)
if 'species.source-value' in querydict:
self.labels = querydict['species.source-value']
species_nums = [atomic_numbers[Z] for Z in self.labels]
a_si_val = querydict['a.si-value']
if isinstance(a_si_val, list):
lat_a = float(a_si_val[int(step)])
else:
lat_a = float(a_si_val)
si_conv = convert_unit_function("m", "angstrom")
cellAtoms = Atoms(
positions=basis,
cell=[si_conv(lat_a),
si_conv(lat_a),
si_conv(lat_a)],
pbc=True)
cellAtoms.positions = ase.geometry.wrap_positions(cellAtoms.positions, cellAtoms.cell, pbc=True)
if len(cellAtoms.numbers) == len(species_nums):
cellAtoms.numbers = species_nums
else:
if len(species_nums)<2 and len(cellAtoms.numbers)>1:
cellAtoms.numbers = [species_nums[0] for i in cellAtoms.numbers]
if len(self.labels)<2:
self.labels = [self.labels[0] for s in cellAtoms.positions]
ang_conv = convert_unit_function("angstrom", "m")
si_cell = np.array([[ang_conv(x) for x in i] for i in cellAtoms.cell])
si_pos = np.array([[ang_conv(x) for x in i] for i in cellAtoms.positions])
backend.addArrayValues("simulation_cell", si_cell)
backend.addArrayValues("configuration_periodic_dimensions", np.ones(3, dtype=bool))
backend.addArrayValues("atom_positions", si_pos)
if self.labels is not None:
backend.addArrayValues("atom_labels", np.asarray(self.labels))
def onEnd_model(self, parser, kimquery):
backend = parser.backend
with open_section(parser, backend, "section_sampling_method"):
if self.KIM_TE == 0:
sampling_method = "geometry_optimization"
else:
sampling_method = "molecular_dynamics"
backend.addValue("sampling_method", sampling_method)
with open_section(parser, backend, "section_frame_sequence"):
if self.temperature:
backend.addArrayValues('frame_sequence_temperature_frames', np.array([
i for i in self.singleConfCalcs]))
if isinstance(self.temperature, list):
if len(self.temperature)<2:
backend.addArrayValues('frame_sequence_temperature', np.array([
self.temperature[0] for i in self.singleConfCalcs]))
else:
backend.addArrayValues('frame_sequence_temperature', np.asarray(self.temperature))
else:
backend.addArrayValues('frame_sequence_temperature', np.asarray([self.temperature]))
if self.cohesivepot:
backend.addArrayValues('frame_sequence_potential_energy_frames', np.array([
i for i in self.singleConfCalcs]))
if isinstance(self.cohesivepot, list):
if len(self.cohesivepot)<2:
backend.addArrayValues('frame_sequence_potential_energy', np.array([
self.cohesivepot[0] for i in self.singleConfCalcs]))
else:
backend.addArrayValues(
'frame_sequence_potential_energy', np.asarray(self.cohesivepot))
else:
backend.addArrayValues(
'frame_sequence_potential_energy', np.asarray([self.cohesivepot]))
if self.cohesiveeng:
backend.addArrayValues('x_openkim_frame_sequence_cohesive_energy_frames', np.array([
i for i in self.singleConfCalcs]))
if isinstance(self.cohesiveeng, list):
if len(self.cohesiveeng)<2:
backend.addArrayValues('x_openkim_frame_sequence_cohesive_energy', np.array([
self.cohesiveeng for i in self.singleConfCalcs]))
else:
backend.addArrayValues(
'x_openkim_frame_sequence_cohesive_energy', np.asarray(self.cohesiveeng))
else:
backend.addArrayValues(
'x_openkim_frame_sequence_cohesive_energy', np.asarray([self.cohesiveeng]))
backend.addArrayValues('frame_sequence_time', np.array([0. for i in self.singleConfCalcs]))
backend.addValue("frame_sequence_to_sampling_ref", parser.gidSections["section_sampling_method"])
backend.addArrayValues("frame_sequence_local_frames_ref", np.asarray(self.singleConfCalcs))
def onEnd_calculation(self, parser, kimquery, step=0):
backend = parser.backend
backend.addValue("single_configuration_calculation_to_system_ref", self.lastSystemDescription)
zeroTemp = None
if 'temperature.si-value' in kimquery:
self.temperature = kimquery['temperature.si-value']
if 'cohesive-potential-energy.si-value' in kimquery:
self.cohesivepot = kimquery['cohesive-potential-energy.si-value']
if self.temperature:
if not isinstance(self.temperature, list):
if float(self.temperature) > 0:
zeroTemp = False
else:
zeroTemp = True
if isinstance(self.cohesivepot, list):
backend.addValue("energy_total", float(self.cohesivepot[int(step)]))
backend.addValue("energy_potential", float(self.cohesivepot[int(step)]))
if zeroTemp:
backend.addValue("energy_total_T0", float(self.cohesivepot[int(step)]))
else:
backend.addValue("energy_total", float(self.cohesivepot))
backend.addValue("energy_potential", float(self.cohesivepot))
if zeroTemp:
backend.addValue("energy_total_T0", float(self.cohesivepot))
if 'cauchy-stress.si-value' in kimquery:
cstress = kimquery['cauchy-stress.si-value']
f = np.zeros((3,3))
f[0][0] = float(cstress[0])
f[1][1] = float(cstress[1])
f[2][2] = float(cstress[2])
f[1][2] = float(cstress[3])
f[0][2] = float(cstress[4])
f[0][1] = float(cstress[5])
backend.addArrayValues("stress_tensor", np.asarray(f))
class KIMParser(object):
@staticmethod
def maybeGet(el, meta, default = None):
if meta in el:
return el[meta]
else:
return default
def __init__(self, parserInfo, superContext):
self.fIn = None
self.parserInfo = parserInfo
self.superContext = superContext
self.gidSections = {}
def parse(self, mainFileUri, fIn, backend):
self.mainFileUri = mainFileUri
self.fIn = fIn
self.backend = backend
backend.startedParsingSession(
mainFileUri = mainFileUri,
parserInfo = self.parserInfo)
self.superContext.startedParsing(self)
QueryList = KIMQueryReader(self.fIn)
try:
for qi, qdict in enumerate(QueryList):
superContext.reset()
with open_section(self, backend, 'section_run'):
superContext.onEnd_program(self, qdict)
with open_section(self, backend, 'x_openkim_section_metadata'):
superContext.onEnd_openkim_data(self, qdict)
if 'a.si-value' in qdict or 'a-host.si-value' in qdict:
if isinstance(qdict['a.si-value'], list):
for step in qdict['a.si-value']:
with open_section(self, backend, 'section_single_configuration_calculation'):
superContext.singleConfCalcs.append(self.gidSections[
"section_single_configuration_calculation"])
with open_section(self, backend, 'section_system'):
superContext.onEnd_structure(self, qdict, step)
superContext.onEnd_calculation(self, qdict, step)
else:
with open_section(self, backend, 'section_single_configuration_calculation'):
superContext.singleConfCalcs.append(self.gidSections[
"section_single_configuration_calculation"])
with open_section(self, backend, 'section_system'):
superContext.onEnd_structure(self, qdict)
superContext.onEnd_calculation(self, qdict)
superContext.onEnd_model(self, qdict)
except:
logging.exception("failure when parsing %s", self.mainFileUri)
backend.finishedParsingSession(
parserStatus = "ParseFailure",
parserErrors = ["exception: %s" % sys.exc_info()[1]]
)
else:
backend.finishedParsingSession(
parserStatus = "ParseSuccess",
parserErrors = None
)
g = KIMParser.maybeGet
parserInfo = {
"name": "parser_openkim",
"version": "1.0"
}
if __name__ == "__main__":
"""
This code is modified from parser-vasprun to work for OpenKIM
"""
metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../../../../nomad-meta-info/meta_info/nomad_meta_info/openkim.nomadmetainfo.json"))
metaInfoEnv, warnings = loadJsonFile(filePath = metaInfoPath, dependencyLoader = None, extraArgsHandling = InfoKindEl.ADD_EXTRA_ARGS, uri = None)
superContext = OpenkimContext()
parser = KIMParser(parserInfo, superContext)
backend = JsonParseEventsWriterBackend(metaInfoEnv, sys.stdout)
parser.parse(sys.argv[1], sys.argv[2], backend)
import sys
import os
import os.path
basedir = os.path.dirname(os.path.abspath(__file__))
commondir = os.path.normpath(os.path.join(basedir,"../../../../python-common/common/python"))
parserdir = os.path.normpath(os.path.join(basedir, ".."))
if commondir not in sys.path:
sys.path.insert(1, commondir)
if parserdir not in sys.path:
sys.path.insert(1, parserdir)
package eu.nomad_lab.parsers
import eu.nomad_lab
import eu.nomad_lab.DefaultPythonInterpreter
import org.{ json4s => jn }
import eu.{ nomad_lab => lab }
import scala.collection.breakOut
object OpenkimParser extends SimpleExternalParserGenerator(
name = "OpenkimParser",
parserInfo = jn.JObject(
("name" -> jn.JString("OpenkimParser")) ::
("parserId" -> jn.JString("OpenkimParser" + lab.OpenkimVersionInfo.version)) ::
("versionInfo" -> jn.JObject(
("nomadCoreVersion" -> jn.JObject(lab.NomadCoreVersionInfo.toMap.map {
case (k, v) => k -> jn.JString(v.toString)
}(breakOut): List[(String, jn.JString)])) ::
(lab.OpenkimVersionInfo.toMap.map {
case (key, value) =>
(key -> jn.JString(value.toString))
}(breakOut): List[(String, jn.JString)])
)) :: Nil
),
mainFileTypes = Seq("text/.*"),
mainFileRe = """\s*OPENKIM_QUERY_OUTPUT\s*""".r,
cmd = Seq(DefaultPythonInterpreter.pythonExe(), "${envDir}/parsers/openkim/parser/parser-openkim/parser-openkim.py",
"${mainFilePath}", "${mainFilePath}/data.json"),
resList = Seq(
"parser-openkim/parser-openkim.py",
"parser-openkim/setup_paths.py",
"nomad_meta_info/public.nomadmetainfo.json",
"nomad_meta_info/common.nomadmetainfo.json",
"nomad_meta_info/meta_types.nomadmetainfo.json",
"nomad_meta_info/openkim.nomadmetainfo.json"
) ++ DefaultPythonInterpreter.commonFiles(),
dirMap = Map(
"parser-openkim" -> "parsers/openkim/parser/parser-openkim",
"nomad_meta_info" -> "nomad-meta-info/meta_info/nomad_meta_info",
"python" -> "python-common/common/python/nomadcore"
) ++ DefaultPythonInterpreter.commonDirMapping(),
metaInfoEnv = Some(lab.meta.KnownMetaInfoEnvs.openkim)
)
package eu.nomad_lab.parsers
import org.specs2.mutable.Specification
object OpenkimParserTests extends Specification {
"OpenkimParserTest" >> {
"[OpenKIM Query with 3767 entries] test with json-events" >> {
ParserRun.parse(OpenkimParser, "parsers/openkim/test/examples/data.json", "json-events") must_== ParseResult.ParseSuccess
}