Skip to content
Snippets Groups Projects
Commit 918a814b authored by Daria M. Tomecka's avatar Daria M. Tomecka
Browse files

Adding normalizing script for classification according to Springer Materials

parent 90fc4e8a
No related branches found
No related tags found
No related merge requests found
##programname: classify4me [formula] [space_group]
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import setup_paths
from nomadcore.parser_backend import JsonParseEventsWriterBackend
import json
import sqlite3
import sys, time, os.path
from io import open
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
import logging
INPUT_FORMULA = sys.argv[2]
DB_FILE = sys.argv[1]
#OUTPUT_FILE = sys.argv[2]
#DB_FILE = "test" + str(time.time()) + ".db"
#loaded_json = json.load(open(DB_FILE, encoding="utf-8"))
# Connecting to the database file
DB = os.path.exists(DB_FILE)
conn = sqlite3.connect(DB_FILE)
cur = conn.cursor()
fOut = sys.stdout
metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../../../nomad-meta-info/meta_info/nomad_meta_info/public.nomadmetainfo.json"))
metaInfoEnv, warnings = loadJsonFile(filePath = metaInfoPath, dependencyLoader = None, extraArgsHandling = InfoKindEl.ADD_EXTRA_ARGS, uri = None)
backend = JsonParseEventsWriterBackend(metaInfoEnv, fOut)
def classify4me():
logging.info('(%r) classified according to Springer Materials as follow: \n \n', INPUT_FORMULA )
#print('***',INPUT_FORMULA, ' classified in Springer Materials as:','\n{')
res={}
cur.execute("""
select entry.space_group_number, entry.entry_id from entry
where entry.alphabetic_formula = ( %r ) group by entry.space_group_number, entry.entry_id;""" % INPUT_FORMULA)
#build dictionary
#print(cur.fetchone()[0])
results = cur.fetchall()
for group , sprId in results:
if not group in res:
res[group]={}
if not "section_springer_id" in res[group]:
res[group]["section_springer_id"]=[]
spRefs = res[group]["section_springer_id"]
if sprId.startswith("sd_"):
url="http://materials.springer.com/isp/crystallographic/docs/" + sprId
else:
raise Exception("does not know how to construct url for id " + sprId)
spRefs.append({
"springer_id": sprId,
"springer_url": url
})
### fOut.write('space group: \n %s \n'% results)
#print('space group:',results, '\n')
cur.execute("""
select entry.space_group_number, compound_classes.compound_class_name, count(*)
from
entry
join entry_compound_class on entry.entry_nr = entry_compound_class.entry_nr
join compound_classes on compound_classes.compound_class_nr = entry_compound_class.compound_class_nr
where
entry.alphabetic_formula = ( %r )
group by entry.space_group_number, compound_classes.compound_class_name
;""" % INPUT_FORMULA)
results = cur.fetchall()
#print('compound class:',results, '\n')
for group , sprCC, count in results:
if not group in res:
res[group]={}
if not "section_springer_compound_class" in res[group]:
res[group]["section_springer_compound_class"] = []
spRefs = res[group]["section_springer_compound_class"]
spRefs.append({
"springer_compound_class": sprCC,
"springer_number_of_compound_class_reference_per_material": count
})
### fOut.write('compound class: \n % r \n' % results)
cur.execute("""
select entry.space_group_number, classification.classification_name, count(*)
from
entry
join entry_classification on entry.entry_nr = entry_classification.entry_nr
join classification on classification.classification_nr = entry_classification.classification_nr
where
entry.alphabetic_formula = ( %r )
group by entry.space_group_number, classification.classification_name
;""" % INPUT_FORMULA)
results = cur.fetchall()
#print('classification:', results, '\n')
for group , sprC, count in results:
if not group in res:
res[group]={}
if not "section_springer_classification" in res[group]:
res[group]["section_springer_classification"] = []
spRefs = res[group]["section_springer_classification"]
spRefs.append({
"springer_classification": sprC,
"springer_number_of_classification_reference_per_material": count
})
### fOut.write('classification:\n %r \n' % results)
cur.execute("""
select entry.space_group_number, reference.reference_name, entry.entry_id
from
entry
join entry_reference on entry.entry_nr = entry_reference.entry_nr
join reference on reference.reference_nr = entry_reference.reference_nr
where
entry.alphabetic_formula = ( %r )
group by entry.space_group_number, reference.reference_name
;""" % INPUT_FORMULA)
results = cur.fetchall()
#print('references:',results)
###
### fOut.write('references: \n %r \n' % results)
#to be corrected
for group , sprRef, sprId in results:
if not group in res:
res[group]={}
if not "section_springer_id" in res[group]:
res[group]["section_springer_id"]=[]
spRefs = res[group]["section_springer_id"]
if sprId.startswith("sd_"):
url="http://materials.springer.com/isp/crystallographic/docs/" + sprId
else:
raise Exception("does not know how to construct url for id " + sprId)
#print("TEST:", sprId)
spRefs.append({
"springer_id": sprId,
"springer_url": url,
"section_springer_references": [{
"springer_reference": sprRef
}]
})
#print('}')
for sp,entry in res.items():
entry["springer_formula"] = INPUT_FORMULA
entry["springer_space_group_number"] = int(sp)
#json.dump(results, fOut, sort_keys=False, ensure_ascii=False, indent=2)
### fOut.write("} \n")
#For normalization replace json.dump with loop below
#json.dump({"section_springer_material":list(res.values())}, sys.stdout, indent=2, sort_keys=True, ensure_ascii=False)
for mat in res.values():
matS=backend.openSection("section_springer_material")
for cl in mat.get("section_springer_classification",[]):
clS = backend.openSection("section_springer_classification")
if "springer_classification" in cl:
backend.addValue("springer_classification",cl["springer_classification"])
if "springer_number_of_classification_reference_per_material" in cl:
backend.addValue("springer_number_of_classification_reference_per_material",cl["springer_number_of_classification_reference_per_material"])
backend.closeSection("section_springer_classification",clS)
for cl in mat.get("section_springer_compound_class",[]):
clS = backend.openSection("section_springer_compound_class")
if "springer_compound_class" in cl:
backend.addValue("springer_compound_class",cl["springer_compound_class"])
if "springer_number_of_compound_class_reference_per_material" in cl:
backend.addValue("springer_number_of_compound_class_reference_per_material",cl["springer_number_of_compound_class_reference_per_material"])
backend.closeSection("section_springer_compound_class",clS)
for cl in mat.get("section_springer_id",[]):
clS = backend.openSection("section_springer_id")
if "springer_id" in cl:
backend.addValue("springer_id",cl["springer_id"])
if "springer_url" in cl:
backend.addValue("springer_url",cl["springer_url"])
backend.closeSection("section_springer_id",clS)
for cl in mat.get("section_springer_references",[]):
clS = backend.openSection("section_springer_references")
if "springer_reference" in cl:
backend.addValue("springer_reference",cl["springer_reference"])
if "springer_id" in cl:
backend.addValue("springer_id",cl["springer_id"])
if "springer_url" in cl:
backend.addValue("springer_url",cl["springer_url"])
backend.closeSection("section_springer_references",clS)
#
#for cl in matS.get("springer_formula"):
# clS = backend.openSection("springer_formula")
if "springer_formula" in mat:
backend.addValue("springer_formula",mat["springer_formula"])
# backend.closeSection("springer_formula",clS)
#for cl in matS.get("springer_space_group_number"):
# clS = backend.openSection("springer_space_group_number")
if "springer_space_group_number" in mat:
backend.addValue("springer_space_group_number",mat["springer_space_group_number"])
backend.closeSection("section_springer_material",matS)
classify4me()
fOut.flush()
# con.create_function("", 1, )
cur.close()
conn.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment