Commit 2bc9016a authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'encyclopedia-similarity' into 'v1.0.0'

Added authentication support to all encyclopedia API routes and added links from encyclopedia back to the individual entries.

See merge request !158
parents c6956c48 fe35db8e
Pipeline #81068 passed with stages
in 47 minutes and 25 seconds
Subproject commit 669c75e414d128c8303291ce58b16a01d2d98633
Subproject commit da7db6108e125b390a4f44b789f02d8b3b09a81e
......@@ -20,16 +20,17 @@ import math
import numpy as np
from flask_restplus import Resource, abort, fields, marshal
from flask import request
from flask import request, g
from elasticsearch_dsl import Search, Q, A
from elasticsearch_dsl.utils import AttrDict
from nomad import config, files, infrastructure
from nomad import config, infrastructure, search
from nomad.files import UploadFiles
from nomad.units import ureg
from nomad.atomutils import get_hill_decomposition
from nomad.datamodel.datamodel import EntryArchive
from .api import api
from .auth import authenticate
from .auth import authenticate, create_authorization_predicate
ns = api.namespace("encyclopedia", description="Access encyclopedia metadata.")
re_formula = re.compile(r"([A-Z][a-z]?)(\d*)")
......@@ -83,12 +84,17 @@ def get_es_doc_values(es_doc, mapping, keys=None):
def get_enc_filter():
"""Returns a shared term filter that will leave out unpublished, embargoed
or invalid entries.
"""Returns a shared term filter that will leave out unpublished (of other
users), embargoed or invalid entries.
"""
# Handle authentication
s = search.SearchRequest()
if g.user is not None:
s.owner('visible', user_id=g.user.user_id)
else:
s.owner('public')
return [
Q("term", published=True),
Q("term", with_embargo=False),
s.q,
Q("term", encyclopedia__status="success"),
]
......@@ -129,6 +135,7 @@ class EncMaterialResource(Resource):
@api.doc("material/<material_id>")
@api.expect(material_query)
@api.marshal_with(material_result, skip_none=True)
@authenticate()
def get(self, material_id):
"""Used to retrieve basic information related to the specified
material.
......@@ -231,6 +238,7 @@ class EncMaterialsResource(Resource):
@api.expect(materials_query, validate=False)
@api.marshal_with(materials_result, skip_none=True)
@api.doc("materials")
@authenticate()
def post(self):
"""Used to query a list of materials with the given search options.
"""
......@@ -269,7 +277,8 @@ class EncMaterialsResource(Resource):
"bool",
filter=get_enc_filter(),
)
s = Search(index=config.elastic.index_name)
# s = Search(index=config.elastic.index_name)
s = s.query(bool_query)
s.aggs.bucket("materials", agg_parent)
buckets_path = {x: "{}._count".format(x) for x in requested_properties}
......@@ -555,6 +564,7 @@ class EncGroupsResource(Resource):
@api.response(200, "Metadata send", fields.Raw)
@api.marshal_with(groups_result)
@api.doc("enc_materials")
@authenticate()
def get(self, material_id):
"""Returns a summary of the calculation groups that were identified for
this material.
......@@ -633,6 +643,7 @@ class EncGroupResource(Resource):
@api.response(200, "Metadata send", fields.Raw)
@api.marshal_with(group_result)
@api.doc("enc_group")
@authenticate()
def get(self, material_id, group_type, group_id):
"""Used to query detailed information for a specific calculation group.
"""
......@@ -717,6 +728,7 @@ class EncSuggestionsResource(Resource):
@api.expect(suggestions_query, validate=False)
@api.marshal_with(suggestions_result, skip_none=True)
@api.doc("enc_suggestions")
@authenticate()
def get(self):
# Parse request arguments
......@@ -749,6 +761,7 @@ class EncSuggestionsResource(Resource):
calc_prop_map = {
"calc_id": "calc_id",
"upload_id": "upload_id",
"code_name": "dft.code_name",
"code_version": "dft.code_version",
"functional_type": "encyclopedia.method.functional_type",
......@@ -763,6 +776,7 @@ calc_prop_map = {
}
calculation_result = api.model("calculation_result", {
"calc_id": fields.String,
"upload_id": fields.String,
"code_name": fields.String,
"code_version": fields.String,
"functional_type": fields.String,
......@@ -795,6 +809,7 @@ class EncCalculationsResource(Resource):
@api.response(400, "Bad request")
@api.response(200, "Metadata send", fields.Raw)
@api.doc("enc_calculations")
@authenticate()
def get(self, material_id):
"""Used to return all calculations related to the given material. Also
returns a representative calculation for each property shown in the
......@@ -943,6 +958,7 @@ class EncStatisticsResource(Resource):
@api.expect(statistics_query, validate=False)
@api.marshal_with(statistics_result, skip_none=True)
@api.doc("enc_statistics")
@authenticate()
def post(self, material_id):
"""Used to return statistics related to the specified material and
calculations.
......@@ -1138,6 +1154,7 @@ class EncCalculationResource(Resource):
@api.expect(calculation_property_query, validate=False)
@api.marshal_with(calculation_property_result, skip_none=True)
@api.doc("enc_calculation")
@authenticate()
def post(self, material_id, calc_id):
"""Used to return calculation details. Some properties are not
available in the ES index and are instead read from the Archive
......@@ -1334,7 +1351,6 @@ class ReportsResource(Resource):
name="webmaster", email="lauri.himanen@gmail.com", message=mail, subject='Encyclopedia error report')
except Exception as e:
abort(500, message="Error sending error report email.")
print(mail)
return "", 204
......@@ -1350,8 +1366,10 @@ def read_archive(upload_id: str, calc_id: str) -> EntryArchive:
For each path, a dictionary containing the path as key and the returned
section as value.
"""
upload_files = files.PublicUploadFiles(upload_id)
with upload_files.read_archive(calc_id, access="public") as archive:
upload_files = UploadFiles.get(
upload_id, is_authorized=create_authorization_predicate(upload_id, calc_id))
with upload_files.read_archive(calc_id) as archive:
data = archive[calc_id]
root = EntryArchive.m_from_dict(data.to_dict())
......
......@@ -17,20 +17,9 @@ import datetime
import elasticsearch_dsl
import elasticsearch
import sys
import io
import re
import uuid
import json
import threading
import numpy as np
import requests
import ase
import bs4
import matid
from nomad import processing as proc, search, datamodel, infrastructure, utils, config
from nomad import atomutils
from nomad.cli.cli import cli
......@@ -357,206 +346,18 @@ AllowEncodedSlashes On
'''.format(prefix, host, port)) # type: ignore
def write_prototype_data_file(aflow_prototypes: dict, filepath) -> None:
'''Writes the prototype data file in a compressed format to a python
module.
Args:
aflow_prototypes
'''
class NoIndent(object):
def __init__(self, value):
self.value = value
class NoIndentEncoder(json.JSONEncoder):
'''A custom JSON encoder that can pretty-print objects wrapped in the
NoIndent class.
'''
def __init__(self, *args, **kwargs):
super(NoIndentEncoder, self).__init__(*args, **kwargs)
self.kwargs = dict(kwargs)
del self.kwargs['indent']
self._replacement_map = {}
def default(self, o): # pylint: disable=E0202
if isinstance(o, NoIndent):
key = uuid.uuid4().hex
self._replacement_map[key] = json.dumps(o.value, **self.kwargs)
return "@@%s@@" % (key,)
else:
return super(NoIndentEncoder, self).default(o)
def encode(self, o):
result = super(NoIndentEncoder, self).encode(o)
for k, v in self._replacement_map.items():
result = result.replace('"@@%s@@"' % (k,), v)
return result
prototype_dict = aflow_prototypes["prototypes_by_spacegroup"]
for prototypes in prototype_dict.values():
for prototype in prototypes:
# Save the information back in a prettified form
prototype["atom_positions"] = NoIndent(prototype["atom_positions"])
prototype["atom_labels"] = NoIndent(prototype["atom_labels"])
prototype["lattice_vectors"] = NoIndent(prototype["lattice_vectors"])
try:
prototype["normalized_wyckoff_matid"] = NoIndent(prototype["normalized_wyckoff_matid"])
except KeyError:
pass
# Save the updated data
with io.open(filepath, "w", encoding="utf8") as f:
json_dump = json.dumps(aflow_prototypes, ensure_ascii=False, indent=4, sort_keys=True, cls=NoIndentEncoder)
json_dump = re.sub(r"\"(-?\d+(?:[\.,]\d+)?)\"", r'\1', json_dump) # Removes quotes around numbers
f.write("# -*- coding: utf-8 -*-\naflow_prototypes = {}\n".format(json_dump))
@ops.command(help='Updates the AFLOW prototype information using the latest online version and writes the results to a python module in the given FILEPATH.')
@click.argument('FILEPATH', nargs=1, type=str)
@click.option('--matches-only', is_flag=True, help='Only update the match information that depends on the symmetry analysis settings. Will not perform and online update.')
@click.pass_context
def prototypes_update(ctx, filepath, matches_only):
from nomad.cli.admin import prototypes
prototypes.update_prototypes(ctx, filepath, matches_only)
if matches_only:
from nomad.aflow_prototypes import aflow_prototypes
else:
# The basic AFLOW prototype data is available in a Javascript file. Here we
# retrieve it and read only the prototype list from it.
prototypes_file_url = 'http://aflowlib.org/CrystalDatabase/js/table_sort.js'
r = requests.get(prototypes_file_url, allow_redirects=True)
datastring = r.content.decode("utf-8")
datastring = datastring.split('];')[0]
datastring = datastring.split('= [')[1]
data = json.loads('[' + datastring + ']')
newdictarray = []
n_prototypes = 0
n_missing = 0
for protodict in data:
n_prototypes += 1
newdict = {}
# Make prototype plaintext
prototype = bs4.BeautifulSoup(protodict["Prototype"], "html5lib").getText()
# Add to new dictionary
newdict['Notes'] = protodict['Notes']
newdict['Prototype'] = prototype
newdict['Space Group Symbol'] = protodict['Space Group Symbol']
newdict['Space Group Number'] = protodict['Space Group Number']
newdict['Pearsons Symbol'] = protodict['Pearson Symbol']
newdict['Strukturbericht Designation'] = protodict['Strukturbericht Designation']
newdict['aflow_prototype_id'] = protodict['AFLOW Prototype']
newdict['aflow_prototype_url'] = 'http://www.aflowlib.org/CrystalDatabase/' + protodict['href'][2:]
# Download cif or poscar if possible make ASE ase.Atoms object if possible
# to obtain labels, positions, cell
cifurl = 'http://www.aflowlib.org/CrystalDatabase/CIF/' + protodict['href'][2:-5] + '.cif'
r = requests.get(cifurl, allow_redirects=True)
cif_str = r.content.decode("utf-8")
cif_file = io.StringIO()
cif_file.write(cif_str)
cif_file.seek(0)
try:
atoms = ase.io.read(cif_file, format='cif')
except Exception:
print("Error in getting prototype structure from CIF: {}", format(cifurl))
# Then try to get structure from POSCAR
try:
poscarurl = 'http://www.aflowlib.org/CrystalDatabase/POSCAR/' + protodict['href'][2:-5] + '.poscar'
r = requests.get(poscarurl, allow_redirects=True)
poscar_str = r.content.decode("utf-8")
poscar_file = io.StringIO()
poscar_file.write(poscar_str)
poscar_file.seek(0)
atoms = ase.io.read(poscar_file, format='vasp')
except Exception:
print("Error in getting prototype structure from POSCAR: {}".format(poscarurl))
print("Could not read prototype structure from CIF or POSCAR file for prototype: {}, {}, ".format(prototype, newdict['aflow_prototype_url']))
n_missing += 1
continue
atom_positions = atoms.get_positions()
atom_labels = atoms.get_chemical_symbols()
cell = atoms.get_cell()
newdict['lattice_vectors'] = cell.tolist()
newdict['atom_positions'] = atom_positions.tolist()
newdict['atom_labels'] = atom_labels
newdictarray.append(newdict)
print("Processed: {}".format(len(newdictarray)))
# Sort prototype dictionaries by spacegroup and make dictionary
structure_types_by_spacegroup = {}
for i_sg in range(1, 231):
protos_sg = []
for newdict in newdictarray:
if newdict['Space Group Number'] == i_sg:
protos_sg.append(newdict)
structure_types_by_spacegroup[i_sg] = protos_sg
# Wrap in a dictionary that can hold other data, e.g. the symmemtry tolerance parameter.
aflow_prototypes = {
"prototypes_by_spacegroup": structure_types_by_spacegroup
}
print(
"Extracted latest AFLOW prototypes online. Total number of "
"successfully fetched prototypes: {}, missing: {}"
.format(n_prototypes, n_missing)
)
# Update matches
n_prototypes = 0
n_failed = 0
n_unmatched = 0
prototype_dict = aflow_prototypes["prototypes_by_spacegroup"]
for aflow_spg_number, prototypes in prototype_dict.items():
n_prototypes += len(prototypes)
for prototype in prototypes:
# Read prototype structure
pos = np.array(prototype["atom_positions"])
labels = prototype["atom_labels"]
cell = np.array(prototype["lattice_vectors"])
atoms = ase.Atoms(
symbols=labels,
positions=pos,
cell=cell,
pbc=True
)
# Try to first see if the space group can be matched with the one in AFLOW
try:
symm = matid.SymmetryAnalyzer(atoms, config.normalize.prototype_symmetry_tolerance)
spg_number = symm.get_space_group_number()
wyckoff_matid = symm.get_wyckoff_letters_conventional()
norm_system = symm.get_conventional_system()
except Exception:
n_failed += 1
else:
# If the space group is matched, add the MatID normalized Wyckoff
# letters to the data.
if spg_number == aflow_spg_number:
atomic_numbers = norm_system.get_atomic_numbers()
normalized_wyckoff_matid = atomutils.get_normalized_wyckoff(atomic_numbers, wyckoff_matid)
prototype["normalized_wyckoff_matid"] = normalized_wyckoff_matid
else:
n_unmatched += 1
print(
"Updated matches in AFLOW prototype library. Total number of "
"prototypes: {}, unmatched: {}, failed: {}"
.format(n_prototypes, n_unmatched, n_failed)
)
# Write data file to the specified path
write_prototype_data_file(aflow_prototypes, filepath)
@admin.command(help='Updates the springer database in nomad.config.springer_msg_db_path.')
@ops.command(help='Updates the springer database in nomad.config.normalize.springer_db_path.')
@click.option('--max-n-query', default=10, type=int, help='Number of unsuccessful springer request before returning an error. Default is 10.')
@click.option('--retry-time', default=120, type=int, help='Time in seconds to retry after unsuccessful request. Default is 120.')
def springer_update(max_n_query, retry_time):
from nomad.cli.admin import springer
springer.update_springer_data(max_n_query, retry_time)
springer.update_springer(max_n_query, retry_time)
import io
import re
import uuid
import json
import numpy as np
import requests
import ase
import bs4
import matid
from nomad import atomutils, config
def write_prototype_data_file(aflow_prototypes: dict, filepath) -> None:
'''Writes the prototype data file in a compressed format to a python
module.
Args:
aflow_prototypes
'''
class NoIndent(object):
def __init__(self, value):
self.value = value
class NoIndentEncoder(json.JSONEncoder):
'''A custom JSON encoder that can pretty-print objects wrapped in the
NoIndent class.
'''
def __init__(self, *args, **kwargs):
super(NoIndentEncoder, self).__init__(*args, **kwargs)
self.kwargs = dict(kwargs)
del self.kwargs['indent']
self._replacement_map = {}
def default(self, o): # pylint: disable=E0202
if isinstance(o, NoIndent):
key = uuid.uuid4().hex
self._replacement_map[key] = json.dumps(o.value, **self.kwargs)
return "@@%s@@" % (key,)
else:
return super(NoIndentEncoder, self).default(o)
def encode(self, o):
result = super(NoIndentEncoder, self).encode(o)
for k, v in self._replacement_map.items():
result = result.replace('"@@%s@@"' % (k,), v)
return result
prototype_dict = aflow_prototypes["prototypes_by_spacegroup"]
for prototypes in prototype_dict.values():
for prototype in prototypes:
# Save the information back in a prettified form
prototype["atom_positions"] = NoIndent(prototype["atom_positions"])
prototype["atom_labels"] = NoIndent(prototype["atom_labels"])
prototype["lattice_vectors"] = NoIndent(prototype["lattice_vectors"])
try:
prototype["normalized_wyckoff_matid"] = NoIndent(prototype["normalized_wyckoff_matid"])
except KeyError:
pass
# Save the updated data
with io.open(filepath, "w", encoding="utf8") as f:
json_dump = json.dumps(aflow_prototypes, ensure_ascii=False, indent=4, sort_keys=True, cls=NoIndentEncoder)
json_dump = re.sub(r"\"(-?\d+(?:[\.,]\d+)?)\"", r'\1', json_dump) # Removes quotes around numbers
f.write("# -*- coding: utf-8 -*-\naflow_prototypes = {}\n".format(json_dump))
def update_prototypes(ctx, filepath, matches_only):
if matches_only:
from nomad.aflow_prototypes import aflow_prototypes
else:
# The basic AFLOW prototype data is available in a Javascript file. Here we
# retrieve it and read only the prototype list from it.
prototypes_file_url = 'http://aflowlib.org/CrystalDatabase/js/table_sort.js'
r = requests.get(prototypes_file_url, allow_redirects=True)
datastring = r.content.decode("utf-8")
datastring = datastring.split('];')[0]
datastring = datastring.split('= [')[1]
data = json.loads('[' + datastring + ']')
newdictarray = []
n_prototypes = 0
n_missing = 0
for protodict in data:
n_prototypes += 1
newdict = {}
# Make prototype plaintext
prototype = bs4.BeautifulSoup(protodict["Prototype"], "html5lib").getText()
# Add to new dictionary
newdict['Notes'] = protodict['Notes']
newdict['Prototype'] = prototype
newdict['Space Group Symbol'] = protodict['Space Group Symbol']
newdict['Space Group Number'] = protodict['Space Group Number']
newdict['Pearsons Symbol'] = protodict['Pearson Symbol']
newdict['Strukturbericht Designation'] = protodict['Strukturbericht Designation']
newdict['aflow_prototype_id'] = protodict['AFLOW Prototype']
newdict['aflow_prototype_url'] = 'http://www.aflowlib.org/CrystalDatabase/' + protodict['href'][2:]
# Download cif or poscar if possible make ASE ase.Atoms object if possible
# to obtain labels, positions, cell
cifurl = 'http://www.aflowlib.org/CrystalDatabase/CIF/' + protodict['href'][2:-5] + '.cif'
r = requests.get(cifurl, allow_redirects=True)
cif_str = r.content.decode("utf-8")
cif_file = io.StringIO()
cif_file.write(cif_str)
cif_file.seek(0)
try:
atoms = ase.io.read(cif_file, format='cif')
except Exception:
print("Error in getting prototype structure from CIF: {}", format(cifurl))
# Then try to get structure from POSCAR
try:
poscarurl = 'http://www.aflowlib.org/CrystalDatabase/POSCAR/' + protodict['href'][2:-5] + '.poscar'
r = requests.get(poscarurl, allow_redirects=True)
poscar_str = r.content.decode("utf-8")
poscar_file = io.StringIO()
poscar_file.write(poscar_str)
poscar_file.seek(0)
atoms = ase.io.read(poscar_file, format='vasp')
except Exception:
print("Error in getting prototype structure from POSCAR: {}".format(poscarurl))
print("Could not read prototype structure from CIF or POSCAR file for prototype: {}, {}, ".format(prototype, newdict['aflow_prototype_url']))
n_missing += 1
continue
atom_positions = atoms.get_positions()
atom_labels = atoms.get_chemical_symbols()
cell = atoms.get_cell()
newdict['lattice_vectors'] = cell.tolist()
newdict['atom_positions'] = atom_positions.tolist()
newdict['atom_labels'] = atom_labels
newdictarray.append(newdict)
print("Processed: {}".format(len(newdictarray)))
# Sort prototype dictionaries by spacegroup and make dictionary
structure_types_by_spacegroup = {}
for i_sg in range(1, 231):
protos_sg = []
for newdict in newdictarray:
if newdict['Space Group Number'] == i_sg:
protos_sg.append(newdict)
structure_types_by_spacegroup[i_sg] = protos_sg
# Wrap in a dictionary that can hold other data, e.g. the symmemtry tolerance parameter.
aflow_prototypes = {
"prototypes_by_spacegroup": structure_types_by_spacegroup
}
print(
"Extracted latest AFLOW prototypes online. Total number of "
"successfully fetched prototypes: {}, missing: {}"
.format(n_prototypes, n_missing)
)
# Update matches
n_prototypes = 0
n_failed = 0
n_unmatched = 0
prototype_dict = aflow_prototypes["prototypes_by_spacegroup"]
for aflow_spg_number, prototypes in prototype_dict.items():
n_prototypes += len(prototypes)
for prototype in prototypes:
# Read prototype structure
pos = np.array(prototype["atom_positions"])
labels = prototype["atom_labels"]
cell = np.array(prototype["lattice_vectors"])
atoms = ase.Atoms(
symbols=labels,
positions=pos,
cell=cell,
pbc=True
)
# Try to first see if the space group can be matched with the one in AFLOW
try:
symm = matid.SymmetryAnalyzer(atoms, config.normalize.prototype_symmetry_tolerance)
spg_number = symm.get_space_group_number()
wyckoff_matid = symm.get_wyckoff_letters_conventional()
norm_system = symm.get_conventional_system()
except Exception:
n_failed += 1
else:
# If the space group is matched, add the MatID normalized Wyckoff
# letters to the data.
if spg_number == aflow_spg_number:
atomic_numbers = norm_system.get_atomic_numbers()
normalized_wyckoff_matid = atomutils.get_normalized_wyckoff(atomic_numbers, wyckoff_matid)
prototype["normalized_wyckoff_matid"] = normalized_wyckoff_matid
else:
n_unmatched += 1
print(
"Updated matches in AFLOW prototype library. Total number of "
"prototypes: {}, unmatched: {}, failed: {}"
.format(n_prototypes, n_unmatched, n_failed)
)
# Write data file to the specified path
write_prototype_data_file(aflow_prototypes, filepath)
......@@ -156,7 +156,7 @@ def _download(path: str, max_n_query: int = 10, retry_time: int = 120) -> str:
return response.text
def update_springer_data(max_n_query: int = 10, retry_time: int = 120):
def update_springer(max_n_query: int = 10, retry_time: int = 120):
'''
Downloads the springer quantities related to a structure from springer and updates
database.
......
Supports Markdown