Commit 026d5f6a authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Removed materials project script.

parent 8a7bb533
Pipeline #120960 passed with stages
in 28 minutes and 53 seconds
# Copyright The NOMAD Authors.
# This file is part of NOMAD. See for further info.
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import requests
base_url = ''
def create_nomad_yaml(filepath: str) -> None:
Creates the NOMAD yaml file that contains the needed metainfo for an upload
containing data from materials project.
filepath: Path of the upload folder in which the individual entries
are. The nomad.json file will be created here.
# Check filepath validity
if not os.path.isdir(filepath):
raise ValueError('The provided filepath does not point to a directory.')
# Each folder in the directory that contains a materials.json will get it's
# own entry.
entries = {}
for dirname in next(os.walk(filepath))[1]:
mainfile_path_os = os.path.join(filepath, dirname, 'materials.json')
mainfile_path_upload = os.path.join(dirname, 'materials.json')
if os.path.isfile(mainfile_path_os):
entries[mainfile_path_upload] = {
'references': [
# Save the information in nomad.json
with open(f'{filepath}/nomad.json', 'w') as f:
nomad = {
'comment': 'Materials Project workflow data',
'external_db': 'Materials Project',
'entries': entries
json.dump(nomad, f)
def create_workflow_data(filepath: str, api_key: str, limit: int = 1) -> None:
Uses the Materials Project API to download information about physical
properties that are calculated through their different workflows. The data
is stored in a custom JSON format that is then parseable by the NOMAD
materials project parser.
Essentially we are creating archives that contain the MP workflow details
and results. This data can only be extracted from the MP API as the full
data behind their workflow is not available as raw files.
See the API swagger documentation for more information:
filepath: Path of the folder into which the data will be saved.
api_key: The Materials Project API key to use.
# Check filepath validity
if not os.path.isdir(filepath):
raise ValueError('The provided filepath does not point to a directory.')
# Get a sorted list of material_ids up to the given limit.
params = {
'fields': ['material_id'],
'sort_fields': ['material_id'],
'deprecated': False,
'limit': limit,
headers = {'x-api-key': api_key}
response = requests.get(f"{base_url}/materials", params=params, headers=headers) # type: ignore
assert response.status_code == 200
material_ids = [x["material_id"] for x in response.json()["data"]]
# Start downloading
# Get all the material_ids that are available
params = {'all_fields': True}
for material_id in material_ids[:limit]:
# If materials.json is already present, we skip the entry.
dirname = f'{filepath}/{material_id}'
if os.path.isfile(f'{dirname}/materials.json'):
routes = [
for route in routes:
response = requests.get(f"{base_url}/{route}/{material_id}", headers=headers, params=params) # type: ignore
# If material is not found, skip the whole entry.
if response.status_code != 200:
if route == "materials":
with open(f'{dirname}/{route}.json', 'w') as f:
json.dump(response.json()["data"][0], f, indent=4)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment