Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • nomad-lab/analytics
1 result
Select Git revision
Show changes
Showing
with 1372 additions and 301 deletions
"""File to describe SixthPowerNode"""
import sisso.feature_creation.nodes.functions as fxn
import sympy
from sisso.feature_creation.nodes.operator_node import OperatorNode
class SixthPowerNode(OperatorNode):
"""Node to raise a feature to the sixth power
"""
def __init__(self, feat):
"""Node to raise a feature to the sixth power
Args:
feat (Node): Feature to add raise a feature to the sixth power
Raises:
ValueError: If feature would leave accepted range
"""
disallowed = ["cbrt", "sqrt"]
if feat.tag in disallowed:
raise ValueError("Invalid feature combination")
super(SixthPowerNode, self).__init__(fxn.six_pwr, "**6", feat)
@property
def expr(self):
"""The sympy.Expression for the resulting feature
Returns:
sympy.Expression: The algebraic representation of the new feature
"""
return sympy.expand(
sympy.trigsimp(sympy.powdenest(self.feat.fxn_in_expr ** 6, force=True))
)
# @property
def get_unit(self):
"""The sympy.Expression for the unit of the resulting feature
Returns:
sympy.Expression: The resulting unit of the feature
"""
return self.feat.unit ** 6.0
"""File to describe SquareNode"""
import sisso.feature_creation.nodes.functions as fxn
import sympy
from sisso.feature_creation.nodes.operator_node import OperatorNode
class SqNode(OperatorNode):
"""Node to add square operators to features
"""
def __init__(self, feat):
"""Node to add square operators to features
Args:
feat (Node): Feature to add the square operator to
Raises:
ValueError: If feature would leave accepted range
"""
if feat.tag == "sqrt":
raise ValueError("Invalid feature combination")
super(SqNode, self).__init__(fxn.sq, "**2", feat)
@property
def expr(self):
"""The sympy.Expression for the resulting feature
Returns:
sympy.Expression: The algebraic representation of the new feature
"""
return sympy.expand(
sympy.trigsimp(sympy.powdenest(self.feat.fxn_in_expr ** 2, force=True))
)
# @property
def get_unit(self):
"""The sympy.Expression for the unit of the resulting feature
Returns:
sympy.Expression: The resulting unit of the feature
"""
return self.feat.unit ** 2.0
"""File to describe SqrtNode"""
import numpy as np
import sisso.feature_creation.nodes.functions as fxn
import sympy
from scipy import stats
from sisso.feature_creation.nodes.operator_node import OperatorNode
from sisso.feature_creation.nodes.operator_nodes.sixth_power import SixthPowerNode
from sisso.feature_creation.nodes.operator_nodes.derivatives import get_deriv
class SqrtNode(OperatorNode):
"""Node to add square root operators to features
"""
def __init__(self, feat):
"""Node to add square root operators to features
Args:
feat (Node): Feature to add the square root operator to
Raises:
ValueError: If feat.value has a negative number
"""
disallowed = ["**2", "**6"]
if feat.tag in disallowed:
raise ValueError("Invalid feature combination")
super(SqrtNode, self).__init__(fxn.sqrt, "sqrt", feat)
self.bounds = (-1.0 * np.inf, np.inf)
def initial_params(self, prop, mat_inds):
"""Get an initial estimate of the parameters
Args:
prop (np.ndarray(float)): Property to fit to
mat_inds (np.ndarray(int)): Indexes to include in the fitting
Returns:
dict: The initial parameter guess based on the property and self.feat.fxn_in_value
"""
val = self.feat.fxn_in_value[mat_inds]
# Get and Transform the derivative of prop with respect to feat
x, prop_prim = get_deriv(val, prop[mat_inds], 1)
prop_trans = (prop_prim) ** (-2.0)
# Disregard any strongly non-linear trends due to noise/outliers
threshold = np.median(np.abs(np.diff(prop_trans))) * 1.1
inds = np.where(np.abs(np.diff(prop_trans)) <= threshold)[0]
# Get initial parameter guess
alpha, a = stats.linregress(x[inds], prop_trans[inds])[:2]
# Correct alpha and a
alpha = 4.0 / alpha
a *= alpha ** 2.0 / 4.0
# Get an estimate of the constant shift and scale factor
b, c = stats.linregress(np.sqrt(alpha * val + a), prop[mat_inds])[:2]
return {"alpha": alpha, "a": a, "b": b, "c": c}
@property
def fxn_in_value(self):
"""Calculate the value of the node by applying _func to feat_1 and feat_2 in that order
Returns:
np.ndarray: The result of applying _func to attached Nodes, with only alpha set the correct value
"""
if self._fxn_in_value is not None:
return self._fxn_in_value
return self._func(self.feat.fxn_in_value)
def set_fxn_in_value(self):
"""Sets the value array of the feature based off the function/feat.fxn_in_value"""
self._fxn_in_value = self._func(self.feat.fxn_in_value)
@property
def expr(self):
"""The sympy.Expression for the resulting feature
Returns:
sympy.Expression: The algebraic representation of the new feature
"""
return self.params["c"] + sympy.trigsimp(
sympy.powdenest(
sympy.sqrt(
self.params["alpha"] * self.feat.fxn_in_expr + self.params["a"]
),
force=True,
)
)
@property
def fxn_in_expr(self):
"""The sympy.Expression used to generate expressions for subsequent features that use this feature
Returns:
sympy.Expression: The algebraic representation of the new feature with only alpha used from the params
"""
return sympy.trigsimp(
sympy.powdenest(sympy.sqrt(self.feat.fxn_in_expr), force=True)
)
# @property
def get_unit(self):
"""The sympy.Expression for the unit of the resulting feature
Returns:
sympy.Expression: The resulting unit of the feature
"""
return self.feat.unit ** 0.5
"""File to describe SubNode"""
import sisso.feature_creation.nodes.functions as fxn
import sympy
from sisso.feature_creation.nodes.operator_node import OperatorNode
class SubNode(OperatorNode):
"""Node to add subtraction operators to the features
"""
def __init__(self, feat_1, feat_2):
"""Node to add subtraction operators to the features
Args:
feat_1 (Node): Feature on the left of the subtraction
feat_2 (Node): Feature on the right of the subtraction
Raises:
ValueError: If the unit for feat_1 and feat_2 are not the same, the resulting feature would be a constant or go outside accepted range
# """
if feat_1.unit != feat_2.unit:
raise ValueError("When subtracting both features must have the same units")
if feat_1 == feat_2:
raise ValueError("When subtracting both features must be different")
func = lambda x: fxn.sub(x, self._n_els)
func.name = "sub"
func.default_params = {}
super(SubNode, self).__init__(func, "-", [feat_1, feat_2])
@property
def expr(self):
"""The sympy.Expression for the resulting feature
Returns:
sympy.Expression: The algebraic representation of the new feature
"""
new_expr = (self.feats[0].fxn_in_expr) - (self.feats[1].fxn_in_expr)
return sympy.trigsimp(new_expr)
# @property
def get_unit(self):
"""The sympy.Expression for the unit of the resulting feature
Returns:
sympy.Expression: The resulting unit of the feature
"""
return self.feats[0].unit
def reset_pivot(self, pvt):
"""reset the divider for self._func"""
self._func = lambda x: fxn.sub(x, pvt)
self._func.name = "sub"
self._func.default_params = {}
"""Defines the unit dictionary for comparisons"""
import re
import numpy as np
class Unit(dict):
def __init__(self, dct=None):
if dct is not None:
for key, val in dct.items():
self[key] = val
@classmethod
def from_str(cls, string):
string = string.replace(" ", "")
unit_comps = re.split(r"/|\*", string.replace("**", "^"))
mult_ops = [m.start() for m in re.finditer(r"\*", string)]
div_ops = [m.start() for m in re.finditer("/", string)]
ops = np.hstack((np.array(mult_ops), -1.0 * np.array(div_ops)))
ops = np.hstack((np.ones(1), np.sign(ops[np.abs(ops).argsort()])))
dct = dict()
for comp, op in zip(unit_comps, ops):
type_exp = comp.split("^")
if len(type_exp) == 1:
dct[type_exp[0]] = float(op)
elif len(type_exp) == 2:
dct[type_exp[0]] = int(type_exp[1]) * float(op)
else:
raise ValueError("Invalid unit")
return cls(dct)
def __repr__(self):
string = ""
for key, val in self.items():
string += " * " + key + "^" + str(val)
return string[3:]
def __mul__(self, unit_2):
dct = self.copy()
for key, val in unit_2.items():
dct[key] = dct.get(key, 0) + val
if dct[key] == 0:
del dct[key]
return Unit(dct)
def __truediv__(self, unit_2):
dct = self.copy()
for key, val in unit_2.items():
dct[key] = dct.get(key, 0) - val
if dct[key] == 0:
del dct[key]
return Unit(dct)
def __pow__(self, power):
dct = self.copy()
for key, val in dct.items():
dct[key] = val * power
return Unit(dct)
def inv(self):
dct = self.copy()
for key, val in dct.items():
dct[key] = val * -1.0
return Unit(dct)
import warnings
from argparse import ArgumentParser as argpars
import dill
from time import time
from sisso.descriptor_identifcation.SISSO_regressor import SISSO_Regressor, print_models
from sisso.feature_creation.feature_space import FeatureSpace
from sisso.feature_creation.nodes.operator_nodes import op_map
from sisso.utils.mpi_interface import my_rank
from sisso.validator.validator import KFoldValidator, LeaveOutValidator
warnings.filterwarnings("ignore")
def main():
parser = argpars(description="Run a SISSO regression ")
parser.add_argument(
"-d",
"--data_file",
type=str,
nargs="?",
help="file where all primary feature data is stored",
default="data.csv",
)
parser.add_argument(
"-p", "--prop_key", type=str, nargs="?", help="column key for the property"
)
parser.add_argument(
"-o",
"--ops",
type=str,
nargs="*",
help="Which operators to use for creating the feature set. If all then use all available features",
default="all",
)
parser.add_argument(
"-c",
"--cols",
type=str,
nargs="*",
help="which columns to use as primary features from data_file. If 'all' passed then take all non-property columns",
default="all",
)
parser.add_argument(
"-m",
"--max_phi",
nargs="?",
type=int,
help="Maximum number of operators to apply to a set of primary features",
default=1,
)
parser.add_argument(
"-r",
"--res_save",
nargs="?",
type=int,
help="Number of residuals to save in the model",
default=1,
)
parser.add_argument(
"-cc",
"--cross_corr_threshold",
nargs="?",
type=float,
help="Cross correlation threshold to discard features during SIS",
default=0.95,
)
parser.add_argument(
"-s",
"--n_sis_select",
type=int,
nargs="?",
default=1,
help="Maximum number of features to select for each SIS call",
)
parser.add_argument(
"-n",
"--n_dim",
type=int,
nargs="?",
help="Maximum dimension model to calculate",
default=1,
)
parser.add_argument(
"-l",
"--leave_out",
type=float,
nargs="?",
help="Leave p out validation (if < 1.0 fractional)",
default=0.0,
)
parser.add_argument(
"-i",
"--leave_out_iter",
type=int,
nargs="?",
help="maximum number of interations for Leave-p-Out validation",
default=None,
)
parser.add_argument(
"-k", "--k_fold", type=int, nargs="?", help="k-fold validation", default=0
)
parser.add_argument(
"--disable_all_l0_combinations",
action="store_true",
help="Do not combine sis selection rounds when calculating descriptors with L0",
)
parser.add_argument(
"--load",
type=str,
nargs="?",
help="use dill to load stored sisso object",
default=None,
)
parser.add_argument(
"-lt",
"--learn_type",
type=str,
nargs="?",
help="Objective function to learn on (log, correlation, classification)",
default="correlation",
)
parser.add_argument(
"-cw",
"--class_width",
type=float,
nargs="?",
help="Width of the boundary for classification",
default=1e-6,
)
param_parser = parser.add_mutually_exclusive_group(required=False)
param_parser.add_argument(
"--no-parameterize",
dest="parameterize",
help="parameterize the nodes",
action="store_false",
)
param_parser.add_argument(
"--parameterize",
dest="parameterize",
help="parameterize the nodes",
action="store_true",
)
parser.set_defaults(parameterize=True)
fix_intercept = parser.add_mutually_exclusive_group(required=False)
fix_intercept.add_argument(
"--no_fix_intercept",
dest="fix_intercept",
help="Set the intercept to 0",
action="store_false",
)
fix_intercept.add_argument(
"--fix_intercept",
dest="fix_intercept",
help="Set the intercept to 0",
action="store_true",
)
parser.set_defaults(fix_intercept=False)
standardize = parser.add_mutually_exclusive_group(required=False)
standardize.add_argument(
"--no_standardize",
dest="standardize",
help="Set the intercept to 0",
action="store_false",
)
standardize.add_argument(
"--standardize",
dest="standardize",
help="Set the intercept to 0",
action="store_true",
)
parser.set_defaults(standardize=False)
log_learn = parser.add_mutually_exclusive_group(required=False)
log_learn.add_argument(
"--no_log_learn",
dest="log_learn",
help="Set the intercept to 0",
action="store_false",
)
log_learn.add_argument(
"--log_learn",
dest="log_learn",
help="Set the intercept to 0",
action="store_true",
)
parser.set_defaults(log_learn=False)
args = parser.parse_args()
if args.load:
sisso = dill.load(open(args.load, "rb"))
else:
allowed_ops = []
if args.ops == "all":
allowed_ops = list(op_map.values())
else:
for op in args.ops:
allowed_ops.append(op_map[op])
t0 = time()
phi = FeatureSpace.from_df(
args.data_file,
args.prop_key,
allowed_ops,
args.cols,
args.max_phi,
args.n_sis_select,
parameterize=args.parameterize,
fix_c_0=args.fix_intercept,
learn_type=args.learn_type,
class_width=args.class_width,
)
# print(f"Time Feature Creation: {time()-t0}")
t0 = time()
sisso = SISSO_Regressor(
phi,
args.n_dim,
not args.disable_all_l0_combinations,
fix_c_0=args.fix_intercept,
n_res_save=args.res_save,
learn_type=args.learn_type,
)
# print(f"Time SISSO Regressor: {time()-t0}")
t0 = time()
sisso.fit()
# print(f"Time SISSO fit: {time()-t0}")
models = sisso.models
if my_rank == 0:
print("Training Results")
print_models(models)
for feat in sisso.feature_set.phi:
feat._value = None
feat._fxn_in_value = None
with open("sisso_regressor.pick", "wb") as pickle_file:
dill.dump(sisso, pickle_file)
if (args.leave_out > 0) and (args.k_fold > 0):
raise ValueError(
"Can not do both k-Fold and Leave-p-Out validation, please specify one and load the saved sisso object"
)
elif args.leave_out > 0:
if args.leave_out < 1.0:
leave_out = None
frac = args.leave_out
else:
leave_out = int(round(args.leave_out))
frac = None
validator = LeaveOutValidator(sisso, args.leave_out_iter, leave_out, frac)
elif args.k_fold > 0:
validator = KFoldValidator(sisso, args.k_fold)
else:
return 0
validator.validate()
with open("validator.pick", "wb") as pickle_file:
dill.dump(validator, pickle_file)
if my_rank == 0:
av_rmse, av_max_ae = validator.summarize_error()
print("Average prediction rmse")
for dim, rmse in enumerate(av_rmse):
print(f"{dim+1}D: {rmse}")
print("Average prediction max ae")
for dim, max_ae in enumerate(av_max_ae):
print(f"{dim+1}D: {max_ae}")
if __name__ == "__main__":
main()
import dill
try:
from mpi4py import MPI
except ImportError:
mpi_size = 1
my_rank = 0
else:
comm = MPI.COMM_WORLD
mpi_size = comm.Get_size()
my_rank = comm.Get_rank()
def get_mpi_start_end_from_list(len_list, start_pt=0):
els_per_rank = len_list // mpi_size
remainder = len_list % mpi_size
start_el = start_pt + els_per_rank * my_rank + min(my_rank, remainder)
end_el = start_pt + els_per_rank * (my_rank + 1) + min(my_rank + 1, remainder)
return start_el, end_el
def allgather_object(obj, all2all=False):
if mpi_size > 1:
serialized_obj = dill.dumps(obj)
if all2all:
all_serialized_obj = comm.allgather(serialized_obj)
else:
all_serialized_obj = comm.gather(serialized_obj, root=0)
all_serialized_obj = comm.bcast(all_serialized_obj, root=0)
all_obj = [dill.loads(oo) for oo in all_serialized_obj]
return all_obj
return [obj]
"""Cross Validation Class"""
import numpy as np
import math
from itertools import combinations, islice
class Validator(object):
"""Base validator class
Attributes:
sisso_reg (SISSO_regressor.SISSO_Regressor): The SISSO Regression trained on all data
feature_set (feature_space.FeatureSpace): The calculated feature space for the calculations
num_mats (int): number of materials in the full data set
shuffled_mat_inds (list of int): The indexes for the materials shuffled
train_ind_lists (list of lists of ints): List of validation training sets
test_ind_lists (list of lists of ints): List of validation testing sets
models_list (list of SISSO_regressor.Model): All models trained from train_ind_list sets
prediction_rmse (list of floats): list of the Prediction RMSE of each model
prediction_max_ae (list of floats): list of prediction max absolute error for each model
"""
def __init__(self, sisso):
"""Constructor
Args:
sisso(SISSO_regressor.SISSO_Regressor): The SISSO Regression trained on all data
"""
self.sisso_reg = sisso
self.feature_set = sisso.feature_set
self.num_mats = len(self.feature_set.phi_0[0].value)
self.shuffled_mat_inds = np.arange(self.num_mats, dtype=np.int64)
np.random.shuffle(self.shuffled_mat_inds)
self.train_ind_lists = []
self.test_ind_lists = []
self.models_list = []
self.prediction_rmse = []
self.prediction_max_ae = []
def populate_mat_ind_lsts(self):
"""Dummy class to populate training/testing sets
Not defined since no strategy passed
"""
raise NotImplementedError(
"populate_mat_ind_lsts is not defined for a generic validator"
)
def validate(self):
"""Validate the models
Use the training/testing sets to validate a model
"""
for train_inds, test_inds in zip(self.train_ind_lists, self.test_ind_lists):
self.sisso_reg.reset(train_inds)
self.sisso_reg.fit()
models = self.sisso_reg.models.copy()
self.models_list.append(models)
prediction = [model[0].predict(test_inds) for model in models]
predict_err = [
(pp - self.feature_set.prop[test_inds].flatten()) for pp in prediction
]
self.prediction_rmse.append(
[np.sqrt(np.sum(pp ** 2.0) / len(pp)) for pp in predict_err]
)
self.prediction_max_ae.append([np.max(np.abs(pp)) for pp in predict_err])
self.prediction_rmse = np.array(self.prediction_rmse)
self.prediction_max_ae = np.array(self.prediction_max_ae)
def summarize_error(self):
"""Get the summary of the error
Returns:
float: mean of the RMSE for each model
float: mean of the max absolute error for each model
"""
return (
np.mean(self.prediction_rmse, axis=0),
np.mean(self.prediction_max_ae, axis=0),
)
class LeaveOutValidator(Validator):
"""Leave p validator class
Attributes:
max_iter (int): Maximum possible iteration value
num_iter (int): Number of iterations of leave-p out validation to perform
num_out (int): Number of materials to leave out for each validation step
"""
def __init__(self, sisso, num_iter=None, num_out=None, frac=None):
"""Constructor
Args:
sisso(SISSO_regressor.SISSO_Regressor): The SISSO Regression trained on all data
num_iter (int): Number of iterations of leave-p out validation to perform
num_out (int): Number of materials to leave out for each validation step
frac (float): Fraction of materials to leave out
"""
if (num_iter is None) and ((frac is None) == (num_out is None)):
raise ValueError(
"leave out validation needs to have either frac or num_out defined."
)
super(LeaveOutValidator, self).__init__(sisso)
if num_out is None:
self.num_out = int(round(frac * self.num_mats))
else:
self.num_out = num_out
if num_iter is None:
num_iter = int(100)
f = math.factorial
self.max_iter = int(
round(
f(self.num_mats) / (f(self.num_out) * f(self.num_mats - self.num_out))
)
)
self.num_iter = min(num_iter, self.max_iter)
self.populate_mat_ind_lsts()
def populate_mat_ind_lsts(self):
"""Set up Training/Testing sets for leave-p out cross validation"""
self.train_ind_lists = []
self.test_ind_lists = []
if self.max_iter < 1e4:
ind_comb = combinations(
np.arange(len(self.shuffled_mat_inds), dtype=np.int64), self.num_out
)
inds = np.sort(
np.random.choice(np.arange(self.max_iter), self.num_iter, False)
)
inds[1:] -= inds[0:-1] + 1
for ind in inds:
leave_out = np.array(list(islice(ind_comb, ind, ind + 1))[0])
self.test_ind_lists.append(list(self.shuffled_mat_inds[leave_out]))
self.train_ind_lists.append(
list(np.delete(self.shuffled_mat_inds, leave_out))
)
else:
leave_out = np.sort(
np.random.choice(self.num_mats, self.num_out, replace=False)
)
test_inds = self.shuffled_mat_inds[leave_out]
self.test_ind_lists.append(test_inds)
self.train_ind_lists.append(
list(np.delete(self.shuffled_mat_inds, leave_out))
)
while len(self.test_ind_lists) < self.num_iter:
leave_out = np.sort(
np.random.choice(self.num_mats, self.num_out, replace=False)
)
test_inds = self.shuffled_mat_inds[leave_out]
if (
np.min(
np.sum(
np.abs(np.array(self.test_ind_lists) - test_inds), axis=1
)
)
> 1e-5
):
self.test_ind_lists.append(list(self.shuffled_mat_inds[leave_out]))
self.train_ind_lists.append(
list(np.delete(self.shuffled_mat_inds, leave_out))
)
class KFoldValidator(Validator):
"""K-Fold validator class
Attributes:
k_fold (int): Number of divisions to make
"""
def __init__(self, sisso, k_fold=None):
"""Constructor
Args:
sisso(SISSO_regressor.SISSO_Regressor): The SISSO Regression trained on all data
k_fold (int): Number of divisions to make
"""
if k_fold is None:
raise ValueError("k-fold validation needs to have n_fold defined")
if k_fold <= 1:
raise ValueError("k for k_fold must be greater than 1")
super(KFoldValidator, self).__init__(sisso)
self.k_fold = k_fold
self.populate_mat_ind_lsts()
def populate_mat_ind_lsts(self):
"""Set up Training/Testing sets for k-fold out cross validation"""
self.train_ind_lists = []
self.test_ind_lists = []
for kk in range(self.k_fold):
test_list = self.shuffled_mat_inds[kk :: self.k_fold]
self.test_ind_lists.append(list(test_list))
self.train_ind_lists.append(
list(np.delete(self.shuffled_mat_inds, test_list))
)
import numpy as np
import pandas as pd
from scipy import interpolate
from matplotlib import pyplot as plt
from scipy.signal import savgol_filter
# import statsmodels.api as sm
rng = np.random.RandomState(13)
from statsmodels.nonparametric.kernel_regression import KernelReg
anions = np.array([6, 14, 32, 7, 15, 33, 51, 8, 16, 34, 52, 9, 17, 35, 53])
cations = np.delete(np.arange(54, dtype=np.int64), anions)
n_pts = 100
Z1 = rng.choice(cations, n_pts)
Z2 = rng.choice(anions, n_pts)
th = rng.rand(n_pts) * np.pi * 2.0 - np.pi
prop_1 = np.exp(-th**2.0 / 2.0) / 2.0 + 2.0
prop_2 = prop_1 + (rng.normal(size=n_pts, scale=1.0))
prop_3 = -853.0*np.sqrt(25.0*np.pi - 5.458752*th) + 100.0
prop_3 = 1.0 / (0.56*th**2.0 + 4.548420) - 15800 + (rng.normal(size=n_pts, scale=0.025))
prop_3 = -np.exp(-th)
# prop_3 = Z1 * Z2 / np.exp(th / 2.0) # * (197.0 / (137.0 * 4.0) )
cols = ["prop_1", "prop_2", "prop_3", "Z1 (C)", "Z2 (C)", "th (Unitless)"]
data = np.vstack((prop_1, prop_2, prop_3, Z1, Z2, th))
pd.DataFrame(data.T, columns=cols).to_csv("test.csv", index_label="sample")
inds = th.argsort()
x = th[inds]
y = prop_2[inds]
# plt.plot(x, y, 'b.')
plt.plot(x, prop_3[inds], 'b.')
# wind = n_pts // 5
# wind += (wind + 1) % 2
# for i in range(1):
# y = savgol_filter(y, wind, 5)
# plt.plot(x, y)
spl = interpolate.UnivariateSpline(th[inds], prop_3[inds], s=1000.0, k=5)
plt.plot(x, spl(x))
# kr = KernelReg(prop_2[inds], th[inds], 'c')
# x_pred = np.linspace(x[0], x[-1], 100)
# y_pred, y_std = kr.fit(x_pred)
# plt.plot(x_pred, y_pred)
plt.show()
import numpy as np
from sisso.utils.mpi_interface import my_rank
from sisso.feature_creation.node.allowed_operator_nodes import op_map
from sisso.feature_creation.feature_space import FeatureSpace
from sisso.descriptor_identifcation.SISSO_regressor import SISSO_Regressor, print_models
from sisso.validator.validator import LeaveOutValidator, KFoldValidator
import warnings
warnings.filterwarnings("ignore")
cols = [
"Z1 (C)",
"Z2 (C)",
"th (Unitless)",
]
# allowed_ops = [
# op_map["sqrt"],
# # op_map["sq"],
# ]
allowed_ops = list(op_map.values())
phi = FeatureSpace.from_df(
"test.csv",
"prop_3",
allowed_ops,
cols,
2,
1,
)
sisso = SISSO_Regressor(
phi,
1,
True,
)
sisso.fit()
models = sisso.models
if my_rank == 0:
print("Training Results")
print_models(models)
sample,prop_1,prop_2,prop_3,Z1 (C),Z2 (C),th (Unitless)
0,2.465193047112261,2.569373534661501,-1.4621138087504126,26.0,15.0,-0.3798832028603818
1,2.0273610029353146,1.0159305346633953,-0.08976167590662842,18.0,14.0,2.410597166408027
2,2.024100478726681,2.465280605287646,-0.08520698372800417,24.0,6.0,2.4626718798675657
3,2.401791177916339,4.274658377971557,-1.9373582921543708,50.0,6.0,-0.6613253400373109
4,2.496735103641117,1.9993373974254611,-0.8918424748066547,46.0,8.0,0.11446575974773365
5,2.213989402627042,2.593513974280932,-0.2717619102743732,37.0,53.0,1.3028289258846684
6,2.016077723690479,1.6009006167556086,-13.7618126826963,47.0,8.0,-2.6218975594898857
7,2.0338716287873844,2.310004378338961,-0.09823842184236464,38.0,34.0,2.3203578790205146
8,2.2756577776818636,1.5779927761114751,-0.33578582149382674,2.0,51.0,1.0912817582979333
9,2.2209174495912416,2.4139374611058004,-0.2785553521037371,49.0,52.0,1.2781384885418081
10,2.425811957748035,1.7928821797019445,-1.7625527804240353,21.0,35.0,-0.5667632015957822
11,2.135741090066919,2.2359670214832974,-5.027098044510345,18.0,53.0,-1.6148428881029682
12,2.013904474993403,0.8307612484574995,-0.06878886310336116,13.0,14.0,2.6767134206434253
13,2.003845361129069,2.159452060130503,-0.04414951852560637,19.0,53.0,3.120173257413912
14,2.0338068767676014,3.3837753958903956,-0.09815745580075226,2.0,32.0,2.3211823978156234
15,2.340251783621266,1.332990691722062,-0.41585951095279017,44.0,16.0,0.8774077898149004
16,2.200014966808943,0.4562068909145396,-3.871621637507877,27.0,51.0,-1.3536734470538807
17,2.0167995183929506,3.0665560733189015,-0.07389617630169253,29.0,51.0,2.605094193886342
18,2.0617799663264216,2.4328989613968193,-7.729220734097287,27.0,33.0,-2.045008046922166
19,2.0065642254860143,1.7239533541182286,-0.05266532118744189,19.0,32.0,2.9437980820235
20,2.138513704103439,2.8658966639674617,-0.2014381977853859,30.0,14.0,1.6022726553832607
21,2.0087270632492813,-0.5341151851383255,-0.0581104335396102,19.0,53.0,2.845410052251637
22,2.2763916755721203,2.893052121831554,-0.3366058496143372,1.0,52.0,1.088842618843052
23,2.2126215500069555,2.2947897357253675,-0.2704300595913585,26.0,33.0,1.3077417738641302
24,2.0072435224843477,1.358824425694374,-18.3596404154381,36.0,51.0,-2.910154799784762
25,2.2355138384370408,3.0040502609966424,-0.29315310995099314,40.0,16.0,1.2270602468920666
26,2.3446885666021373,0.61317147866244,-2.3691135334003603,19.0,35.0,-0.8625158486379929
27,2.0936368788511923,1.3774347410487648,-0.16034926557752383,20.0,53.0,1.8304009329749222
28,2.0480609553427374,3.6208755068880727,-0.11482812590264227,30.0,15.0,2.164318825952126
29,2.173410691995628,1.9517992185985686,-0.2333309426952121,46.0,33.0,1.4552974782512766
30,2.0145329864426476,2.5723276708006346,-0.06993804330826447,47.0,17.0,2.660145524457387
31,2.0674701676185943,1.4240258025086252,-7.39985752226473,5.0,51.0,-2.001460746276759
32,2.0139284697143944,2.0903860754390617,-0.06883319283178632,36.0,17.0,2.676069196435945
33,2.040102867011589,2.1043379470326906,-9.453657344318897,29.0,51.0,-2.246401687195357
34,2.257625149803967,2.6555952010436115,-3.163280821278439,36.0,9.0,-1.15160972358784
35,2.074340626647492,0.9383115018381281,-0.1419317420307118,11.0,16.0,1.9524090268738794
36,2.186447219591422,3.363963728194187,-0.24546359342620638,11.0,14.0,1.4046066383607112
37,2.011486530700132,2.410950501005991,-15.598207149863512,10.0,7.0,-2.7471559813601587
38,2.1940304275191855,-0.9592810226612123,-0.25260430243098003,23.0,9.0,1.3759310366257633
39,2.004109810978604,1.5569152939631026,-22.17097442132624,30.0,6.0,-3.0987839749098107
40,2.108931134353001,1.9251071420665258,-5.730429510739103,50.0,16.0,-1.745790486161591
41,2.0787183592548995,1.5721729227885755,-6.840623821619246,27.0,32.0,-1.9228789294669282
42,2.2307856322476165,2.0578322161193525,-0.2883794764007134,18.0,7.0,1.2434780397627039
43,2.1895997114169146,2.2736249669358335,-4.025376388385303,36.0,52.0,-1.3926184191408035
44,2.015461211809678,0.8294655653292167,-13.967990981111438,30.0,7.0,-2.636768353414867
45,2.4218536291802146,1.9420517005193076,-1.791420553564314,48.0,17.0,-0.583008910457643
46,2.0165092868986507,3.410849993705004,-0.07340411581532794,42.0,34.0,2.611775271155433
47,2.481522659402743,3.22435038371,-1.3157751720648563,27.0,6.0,-0.2744259764119805
48,2.266287474880764,2.8055822372542,-0.32545684611420955,5.0,14.0,1.1225254033447403
49,2.1429102753166074,1.170644695259943,-4.867840349088464,0.0,17.0,-1.5826503785908623
50,2.2698451424140367,1.8320135343957364,-0.3293483344754087,29.0,9.0,1.1106393209132053
51,2.4751531336515167,3.243157711106658,-0.7266695479364104,37.0,16.0,0.31928344682486687
52,2.2578504795336323,2.7976641230706223,-0.31636764435570935,47.0,53.0,1.1508503101517773
53,2.0753504092602513,0.2517020939482413,-6.997034656949864,2.0,6.0,-1.9454864388670945
54,2.0581217885103382,1.6289342695100402,-7.9616871168521355,25.0,33.0,-2.074640926751484
55,2.4989789488579874,2.5555989118250806,-0.9380608646028438,40.0,6.0,0.06394044444473002
56,2.192613871353365,3.4055788922066306,-3.9798583748190848,12.0,33.0,-1.3812462344469028
57,2.0048223200797946,0.3853143585111587,-0.04751293283053153,49.0,15.0,3.04675333488387
58,2.0241367933942014,1.0889507705064805,-11.728953084575707,21.0,17.0,-2.462060407580452
59,2.0043612809162252,0.5225055957212346,-0.04597952200159264,5.0,16.0,3.079559155490772
60,2.005333959485182,1.7570875639240824,-0.04912071221532253,37.0,17.0,3.013474495750758
61,2.4998053170649976,2.5148293975632043,-0.9724773616052392,1.0,17.0,0.027908482315509353
62,2.33964475751984,2.264394541240279,-2.40955135335727,24.0,6.0,-0.8794405697398915
63,2.014541995026371,1.8025460814233565,-0.06995433794821282,38.0,35.0,2.659912564808324
64,2.3376184917964093,1.500673794894695,-2.425938455245215,11.0,51.0,-0.8862184413811094
65,2.457770222428665,3.420386961325001,-0.6569830999208244,38.0,14.0,0.42009698393213935
66,2.0850774064127355,0.8946018297914258,-6.56692014924806,48.0,52.0,-1.8820449477391519
67,2.036051773031691,2.9618512025265575,-0.10093109009747232,47.0,34.0,2.293317271259898
68,2.48337736045334,3.8640498920305175,-0.7710205533184561,30.0,53.0,0.2600402477735342
69,2.242481332572665,4.494435123443452,-3.3303105549386323,45.0,35.0,-1.2030655593852642
70,2.4482966695762434,2.223767346217671,-1.5955715514593423,3.0,9.0,-0.4672320115214945
71,2.026251051692348,1.1022284011828607,-11.332965932296858,47.0,9.0,-2.427715817684073
72,2.005494216192898,2.7284751839954775,-20.158684427554544,28.0,15.0,-3.0036351845041502
73,2.0345131487704275,3.0639944216399533,-0.09903740703328553,48.0,35.0,2.31225765139337
74,2.2092200493676923,2.0967153015581736,-3.743482442073076,4.0,14.0,-1.3200163124340016
75,2.481493666335202,2.3523874343769084,-1.316063788524989,39.0,8.0,-0.2746453032519671
76,2.0090550078867424,1.710643808217539,-16.9864501791876,10.0,9.0,-2.8324159779612654
77,2.227078644943877,0.9808984239267389,-3.5128679991093947,27.0,14.0,-1.2564327976017564
78,2.0796821613320557,2.2241982687132453,-0.14711513344919463,3.0,16.0,1.9165397780178886
79,2.3825347484313286,2.895927863668369,-0.48102722655312485,37.0,34.0,0.7318314064177747
80,2.1270178479323425,1.7777805815987853,-0.1910034142562072,24.0,16.0,1.6554639754083267
81,2.496378148071153,0.5651245104404725,-1.1281537837604634,48.0,17.0,-0.12058247690433
82,2.4535004978925765,2.1376237923344164,-1.5555683725050216,41.0,16.0,-0.44184099171260893
83,2.0400961755695928,2.2839544875637237,-0.10577130986862256,37.0,14.0,2.246475969596493
84,2.215823334152881,2.6153440730853914,-3.6556073886687486,13.0,51.0,-1.296262259605668
85,2.0499527866186678,0.4187532283014821,-0.11690353062519399,2.0,52.0,2.1464062088627207
86,2.0760403360456325,2.0967193282634518,-6.964290923016863,26.0,16.0,-1.9407957963086333
87,2.0039462902502794,1.4482817154922396,-22.462756092740825,13.0,51.0,-3.1118586529457817
88,2.2663767438880345,4.090832673379053,-0.32555405300138923,40.0,51.0,1.1222267696719888
89,2.3081169755245114,2.803476987089165,-0.37381262619360145,36.0,16.0,0.9840006065921276
90,2.3294779674510813,1.6360183326909516,-2.492643965009495,18.0,9.0,-0.9133439804674963
91,2.036114692596269,1.994971935704235,-0.1010078754461115,40.0,51.0,2.2925567904667723
92,2.237020043904856,1.8168621325474468,-0.2946833682253918,39.0,9.0,1.2218538271208894
93,2.3889280427796176,1.7241654241857953,-0.4922240857990232,20.0,35.0,0.7088212072386653
94,2.262848151577783,2.4738759090397515,-3.1082108409220894,3.0,16.0,-1.1340472683390193
95,2.1645602088494704,1.1083354424147591,-0.22517894644688846,4.0,6.0,1.490859875333399
96,2.493601742701712,2.0013134266725627,-0.8517232882661091,31.0,32.0,0.1604935839353412
97,2.4938448597750438,4.559129095008126,-0.8543660473924344,36.0,53.0,0.1573955501046207
98,2.014639468182631,1.6061442172993834,-14.259164541558194,21.0,16.0,-2.6573998257213187
99,2.493558354490546,3.1110421147441,-1.1747323916053498,39.0,8.0,-0.1610403698315026
Subproject commit f538cd9fe89b418bbed7ff0798c357b9b66335fe Subproject commit b7d59496c970679531a4a22c9c6dc6968aeb69ed
Subproject commit 8de6f9f72c5f0987eb948379c210d9381fb793fc
FROM jupyter/tensorflow-notebook:2ce7c06a61a1 ARG BASE_IMAGE=jupyter/tensorflow-notebook:2022-06-27
LABEL maintainer="Luca Ghiringhelli <ghiringhelli@fhi-berlin.mpg.de>" ARG BUILDER_BASE_IMAGE=ubuntu:focal
FROM $BASE_IMAGE AS builder
# ================================================================================ # ================================================================================
# Linux applications and libraries # Linux applications and libraries
# ================================================================================ # ================================================================================
# RUN apt-get update \
# && apt-get install --yes --quiet --no-install-recommends \
# gcc \
# gfortran \
# liblapack-dev \
# libblas-dev \
# libnetcdf-dev \
# netcdf-bin \
# libxpm-dev \
# libgsl-dev \
# lsof \
# vim \
# git-lfs\
# openjdk-8-jdk \
# xvfb \
# cmake \
# openssh-client \
# graphviz \
# dvipng \
# libboost-system-dev \
# libboost-filesystem-dev \
# libboost-mpi-dev \
# libboost-serialization-dev \
# && rm -rf /var/lib/apt/lists/*
USER root
RUN apt-get update \
&& apt-get install -y -q --no-install-recommends \
gcc \
gfortran \
liblapack-dev \
libblas-dev \
libnetcdf-dev \
netcdf-bin \
libxpm-dev \
libgsl-dev \
lsof \
vim \
git-lfs\
openjdk-8-jdk \
xvfb \
cmake \
mpi \
openmpi-bin \
openmpi-common \
openssh-server \
openssh-client \
libopenmpi-dev \
graphviz \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
USER $NB_UID
# Dependecies:
# - quippy: gcc gfortran liblapack-dev libblas-dev libnetcdf-dev netcdf-bin libxpm-dev libgsl-dev
# ================================================================================ # ================================================================================
# Python environment # KERAS-VIS
# ================================================================================ # ================================================================================
# This part contains the installation of all the common/basic python packages. WORKDIR /opt/keras-vis
# All the other package will be installed by the tutorial's own setup scripts.
# Add any additional packages you want available for use in a Python 3 notebook
# to the first line here (e.g., nglview, jupyter_contrib_nbextensions, etc.)
# https://github.com/ipython-contrib/jupyter_contrib_nbextensions
RUN conda install --quiet --yes \
'pytorch::pytorch-cpu==1.1.0' \
'pytorch::torchvision-cpu==0.3.0' \
'nglview==2.7.0' \
'jupyter_contrib_nbextensions==0.5.1' \
'jupyter_nbextensions_configurator==0.4.1' \
'ase' \
'asap3' \
'dscribe' \
'pytest' \
'mpi4py' \
'orjson' \
'hdbscan' \
&& conda install -c plotly plotly-orca \
&& conda install -c conda-forge umap-learn \
&& conda clean -tipsy \
&& jupyter nbextensions_configurator enable --user \
&& jupyter nbextension install nglview --py --sys-prefix \
&& jupyter nbextension enable nglview --py --sys-prefix \
&& jupyter nbextension enable execute_time/ExecuteTime \
&& jupyter nbextension enable init_cell/main \
&& jupyter nbextension enable collapsible_headings/main \
&& fix-permissions $CONDA_DIR \
&& fix-permissions /home/$NB_USER
# Dependecies: COPY 3rdparty/keras-vis .
# - ase: many RUN pip install .
# - nglview: many
# - asap3: grain-boundaries
# fixing pip installation issue of nomad-lab dependency: orjson
# ================================================================================ # ================================================================================
# QMMLPACK # ATOMIC FEATURES
# ================================================================================ # ================================================================================
# for the test TMPDIR has to be defined RUN pip install nomad-lab --extra-index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple
ENV TMPDIR "/tmp/"
WORKDIR /opt/qmmlpack WORKDIR /opt/atomic_features
COPY 3rdparty/qmmlpack .
COPY 3rdparty/atomic-features-package ./atomic-features-package
USER root USER root
RUN pip install ./atomic-features-package
RUN ./make --debug --verbose cpp --include-path /usr/include/gsl/ --library-path /usr/lib/
RUN ./make --debug --verbose python --include-path /usr/include/gsl/ --library-path /usr/lib/
RUN chown -R $NB_USER:$NB_GID $PWD
USER $NB_USER
# ================================================================================
# CMLKIT
# ================================================================================
#RUN make -v install WORKDIR /opt/cmlkit
ENV PYTHONPATH "/opt/qmmlpack/python":$PYTHONPATH
# The CML_PLUGINS environment variable tells cmlkit to register the cscribe plugin, which provides the dscribe interface. COPY 3rdparty/cmlkit .
ENV CML_PLUGINS "cscribe" USER root
ENV CML_DATASET_PATH $HOME/tutorials/data/cmlkit/ RUN pip install .
# Dependecies:
# - ase: many
# - nglview: many
# - asap3: grain-boundaries
# ================================================================================ # ================================================================================
# QUIP + GAP + quippy # Final image
# ================================================================================ # ================================================================================
# All the QUIPs go here; added to path in the end. FROM $BASE_IMAGE
WORKDIR /opt/quip
# QUIP for general use is the OpenMP version.
ENV QUIP_ARCH linux_x86_64_gfortran_openmp
ENV QUIP_INSTALLDIR /opt/quip/bin
COPY 3rdparty/quip . # # Customize jupyter
COPY 3rdparty/gap src/GAP # WORKDIR $HOME
COPY files/Makefile.inc build/$QUIP_ARCH/ # COPY congifs/custom.css congifs/logo.png congifs/Titillium .jupyter/custom/
COPY files/GIT_VERSION .
COPY files/GAP_VERSION src/GAP/
USER root
RUN chown -R $NB_USER:$NB_GID $PWD
USER $NB_USER
# Installs with no suffix, e.g. quip # ================================================================================
RUN make \ # Linux applications and libraries
&& make install # ================================================================================
# Installs quippy USER root
RUN pip install src/f90wrap \ RUN apt-get update \
&& make install-quippy && apt-get install --yes --quiet --no-install-recommends \
vim \
openjdk-8-jdk \
openmpi-bin \
netcdf-bin \
&& rm -rf /var/lib/apt/lists/*
# Make quippy executable available from terminal
ENV PATH /opt/quip/bin:$PATH
# ================================================================================ # ================================================================================
# SISSO++ # Python environment
# ================================================================================ # ================================================================================
WORKDIR /opt/cpp_sisso # This part contains the installation of all the common/basic python packages.
# All the other package will be installed by the tutorial's own setup scripts.
# Add any additional packages you want available for use in a Python 3 notebook
# to the first line here (e.g., nglview, jupyter_contrib_nbextensions, etc.)
# https://github.com/ipython-contrib/jupyter_contrib_nbextensions
# All the dependencies will be fixed in eg. requirements.txt
# # Pytorch does not support Python 3.10 yet.
# RUN mamba install -c pytorch -c conda-forge --quiet --yes \
# 'pytorch' \
# 'torchvision' \
# 'cpuonly' \
# # Cmlkit depends on a few old packages...
# && pip install --no-cache-dir 'cmlkit' \
RUN mamba install --quiet --yes \
'nomad-lab' \
'nglview' \
'jupyter_contrib_nbextensions' \
'ase' \
'asap3' \
'dscribe' \
'orjson' \
'hdbscan' \
'pyyaml' \
'numba' \
'scikit-learn' \
'pandas' \
'urllib3' \
'nest-asyncio '\
'seaborn' \
'pynndescent' \
'mpi4py' \
'umap-learn' \
&& mamba clean --all -f -y \
&& jupyter nbextension enable execute_time/ExecuteTime \
&& jupyter nbextension enable init_cell/main \
&& jupyter nbextension enable collapsible_headings/main \
&& fix-permissions "${CONDA_DIR}" \
&& fix-permissions "/home/${NB_USER}"
COPY 3rdparty/cpp_sisso . # fixing pip installation issue of nomad-lab dependency: orjson
WORKDIR /opt/cpp_sisso/build
USER root
RUN cmake -C ../cmake/toolchains/gnu_py.cmake -DEXTERNAL_BOOST=OFF ../ \
&& make install
# ================================================================================ # Copy all the notebooks of the tutorials
# ATOMIC FEATURES ARG TUTORIALS_HOME=$HOME/tutorials
# ================================================================================
WORKDIR /opt/atomic_features WORKDIR $TUTORIALS_HOME
COPY 3rdparty/atomic-features-package ./atomic-features-package COPY tutorials/*/*.ipynb ./
USER root
RUN pip install ./atomic-features-package
# ================================================================================ # Copy images or other assets may required by the tutorials
# KERAS-VIS COPY tutorials/*/assets/* ./
# ================================================================================
WORKDIR opt/keras-vis # Copy data may be required by the tutorials
COPY tutorials/*/data/* ./
COPY 3rdparty/keras-vis . # RUN jupyter-trust -y *.ipynb
RUN pip install .
# Fix permissions
RUN fix-permissions $TUTORIALS_HOME
# ================================================================================ # ================================================================================
# Install all of the package dependencies of the tutorials # Install all of the package dependencies of the tutorials
# ================================================================================ # ================================================================================
WORKDIR /opt/tutorials RUN pip install ./analytics-arise \
COPY tutorials/ . && pip install 'git+https://github.com/AndreasLeitherer/ARISE.git'
RUN pip install ./analytics-atomic-features
RUN pip install ./analytics-clustering-tutorial
RUN pip install ./analytics-cmlkit
RUN pip install ./analytics-co2-sgd-tutorial
RUN pip install ./analytics-compressed-sensing RUN pip install ./analytics-compressed-sensing
RUN pip install ./analytics-convolutional-nn RUN pip install ./analytics-convolutional-nn
RUN pip install ./analytics-decision-tree
RUN pip install ./analytics-descriptor-role
RUN pip install ./analytics-domain-of-applicability
RUN pip install ./analytics-dos-similarity-search
RUN pip install ./analytics-error-estimates
RUN pip install ./analytics-exploratory-analysis
RUN pip install ./analytics-gap-si-surface RUN pip install ./analytics-gap-si-surface
RUN pip install ./analytics-grain-boundaries RUN pip install ./analytics-grain-boundaries
RUN pip install ./analytics-kaggle-competition RUN pip install ./analytics-kaggle-competition
RUN pip install ./analytics-krr4mat
RUN pip install ./analytics-nn-regression
RUN pip install ./analytics-perovskites-tolerance-factor
RUN pip install ./analytics-query-nomad-archive
RUN pip install ./analytics-sgd-alloys-oxygen-reduction-evolution
RUN pip install ./analytics-sgd-propylene-oxidation-hte
RUN pip install ./analytics-soap-atomic-charges RUN pip install ./analytics-soap-atomic-charges
RUN pip install ./analytics-tcmi RUN pip install ./analytics-tcmi
RUN pip install ./analytics-query-nomad-archive
RUN pip install ./analytics-descriptor-role
RUN pip install ./analytics-error-estimates
RUN pip install ./analytics-cmlkit
RUN pip install ./analytics-tetradymite-PRM2020 RUN pip install ./analytics-tetradymite-PRM2020
RUN pip install ./analytics-krr4mat
RUN pip install ./analytics-decision-tree
RUN pip install ./analytics-clustering-tutorial
RUN pip install ./analytics-arise \
&& pip install 'git+https://github.com/AndreasLeitherer/ARISE.git'
RUN pip install ./analytics-exploratory-analysis
RUN pip install nomad-lab
USER root
RUN fix-permissions /opt/tutorials \
&& fix-permissions $CONDA_DIR
ARG TUTORIALS_HOME=$HOME/tutorials
# Linking all the notebooks of the tutorials
WORKDIR $TUTORIALS_HOME
RUN ln -s /opt/tutorials/*/*.ipynb . \
&& jupyter-trust -y *.ipynb
# Linking images or other assets may required by the tutorials
WORKDIR $TUTORIALS_HOME/assets
RUN ln -s /opt/tutorials/*/assets/* .
# Linking data may required by the tutorials
WORKDIR $TUTORIALS_HOME/data
RUN mkdir data && ln -s /opt/tutorials/*/data/* .
RUN fix-permissions $TUTORIALS_HOME \
&& fix-permissions $HOME/.local/share/jupyter
# ================================================================================ # ================================================================================
# Switch back to jovyan to avoid accidental container runs as root # Switch back to jovyan to avoid accidental container runs as root
......
ARG BUILDER_BASE_IMAGE=jupyter/tensorflow-notebook:2022-06-27
FROM $BUILDER_BASE_IMAGE
# Read more: https://gitlab.com/qmml/qmmlpack
# ================================================================================
# Linux applications and libraries
# ================================================================================
USER root
RUN apt-get update \
&& apt-get install --yes --quiet --no-install-recommends \
gcc \
libgsl-dev \
lsof \
&& rm -rf /var/lib/apt/lists/*
# RUN pip install --no-cache-dir pytest
RUN mamba install --quiet --yes \
'pytest' \
&& mamba clean --all -f -y \
&& fix-permissions "${CONDA_DIR}" \
&& fix-permissions "/home/${NB_USER}"
# ================================================================================
# QMMLPACK
# ================================================================================
WORKDIR /opt/qmmlpack
COPY 3rdparty/qmmlpack .
# for the test TMPDIR has to be defined
ENV TMPDIR "/tmp/"
RUN ./make --debug --verbose cpp --include-path /usr/include/gsl/ --library-path /usr/lib/
RUN ./make --debug --verbose python --include-path /usr/include/gsl/ --library-path /usr/lib/
# RUN ./make --verbose install
ENV PYTHONPATH "/opt/qmmlpack/python:$PYTHONPATH"
# ================================================================================
# Testing (it also runs durign the build process)
# ================================================================================
# RUN ./build/cpptests \
# && pytest ./python/tests/
ARG BUILDER_BASE_IMAGE=jupyter/tensorflow-notebook:2022-06-27
FROM $BUILDER_BASE_IMAGE
# Read more: https://github.com/libAtoms/QUIP/
# ================================================================================
# Linux applications and libraries
# ================================================================================
USER root
RUN apt-get update \
&& apt-get install --yes --quiet --no-install-recommends \
gfortran \
libblas-dev \
liblapack-dev \
openmpi-bin \
libopenmpi-dev \
libscalapack-openmpi-dev \
netcdf-bin \
libnetcdf-dev \
libhdf5-serial-dev \
&& rm -rf /var/lib/apt/lists/*
# ================================================================================
# QUIP + GAP + quippy
# ================================================================================
# All the QUIPs go here; added to path in the end.
WORKDIR /opt/quip
# QUIP for general use is the OpenMP version.
ENV QUIP_ARCH linux_x86_64_gfortran_openmp
ENV QUIP_ROOT /opt/quip
ENV QUIP_INSTALLDIR /opt/quip/bin
COPY 3rdparty/quip .
COPY configs/Makefile.inc build/${QUIP_ARCH}/
COPY configs/GIT_VERSION .
COPY configs/GAP_VERSION src/GAP/
RUN make \
&& make quippy \
&& make install-quippy
# ================================================================================
# Testing (it also runs durign the build process)
# ================================================================================
# RUN ulimit -n 256 \
# && make test
ARG BUILDER_BASE_IMAGE=jupyter/tensorflow-notebook:2022-06-27
FROM $BUILDER_BASE_IMAGE
# Read more: https://sissopp_developers.gitlab.io/sissopp/quick_start/Installation.html
# ================================================================================
# Linux applications and libraries
# ================================================================================
USER root
RUN apt-get update \
&& apt-get install --yes --quiet --no-install-recommends \
build-essential g++ gfortran cmake git \
liblapack-dev libblas-dev \
zlib1g-dev \
libboost-mpi-dev libboost-filesystem-dev libboost-system-dev libboost-serialization-dev \
openssh-client \
dvipng \
&& rm -rf /var/lib/apt/lists/*
RUN mamba install --quiet --yes \
'numpy' \
'pandas' \
'scipy' \
'seaborn' \
'scikit-learn' \
'toml' \
'pytest' \
&& mamba clean --all -f -y \
&& fix-permissions "${CONDA_DIR}" \
&& fix-permissions "/home/${NB_USER}"
# ================================================================================
# SISSO++
# ================================================================================
WORKDIR /opt/sissopp
COPY 3rdparty/sissopp .
RUN mkdir build && cd build \
&& cmake -C ../cmake/toolchains/gnu_param_py.cmake -DEXTERNAL_BOOST=ON ../ \
&& make \
&& make install
# ================================================================================
# Testing
# ================================================================================
RUN pytest tests/pytest
# RUN cd build \
# && cmake test
\ No newline at end of file
# Single-user Jupyter notebook image for analytics # Single-user Jupyter notebook image for analytics
This notebook folder contains Dockerfile for a single user notebook image which is based on Jupyter docker stack.
The Docker image is suitable for running/testing locally and for deploying it by the JupyterHub on a Kubernetes cluster.
**Please note that this documentation is just a draft. Some of the instructions may be incomplete.**
This notebook folder contains Dockerfile for a single user notebook image which based on Jupyter docker stack. ## Building/using your own Docker image
In addition of the base notebook it contains QUIP with GAP and all of the necessary packages for the tools.
The docker image is suitable for running/testing locally and for deploying it by the jupyterhub on a kubernetes cluster.
## Short description ### Cloning the repository and checkout the develop branch
- Based on the jupyter/minimal-notebook docker image
- conda: python 2 and 3 environments
- compiled quip with gap
## Tutorials
Each tutorial could have it's own repository. For practical reason please use "analytics-" as prefix for your tutorial
You can add a new repository to the tutorial folder like:
```bash
cd tutorials
git submodule add https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-grain-boundaries.git grain-boundaries
```
Link for the old tutorials:tutorials
- https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-lab-base/tree/master/analysis-tools
- https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-toolkit-tutorials
## Building/using your own docker image
### Cloning the repository
```bash ```bash
git clone --recursive https://gitlab.mpcdf.mpg.de/nomad-lab/analytics.git git clone --recursive https://gitlab.mpcdf.mpg.de/nomad-lab/analytics.git
cd analytics
git checkout develop
git submodule update --init
``` ```
Alternativle you can clone it wothout the submodules:
### Building the docker image
```bash ```bash
git clone https://gitlab.mpcdf.mpg.de/nomad-lab/analytics.git docker build -t analytics:develop .
``` ```
And you can initialise the submodules when its nedded: ### Deploy the Docker image locally
```bash ```bash
git submodule update --init --recursive docker run --rm -p 8888:8888 analytics:develop
``` ```
### Create new tutorial
To create a new tutorial, you can use the template [./analytics-tutorial-template](https://gitlab.mpcdf.mpg.de/nomad-lab/analytics/-/tree/develop/analytics-tutorial-template). Substitute 'tutorial template' with a short name that identifies the tutorial.
### Building the docker image locally Please observe the following:
- the underscore/hyphen convention used - in all files, also within the directory.
```bash - employ lowercase letters for file names.
docker build -t analytics-notebook:latest . - clear the cells output before saving.
``` - update the 'setup.py' file with all Python packages that need to be installed with pip.
- update the 'metainfo.json' file with all metadata.
### Testing/Developing the notebooks locally ### Test new tutorial
- Use the following command to run the docker image locally with a password-less sudo access: If you move the new notebook directory into the 'tutorials' folder, and then build the image, you can test the notebook in the NOMAD AI Toolkit environment.
```bash To install in the NOMAD AI Toolkit environment functions that are defined in analytics-tutorial-template, add the command `RUN pip install analytics-tutorial-template` in Dockerfile. Then:
docker run -d --rm \
-p 8888:8888 \
-v $PWD/tutorials:/home/jovyan/tutorials \
-e GRANT_SUDO=yes \
--user root \
--name analytics-notebook \
analytics-notebook:latest
```
- To have bash in the container use the following command:
```bash
docker exec -it analytics-notebook start.sh
```
more info: https://jupyter-docker-stacks.readthedocs.io/en/latest/using/common.html?highlight=root#alternative-commands
### Useful tricks for Linux
- avoiding tokens for development
```bash
docker run --rm \
-p 8889:8888 \
-v $PWD/tutorials:/home/jovyan/tutorials \
--name analytics-notebook \
analytics-notebook:latest \
start-notebook.sh \
--NotebookApp.token=''
```
- you may consider to use the '--rm' option which deletes the container when it stop. Be careful in this way you will loose everything which was not stored into the mounted folder.
```bash
docker run \
--rm \
-p 8888:8888 \
-v $PWD/tutorials:/home/jovyan/tutorials \
--name analytics-notebook \
analytics-notebook:latest
```
- For mounting a folder, you may need to use absolute path or other tricks if the PWD environmental variable is not accessible in your shell:
```bash
docker run \
--rm \
-p 8888:8888 \
-v /path/for/the/tutorials:/home/jovyan/tutorials \
--name analytics-notebook \
analytics-notebook:latest
```
- you may need to change the user id in the container - by adding "-e NB_UID=1001" and "--user root" options to your command - to have access for the mounted folders:
```bash
docker run \
--rm \
-p 8888:8888 \
-v $PWD/tutorials:/home/jovyan/tutorials \
--user root \
-e NB_UID=1001 \
--name analytics-notebook \
analytics-notebook:latest
```
More information about the command line options: https://jupyter-docker-stacks.readthedocs.io/en/latest/using/common.html#notebook-options
## Update the list of tutorials
Run the following command to update the list of tutorials:
```bash ```bash
python generate_tutorials_json.py mv analytics-tutorial-template tutorials
docker build -t analytics:develop .
docker run --rm -p 8888:8888 analytics:develop
``` ```
The 'tutorial_template.ipynb' notebook will then appear in the tutorials list.
## Continuous integration ### Getting published
If you wish to publish your notebook in the NOMAD AI Toolkit, please contact us (ghiringhelli@fhi-berlin.mpg.de).
Each commit triggers a build process on GitLab Runner. Beside the latest tag there will be a unique tag (same that as the value of the git commit) available for explicitly tracking the version of the notebook for a cluster deployment.
### Using the docker image from the registry
1. Install docker on your machine
2. Login to the image repository
```bash
docker login gitlab-registry.mpcdf.mpg.de
```
3. Pull the image:
```bash
docker pull gitlab-registry.mpcdf.mpg.de/nomad-lab/analytics:latest
```
4. Create a container:
```bash
docker run -p 8888:8888 \
-v $PWD/tutorials:/home/jovyan/tutorials \
gitlab-registry.mpcdf.mpg.de/nomad-lab/analytics:latest
```
Note: The latest tag can be find on the following page:
https://gitlab.mpcdf.mpg.de/nomad-lab/analytics/container_registry
### Updating quip/quippy
```
cd 3rdparty/quip
./bin/gitversion > ../../configs/GIT_VERSION
./src/GAP/gapversion > ../../configs/GAP_VERSION
```
\ No newline at end of file