Compare revisions

bb559412 · bb559412 · bb559412 · bb559412 · bb559412 · bb559412
--- a/3rdparty/pysisso/sisso/feature_creation/nodes/operator_nodes/sixth_power.py
+++ b/3rdparty/pysisso/sisso/feature_creation/nodes/operator_nodes/sixth_power.py
+"""File to describe SixthPowerNode"""
+import sisso.feature_creation.nodes.functions as fxn
+import sympy
+from sisso.feature_creation.nodes.operator_node import OperatorNode
+class SixthPowerNode(OperatorNode):
+    """Node to raise a feature to the sixth power
+    """
+    def __init__(self, feat):
+        """Node to raise a feature to the sixth power
+        Args:
+            feat (Node): Feature to add raise a feature to the sixth power
+        Raises:
+            ValueError: If feature would leave accepted range
+        """
+        disallowed = ["cbrt", "sqrt"]
+        if feat.tag in disallowed:
+            raise ValueError("Invalid feature combination")
+        super(SixthPowerNode, self).__init__(fxn.six_pwr, "**6", feat)
+    @property
+    def expr(self):
+        """The sympy.Expression for the resulting feature
+        Returns:
+            sympy.Expression: The algebraic representation of the new feature
+        """
+        return sympy.expand(
+            sympy.trigsimp(sympy.powdenest(self.feat.fxn_in_expr ** 6, force=True))
+        )
+    # @property
+    def get_unit(self):
+        """The sympy.Expression for the unit of the resulting feature
+        Returns:
+            sympy.Expression: The resulting unit of the feature
+        """
+        return self.feat.unit ** 6.0
--- a/3rdparty/pysisso/sisso/feature_creation/nodes/operator_nodes/square.py
+++ b/3rdparty/pysisso/sisso/feature_creation/nodes/operator_nodes/square.py
+"""File to describe SquareNode"""
+import sisso.feature_creation.nodes.functions as fxn
+import sympy
+from sisso.feature_creation.nodes.operator_node import OperatorNode
+class SqNode(OperatorNode):
+    """Node to add square operators to features
+    """
+    def __init__(self, feat):
+        """Node to add square operators to features
+        Args:
+            feat (Node): Feature to add the square operator to
+        Raises:
+            ValueError: If feature would leave accepted range
+        """
+        if feat.tag == "sqrt":
+            raise ValueError("Invalid feature combination")
+        super(SqNode, self).__init__(fxn.sq, "**2", feat)
+    @property
+    def expr(self):
+        """The sympy.Expression for the resulting feature
+        Returns:
+            sympy.Expression: The algebraic representation of the new feature
+        """
+        return sympy.expand(
+            sympy.trigsimp(sympy.powdenest(self.feat.fxn_in_expr ** 2, force=True))
+        )
+    # @property
+    def get_unit(self):
+        """The sympy.Expression for the unit of the resulting feature
+        Returns:
+            sympy.Expression: The resulting unit of the feature
+        """
+        return self.feat.unit ** 2.0
--- a/3rdparty/pysisso/sisso/feature_creation/nodes/operator_nodes/square_root.py
+++ b/3rdparty/pysisso/sisso/feature_creation/nodes/operator_nodes/square_root.py
+"""File to describe SqrtNode"""
+import numpy as np
+import sisso.feature_creation.nodes.functions as fxn
+import sympy
+from scipy import stats
+from sisso.feature_creation.nodes.operator_node import OperatorNode
+from sisso.feature_creation.nodes.operator_nodes.sixth_power import SixthPowerNode
+from sisso.feature_creation.nodes.operator_nodes.derivatives import get_deriv
+class SqrtNode(OperatorNode):
+    """Node to add square root operators to features
+    """
+    def __init__(self, feat):
+        """Node to add square root operators to features
+        Args:
+            feat (Node): Feature to add the square root operator to
+        Raises:
+            ValueError: If feat.value has a negative number
+        """
+        disallowed = ["**2", "**6"]
+        if feat.tag in disallowed:
+            raise ValueError("Invalid feature combination")
+        super(SqrtNode, self).__init__(fxn.sqrt, "sqrt", feat)
+        self.bounds = (-1.0 * np.inf, np.inf)
+    def initial_params(self, prop, mat_inds):
+        """Get an initial estimate of the parameters
+        Args:
+            prop (np.ndarray(float)): Property to fit to
+            mat_inds (np.ndarray(int)): Indexes to include in the fitting
+        Returns:
+            dict: The initial parameter guess based on the property and self.feat.fxn_in_value
+        """
+        val = self.feat.fxn_in_value[mat_inds]
+        # Get and Transform the derivative of prop with respect to feat
+        x, prop_prim = get_deriv(val, prop[mat_inds], 1)
+        prop_trans = (prop_prim) ** (-2.0)
+        # Disregard any strongly non-linear trends due to noise/outliers
+        threshold = np.median(np.abs(np.diff(prop_trans))) * 1.1
+        inds = np.where(np.abs(np.diff(prop_trans)) <= threshold)[0]
+        # Get initial parameter guess
+        alpha, a = stats.linregress(x[inds], prop_trans[inds])[:2]
+        # Correct alpha and a
+        alpha = 4.0 / alpha
+        a *= alpha ** 2.0 / 4.0
+        # Get an estimate of the constant shift and scale factor
+        b, c = stats.linregress(np.sqrt(alpha * val + a), prop[mat_inds])[:2]
+        return {"alpha": alpha, "a": a, "b": b, "c": c}
+    @property
+    def fxn_in_value(self):
+        """Calculate the value of the node by applying _func to feat_1 and feat_2 in that order
+        Returns:
+            np.ndarray: The result of applying _func to attached Nodes, with only alpha set the correct value
+        """
+        if self._fxn_in_value is not None:
+            return self._fxn_in_value
+        return self._func(self.feat.fxn_in_value)
+    def set_fxn_in_value(self):
+        """Sets the value array of the feature based off the function/feat.fxn_in_value"""
+        self._fxn_in_value = self._func(self.feat.fxn_in_value)
+    @property
+    def expr(self):
+        """The sympy.Expression for the resulting feature
+        Returns:
+            sympy.Expression: The algebraic representation of the new feature
+        """
+        return self.params["c"] + sympy.trigsimp(
+            sympy.powdenest(
+                sympy.sqrt(
+                    self.params["alpha"] * self.feat.fxn_in_expr + self.params["a"]
+                ),
+                force=True,
+            )
+        )
+    @property
+    def fxn_in_expr(self):
+        """The sympy.Expression used to generate expressions for subsequent features that use this feature
+        Returns:
+            sympy.Expression: The algebraic representation of the new feature with only alpha used from the params
+        """
+        return sympy.trigsimp(
+            sympy.powdenest(sympy.sqrt(self.feat.fxn_in_expr), force=True)
+        )
+    # @property
+    def get_unit(self):
+        """The sympy.Expression for the unit of the resulting feature
+        Returns:
+            sympy.Expression: The resulting unit of the feature
+        """
+        return self.feat.unit ** 0.5
--- a/3rdparty/pysisso/sisso/feature_creation/nodes/operator_nodes/subtract.py
+++ b/3rdparty/pysisso/sisso/feature_creation/nodes/operator_nodes/subtract.py
+"""File to describe SubNode"""
+import sisso.feature_creation.nodes.functions as fxn
+import sympy
+from sisso.feature_creation.nodes.operator_node import OperatorNode
+class SubNode(OperatorNode):
+    """Node to add subtraction operators to the features
+    """
+    def __init__(self, feat_1, feat_2):
+        """Node to add subtraction operators to the features
+        Args:
+            feat_1 (Node): Feature on the left of the subtraction
+            feat_2 (Node): Feature on the right of the subtraction
+        Raises:
+            ValueError: If the unit for feat_1 and feat_2 are not the same, the resulting feature would be a constant or go outside accepted range
+        # """
+        if feat_1.unit != feat_2.unit:
+            raise ValueError("When subtracting both features must have the same units")
+        if feat_1 == feat_2:
+            raise ValueError("When subtracting both features must be different")
+        func = lambda x: fxn.sub(x, self._n_els)
+        func.name = "sub"
+        func.default_params = {}
+        super(SubNode, self).__init__(func, "-", [feat_1, feat_2])
+    @property
+    def expr(self):
+        """The sympy.Expression for the resulting feature
+        Returns:
+            sympy.Expression: The algebraic representation of the new feature
+        """
+        new_expr = (self.feats[0].fxn_in_expr) - (self.feats[1].fxn_in_expr)
+        return sympy.trigsimp(new_expr)
+    # @property
+    def get_unit(self):
+        """The sympy.Expression for the unit of the resulting feature
+        Returns:
+            sympy.Expression: The resulting unit of the feature
+        """
+        return self.feats[0].unit
+    def reset_pivot(self, pvt):
+        """reset the divider for self._func"""
+        self._func = lambda x: fxn.sub(x, pvt)
+        self._func.name = "sub"
+        self._func.default_params = {}
--- a/3rdparty/pysisso/sisso/feature_creation/nodes/unit.py
+++ b/3rdparty/pysisso/sisso/feature_creation/nodes/unit.py
+"""Defines the unit dictionary for comparisons"""
+import re
+import numpy as np
+class Unit(dict):
+    def __init__(self, dct=None):
+        if dct is not None:
+            for key, val in dct.items():
+                self[key] = val
+    @classmethod
+    def from_str(cls, string):
+        string = string.replace(" ", "")
+        unit_comps = re.split(r"/|\*", string.replace("**", "^"))
+        mult_ops = [m.start() for m in re.finditer(r"\*", string)]
+        div_ops = [m.start() for m in re.finditer("/", string)]
+        ops = np.hstack((np.array(mult_ops), -1.0 * np.array(div_ops)))
+        ops = np.hstack((np.ones(1), np.sign(ops[np.abs(ops).argsort()])))
+        dct = dict()
+        for comp, op in zip(unit_comps, ops):
+            type_exp = comp.split("^")
+            if len(type_exp) == 1:
+                dct[type_exp[0]] = float(op)
+            elif len(type_exp) == 2:
+                dct[type_exp[0]] = int(type_exp[1]) * float(op)
+            else:
+                raise ValueError("Invalid unit")
+        return cls(dct)
+    def __repr__(self):
+        string = ""
+        for key, val in self.items():
+            string += " * " + key + "^" + str(val)
+        return string[3:]
+    def __mul__(self, unit_2):
+        dct = self.copy()
+        for key, val in unit_2.items():
+            dct[key] = dct.get(key, 0) + val
+            if dct[key] == 0:
+                del dct[key]
+        return Unit(dct)
+    def __truediv__(self, unit_2):
+        dct = self.copy()
+        for key, val in unit_2.items():
+            dct[key] = dct.get(key, 0) - val
+            if dct[key] == 0:
+                del dct[key]
+        return Unit(dct)
+    def __pow__(self, power):
+        dct = self.copy()
+        for key, val in dct.items():
+            dct[key] = val * power
+        return Unit(dct)
+    def inv(self):
+        dct = self.copy()
+        for key, val in dct.items():
+            dct[key] = val * -1.0
+        return Unit(dct)
--- a/3rdparty/pysisso/sisso/main.py
+++ b/3rdparty/pysisso/sisso/main.py
+import warnings
+from argparse import ArgumentParser as argpars
+import dill
+from time import time
+from sisso.descriptor_identifcation.SISSO_regressor import SISSO_Regressor, print_models
+from sisso.feature_creation.feature_space import FeatureSpace
+from sisso.feature_creation.nodes.operator_nodes import op_map
+from sisso.utils.mpi_interface import my_rank
+from sisso.validator.validator import KFoldValidator, LeaveOutValidator
+warnings.filterwarnings("ignore")
+def main():
+    parser = argpars(description="Run a SISSO regression ")
+    parser.add_argument(
+        "-d",
+        "--data_file",
+        type=str,
+        nargs="?",
+        help="file where all primary feature data is stored",
+        default="data.csv",
+    )
+    parser.add_argument(
+        "-p", "--prop_key", type=str, nargs="?", help="column key for the property"
+    )
+    parser.add_argument(
+        "-o",
+        "--ops",
+        type=str,
+        nargs="*",
+        help="Which operators to use for creating the feature set. If all then use all available features",
+        default="all",
+    )
+    parser.add_argument(
+        "-c",
+        "--cols",
+        type=str,
+        nargs="*",
+        help="which columns to use as primary features from data_file. If 'all' passed then take all non-property columns",
+        default="all",
+    )
+    parser.add_argument(
+        "-m",
+        "--max_phi",
+        nargs="?",
+        type=int,
+        help="Maximum number of operators to apply to a set of primary features",
+        default=1,
+    )
+    parser.add_argument(
+        "-r",
+        "--res_save",
+        nargs="?",
+        type=int,
+        help="Number of residuals to save in the model",
+        default=1,
+    )
+    parser.add_argument(
+        "-cc",
+        "--cross_corr_threshold",
+        nargs="?",
+        type=float,
+        help="Cross correlation threshold to discard features during SIS",
+        default=0.95,
+    )
+    parser.add_argument(
+        "-s",
+        "--n_sis_select",
+        type=int,
+        nargs="?",
+        default=1,
+        help="Maximum number of features to select for each SIS call",
+    )
+    parser.add_argument(
+        "-n",
+        "--n_dim",
+        type=int,
+        nargs="?",
+        help="Maximum dimension model to calculate",
+        default=1,
+    )
+    parser.add_argument(
+        "-l",
+        "--leave_out",
+        type=float,
+        nargs="?",
+        help="Leave p out validation (if < 1.0 fractional)",
+        default=0.0,
+    )
+    parser.add_argument(
+        "-i",
+        "--leave_out_iter",
+        type=int,
+        nargs="?",
+        help="maximum number of interations for Leave-p-Out validation",
+        default=None,
+    )
+    parser.add_argument(
+        "-k", "--k_fold", type=int, nargs="?", help="k-fold validation", default=0
+    )
+    parser.add_argument(
+        "--disable_all_l0_combinations",
+        action="store_true",
+        help="Do not combine sis selection rounds when calculating descriptors with L0",
+    )
+    parser.add_argument(
+        "--load",
+        type=str,
+        nargs="?",
+        help="use dill to load stored sisso object",
+        default=None,
+    )
+    parser.add_argument(
+        "-lt",
+        "--learn_type",
+        type=str,
+        nargs="?",
+        help="Objective function to learn on (log, correlation, classification)",
+        default="correlation",
+    )
+    parser.add_argument(
+        "-cw",
+        "--class_width",
+        type=float,
+        nargs="?",
+        help="Width of the boundary for classification",
+        default=1e-6,
+    )
+    param_parser = parser.add_mutually_exclusive_group(required=False)
+    param_parser.add_argument(
+        "--no-parameterize",
+        dest="parameterize",
+        help="parameterize the nodes",
+        action="store_false",
+    )
+    param_parser.add_argument(
+        "--parameterize",
+        dest="parameterize",
+        help="parameterize the nodes",
+        action="store_true",
+    )
+    parser.set_defaults(parameterize=True)
+    fix_intercept = parser.add_mutually_exclusive_group(required=False)
+    fix_intercept.add_argument(
+        "--no_fix_intercept",
+        dest="fix_intercept",
+        help="Set the intercept to 0",
+        action="store_false",
+    )
+    fix_intercept.add_argument(
+        "--fix_intercept",
+        dest="fix_intercept",
+        help="Set the intercept to 0",
+        action="store_true",
+    )
+    parser.set_defaults(fix_intercept=False)
+    standardize = parser.add_mutually_exclusive_group(required=False)
+    standardize.add_argument(
+        "--no_standardize",
+        dest="standardize",
+        help="Set the intercept to 0",
+        action="store_false",
+    )
+    standardize.add_argument(
+        "--standardize",
+        dest="standardize",
+        help="Set the intercept to 0",
+        action="store_true",
+    )
+    parser.set_defaults(standardize=False)
+    log_learn = parser.add_mutually_exclusive_group(required=False)
+    log_learn.add_argument(
+        "--no_log_learn",
+        dest="log_learn",
+        help="Set the intercept to 0",
+        action="store_false",
+    )
+    log_learn.add_argument(
+        "--log_learn",
+        dest="log_learn",
+        help="Set the intercept to 0",
+        action="store_true",
+    )
+    parser.set_defaults(log_learn=False)
+    args = parser.parse_args()
+    if args.load:
+        sisso = dill.load(open(args.load, "rb"))
+    else:
+        allowed_ops = []
+        if args.ops == "all":
+            allowed_ops = list(op_map.values())
+        else:
+            for op in args.ops:
+                allowed_ops.append(op_map[op])
+        t0 = time()
+        phi = FeatureSpace.from_df(
+            args.data_file,
+            args.prop_key,
+            allowed_ops,
+            args.cols,
+            args.max_phi,
+            args.n_sis_select,
+            parameterize=args.parameterize,
+            fix_c_0=args.fix_intercept,
+            learn_type=args.learn_type,
+            class_width=args.class_width,
+        )
+        # print(f"Time Feature Creation: {time()-t0}")
+        t0 = time()
+        sisso = SISSO_Regressor(
+            phi,
+            args.n_dim,
+            not args.disable_all_l0_combinations,
+            fix_c_0=args.fix_intercept,
+            n_res_save=args.res_save,
+            learn_type=args.learn_type,
+        )
+        # print(f"Time SISSO Regressor: {time()-t0}")
+        t0 = time()
+        sisso.fit()
+        # print(f"Time SISSO fit: {time()-t0}")
+        models = sisso.models
+        if my_rank == 0:
+            print("Training Results")
+            print_models(models)
+            for feat in sisso.feature_set.phi:
+                feat._value = None
+                feat._fxn_in_value = None
+            with open("sisso_regressor.pick", "wb") as pickle_file:
+                dill.dump(sisso, pickle_file)
+    if (args.leave_out > 0) and (args.k_fold > 0):
+        raise ValueError(
+            "Can not do both k-Fold and Leave-p-Out validation, please specify one and load the saved sisso object"
+        )
+    elif args.leave_out > 0:
+        if args.leave_out < 1.0:
+            leave_out = None
+            frac = args.leave_out
+        else:
+            leave_out = int(round(args.leave_out))
+            frac = None
+        validator = LeaveOutValidator(sisso, args.leave_out_iter, leave_out, frac)
+    elif args.k_fold > 0:
+        validator = KFoldValidator(sisso, args.k_fold)
+    else:
+        return 0
+    validator.validate()
+    with open("validator.pick", "wb") as pickle_file:
+        dill.dump(validator, pickle_file)
+    if my_rank == 0:
+        av_rmse, av_max_ae = validator.summarize_error()
+        print("Average prediction rmse")
+        for dim, rmse in enumerate(av_rmse):
+            print(f"{dim+1}D: {rmse}")
+        print("Average prediction max ae")
+        for dim, max_ae in enumerate(av_max_ae):
+            print(f"{dim+1}D: {max_ae}")
+if __name__ == "__main__":
+    main()
--- a/3rdparty/pysisso/sisso/utils/__init__.py
+++ b/3rdparty/pysisso/sisso/utils/__init__.py
--- a/3rdparty/pysisso/sisso/utils/mpi_interface.py
+++ b/3rdparty/pysisso/sisso/utils/mpi_interface.py
+import dill
+try:
+    from mpi4py import MPI
+except ImportError:
+    mpi_size = 1
+    my_rank = 0
+else:
+    comm = MPI.COMM_WORLD
+    mpi_size = comm.Get_size()
+    my_rank = comm.Get_rank()
+def get_mpi_start_end_from_list(len_list, start_pt=0):
+    els_per_rank = len_list // mpi_size
+    remainder = len_list % mpi_size
+    start_el = start_pt + els_per_rank * my_rank + min(my_rank, remainder)
+    end_el = start_pt + els_per_rank * (my_rank + 1) + min(my_rank + 1, remainder)
+    return start_el, end_el
+def allgather_object(obj, all2all=False):
+    if mpi_size > 1:
+        serialized_obj = dill.dumps(obj)
+        if all2all:
+            all_serialized_obj = comm.allgather(serialized_obj)
+        else:
+            all_serialized_obj = comm.gather(serialized_obj, root=0)
+            all_serialized_obj = comm.bcast(all_serialized_obj, root=0)
+        all_obj = [dill.loads(oo) for oo in all_serialized_obj]
+        return all_obj
+    return [obj]
--- a/3rdparty/pysisso/sisso/validator/__init__.py
+++ b/3rdparty/pysisso/sisso/validator/__init__.py
--- a/3rdparty/pysisso/sisso/validator/validator.py
+++ b/3rdparty/pysisso/sisso/validator/validator.py
+"""Cross Validation Class"""
+import numpy as np
+import math
+from itertools import combinations, islice
+class Validator(object):
+    """Base validator class
+    Attributes:
+        sisso_reg (SISSO_regressor.SISSO_Regressor): The SISSO Regression trained on all data
+        feature_set (feature_space.FeatureSpace): The calculated feature space for the calculations
+        num_mats (int): number of materials in the full data set
+        shuffled_mat_inds (list of int): The indexes for the materials shuffled
+        train_ind_lists (list of lists of ints): List of validation training sets
+        test_ind_lists (list of lists of ints): List of validation testing sets
+        models_list (list of SISSO_regressor.Model): All models trained from train_ind_list sets
+        prediction_rmse (list of floats): list of the Prediction RMSE of each model
+        prediction_max_ae (list of floats): list of prediction max absolute error for each model
+    """
+    def __init__(self, sisso):
+        """Constructor
+        Args:
+            sisso(SISSO_regressor.SISSO_Regressor): The SISSO Regression trained on all data
+        """
+        self.sisso_reg = sisso
+        self.feature_set = sisso.feature_set
+        self.num_mats = len(self.feature_set.phi_0[0].value)
+        self.shuffled_mat_inds = np.arange(self.num_mats, dtype=np.int64)
+        np.random.shuffle(self.shuffled_mat_inds)
+        self.train_ind_lists = []
+        self.test_ind_lists = []
+        self.models_list = []
+        self.prediction_rmse = []
+        self.prediction_max_ae = []
+    def populate_mat_ind_lsts(self):
+        """Dummy class to populate training/testing sets
+        Not defined since no strategy passed
+        """
+        raise NotImplementedError(
+            "populate_mat_ind_lsts is not defined for a generic validator"
+        )
+    def validate(self):
+        """Validate the models
+        Use the training/testing sets to validate a model
+        """
+        for train_inds, test_inds in zip(self.train_ind_lists, self.test_ind_lists):
+            self.sisso_reg.reset(train_inds)
+            self.sisso_reg.fit()
+            models = self.sisso_reg.models.copy()
+            self.models_list.append(models)
+            prediction = [model[0].predict(test_inds) for model in models]
+            predict_err = [
+                (pp - self.feature_set.prop[test_inds].flatten()) for pp in prediction
+            ]
+            self.prediction_rmse.append(
+                [np.sqrt(np.sum(pp ** 2.0) / len(pp)) for pp in predict_err]
+            )
+            self.prediction_max_ae.append([np.max(np.abs(pp)) for pp in predict_err])
+        self.prediction_rmse = np.array(self.prediction_rmse)
+        self.prediction_max_ae = np.array(self.prediction_max_ae)
+    def summarize_error(self):
+        """Get the summary of the error
+        Returns:
+            float: mean of the RMSE for each model
+            float: mean of the max absolute error for each model
+        """
+        return (
+            np.mean(self.prediction_rmse, axis=0),
+            np.mean(self.prediction_max_ae, axis=0),
+        )
+class LeaveOutValidator(Validator):
+    """Leave p validator class
+    Attributes:
+        max_iter (int): Maximum possible iteration value
+        num_iter (int): Number of iterations of leave-p out validation to perform
+        num_out (int): Number of materials to leave out for each validation step
+    """
+    def __init__(self, sisso, num_iter=None, num_out=None, frac=None):
+        """Constructor
+        Args:
+            sisso(SISSO_regressor.SISSO_Regressor): The SISSO Regression trained on all data
+            num_iter (int): Number of iterations of leave-p out validation to perform
+            num_out (int): Number of materials to leave out for each validation step
+            frac (float): Fraction of materials to leave out
+        """
+        if (num_iter is None) and ((frac is None) == (num_out is None)):
+            raise ValueError(
+                "leave out validation needs to have either frac or num_out defined."
+            )
+        super(LeaveOutValidator, self).__init__(sisso)
+        if num_out is None:
+            self.num_out = int(round(frac * self.num_mats))
+        else:
+            self.num_out = num_out
+        if num_iter is None:
+            num_iter = int(100)
+        f = math.factorial
+        self.max_iter = int(
+            round(
+                f(self.num_mats) / (f(self.num_out) * f(self.num_mats - self.num_out))
+            )
+        )
+        self.num_iter = min(num_iter, self.max_iter)
+        self.populate_mat_ind_lsts()
+    def populate_mat_ind_lsts(self):
+        """Set up Training/Testing sets for leave-p out cross validation"""
+        self.train_ind_lists = []
+        self.test_ind_lists = []
+        if self.max_iter < 1e4:
+            ind_comb = combinations(
+                np.arange(len(self.shuffled_mat_inds), dtype=np.int64), self.num_out
+            )
+            inds = np.sort(
+                np.random.choice(np.arange(self.max_iter), self.num_iter, False)
+            )
+            inds[1:] -= inds[0:-1] + 1
+            for ind in inds:
+                leave_out = np.array(list(islice(ind_comb, ind, ind + 1))[0])
+                self.test_ind_lists.append(list(self.shuffled_mat_inds[leave_out]))
+                self.train_ind_lists.append(
+                    list(np.delete(self.shuffled_mat_inds, leave_out))
+                )
+        else:
+            leave_out = np.sort(
+                np.random.choice(self.num_mats, self.num_out, replace=False)
+            )
+            test_inds = self.shuffled_mat_inds[leave_out]
+            self.test_ind_lists.append(test_inds)
+            self.train_ind_lists.append(
+                list(np.delete(self.shuffled_mat_inds, leave_out))
+            )
+            while len(self.test_ind_lists) < self.num_iter:
+                leave_out = np.sort(
+                    np.random.choice(self.num_mats, self.num_out, replace=False)
+                )
+                test_inds = self.shuffled_mat_inds[leave_out]
+                if (
+                    np.min(
+                        np.sum(
+                            np.abs(np.array(self.test_ind_lists) - test_inds), axis=1
+                        )
+                    )
+                    > 1e-5
+                ):
+                    self.test_ind_lists.append(list(self.shuffled_mat_inds[leave_out]))
+                    self.train_ind_lists.append(
+                        list(np.delete(self.shuffled_mat_inds, leave_out))
+                    )
+class KFoldValidator(Validator):
+    """K-Fold validator class
+    Attributes:
+        k_fold (int): Number of divisions to make
+    """
+    def __init__(self, sisso, k_fold=None):
+        """Constructor
+        Args:
+            sisso(SISSO_regressor.SISSO_Regressor): The SISSO Regression trained on all data
+            k_fold (int): Number of divisions to make
+        """
+        if k_fold is None:
+            raise ValueError("k-fold validation needs to have n_fold defined")
+        if k_fold <= 1:
+            raise ValueError("k for k_fold must be greater than 1")
+        super(KFoldValidator, self).__init__(sisso)
+        self.k_fold = k_fold
+        self.populate_mat_ind_lsts()
+    def populate_mat_ind_lsts(self):
+        """Set up Training/Testing sets for k-fold out cross validation"""
+        self.train_ind_lists = []
+        self.test_ind_lists = []
+        for kk in range(self.k_fold):
+            test_list = self.shuffled_mat_inds[kk :: self.k_fold]
+            self.test_ind_lists.append(list(test_list))
+            self.train_ind_lists.append(
+                list(np.delete(self.shuffled_mat_inds, test_list))
+            )
--- a/3rdparty/pysisso/tests/generate_dataset.py
+++ b/3rdparty/pysisso/tests/generate_dataset.py
+import numpy as np
+import pandas as pd
+from scipy import interpolate
+from matplotlib import pyplot as plt
+from scipy.signal import savgol_filter
+# import statsmodels.api as sm
+rng = np.random.RandomState(13)
+from statsmodels.nonparametric.kernel_regression import KernelReg
+anions = np.array([6, 14, 32, 7, 15, 33, 51, 8, 16, 34, 52, 9, 17, 35, 53])
+cations = np.delete(np.arange(54, dtype=np.int64), anions)
+n_pts = 100
+Z1 = rng.choice(cations, n_pts)
+Z2 = rng.choice(anions, n_pts)
+th = rng.rand(n_pts) * np.pi * 2.0 - np.pi
+prop_1 = np.exp(-th**2.0 / 2.0) / 2.0 + 2.0
+prop_2 = prop_1 + (rng.normal(size=n_pts, scale=1.0))
+prop_3 = -853.0*np.sqrt(25.0*np.pi - 5.458752*th) + 100.0
+prop_3 = 1.0 / (0.56*th**2.0 + 4.548420) - 15800 + (rng.normal(size=n_pts, scale=0.025))
+prop_3 = -np.exp(-th)
+# prop_3 = Z1 * Z2 / np.exp(th / 2.0) # * (197.0 / (137.0 * 4.0) )
+cols = ["prop_1", "prop_2", "prop_3", "Z1 (C)", "Z2 (C)", "th (Unitless)"]
+data = np.vstack((prop_1, prop_2, prop_3, Z1, Z2, th))
+pd.DataFrame(data.T, columns=cols).to_csv("test.csv", index_label="sample")
+inds = th.argsort()
+x = th[inds]
+y = prop_2[inds]
+# plt.plot(x, y, 'b.')
+plt.plot(x, prop_3[inds], 'b.')
+# wind = n_pts // 5
+# wind += (wind + 1) % 2
+# for i in range(1):
+#     y = savgol_filter(y, wind, 5)
+# plt.plot(x, y)
+spl = interpolate.UnivariateSpline(th[inds], prop_3[inds], s=1000.0, k=5)
+plt.plot(x, spl(x))
+# kr = KernelReg(prop_2[inds], th[inds], 'c')
+# x_pred = np.linspace(x[0], x[-1], 100)
+# y_pred, y_std = kr.fit(x_pred)
+# plt.plot(x_pred, y_pred)
+plt.show()
--- a/3rdparty/pysisso/tests/regression_test.py
+++ b/3rdparty/pysisso/tests/regression_test.py
+import numpy as np
+from sisso.utils.mpi_interface import my_rank
+from sisso.feature_creation.node.allowed_operator_nodes import op_map
+from sisso.feature_creation.feature_space import FeatureSpace
+from sisso.descriptor_identifcation.SISSO_regressor import SISSO_Regressor, print_models
+from sisso.validator.validator import LeaveOutValidator, KFoldValidator
+import warnings
+warnings.filterwarnings("ignore")
+cols = [
+    "Z1 (C)",
+    "Z2 (C)",
+    "th (Unitless)",
+]
+# allowed_ops = [
+#     op_map["sqrt"],
+#     # op_map["sq"],
+# ]
+allowed_ops = list(op_map.values())
+phi = FeatureSpace.from_df(
+    "test.csv",
+    "prop_3",
+    allowed_ops,
+    cols,
+    2,
+    1,
+)
+sisso = SISSO_Regressor(
+    phi,
+    1,
+    True,
+)
+sisso.fit()
+models = sisso.models
+if my_rank == 0:
+    print("Training Results")
+    print_models(models)
--- a/3rdparty/pysisso/tests/test.csv
+++ b/3rdparty/pysisso/tests/test.csv
+sample,prop_1,prop_2,prop_3,Z1 (C),Z2 (C),th (Unitless)
+0,2.465193047112261,2.569373534661501,-1.4621138087504126,26.0,15.0,-0.3798832028603818
+1,2.0273610029353146,1.0159305346633953,-0.08976167590662842,18.0,14.0,2.410597166408027
+2,2.024100478726681,2.465280605287646,-0.08520698372800417,24.0,6.0,2.4626718798675657
+3,2.401791177916339,4.274658377971557,-1.9373582921543708,50.0,6.0,-0.6613253400373109
+4,2.496735103641117,1.9993373974254611,-0.8918424748066547,46.0,8.0,0.11446575974773365
+5,2.213989402627042,2.593513974280932,-0.2717619102743732,37.0,53.0,1.3028289258846684
+6,2.016077723690479,1.6009006167556086,-13.7618126826963,47.0,8.0,-2.6218975594898857
+7,2.0338716287873844,2.310004378338961,-0.09823842184236464,38.0,34.0,2.3203578790205146
+8,2.2756577776818636,1.5779927761114751,-0.33578582149382674,2.0,51.0,1.0912817582979333
+9,2.2209174495912416,2.4139374611058004,-0.2785553521037371,49.0,52.0,1.2781384885418081
+10,2.425811957748035,1.7928821797019445,-1.7625527804240353,21.0,35.0,-0.5667632015957822
+11,2.135741090066919,2.2359670214832974,-5.027098044510345,18.0,53.0,-1.6148428881029682
+12,2.013904474993403,0.8307612484574995,-0.06878886310336116,13.0,14.0,2.6767134206434253
+13,2.003845361129069,2.159452060130503,-0.04414951852560637,19.0,53.0,3.120173257413912
+14,2.0338068767676014,3.3837753958903956,-0.09815745580075226,2.0,32.0,2.3211823978156234
+15,2.340251783621266,1.332990691722062,-0.41585951095279017,44.0,16.0,0.8774077898149004
+16,2.200014966808943,0.4562068909145396,-3.871621637507877,27.0,51.0,-1.3536734470538807
+17,2.0167995183929506,3.0665560733189015,-0.07389617630169253,29.0,51.0,2.605094193886342
+18,2.0617799663264216,2.4328989613968193,-7.729220734097287,27.0,33.0,-2.045008046922166
+19,2.0065642254860143,1.7239533541182286,-0.05266532118744189,19.0,32.0,2.9437980820235
+20,2.138513704103439,2.8658966639674617,-0.2014381977853859,30.0,14.0,1.6022726553832607
+21,2.0087270632492813,-0.5341151851383255,-0.0581104335396102,19.0,53.0,2.845410052251637
+22,2.2763916755721203,2.893052121831554,-0.3366058496143372,1.0,52.0,1.088842618843052
+23,2.2126215500069555,2.2947897357253675,-0.2704300595913585,26.0,33.0,1.3077417738641302
+24,2.0072435224843477,1.358824425694374,-18.3596404154381,36.0,51.0,-2.910154799784762
+25,2.2355138384370408,3.0040502609966424,-0.29315310995099314,40.0,16.0,1.2270602468920666
+26,2.3446885666021373,0.61317147866244,-2.3691135334003603,19.0,35.0,-0.8625158486379929
+27,2.0936368788511923,1.3774347410487648,-0.16034926557752383,20.0,53.0,1.8304009329749222
+28,2.0480609553427374,3.6208755068880727,-0.11482812590264227,30.0,15.0,2.164318825952126
+29,2.173410691995628,1.9517992185985686,-0.2333309426952121,46.0,33.0,1.4552974782512766
+30,2.0145329864426476,2.5723276708006346,-0.06993804330826447,47.0,17.0,2.660145524457387
+31,2.0674701676185943,1.4240258025086252,-7.39985752226473,5.0,51.0,-2.001460746276759
+32,2.0139284697143944,2.0903860754390617,-0.06883319283178632,36.0,17.0,2.676069196435945
+33,2.040102867011589,2.1043379470326906,-9.453657344318897,29.0,51.0,-2.246401687195357
+34,2.257625149803967,2.6555952010436115,-3.163280821278439,36.0,9.0,-1.15160972358784
+35,2.074340626647492,0.9383115018381281,-0.1419317420307118,11.0,16.0,1.9524090268738794
+36,2.186447219591422,3.363963728194187,-0.24546359342620638,11.0,14.0,1.4046066383607112
+37,2.011486530700132,2.410950501005991,-15.598207149863512,10.0,7.0,-2.7471559813601587
+38,2.1940304275191855,-0.9592810226612123,-0.25260430243098003,23.0,9.0,1.3759310366257633
+39,2.004109810978604,1.5569152939631026,-22.17097442132624,30.0,6.0,-3.0987839749098107
+40,2.108931134353001,1.9251071420665258,-5.730429510739103,50.0,16.0,-1.745790486161591
+41,2.0787183592548995,1.5721729227885755,-6.840623821619246,27.0,32.0,-1.9228789294669282
+42,2.2307856322476165,2.0578322161193525,-0.2883794764007134,18.0,7.0,1.2434780397627039
+43,2.1895997114169146,2.2736249669358335,-4.025376388385303,36.0,52.0,-1.3926184191408035
+44,2.015461211809678,0.8294655653292167,-13.967990981111438,30.0,7.0,-2.636768353414867
+45,2.4218536291802146,1.9420517005193076,-1.791420553564314,48.0,17.0,-0.583008910457643
+46,2.0165092868986507,3.410849993705004,-0.07340411581532794,42.0,34.0,2.611775271155433
+47,2.481522659402743,3.22435038371,-1.3157751720648563,27.0,6.0,-0.2744259764119805
+48,2.266287474880764,2.8055822372542,-0.32545684611420955,5.0,14.0,1.1225254033447403
+49,2.1429102753166074,1.170644695259943,-4.867840349088464,0.0,17.0,-1.5826503785908623
+50,2.2698451424140367,1.8320135343957364,-0.3293483344754087,29.0,9.0,1.1106393209132053
+51,2.4751531336515167,3.243157711106658,-0.7266695479364104,37.0,16.0,0.31928344682486687
+52,2.2578504795336323,2.7976641230706223,-0.31636764435570935,47.0,53.0,1.1508503101517773
+53,2.0753504092602513,0.2517020939482413,-6.997034656949864,2.0,6.0,-1.9454864388670945
+54,2.0581217885103382,1.6289342695100402,-7.9616871168521355,25.0,33.0,-2.074640926751484
+55,2.4989789488579874,2.5555989118250806,-0.9380608646028438,40.0,6.0,0.06394044444473002
+56,2.192613871353365,3.4055788922066306,-3.9798583748190848,12.0,33.0,-1.3812462344469028
+57,2.0048223200797946,0.3853143585111587,-0.04751293283053153,49.0,15.0,3.04675333488387
+58,2.0241367933942014,1.0889507705064805,-11.728953084575707,21.0,17.0,-2.462060407580452
+59,2.0043612809162252,0.5225055957212346,-0.04597952200159264,5.0,16.0,3.079559155490772
+60,2.005333959485182,1.7570875639240824,-0.04912071221532253,37.0,17.0,3.013474495750758
+61,2.4998053170649976,2.5148293975632043,-0.9724773616052392,1.0,17.0,0.027908482315509353
+62,2.33964475751984,2.264394541240279,-2.40955135335727,24.0,6.0,-0.8794405697398915
+63,2.014541995026371,1.8025460814233565,-0.06995433794821282,38.0,35.0,2.659912564808324
+64,2.3376184917964093,1.500673794894695,-2.425938455245215,11.0,51.0,-0.8862184413811094
+65,2.457770222428665,3.420386961325001,-0.6569830999208244,38.0,14.0,0.42009698393213935
+66,2.0850774064127355,0.8946018297914258,-6.56692014924806,48.0,52.0,-1.8820449477391519
+67,2.036051773031691,2.9618512025265575,-0.10093109009747232,47.0,34.0,2.293317271259898
+68,2.48337736045334,3.8640498920305175,-0.7710205533184561,30.0,53.0,0.2600402477735342
+69,2.242481332572665,4.494435123443452,-3.3303105549386323,45.0,35.0,-1.2030655593852642
+70,2.4482966695762434,2.223767346217671,-1.5955715514593423,3.0,9.0,-0.4672320115214945
+71,2.026251051692348,1.1022284011828607,-11.332965932296858,47.0,9.0,-2.427715817684073
+72,2.005494216192898,2.7284751839954775,-20.158684427554544,28.0,15.0,-3.0036351845041502
+73,2.0345131487704275,3.0639944216399533,-0.09903740703328553,48.0,35.0,2.31225765139337
+74,2.2092200493676923,2.0967153015581736,-3.743482442073076,4.0,14.0,-1.3200163124340016
+75,2.481493666335202,2.3523874343769084,-1.316063788524989,39.0,8.0,-0.2746453032519671
+76,2.0090550078867424,1.710643808217539,-16.9864501791876,10.0,9.0,-2.8324159779612654
+77,2.227078644943877,0.9808984239267389,-3.5128679991093947,27.0,14.0,-1.2564327976017564
+78,2.0796821613320557,2.2241982687132453,-0.14711513344919463,3.0,16.0,1.9165397780178886
+79,2.3825347484313286,2.895927863668369,-0.48102722655312485,37.0,34.0,0.7318314064177747
+80,2.1270178479323425,1.7777805815987853,-0.1910034142562072,24.0,16.0,1.6554639754083267
+81,2.496378148071153,0.5651245104404725,-1.1281537837604634,48.0,17.0,-0.12058247690433
+82,2.4535004978925765,2.1376237923344164,-1.5555683725050216,41.0,16.0,-0.44184099171260893
+83,2.0400961755695928,2.2839544875637237,-0.10577130986862256,37.0,14.0,2.246475969596493
+84,2.215823334152881,2.6153440730853914,-3.6556073886687486,13.0,51.0,-1.296262259605668
+85,2.0499527866186678,0.4187532283014821,-0.11690353062519399,2.0,52.0,2.1464062088627207
+86,2.0760403360456325,2.0967193282634518,-6.964290923016863,26.0,16.0,-1.9407957963086333
+87,2.0039462902502794,1.4482817154922396,-22.462756092740825,13.0,51.0,-3.1118586529457817
+88,2.2663767438880345,4.090832673379053,-0.32555405300138923,40.0,51.0,1.1222267696719888
+89,2.3081169755245114,2.803476987089165,-0.37381262619360145,36.0,16.0,0.9840006065921276
+90,2.3294779674510813,1.6360183326909516,-2.492643965009495,18.0,9.0,-0.9133439804674963
+91,2.036114692596269,1.994971935704235,-0.1010078754461115,40.0,51.0,2.2925567904667723
+92,2.237020043904856,1.8168621325474468,-0.2946833682253918,39.0,9.0,1.2218538271208894
+93,2.3889280427796176,1.7241654241857953,-0.4922240857990232,20.0,35.0,0.7088212072386653
+94,2.262848151577783,2.4738759090397515,-3.1082108409220894,3.0,16.0,-1.1340472683390193
+95,2.1645602088494704,1.1083354424147591,-0.22517894644688846,4.0,6.0,1.490859875333399
+96,2.493601742701712,2.0013134266725627,-0.8517232882661091,31.0,32.0,0.1604935839353412
+97,2.4938448597750438,4.559129095008126,-0.8543660473924344,36.0,53.0,0.1573955501046207
+98,2.014639468182631,1.6061442172993834,-14.259164541558194,21.0,16.0,-2.6573998257213187
+99,2.493558354490546,3.1110421147441,-1.1747323916053498,39.0,8.0,-0.1610403698315026
--- a/quip @ b7d59496
+++ b/quip @ b7d59496
-Subproject commit f538cd9fe89b418bbed7ff0798c357b9b66335fe
+Subproject commit b7d59496c970679531a4a22c9c6dc6968aeb69ed
--- a/sissopp @ 8de6f9f7
+++ b/sissopp @ 8de6f9f7
+Subproject commit 8de6f9f72c5f0987eb948379c210d9381fb793fc
--- a/Dockerfile
+++ b/Dockerfile
-FROM jupyter/tensorflow-notebook:2ce7c06a61a1
+ARG BASE_IMAGE=jupyter/tensorflow-notebook:2022-06-27
-LABEL maintainer="Luca Ghiringhelli <ghiringhelli@fhi-berlin.mpg.de>"
+ARG BUILDER_BASE_IMAGE=ubuntu:focal
+FROM $BASE_IMAGE AS builder
 # ================================================================================
 # Linux applications and libraries
 # ================================================================================
+# RUN apt-get update \
+#  && apt-get install --yes --quiet --no-install-recommends \
+#         gcc \
+#         gfortran \
+#         liblapack-dev \
+#         libblas-dev \
+#         libnetcdf-dev \
+#         netcdf-bin \
+#         libxpm-dev \
+#         libgsl-dev \
+#         lsof \
+#         vim \
+#         git-lfs\
+#         openjdk-8-jdk \
+#         xvfb \
+#         cmake \
+#         openssh-client \
+#         graphviz \
+#         dvipng \
+#         libboost-system-dev \
+#         libboost-filesystem-dev \
+#         libboost-mpi-dev \
+#         libboost-serialization-dev \
+#  && rm -rf /var/lib/apt/lists/*
-USER root
-RUN apt-get update \
- && apt-get install -y -q --no-install-recommends \
-        gcc \
-        gfortran \
-        liblapack-dev \
-        libblas-dev \
-        libnetcdf-dev \
-        netcdf-bin \
-        libxpm-dev \
-        libgsl-dev \
-        lsof \
-        vim \
-        git-lfs\
-        openjdk-8-jdk \
-        xvfb \
-        cmake \
-        mpi \
-        openmpi-bin \
-        openmpi-common \
-        openssh-server \
-        openssh-client \
-        libopenmpi-dev \
-        graphviz \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
-USER $NB_UID
-# Dependecies:
-# - quippy: gcc gfortran liblapack-dev libblas-dev libnetcdf-dev netcdf-bin libxpm-dev libgsl-dev
 # ================================================================================
-# Python environment
+#  KERAS-VIS
 # ================================================================================
-# This part contains the installation of all the common/basic python packages.
+WORKDIR /opt/keras-vis
-# All the other package will be installed by the tutorial's own setup scripts.
-# Add any additional packages you want available for use in a Python 3 notebook
-# to the first line here (e.g., nglview, jupyter_contrib_nbextensions, etc.)
-# https://github.com/ipython-contrib/jupyter_contrib_nbextensions
-RUN conda install --quiet --yes \
-    'pytorch::pytorch-cpu==1.1.0' \
-    'pytorch::torchvision-cpu==0.3.0' \
-    'nglview==2.7.0' \
-    'jupyter_contrib_nbextensions==0.5.1' \
-    'jupyter_nbextensions_configurator==0.4.1' \
-    'ase' \
-    'asap3' \
-    'dscribe' \
-    'pytest' \
-    'mpi4py' \
-    'orjson' \
-    'hdbscan' \
- && conda install -c plotly plotly-orca \
- && conda install -c conda-forge umap-learn \
- && conda clean -tipsy \
- && jupyter nbextensions_configurator enable --user \
- && jupyter nbextension install nglview --py --sys-prefix \
- && jupyter nbextension enable nglview --py --sys-prefix \
- && jupyter nbextension enable execute_time/ExecuteTime \
- && jupyter nbextension enable init_cell/main \
- && jupyter nbextension enable collapsible_headings/main \
- && fix-permissions $CONDA_DIR \
- && fix-permissions /home/$NB_USER 
-# Dependecies:
+COPY 3rdparty/keras-vis .
-# - ase: many
+RUN pip install .
-# - nglview: many
-# - asap3: grain-boundaries
-# fixing pip installation issue of nomad-lab dependency: orjson
 # ================================================================================
-#  QMMLPACK
+#  ATOMIC FEATURES
 # ================================================================================
-# for the test TMPDIR has to be defined
+RUN pip install nomad-lab --extra-index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple
-ENV TMPDIR "/tmp/"
-WORKDIR /opt/qmmlpack
+WORKDIR /opt/atomic_features
-COPY 3rdparty/qmmlpack .
+COPY 3rdparty/atomic-features-package ./atomic-features-package
 USER root
+RUN pip install ./atomic-features-package
-RUN ./make --debug --verbose cpp --include-path /usr/include/gsl/ --library-path /usr/lib/
-RUN ./make --debug --verbose python --include-path /usr/include/gsl/ --library-path /usr/lib/
-RUN chown -R $NB_USER:$NB_GID $PWD
-USER $NB_USER
+# ================================================================================
+#  CMLKIT
+# ================================================================================
-#RUN make -v install
+WORKDIR /opt/cmlkit
-ENV PYTHONPATH "/opt/qmmlpack/python":$PYTHONPATH
-# The CML_PLUGINS environment variable tells cmlkit to register the cscribe plugin, which provides the dscribe interface.
+COPY 3rdparty/cmlkit .
-ENV CML_PLUGINS "cscribe"
+USER root
-ENV CML_DATASET_PATH $HOME/tutorials/data/cmlkit/
+RUN pip install .
-# Dependecies:
-# - ase: many
-# - nglview: many
-# - asap3: grain-boundaries
 # ================================================================================
-#  QUIP + GAP + quippy
+#  Final image
 # ================================================================================
-# All the QUIPs go here; added to path in the end.
+FROM $BASE_IMAGE
-WORKDIR /opt/quip
-# QUIP for general use is the OpenMP version.
-ENV QUIP_ARCH linux_x86_64_gfortran_openmp
-ENV QUIP_INSTALLDIR /opt/quip/bin
-COPY 3rdparty/quip .
+# # Customize jupyter
-COPY 3rdparty/gap src/GAP
+# WORKDIR $HOME
-COPY files/Makefile.inc build/$QUIP_ARCH/
+# COPY congifs/custom.css congifs/logo.png congifs/Titillium .jupyter/custom/
-COPY files/GIT_VERSION .
-COPY files/GAP_VERSION src/GAP/
-USER root
-RUN chown -R $NB_USER:$NB_GID $PWD
-USER $NB_USER
-# Installs with no suffix, e.g. quip
+# ================================================================================
-RUN make \
+# Linux applications and libraries
- && make install
+# ================================================================================
-# Installs quippy
+USER root
-RUN pip install src/f90wrap \
+RUN apt-get update \
- && make install-quippy
+ && apt-get install --yes --quiet --no-install-recommends \
+        vim \
+        openjdk-8-jdk \
+        openmpi-bin \
+        netcdf-bin \
+ && rm -rf /var/lib/apt/lists/*
-# Make quippy executable available from terminal
-ENV PATH /opt/quip/bin:$PATH
 # ================================================================================
-#  SISSO++
+# Python environment
 # ================================================================================
-WORKDIR /opt/cpp_sisso
+# This part contains the installation of all the common/basic python packages.
+# All the other package will be installed by the tutorial's own setup scripts.
+# Add any additional packages you want available for use in a Python 3 notebook
+# to the first line here (e.g., nglview, jupyter_contrib_nbextensions, etc.)
+# https://github.com/ipython-contrib/jupyter_contrib_nbextensions
+# All the dependencies will be fixed in eg. requirements.txt
+# # Pytorch does not support Python 3.10 yet.
+# RUN mamba install -c pytorch -c conda-forge --quiet --yes \
+#        'pytorch' \
+#        'torchvision' \
+#        'cpuonly' \
+# # Cmlkit depends on a few old packages...
+#  && pip install --no-cache-dir  'cmlkit' \
+RUN mamba install --quiet --yes \
+       'nomad-lab' \
+       'nglview' \
+       'jupyter_contrib_nbextensions' \
+       'ase' \
+       'asap3' \
+       'dscribe' \
+       'orjson' \
+       'hdbscan' \
+       'pyyaml' \
+       'numba' \
+       'scikit-learn' \
+       'pandas' \
+       'urllib3' \
+       'nest-asyncio '\
+       'seaborn' \
+       'pynndescent' \
+       'mpi4py' \
+       'umap-learn' \
+ && mamba clean --all -f -y \
+ && jupyter nbextension enable execute_time/ExecuteTime \
+ && jupyter nbextension enable init_cell/main \
+ && jupyter nbextension enable collapsible_headings/main \
+ && fix-permissions "${CONDA_DIR}" \
+ && fix-permissions "/home/${NB_USER}"
-COPY 3rdparty/cpp_sisso .
+# fixing pip installation issue of nomad-lab dependency: orjson
-WORKDIR /opt/cpp_sisso/build
-USER root
-RUN cmake -C ../cmake/toolchains/gnu_py.cmake -DEXTERNAL_BOOST=OFF ../ \
-&& make install
-# ================================================================================
+# Copy all the notebooks of the tutorials
-#  ATOMIC FEATURES
+ARG TUTORIALS_HOME=$HOME/tutorials
-# ================================================================================
-WORKDIR /opt/atomic_features
+WORKDIR $TUTORIALS_HOME
-COPY 3rdparty/atomic-features-package ./atomic-features-package
+COPY tutorials/*/*.ipynb ./
-USER root
-RUN pip install ./atomic-features-package
-# ================================================================================
+# Copy images or other assets may required by the tutorials
-#  KERAS-VIS
+COPY tutorials/*/assets/*  ./
-# ================================================================================
-WORKDIR opt/keras-vis
+# Copy data may be required by the tutorials
+COPY tutorials/*/data/* ./
-COPY 3rdparty/keras-vis .
+# RUN jupyter-trust -y *.ipynb
-RUN pip install .
+# Fix permissions
+RUN fix-permissions $TUTORIALS_HOME
 # ================================================================================
 # Install all of the package dependencies of the tutorials
 # ================================================================================
-WORKDIR /opt/tutorials
+RUN pip install ./analytics-arise \
-COPY tutorials/ .
+ && pip install 'git+https://github.com/AndreasLeitherer/ARISE.git'
+RUN pip install ./analytics-atomic-features
+RUN pip install ./analytics-clustering-tutorial
+RUN pip install ./analytics-cmlkit
+RUN pip install ./analytics-co2-sgd-tutorial
 RUN pip install ./analytics-compressed-sensing
-RUN pip install ./analytics-convolutional-nn 
+RUN pip install ./analytics-convolutional-nn
+RUN pip install ./analytics-decision-tree
+RUN pip install ./analytics-descriptor-role
+RUN pip install ./analytics-domain-of-applicability
+RUN pip install ./analytics-dos-similarity-search
+RUN pip install ./analytics-error-estimates
+RUN pip install ./analytics-exploratory-analysis
 RUN pip install ./analytics-gap-si-surface
 RUN pip install ./analytics-grain-boundaries
 RUN pip install ./analytics-kaggle-competition
+RUN pip install ./analytics-krr4mat
+RUN pip install ./analytics-nn-regression
+RUN pip install ./analytics-perovskites-tolerance-factor
+RUN pip install ./analytics-query-nomad-archive
+RUN pip install ./analytics-sgd-alloys-oxygen-reduction-evolution
+RUN pip install ./analytics-sgd-propylene-oxidation-hte
 RUN pip install ./analytics-soap-atomic-charges
 RUN pip install ./analytics-tcmi
-RUN pip install ./analytics-query-nomad-archive
-RUN pip install ./analytics-descriptor-role
-RUN pip install ./analytics-error-estimates
-RUN pip install ./analytics-cmlkit
 RUN pip install ./analytics-tetradymite-PRM2020
-RUN pip install ./analytics-krr4mat
-RUN pip install ./analytics-decision-tree
-RUN pip install ./analytics-clustering-tutorial
-RUN pip install ./analytics-arise \ 
-&& pip install 'git+https://github.com/AndreasLeitherer/ARISE.git'   
-RUN pip install ./analytics-exploratory-analysis
-RUN pip install nomad-lab
-USER root
-RUN fix-permissions /opt/tutorials \
- && fix-permissions $CONDA_DIR
-ARG TUTORIALS_HOME=$HOME/tutorials
-# Linking all the notebooks of the tutorials
-WORKDIR $TUTORIALS_HOME
-RUN ln -s /opt/tutorials/*/*.ipynb . \
- && jupyter-trust -y *.ipynb
-# Linking images or other assets may required by the tutorials
-WORKDIR $TUTORIALS_HOME/assets
-RUN ln -s /opt/tutorials/*/assets/*  .
-# Linking data may required by the tutorials
-WORKDIR $TUTORIALS_HOME/data
-RUN mkdir data && ln -s /opt/tutorials/*/data/* .
-RUN fix-permissions $TUTORIALS_HOME \
- && fix-permissions $HOME/.local/share/jupyter
 # ================================================================================
 # Switch back to jovyan to avoid accidental container runs as root

--- a/Dockerfile.qmmlpack
+++ b/Dockerfile.qmmlpack
+ARG BUILDER_BASE_IMAGE=jupyter/tensorflow-notebook:2022-06-27
+FROM $BUILDER_BASE_IMAGE
+# Read more: https://gitlab.com/qmml/qmmlpack
+# ================================================================================
+# Linux applications and libraries
+# ================================================================================
+USER root
+RUN apt-get update \
+ && apt-get install --yes --quiet --no-install-recommends \
+       gcc \
+       libgsl-dev \
+       lsof \
+ && rm -rf /var/lib/apt/lists/*
+# RUN pip install --no-cache-dir pytest
+RUN mamba install --quiet --yes \
+    'pytest' \
+ && mamba clean --all -f -y \
+ && fix-permissions "${CONDA_DIR}" \
+ && fix-permissions "/home/${NB_USER}"
+# ================================================================================
+# QMMLPACK
+# ================================================================================
+WORKDIR /opt/qmmlpack
+COPY 3rdparty/qmmlpack .
+# for the test TMPDIR has to be defined
+ENV TMPDIR "/tmp/"
+RUN ./make --debug --verbose cpp --include-path /usr/include/gsl/ --library-path /usr/lib/
+RUN ./make --debug --verbose python --include-path /usr/include/gsl/ --library-path /usr/lib/
+# RUN ./make --verbose install
+ENV PYTHONPATH "/opt/qmmlpack/python:$PYTHONPATH"
+# ================================================================================
+# Testing (it also runs durign the build process)
+# ================================================================================
+# RUN ./build/cpptests \
+#  && pytest ./python/tests/
--- a/Dockerfile.quip
+++ b/Dockerfile.quip
+ARG BUILDER_BASE_IMAGE=jupyter/tensorflow-notebook:2022-06-27
+FROM $BUILDER_BASE_IMAGE
+# Read more: https://github.com/libAtoms/QUIP/
+# ================================================================================
+# Linux applications and libraries
+# ================================================================================
+USER root
+RUN apt-get update \
+ && apt-get install --yes --quiet --no-install-recommends \
+        gfortran \
+        libblas-dev \
+        liblapack-dev \
+        openmpi-bin \
+        libopenmpi-dev \
+        libscalapack-openmpi-dev \
+        netcdf-bin \
+        libnetcdf-dev \
+        libhdf5-serial-dev \
+ && rm -rf /var/lib/apt/lists/*
+# ================================================================================
+#  QUIP + GAP + quippy
+# ================================================================================
+# All the QUIPs go here; added to path in the end.
+WORKDIR /opt/quip
+# QUIP for general use is the OpenMP version.
+ENV QUIP_ARCH linux_x86_64_gfortran_openmp
+ENV QUIP_ROOT /opt/quip
+ENV QUIP_INSTALLDIR /opt/quip/bin
+COPY 3rdparty/quip .
+COPY configs/Makefile.inc build/${QUIP_ARCH}/
+COPY configs/GIT_VERSION .
+COPY configs/GAP_VERSION src/GAP/
+RUN make \
+ && make quippy \
+ && make install-quippy
+# ================================================================================
+# Testing (it also runs durign the build process)
+# ================================================================================
+# RUN ulimit -n 256 \
+#  && make test
--- a/Dockerfile.sissopp
+++ b/Dockerfile.sissopp
+ARG BUILDER_BASE_IMAGE=jupyter/tensorflow-notebook:2022-06-27
+FROM $BUILDER_BASE_IMAGE
+# Read more: https://sissopp_developers.gitlab.io/sissopp/quick_start/Installation.html
+# ================================================================================
+# Linux applications and libraries
+# ================================================================================
+USER root
+RUN apt-get update \
+ && apt-get install --yes --quiet --no-install-recommends \
+    build-essential g++ gfortran cmake git \
+    liblapack-dev libblas-dev \
+    zlib1g-dev \
+    libboost-mpi-dev libboost-filesystem-dev libboost-system-dev libboost-serialization-dev \
+    openssh-client \
+    dvipng \
+ && rm -rf /var/lib/apt/lists/*
+RUN mamba install --quiet --yes \
+    'numpy' \
+    'pandas' \
+    'scipy' \
+    'seaborn' \
+    'scikit-learn' \
+    'toml' \
+    'pytest' \
+ && mamba clean --all -f -y \
+ && fix-permissions "${CONDA_DIR}" \
+ && fix-permissions "/home/${NB_USER}"
+# ================================================================================
+#  SISSO++
+# ================================================================================
+WORKDIR /opt/sissopp
+COPY 3rdparty/sissopp .
+RUN mkdir build && cd build \
+ && cmake -C ../cmake/toolchains/gnu_param_py.cmake -DEXTERNAL_BOOST=ON ../ \
+ && make \
+ && make install
+# ================================================================================
+# Testing
+# ================================================================================
+RUN pytest tests/pytest
+# RUN cd build \
+#  && cmake test
\ No newline at end of file
--- a/README.md
+++ b/README.md
 # Single-user Jupyter notebook image for analytics
+This notebook folder contains Dockerfile for a single user notebook image which is based on Jupyter docker stack.
+The Docker image is suitable for running/testing locally and for deploying it by the JupyterHub on a Kubernetes cluster.
-**Please note that this documentation is just a draft. Some of the instructions may be incomplete.**
-This notebook folder contains Dockerfile for a single user notebook image which based on Jupyter docker stack.
+## Building/using your own Docker image
-In addition of the base notebook it contains QUIP with GAP and all of the necessary packages for the tools.
-The docker image is suitable for running/testing locally and for deploying it by the jupyterhub on a kubernetes cluster.
-## Short description
+### Cloning the repository and checkout the develop branch
- Based on the jupyter/minimal-notebook docker image
- conda: python 2 and 3 environments
- compiled quip with gap
-## Tutorials
-Each tutorial could have it's own repository. For practical reason please use "analytics-" as prefix for your tutorial
-You can add a new repository to the tutorial folder like:
-```bash
-cd tutorials
-git submodule add https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-grain-boundaries.git grain-boundaries
-```
-Link for the old tutorials:tutorials
- https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-lab-base/tree/master/analysis-tools
- https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-toolkit-tutorials
-## Building/using your own docker image
-### Cloning the repository
 ```bash
 git clone --recursive https://gitlab.mpcdf.mpg.de/nomad-lab/analytics.git
+cd analytics
+git checkout develop
+git submodule update --init
 ```
-Alternativle you can clone it wothout the submodules:
+### Building the docker image
 ```bash
-git clone https://gitlab.mpcdf.mpg.de/nomad-lab/analytics.git
+docker build -t analytics:develop .
 ```
-And you can initialise the submodules when its nedded:
+### Deploy the Docker image locally
 ```bash
-git submodule update --init --recursive
+docker run --rm -p 8888:8888 analytics:develop
 ```
+### Create new tutorial
+To create a new tutorial, you can use the template [./analytics-tutorial-template](https://gitlab.mpcdf.mpg.de/nomad-lab/analytics/-/tree/develop/analytics-tutorial-template). Substitute 'tutorial template' with a short name that identifies the tutorial.
-### Building the docker image locally
+Please observe the following:
+- the underscore/hyphen convention used - in all files, also within the directory.
-```bash
+- employ lowercase letters for file names.
-docker build -t analytics-notebook:latest .
+- clear the cells output before saving.
-```
+- update the 'setup.py' file with all Python packages that need to be installed with pip.
+- update the 'metainfo.json' file with all metadata.
-### Testing/Developing the notebooks locally
+### Test new tutorial
- Use the following command to run the docker image locally with a password-less sudo access:
+If you move the new notebook directory into the 'tutorials' folder, and then build the image, you can test the notebook in the NOMAD AI Toolkit environment.
-    ```bash
+To install in the NOMAD AI Toolkit environment functions that are defined in analytics-tutorial-template, add the command `RUN pip install analytics-tutorial-template` in Dockerfile. Then:
-    docker run -d --rm \
-               -p 8888:8888 \
-               -v $PWD/tutorials:/home/jovyan/tutorials \
-               -e GRANT_SUDO=yes \
-               --user root \
-               --name analytics-notebook \
-               analytics-notebook:latest
-    ```
- To have bash in the container use the following command:
-    ```bash
-     docker exec -it analytics-notebook start.sh
-    ```
-    more info: https://jupyter-docker-stacks.readthedocs.io/en/latest/using/common.html?highlight=root#alternative-commands
-### Useful tricks for Linux
- avoiding tokens for development
-    ```bash
-    docker run --rm \
-      -p 8889:8888 \
-      -v $PWD/tutorials:/home/jovyan/tutorials \
-      --name analytics-notebook \
-      analytics-notebook:latest \
-        start-notebook.sh \
-          --NotebookApp.token=''
-    ```
- you may consider to use the '--rm' option which deletes the container when it stop. Be careful in this way you will loose everything which was not stored into the mounted folder.
-    ```bash
-    docker run \
-           --rm \
-           -p 8888:8888  \
-           -v $PWD/tutorials:/home/jovyan/tutorials  \
-           --name analytics-notebook \
-           analytics-notebook:latest
-    ```
- For mounting a folder, you may need to use absolute path or other tricks if the PWD environmental variable is not accessible in your shell:
-    ```bash
-    docker run \
-           --rm \
-           -p 8888:8888  \
-           -v /path/for/the/tutorials:/home/jovyan/tutorials  \
-           --name analytics-notebook \
-           analytics-notebook:latest
-    ```
- you may need to change the user id in the container - by adding "-e NB_UID=1001" and "--user root" options to your command - to have access for the mounted folders:
-    ```bash
-    docker run \
-           --rm \
-           -p 8888:8888  \
-           -v $PWD/tutorials:/home/jovyan/tutorials  \
-           --user root \
-           -e NB_UID=1001 \
-           --name analytics-notebook \
-           analytics-notebook:latest
-    ```
-More information about the command line options: https://jupyter-docker-stacks.readthedocs.io/en/latest/using/common.html#notebook-options
-## Update the list of tutorials
-Run the following command to update the list of tutorials:
 ```bash
-python generate_tutorials_json.py
+mv analytics-tutorial-template tutorials
+docker build -t analytics:develop .
+docker run --rm -p 8888:8888 analytics:develop
 ```
+The 'tutorial_template.ipynb' notebook will then appear in the tutorials list.
-## Continuous integration
+### Getting published
+If you wish to publish your notebook in the NOMAD AI Toolkit, please contact us (ghiringhelli@fhi-berlin.mpg.de).
-Each commit triggers a build process on GitLab Runner. Beside the latest tag there will be a unique tag (same that as the value of the git commit) available for explicitly tracking the version of the notebook for a cluster deployment.
-### Using the docker image from the registry
-1. Install docker on your machine
-2. Login to the image repository
-    ```bash
-    docker login gitlab-registry.mpcdf.mpg.de
-    ```
-3. Pull the image:
-   ```bash
-   docker pull gitlab-registry.mpcdf.mpg.de/nomad-lab/analytics:latest
-   ```
-4. Create a container:
-   ```bash
-   docker run -p 8888:8888 \
-              -v $PWD/tutorials:/home/jovyan/tutorials \
-              gitlab-registry.mpcdf.mpg.de/nomad-lab/analytics:latest
-   ```
-Note: The latest tag can be find on the following page:
-https://gitlab.mpcdf.mpg.de/nomad-lab/analytics/container_registry
+### Updating quip/quippy
+```
+cd 3rdparty/quip
+./bin/gitversion > ../../configs/GIT_VERSION
+./src/GAP/gapversion > ../../configs/GAP_VERSION
+```
\ No newline at end of file
No results found