Commit 597dc961 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Merge branch 'classification' into 'master'

Classification

See merge request tpurcell/cpp_sisso!12
parents cead6f05 b873362b
Pipeline #83117 passed with stage
in 31 minutes and 32 seconds
......@@ -7,7 +7,8 @@ stages:
build-intel:
stage: build
script:
- module load intel impi mkl cmake anaconda/3/2019.03 mpi4py
- module load intel impi/2018.4 mkl cmake anaconda/3/2019.03 mpi4py git
- module load svn
- mkdir build_intel/
- cd build_intel/
- conda create -p sisso/ python=3.7 mkl numpy scipy pandas pytest seaborn
......@@ -19,7 +20,7 @@ build-intel:
- cd ../
- pytest tests
- cd tests/exec_test/
- ../../bin/sisso++
- mpiexec -n 2 ../../bin/sisso++
- python check_model.py
tags:
- docker
......@@ -27,7 +28,7 @@ build-intel:
build-gnu:
stage: build
script:
- module load gcc impi mkl cmake anaconda/3/2019.03 mpi4py
- module load gcc impi/2018.4 intel mkl cmake anaconda/3/2019.03 mpi4py git
- mkdir build_gcc/
- cd build_gcc/
- conda create -p sisso/ python=3.7 mkl numpy scipy pandas pytest seaborn
......
cmake_minimum_required(VERSION 3.10)
# Include External Project options
include( ExternalProject )
# set the project name
project(sisso++ VERSION 0.1 LANGUAGES CXX)
......@@ -18,7 +21,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
# compiler options
option(EXTERNAL_BOOST "Use an external boost library" OFF)
option(USE_PYTHON "Whether to compile with python binding support" OFF)
option(USE_PYTHON "Whether to compile with python binding support" ON)
if(USE_PYTHON)
message(STATUS "USE PYTHON True")
......@@ -136,6 +139,7 @@ else(EXTERNAL_BOOST)
set(Boost_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/boost)
set(Boost_INCLUDE_DIRS ${Boost_INSTALL_DIR}/include)
set(Boost_LIBRARY_DIRS ${Boost_INSTALL_DIR}/lib)
set(ENV{BOOST_ROOT} ${Boost_INSTALL_DIR})
string(COMPARE EQUAL ${CMAKE_CXX_COMPILER_ID} "AppleClang" AP_CLANG_COMP)
string(COMPARE EQUAL ${CMAKE_CXX_COMPILER_ID} "Clang" CLANG_COMP)
......@@ -268,6 +272,49 @@ set(MPI_LIBRARIES, ${MPI_CXX_LIBRARIES})
list(GET MPI_CXX_LIBRARIES 0 MPI_LIBRARY)
get_filename_component(MPI_DIR ${MPI_LIBRARY} DIRECTORY)
# Coin-Clp for linear programing
set(COIN_CLP_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/coin-Clp/build/")
set(COIN_CLP_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/coin-Clp/bin/")
set(COIN_CLP_INCLUDE_DIRS "${COIN_CLP_INSTALL_DIR}/include/;${COIN_CLP_INSTALL_DIR}/include/coin")
set(COIN_CLP_LIBRARY_DIRS "${COIN_CLP_INSTALL_DIR}/lib")
set(COIN_CLP_BLAS_LAPACK_LIBS "-L${LAPACK_DIR}")
set(COIN_CLP_URL "https://www.coin-or.org/download/source/Clp/Clp-1.17.6.tgz")
foreach(LAPACK_LIB_FILE IN LISTS LAPACK_LIBRARIES)
get_filename_component(LAPACK_LIB ${LAPACK_LIB_FILE} NAME_WE)
string(REPLACE "lib" "-l" LAPACK_LIB ${LAPACK_LIB})
set(COIN_CLP_BLAS_LAPACK_LIBS "${COIN_CLP_BLAS_LAPACK_LIBS} ${LAPACK_LIB}")
endforeach()
set(COIN_CLP_BLAS_LAPACK_LIBS "${COIN_CLP_BLAS_LAPACK_LIBS}")
message(STATUS "COIN_CLP_BLAS_LAPACK_LIBS = ${COIN_CLP_BLAS_LAPACK_LIBS}")
set(COIN_CLP_CONFIGURE_COMMAND bash ${CMAKE_CURRENT_LIST_DIR}/cmake/coin-Clp/clp_configure.sh ${COIN_CLP_INSTALL_DIR} ${COIN_CLP_BLAS_LAPACK_LIBS} ${CMAKE_CXX_COMPILER})
ExternalProject_Add(
external_Clp
PREFIX "external/coin-Clp"
URL ${COIN_CLP_URL}
# SVN_REPOSITORY "https://projects.coin-or.org/svn/Clp/stable/1.17"
# BUILD_IN_SOURCE 1
# CONFIGURE_COMMAND "${CMAKE_CURRENT_BINARY_DIR}/external/coin-Clp/src/external_Clp/configure"
CONFIGURE_COMMAND "${COIN_CLP_CONFIGURE_COMMAND}"
BUILD_COMMAND make -j ${BOOST_BUILD_N_PROCS}
INSTALL_COMMAND make install
BINARY_DIR "${COIN_CLP_BUILD_DIR}"
INSTALL_DIR "${COIN_CLP_INSTALL_DIR}"
)
add_library( Clp SHARED IMPORTED )
set_property( TARGET Clp PROPERTY IMPORTED_LOCATION ${COIN_CLP_LIBRARY_DIRS}/libClp.so )
set_property( TARGET Clp PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${COIN_CLP_INCLUDE_DIRS} )
ExternalProject_Add_StepDependencies(external_Clp Clp)
add_library( CoinUtils SHARED IMPORTED )
set_property( TARGET CoinUtils PROPERTY IMPORTED_LOCATION ${COIN_CLP_LIBRARY_DIRS}/libCoinUtils.so )
set_property( TARGET CoinUtils PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${COIN_CLP_INCLUDE_DIRS} )
ExternalProject_Add_StepDependencies(external_Clp CoinUtils)
set(COIN_CLP_LIBRARIES "${COIN_CLP_LIBRARY_DIRS}/libClp.so;${COIN_CLP_LIBRARY_DIRS}/libCoinUtils.so")
include_directories(${COIN_CLP_INCLUDE_DIRS})
include_directories(${CMAKE_CURRENT_LIST_DIR}/src)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/src)
......@@ -72,7 +72,7 @@ Once installed you should have access to the python interface via `import cpp_si
### Input files
To see a sample of the input files look in `~/sisso++/main directory/test/`
To see a sample of the input files look in `~/sisso++/main directory/test/exec_test`
To use the code two files are necessary: `sisso.json` and `data.csv`.
`data.csv` stores all the data for the calculation in a `csv` file.
......@@ -98,6 +98,10 @@ The expression of the column where the task identification is stored. (Default:
The set of operators to use to combine the features during feature creation. (If empty use all available features)
#### `calc_type`
The type of calculation to run either regression or classification
#### `desc_dim`
The maximum dimension of the model to be created
......@@ -144,7 +148,9 @@ Fraction (in decimal form) of the data to use as a test set (Default: 0.0 if `le
#### `fix_intercept`
If true set the intercept to 0.0 for all models (Default: false)
If true set the intercept to 0.0 for all Regression models (Default: false)
This does not work for classification
#### `max_feat_cross_correlation`
......@@ -187,3 +193,4 @@ With this file the model can be perfectly recreated using the python binding.
### Using the Python Library
To see how the python interface can be used look at `examples/python_interface_demo.ipynb`
If you get an error about not being able to load MKL libraries, you may have to run `conda install numpy` to get proper linking.
#! /usr/bin/bash
../src/external_Clp/configure -C CXX=$3 --prefix=$1 --with-lapack-lib="$2" --with-blas-lib="$2"
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from cpp_sisso import generate_fs_sr_from_csv, Model\n",
"\n",
"feat_sapce, sisso = generate_fs_sr_from_csv(\n",
" df=\"data.csv\",\n",
" prop_key=\"energy_diff\",\n",
" allowed_ops=\"all\",\n",
" cols=\"all\",\n",
" max_phi=2,\n",
" n_sis_select=100,\n",
" max_dim=3,\n",
" n_residuals=1,\n",
" task_key=None,\n",
" leave_out_frac=0.10,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"sisso.fit()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.21840220709133837, 0.1932239811232758, 0.1484061987754675]\n"
]
},
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x148ef1a968d0>]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAm+ElEQVR4nO3deXxV9bnv8c+TiSnMCWMIGUQQmYQwBhlq7QGHBmegULAohOHaHmtPba/tsbfTscdre3plFEQUhTow1TrUWgFJGBJQGQUzMARQwjwTkvzuH9mepjmB7ECStbPzfb9eeZm11m/t/WS5/Lr2en57b3POISIiwSvE6wJERKR6KehFRIKcgl5EJMgp6EVEgpyCXkQkyIV5XUB5oqKiXFxcnNdliIjUGps3bz7qnIsub1tABn1cXByZmZlelyEiUmuY2b4rbdOtGxGRIOdX0JvZCDPbbWZZZvZkOdu/Y2ZbfT/pZtbTt76DmX1kZrvMbIeZfb+q/wAREbm6Cm/dmFkoMBO4HcgDMsxslXNuZ6lhucBQ59wJMxsJzAP6A4XAD51zW8ysMbDZzD4os6+IiFQjf67o+wFZzrkc51wBsBRIKT3AOZfunDvhW9wAxPjWH3bObfH9fgbYBbSvquJFRKRi/gR9e+BAqeU8rh7Wk4B3y640szjgFmBjeTuZ2WQzyzSzzPz8fD/KEhERf/gT9FbOunI/Cc3MhlMS9D8usz4SeAv4gXPudHn7OufmOeeSnHNJ0dHlzhASEZFr4M/0yjygQ6nlGOBQ2UFm1gOYD4x0zh0rtT6ckpB/1Tm37PrKFRGRyvLnij4D6GRm8WYWAYwGVpUeYGaxwDJgvHNuT6n1BiwAdjnnnqu6ssv3xw+/ID3rKProZRGRf6gw6J1zhcAM4H1KmqmvO+d2mFmqmaX6hv0caAnMMrNPzezrdzslA+OBb/jWf2pmd1T9nwFnLl5m8YZ9jJ2/kVEz03hv+2GKixX4IiIWiFe/SUlJ7lreGXvxchHLthxk7tps9h07T0J0I6YMSWDULe2pFxZaDZWKiAQGM9vsnEsqd1swBf3Xiood724/zOzV2ew4dJrWTeoxaXA8Y/t3JLJeQH7qg4jIdalzQf815xzrso4ye3U26dnHaFI/jO8OjGNichxRkfWqoFIRkcBQZ4O+tM8OnGTOmmze2/ElEaEhPJjUgUdvTSC2ZcMqfR4RES8o6EvJzj/LC2tzeGtLHkXFjrt6tCN1aCJd2zWplucTEakJCvpyfHX6Ii+uy2Xxhn2cKyhi6I3RTB2WSP/4FpTMChURqT0U9Fdx6vxlFm/cx8K0XI6eLaBXh2ZMHZbI7Te1JiREgS8itYOC3g8XLxfxxuY8Xlibw/7j50mMbsSUoYmM6tWeiDB9bL+IBDYFfSUUFhXzzvYvmb06m12HT9OmSX0euTWe0f1iNTVTRAKWgv4aOOdY+8VRZq/OYkPOcZo2COe7AzsycVAcLTU1U0QCjIL+On2y/wRz1mTz/o6vqB8ewkNJHXjk1gQ6tNDUTBEJDAr6KpJ15Axz1+Sw4tODFDu4u0dbUocl0qWNpmaKiLcU9FXs8KkLLPg4l9c27ed8QRHDO0czddgN9I1rrqmZIuIJBX01OXm+gFfW72Nh+l6OnyugT8fmpA5N5LYurTQ1U0RqlIK+ml0oKOKNzQeYtzaHvBMX6NQqkilDE0np1Y7wUE3NFJHqp6CvIYVFxfxlW8mnZn7+5RnaNa3PI7cmMLpfBxpGaGqmiFQfBX0Nc86xenc+s9dksyn3OM0ahjNhYBwTBsXRolGE1+WJSBBS0Hto876SqZkf7PyKBuGhPNS3A4/cGk9Mc03NFJGqo6APAF98dYa5a3NY8clBAL7dsx1ThibSuU1jjysTkWCgoA8gh05eYP7HuSzZtJ8Ll4u4rUsrpg5LJCmuhdeliUgtdrWg92tKiJmNMLPdZpZlZk+Ws/07ZrbV95NuZj1LbXvRzI6Y2fZr/xOCR7tmDfj53V1Jf/Ib/Os3b2TL/hPcP2c9989O58NdX+kLzUWkylV4RW9mocAe4HYgD8gAxjjndpYaMwjY5Zw7YWYjgaedc/1924YAZ4GXnXPd/CkqmK/oyzpfUMjrGQd44eNcDp68QOfWjZkyNIG7e2pqpoj473qv6PsBWc65HOdcAbAUSCk9wDmX7pw74VvcAMSU2rYWOH5NldcBDSPCmJgcz+ofDeO5B3vicDz++mcM+8/VvJSWy4WCIq9LFJFazp+gbw8cKLWc51t3JZOAdytbiJlNNrNMM8vMz8+v7O61XnhoCPf2juG97w9hwYQk2jWrz9N/3knyM3/nv/72BSfOFXhdoojUUv68i6e89/KXe7/HzIZTEvSDK1uIc24eMA9Kbt1Udv9gERJi3HZTa267qTUZe48zZ3U2v//bHuauzWZ031geuTWeds0aeF2miNQi/gR9HtCh1HIMcKjsIDPrAcwHRjrnjlVNeXVb37gW9J3Ygt1fnmHummwWrd/Ly+v3ktKrPalDE+jUWlMzRaRi/ty6yQA6mVm8mUUAo4FVpQeYWSywDBjvnNtT9WXWbZ3bNOa5h3qx5kfDGDegI3/Zdojbf7+WR1/OZPO+ExU/gIjUaX7NozezO4A/AKHAi865X5tZKoBzbo6ZzQfuA/b5din8uvtrZkuAYUAU8BXw7865BVd7vro06+ZaHD9XwKL0vSxav5eT5y/TL74FU4cmMqxztD4mWaSO0humgtS5S4X8KeMAL3ycw+FTF+nSpjFThyVyZ/e2hGlqpkidoqAPcgWFxaz67BBz1mSTdeQsMc0bMHlIAg/06UCDiFCvyxORGqCgryOKix0ffn6E2auz2LL/JC0aRfDwoDi+OzCOpg3DvS5PRKqRgr6Occ6RsfcEs1dn8dHufBpFhDKmXyyTbo2nbVNNzRQJRgr6OmzX4dPMXZPNn7ceJsRgVK/2TBmayA2tIr0uTUSqkIJeOHD8PPM/zmFpxgEKior5VtfWpA5N5JbY5l6XJiJVQEEv/+3o2UssSt/Ly+v3cerCZQYktGDqsBsY0ilKUzNFajEFvfwPZy8VsnTTfuZ/nMuXpy/StW0TUoclcke3NpqaKVILKejligoKi1nx6UHmrskmO/8csS0a8uiQBB7oE0P9cE3NFKktFPRSoeJixwe7vmL26mw+PXCSqMgIHk6OZ9yAjjRtoKmZIoFOQS9+c86xMfc4s1dns2ZPPpH1whjbP5ZJg+Np3aS+1+WJyBUo6OWa7Dh0irlrcnh76yHCQkK4t3d7Jg9JICFaUzNFAo2CXq7L/mPnmfdxNm9k5lFQVMyIm9uQOjSRnh2aeV2aiPgo6KVK5J+5xEvpuby8fh9nLhYyKLElU4clMvgGTc0U8ZqCXqrUmYuXWeKbmnnkzCVubteEqcMSGdmtLaEhCnwRLyjopVpcKixixScHmbsmh5yj5+jYsiGThyRwX29NzRSpaQp6qVZFxY4Pdn7J7NXZfJZ3iqjIenxvcBzjBnSkSX1NzRSpCQp6qRHOOdZnH2P2mmw+/uIojeuFMXZALJOS42mlqZki1UpBLzVu+8FTzFmTzTvbDhMWEsJ9fWKYMiSBuKhGXpcmEpQU9OKZvUfPMe/jHN7cnEdhUTEju7UldWgi3WOael2aSFC5WtD79elVZjbCzHabWZaZPVnO9u+Y2VbfT7qZ9fR3XwlucVGN+M093Vn34+FMGZrI2j353P38OsbN30ha1lEC8UJDJNhUeEVvZqHAHuB2IA/IAMY453aWGjMI2OWcO2FmI4GnnXP9/dm3PLqiD16nL17mtY37WbAul/wzl+gR05TUoYn8y81tNDVT5Dpc7xV9PyDLOZfjnCsAlgIppQc459Kdcyd8ixuAGH/3lbqlSf1wUocm8vG/Dec393Tn9IXLTHt1C998bg1LNu3nUmGR1yWKBB1/gr49cKDUcp5v3ZVMAt6t7L5mNtnMMs0sMz8/34+ypDarHx7K2P6xfPjDYcwc25vIemH8ZNk2bn3mI+auyebMxctelygSNPwJ+vJeT5d7v8fMhlMS9D+u7L7OuXnOuSTnXFJ0dLQfZUkwCA0x7uzRllUzklk8qT+dWkfy23c/Z9B//J3fvfc5+WcueV2iSK0X5seYPKBDqeUY4FDZQWbWA5gPjHTOHavMviJmxuBOUQzuFMXWvJPMWZPN7DXZzF+XywN9Ypg8JIGOLTU1U+Ra+NOMDaOkoXobcJCShupY59yOUmNigb8D33XOpVdm3/KoGSsAOflneeHjHN7afJDC4mLu6F4yNbNbe03NFCnruufRm9kdwB+AUOBF59yvzSwVwDk3x8zmA/cB+3y7FH79hOXtW9HzKeiltCOnL7IgLZdXN+zn7KVCbu0UxdRhiQxMaKlPzRTx0RumJCicunCZVzfu48V1uRw9W0DPmKZMHZbIt7q2IURTM6WOU9BLULl4uYg3N+cxb20O+4+fJyG6EalDEkm5pR31wvSpmVI3KeglKBUWFfPu9pJPzdx5+DStm9TjkcEJjOkfS2Q9f+YZiAQPBb0ENeccH39xlNmrs1mfc4wm9cP47sA4JibHERVZz+vyRGqEgl7qjE8PnGTO6mze3/klEaEhPJjUgclDEujQoqHXpYlUKwW91DnZ+WeZtyaHZZ/kUezgTt/UzK7tmnhdmki1UNBLnfXlqYu8mJbLqxv2ca6giGGdo0kdmkj/+BaamilBRUEvdd6p85d5ZcNeFqbt5di5Am6JbUbq0ERuv6m1pmZKUFDQi/hcvFzEG5kHmPdxDgeOXyAxuhGpQxNJ6dWeiDC/vp5BJCAp6EXKKCwq5i/bDjN7dTaff3mGtk3rM2lwPGP6xdJIUzOlFlLQi1yBc47Ve/KZszqbjbnHadognAkDOzJhUBwtNTVTahEFvYgftuw/wZzV2fx151fUDw/hoaQOPHKrpmZK7aCgF6mErCNnmLMmhxWfHMQBd/doS+qwRLq00dRMCVwKepFrcOjkBRasy2XJpv2cLyhieOdofvitzvqYZAlI1/udsSJ1UrtmDfjZXV1Jf/IbPH77jXyWd4p7Z6ez/JM8r0sTqRQFvUgFmjWM4LHbOvHh40PpHduMf/3TZzzz3ucUFwfeq2GR8ijoRfzUvFEEr0zqz9j+scxenc2UxZs5d6nQ67JEKqSgF6mE8NAQfj2qG0/f3ZUPd33FfbPTyTtx3uuyRK5KQS9SSWbGxOR4Xnq4HwdPXmDUzDQ27zvudVkiV6SgF7lGQ26MZvm0ZCLrhTFm3kbe2qwmrQQmv4LezEaY2W4zyzKzJ8vZ3sXM1pvZJTN7osy275vZdjPbYWY/qKK6RQLCDa0iWTE9maS45vzwjc/47bu7KFKTVgJMhUFvZqHATGAk0BUYY2Zdyww7DjwGPFtm327Ao0A/oCdwl5l1qoK6RQJGs4YRLPpeP8YNiGXumhymvJLJWTVpJYD4c0XfD8hyzuU45wqApUBK6QHOuSPOuQzgcpl9bwI2OOfOO+cKgTXAPVVQt0hACQ8N4VejuvPLlJv5aHc+981K58BxNWklMPgT9O2BA6WW83zr/LEdGGJmLc2sIXAH0KG8gWY22cwyzSwzPz/fz4cXCSzjB8ax6OF+HD51gZSZaWTsVZNWvOdP0Jf3rQx+3YR0zu0CngE+AN4DPgPKfU3rnJvnnEtyziVFR0f78/AiAWlwpyhWTE+mWYNwxr6wgTcyD1S8k0g18ifo8/jnq/AY4JC/T+CcW+Cc6+2cG0LJvfwvKleiSO2TEB3J8mnJ9I9vyY/e3Mqv/7JTTVrxjD9BnwF0MrN4M4sARgOr/H0CM2vl+2cscC+w5FoKFaltmjYM56WH+zJhYEde+DiXR1/O5MzFsm0skepXYdD7mqgzgPeBXcDrzrkdZpZqZqkAZtbGzPKAx4GnzCzPzL7+TNe3zGwn8GdgunPuRLX8JSIBKCw0hF+kdONXo7qxZk8+981OZ/8xNWmlZuljikVqSHrWUaa+uoUQgznj+tA/oaXXJUkQ0ccUiwSAQTeUNGmbN4pg3IKN/Cljv9clSR2hoBepQfFRjVg+LZkBCS358Vvb+OXbatJK9VPQi9Swpg3CWTixLxMHxbFgXS7feymD02rSSjVS0It4ICw0hKe/fTO/uac7aVlHuXdWOvuOnfO6LAlSCnoRD43tH8srk/pz9OwlUmamsT77mNclSRBS0It4bGBiS1ZMSyYqsh7jF2zktY1q0krVUtCLBIC4qEYsmzaIwZ2i+OnybTy9ageFRcVelyVBQkEvEiCa1A9nwYS+TBocz0vpe/neokxOXVCTVq6fgl4kgISGGD+7qyvP3Ned9dlHuWdWGrlH1aSV66OgFwlAD/WNZfGk/pw4V8ComWmkZx31uiSpxRT0IgGqf0JLVk4fTKvG9Rj/4iYWb9jndUlSSynoRQJYbMuGLJs2iCGdonhqxXb+feV2NWml0hT0IgGucf1w5k/oy6O3xrNo/T4mLszg1Hk1acV/CnqRWiA0xPjfd3bld/f3YGPuMe6ZlUZO/lmvy5JaQkEvUos8mNSB1x4dwMkLlxk1M411X6hJKxVT0IvUMn3jWrByejJtmzZgwsJNvLx+r9clSYBT0IvUQh1aNOStaYMY3jman6/cwVMrtnFZTVq5AgW9SC0VWS+MueOTmDI0gcUb9jNx4SZOni/wuiwJQAp6kVosNMT4ycibePaBnmTknmDUzDSyjqhJK/9MQS8SBO7vE8OSyf05e6mQe2alsXZPvtclSQDxK+jNbISZ7TazLDN7spztXcxsvZldMrMnymz7VzPbYWbbzWyJmdWvquJF5B/6dGzBiunJtG/WgIkLN7EwLRfn9DWF4kfQm1koMBMYCXQFxphZ1zLDjgOPAc+W2be9b32Sc64bEAqMroK6RaQcMc0b8tbUQdx2U2t+8eed/HT5djVpxa8r+n5AlnMuxzlXACwFUkoPcM4dcc5lAOW9XS8MaGBmYUBD4NB11iwiV9GoXhhzx/Vh6rBElmzaz/gFGzlxTk3ausyfoG8PHCi1nOdbVyHn3EFKrvL3A4eBU865v5Y31swmm1mmmWXm5+v+osj1CAkxfjyiC79/qCdb9p9k1Kw0so6c8bos8Yg/QW/lrPPrxp+ZNafk6j8eaAc0MrNx5Y11zs1zziU555Kio6P9eXgRqcA9t8SwdPIAzl0q4p6Z6azefcTrksQD/gR9HtCh1HIM/t9++SaQ65zLd85dBpYBgypXoohcj96xzVk5I5mYFg353ksZLFinJm1d40/QZwCdzCzezCIoaaau8vPx9wMDzKyhmRlwG7Dr2koVkWvVvlkD3kwdyO1dW/PLt3fyk2XbKChUk7auqDDonXOFwAzgfUpC+nXn3A4zSzWzVAAza2NmecDjwFNmlmdmTZxzG4E3gS3ANt/zzaumv0VErqJRvTBmf6cPM4bfwNKMA4xfsJHjatLWCRaIL+GSkpJcZmam12WIBK2Vnx7kR29upU2T+syfkMSNrRt7XZJcJzPb7JxLKm+b3hkrUgel9GrPnyYP4MLlIu6dlc7fP//K65KkGinoReqoW2Kbs2pGMnFRDZm0KJMX1uaoSRukFPQidVjbpg14fcpARnZrw6/f2cW/vbmVS4VFXpclVUxBL1LHNYwI4/kxvXnsGzfwxuY8xs3fyLGzl7wuS6qQgl5ECAkxHv9WZ/7fmFvYmneKlJlp7P5S76QNFgp6Eflvd/dsx+tTBlJQWMy9s9L4cJeatMFAQS8i/6Rnh2asmjGYhOhIHnk5k7lrstWkreUU9CLyP7RpWp/Xpwzkju5t+e27n/PEG2rS1mZhXhcgIoGpQUQoz4+5hU6tIvnD375g37FzzBnfh6jIel6XJpWkK3oRuSIz4wffvJGZY3uz/dApUp5PY9fh016XJZWkoBeRCt3Zoy1vTBlEYXEx981O5687vvS6JKkEBb2I+KV7TFNWzRhMp1aRTFm8mVmrs9SkrSUU9CLit9ZN6vOnKQO5q0c7fvfebn74+mdcvKwmbaBTM1ZEKqV+eCh/HN2LTq0iee6DPeQeO8fc8X1o1bi+16XJFeiKXkQqzcx47LZOzP5Obz4/fIZRz6ex49Apr8uSK1DQi8g1G9m9LW+kDsQB989ez3vb1aQNRAp6Ebku3do3ZeX0ZDq3aUzq4s3M/EhN2kCjoBeR69aqSX2WTh5ASq92/Of7u/nBnz5VkzaAqBkrIlWifngof3ioFze2bsx/vr+bvcfO88L4PrRqoiat1/y6ojezEWa228yyzOzJcrZ3MbP1ZnbJzJ4otb6zmX1a6ue0mf2gCusXkQBiZkwffgNzxvVhz5dnSJmZxvaDatJ6rcKgN7NQYCYwEugKjDGzrmWGHQceA54tvdI5t9s518s51wvoA5wHlldB3SISwEZ0a8ObUwdiwANz1vPutsNel1Sn+XNF3w/Ics7lOOcKgKVASukBzrkjzrkM4PJVHuc2INs5t++aqxWRWuPmdk1ZOWMwN7VtzNRXt/DHD79Qk9Yj/gR9e+BAqeU837rKGg0sudJGM5tsZplmlpmfn38NDy8igSa6cT1ee3QA997Snuc+2MNjS9Wk9YI/QW/lrKvU/5bNLAL4NvDGlcY45+Y555Kcc0nR0dGVeXgRCWD1w0P5vw/25McjuvD21kM8NHc9X52+6HVZdYo/QZ8HdCi1HAMcquTzjAS2OOf0vWQidZCZMXVYInPH9eGLI2f59vPr2Jp30uuy6gx/gj4D6GRm8b4r89HAqko+zxiucttGROqGb93chremDiIsJIQH567n7a2VvWaUa1Fh0DvnCoEZwPvALuB159wOM0s1s1QAM2tjZnnA48BTZpZnZk182xoCtwPLquuPEJHa46a2TVg5I5lu7Zoy47VP+MPf9qhJW80sEA9wUlKSy8zM9LoMEalGlwqL+Omy7by1JY87e7Tl2ft70iAi1Ouyai0z2+ycSypvm94ZKyKeqBcWyrMP9ODG1pH8x3ufs//YeV74bhJtmuqdtFVNn3UjIp4xM6YMTeSF8Unk5Jc0aT87cNLrsoKOgl5EPPfNrq15a9ogIsJKmrSrPlOTtiop6EUkIHRp04SV05PpGdOMx5Z8wnN/3U1xceD1EGsjBb2IBIyWkfVY/Eh/HkyK4Y9/z2L6a1s4X1DodVm1noJeRAJKRFgIz9zXg6fuvIn3dnzJA3PWc/jUBa/LqtUU9CIScMyMR25N4MUJfdl37Dzffj6NT/af8LqsWktBLyIBa3iXViybNoj64SE8NG8DKz896HVJtZKCXkQC2o2tG7Ny+mB6dWjG95d+yrPvq0lbWQp6EQl4LRpFsHhSf0b37cDzH2Ux9dXNnLukJq2/FPQiUitEhIXw23u787O7uvLBzq+4f856Dp5Uk9YfCnoRqTXMjEmD41kwsS95x8+T8nwam/epSVsRBb2I1DrDO7di+fRBNKoXyph5G1i2Jc/rkgKagl5EaqUbWjVmxbRkendsxuOvf8Yz732uJu0VKOhFpNZq3iiCVyb1Z2z/WGavzmbKYjVpy6OgF5FaLTw0hF+P6sbTd3flw11fcd/sdPJOnPe6rICioBeRWs/MmJgcz0sP9+PgyQukPJ9G5t7jXpcVMBT0IhI0htwYzfJpyTSuH8bYFzby5mY1aUFBLyJB5oZWkayYnkxSXHOeeOMzfvvOLorqeJPWr6A3sxFmttvMsszsyXK2dzGz9WZ2ycyeKLOtmZm9aWafm9kuMxtYVcWLiJSnWcMIFn2vH+MGxDJ3bQ5TXsnkbB1u0lYY9GYWCswERgJdgTFm1rXMsOPAY8Cz5TzEfwHvOee6AD2BXddVsYiIH8JDQ/jVqO78MuVmPtqdz32z0jlwvG42af25ou8HZDnncpxzBcBSIKX0AOfcEedcBnC59HozawIMARb4xhU4505WReEiIv4YPzCORQ/34/CpC6TMTCOjDjZp/Qn69sCBUst5vnX+SADygYVm9omZzTezRuUNNLPJZpZpZpn5+fl+PryISMUGd4pixfRkmjUIZ+wLG3g980DFOwURf4Leylnnb2cjDOgNzHbO3QKcA/7HPX4A59w851yScy4pOjraz4cXEfFPQnQky6cl0z++Jf/25lZ+9fbOOtOk9Sfo84AOpZZjAH+/oj0PyHPObfQtv0lJ8IuI1LimDcN56eG+TBjYkfnrcnlkUQZnLl6ueMdazp+gzwA6mVm8mUUAo4FV/jy4c+5L4ICZdfatug3YeU2ViohUgbDQEH6R0o1fjerG2i+Oct/sdPYfC+4mbYVB75wrBGYA71MyY+Z159wOM0s1s1QAM2tjZnnA48BTZpbna8QC/C/gVTPbCvQCflMNf4eISKWMG9CRV77Xj69OXyJl5jo25BzzuqRqY84F3j2qpKQkl5mZ6XUZIlIH5B49x6RFGew/dp5fjerG6H6xXpd0Tcxss3MuqbxtemesiNRp8VGNWD4tmYGJLXly2Tb+z593UlhU7HVZVUpBLyJ1XtMG4Syc2JeJg+J4MS2XSYsyOR1ETVoFvYgIJU3ap799M7+5pztpWUe5d1Y6+46d87qsKqGgFxEpZWz/WF6Z1J+jZy+RMjON9dm1v0mroBcRKWNgYktWTEsmKrIe4xds5LWN+70u6boo6EVEyhEX1Yhl0wYxuFMUP12+jadX7ai1TVoFvYjIFTSpH86CCX2ZNDiel9L38vBLGZy6UPuatAp6EZGrCA0xfnZXV565rzsbco5xz6w0co/Wriatgl5ExA8P9Y1l8aT+nDhXwKiZaaRnHfW6JL8p6EVE/NQ/oSUrpw+mVeN6jH9xE69s2Od1SX5R0IuIVEJsy4YsmzaIIZ2i+NmK7fx85faAb9Iq6EVEKqlx/XDmT+jLo7fG8/L6fUxcmMGp84HbpFXQi4hcg9AQ43/f2ZXf3d+DjbklTdqc/LNel1UuBb2IyHV4MKkDrz06gJMXLjNqZhrrvgi8Jq2CXkTkOvWNa8HK6cm0bdqACQs38fL6vV6X9E8U9CIiVaBDi4a8NW0QwztH8/OVO3hqxTYuB0iTVkEvIlJFIuuFMXd8ElOGJrB4w34mvLiJk+cLvC5LQS8iUpVCQ4yfjLyJZx/oSebeE4yamUbWEW+btAp6EZFqcH+fGJZM7s/ZS4XcMyuNtXvyPavFr6A3sxFmttvMsszsyXK2dzGz9WZ2ycyeKLNtr5ltM7NPzUxfBCsidUafji1YMT2Z9s0aMHHhJham5eLF93RXGPRmFgrMBEYCXYExZta1zLDjwGPAs1d4mOHOuV5X+uJaEZFgFdO8IW9NHcRtN7XmF3/eyU+Xb6/xJq0/V/T9gCznXI5zrgBYCqSUHuCcO+KcywAC961hIiIeaVQvjLnj+jB1WCJLNu1n/IKNnDhXc01af4K+PXCg1HKeb52/HPBXM9tsZpOvNMjMJptZppll5ud7dy9LRKQ6hIQYPx7Rhd8/1JMt+0+SMjONL746UzPP7ccYK2ddZW4yJTvnelNy62e6mQ0pb5Bzbp5zLsk5lxQdHV2JhxcRqT3uuSWGpZMHcL6giHtnpfPR7iPV/pz+BH0e0KHUcgxwyN8ncM4d8v3zCLCckltBIiJ1Vu/Y5qyckUxMi4ZMeimDBeuqt0nrT9BnAJ3MLN7MIoDRwCp/HtzMGplZ469/B74FbL/WYkVEgkX7Zg14M3Ugt3dtzS/f3slPlm2joLB6mrRhFQ1wzhWa2QzgfSAUeNE5t8PMUn3b55hZGyATaAIUm9kPKJmhEwUsN7Ovn+s159x71fKXiIjUMo3qhTH7O3147oM9PP9RFjlHz7FwYl8a1aswmivFr0dzzr0DvFNm3ZxSv39JyS2dsk4DPa+nQBGRYBYSYjzxL53p1DqStKyjNIwIrfLnqNr/bYiIyDVJ6dWelF6VmdDoP30EgohIkFPQi4gEOQW9iEiQU9CLiAQ5Bb2ISJBT0IuIBDkFvYhIkFPQi4gEOfPi204qYmb5wL5r3D0KOFqF5VQV1VU5qqtyVFflBGNdHZ1z5X70b0AG/fUws8xA/CYr1VU5qqtyVFfl1LW6dOtGRCTIKehFRIJcMAb9PK8LuALVVTmqq3JUV+XUqbqC7h69iIj8s2C8ohcRkVIU9CIiQa7WBL2ZvWhmR8ys3O+ctRJ/NLMsM9tqZr1LbRthZrt9256s4bq+46tnq5mlm1nPUtv2mtk2M/vUzDJruK5hZnbK99yfmtnPS23z8nj9qFRN282syMxa+LZV5/HqYGYfmdkuM9thZt8vZ0yNn2N+1lXj55ifddX4OeZnXTV+jplZfTPbZGaf+er6RTljqu/8cs7Vih9gCNAb2H6F7XcA7wIGDAA2+taHAtlAAhABfAZ0rcG6BgHNfb+P/Lou3/JeIMqj4zUMeLuc9Z4erzJj7wb+XkPHqy3Q2/d7Y2BP2b/bi3PMz7pq/Bzzs64aP8f8qcuLc8x3zkT6fg8HNgIDaur8qjVX9M65tcDxqwxJAV52JTYAzcysLdAPyHLO5TjnCoClvrE1UpdzLt05d8K3uIHyv1u3yvlxvK7E0+NVxhhgSVU999U45w4757b4fj8D7ALKfq9bjZ9j/tTlxTnm5/G6Ek+PVxk1co75zpmzvsVw30/ZmTDVdn7VmqD3Q3vgQKnlPN+6K633wiRK/o/9NQf81cw2m9lkD+oZ6Hsp+a6Z3exbFxDHy8waAiOAt0qtrpHjZWZxwC2UXHWV5uk5dpW6Sqvxc6yCujw7xyo6XjV9jplZqJl9ChwBPnDO1dj5FUxfDm7lrHNXWV+jzGw4Jf8RDi61Otk5d8jMWgEfmNnnvivemrCFks/GOGtmdwArgE4EyPGi5CV1mnOu9NV/tR8vM4uk5D/8HzjnTpfdXM4uNXKOVVDX12Nq/ByroC7PzjF/jhc1fI4554qAXmbWDFhuZt2cc6V7VdV2fgXTFX0e0KHUcgxw6Crra4yZ9QDmAynOuWNfr3fOHfL98wiwnJKXaDXCOXf665eSzrl3gHAziyIAjpfPaMq8pK7u42Vm4ZSEw6vOuWXlDPHkHPOjLk/OsYrq8uoc8+d4+dT4OeZ77JPAakpeTZRWfedXVTUbauIHiOPKzcU7+edGxibf+jAgB4jnH42Mm2uwrlggCxhUZn0joHGp39OBETVYVxv+8Ya5fsB+37Hz9Hj5tjel5D5+o5o6Xr6//WXgD1cZU+PnmJ911fg55mddNX6O+VOXF+cYEA008/3eAPgYuKumzq9ac+vGzJZQ0sWPMrM84N8paWjgnJsDvENJ1zoLOA887NtWaGYzgPcp6V6/6JzbUYN1/RxoCcwyM4BCV/LpdK0pefkGJf8iX3POvVeDdd0PTDWzQuACMNqVnFVeHy+Ae4C/OufOldq1Wo8XkAyMB7b57qMC/JSSEPXyHPOnLi/OMX/q8uIc86cuqPlzrC2wyMxCKbmT8rpz7m0zSy1VV7WdX/oIBBGRIBdM9+hFRKQcCnoRkSCnoBcRCXIKehGRIKegFxEJcgp6EZEgp6AXEQly/x94Tjq4ooxUiAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"print([models[0].test_rmse for models in sisso.models])\n",
"plt.plot([1, 2, 3], [models[0].test_rmse for models in sisso.models])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
%% Cell type:code id: tags:
``` python
from cpp_sisso import generate_fs_sr_from_csv, Model
feat_sapce, sisso = generate_fs_sr_from_csv(
df="data.csv",
prop_key="energy_diff",
allowed_ops="all",
cols="all",
max_phi=2,
n_sis_select=100,
max_dim=3,
n_residuals=1,
task_key=None,
leave_out_frac=0.10,
)
```
%% Cell type:code id: tags:
``` python
sisso.fit()
```
%% Cell type:code id: tags:
``` python
import matplotlib.pyplot as plt
print([models[0].test_rmse for models in sisso.models])
plt.plot([1, 2, 3], [models[0].test_rmse for models in sisso.models])
```
%%%% Output: stream
[0.21840220709133837, 0.1932239811232758, 0.1484061987754675]
%%%% Output: execute_result
[<matplotlib.lines.Line2D at 0x148ef1a968d0>]
%%%% Output: display_data
%% Cell type:code id: tags:
``` python
```
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cpp_sisso import SISSOClassifier, generate_phi_0_from_csv, FeatureSpace\n",
"\n",
"phi_0, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(\n",
" \"data.csv\", \"class\", cols=\"all\", task_key=None, leave_out_frac=0.0, leave_out_inds=None\n",
")\n",
"\n",
"# Load a feature space from a predefined text file\n",
"feat_space = FeatureSpace(\n",
" \"phi.txt\", \n",
" phi_0, \n",
" task_sizes_train, \n",
" \"classification\",\n",
" 100, \n",
" 1.0\n",
")\n",
"\n",
"sisso = SISSOClassifier(\n",
" feat_space,\n",
" prop_unit,\n",
" prop.astype(float),\n",
" prop_test.astype(float),\n",
" task_sizes_train,\n",
" task_sizes_test,\n",
" leave_out_inds,\n",
" 2,\n",
" 2,\n",
" 2\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sisso.fit()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"model = sisso.models[1][1]\n",
"\n",
"x_div = np.linspace(model.feats[1].value.min(), model.feats[1].value.max(), 101)\n",
"y_div = -1.0 * (model.coefs[0][1] * x_div + model.coefs[0][-1]) / model.coefs[0][0]\n",
"\n",
"x_range = model.feats[1].value.max() - model.feats[1].value.min()\n",
"y_range = model.feats[0].value.max() - model.feats[0].value.min()\n",
"\n",
"plt.plot(model.feats[1].value[:41], model.feats[0].value[:41], \".\")\n",
"plt.plot(model.feats[1].value[41:], model.feats[0].value[41:], \".\")\n",
"plt.plot(x_div, y_div)\n",
"plt.xlim([model.feats[1].value.min() - 0.05 * x_range, model.feats[1].value.max() + 0.05 * x_range])\n",
"plt.ylim([model.feats[0].value.min() - 0.05 * y_range, model.feats[0].value.max() + 0.05 * y_range])\n",
"plt.xlabel(\"D1\")\n",
"plt.ylabel(\"D2\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
%% Cell type:code id: tags:
``` python
from cpp_sisso import SISSOClassifier, generate_phi_0_from_csv, FeatureSpace
phi_0, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(
"data.csv", "class", cols="all", task_key=None, leave_out_frac=0.0, leave_out_inds=None
)
# Load a feature space from a predefined text file
feat_space = FeatureSpace(
"phi.txt",
phi_0,
task_sizes_train,
"classification",
100,
1.0
)
sisso = SISSOClassifier(
feat_space,
prop_unit,
prop.astype(float),
prop_test.astype(float),
task_sizes_train,
task_sizes_test,
leave_out_inds,
2,
2,
2
)
```
%% Cell type:code id: tags:
``` python
sisso.fit()
```
%% Cell type:code id: tags:
``` python
import matplotlib.pyplot as plt
import numpy as np
model = sisso.models[1][1]
x_div = np.linspace(model.feats[1].value.min(), model.feats[1].value.max(), 101)
y_div = -1.0 * (model.coefs[0][1] * x_div + model.coefs[0][-1]) / model.coefs[0][0]
x_range = model.feats[1].value.max() - model.feats[1].value.min()
y_range = model.feats[0].value.max() - model.feats[0].value.min()
plt.plot(model.feats[1].value[:41], model.feats[0].value[:41], ".")
plt.plot(model.feats[1].value[41:], model.feats[0].value[41:], ".")
plt.plot(x_div, y_div)
plt.xlim([model.feats[1].value.min() - 0.05 * x_range, model.feats[1].value.max() + 0.05 * x_range])
plt.ylim([model.feats[0].value.min() - 0.05 * y_range, model.feats[0].value.max() + 0.05 * y_range])
plt.xlabel("D1")
plt.ylabel("D2")
plt.show()
```
%% Cell type:code id: tags:
``` python
```
,purcell,theobook151,27.09.2020 10:48,file:///home/purcell/.config/libreoffice/4;
\ No newline at end of file
Material,class,Z_A (nuc_charge),Z_B (nuc_charge),period_A (Unitless),period_B (Unitless),IP_A (eV_IP),IP_B (eV_IP),EA_A (eV_IP),EA_B (eV_IP),E_HOMO_A (eV),E_HOMO_B (eV),E_LUMO_A (eV),E_LUMO_B (eV),rs_A,rs_B,rp_A,rp_B,rd_A,rd_B
AgBr,-1,47,35,5,4,-8.0580997467,-12.649600029,-1.66659998894,-3.73930001259,-4.71000003815,-8.00100040436,-0.479000002146,0.708000004292,1.32000005245,0.75,1.87999999523,0.879999995232,2.97000002861,1.87000000477
AgCl,-1,47,17,5,3,-8.0580997467,-13.9018001556,-1.66659998894,-3.97079992294,-4.71000003815,-8.69999980927,-0.479000002146,0.574000000954,1.32000005245,0.680000007153,1.87999999523,0.759999990463,2.97000002861,1.66999995708
AgF,-1,47,9,5,2,-8.0580997467,-19.4043006897,-1.66659998894,-4.27349996567,-4.71000003815,-11.2939996719,-0.479000002146,1.25100004673,1.32000005245,0.409999996424,1.87999999523,0.370000004768,2.97000002861,1.42999994755
BaO,-1,56,8,6,2,-5.51569986343,-16.4332008362,0.277999997139,-3.00589990616,-3.34599995613,-9.19699954987,-2.1289999485,2.54099988937,2.15000009537,0.460000008345,2.63000011444,0.430000007153,1.35000002384,2.22000002861
BaS,-1,56,16,6,3,-5.51569986343,-11.7951002121,0.277999997139,-2.84489989281,-3.34599995613,-7.10599994659,-2.1289999485,0.64200001955,2.15000009537,0.740000009537,2.63000011444,0.850000023842,1.35000002384,2.36999988556
BaSe,-1,56,34,6,4,-5.51569986343,-10.9460000992,0.277999997139,-2.75099992752,-3.34599995613,-6.65399980545,-2.1289999485,1.31599998474,2.15000009537,0.800000011921,2.63000011444,0.949999988079,1.35000002384,2.18000006676
BaTe,-1,56,52,6,5,-5.51569986343,-9.86670017242,0.277999997139,-2.66599988937,-3.34599995613,-6.10900020599,-2.1289999485,0.0989999994636,2.15000009537,0.939999997616,2.63000011444,1.13999998569,1.35000002384,1.83000004292
CaO,-1,20,8,4,2,-6.4279999733,-16.4332008362,0.303900003433,-3.00589990616,-3.86400008202,-9.19699954987,-2.132999897,2.54099988937,1.75999999046,0.460000008345,2.31999993324,0.430000007153,0.680000007153,2.22000002861
CaS,-1,20,16,4,3,-6.4279999733,-11.7951002121,0.303900003433,-2.84489989281,-3.86400008202,-7.10599994659,-2.132999897,0.64200001955,1.75999999046,0.740000009537,2.31999993324,0.850000023842,0.680000007153,2.36999988556
CaSe,-1,20,34,4,4,-6.4279999733,-10.9460000992,0.303900003433,-2.75099992752,-3.86400008202,-6.65399980545,-2.132999897,1.31599998474,1.75999999046,0.800000011921,2.31999993324,0.949999988079,0.680000007153,2.18000006676
CaTe,-1,20,52,4,5,-6.4279999733,-9.86670017242,0.303900003433,-2.66599988937,-3.86400008202,-6.10900020599,-2.132999897,0.0989999994636,1.75999999046,0.939999997616,2.31999993324,1.13999998569,0.680000007153,1.83000004292
CdO,-1,48,8,5,2,-9.5813999176,-16.4332008362,0.838699996471,-3.00589990616,-5.95200014114,-9.19699954987,-1.30900001526,2.54099988937,1.23000001907,0.460000008345,1.74000000954,0.430000007153,2.59999990463,2.22000002861
BrCs,-1,55,35,6,4,-4.00619983673,-12.649600029,-0.569599986076,-3.73930001259,-2.22000002861,-8.00100040436,-0.547999978065,0.708000004292,2.46000003815,0.75,3.16000008583,0.879999995232,1.97000002861,1.87000000477
ClCs,-1,55,17,6,3,-4.00619983673,-13.9018001556,-0.569599986076,-3.97079992294,-2.22000002861,-8.69999980927,-0.547999978065,0.574000000954,2.46000003815,0.680000007153,3.16000008583,0.759999990463,1.97000002861,1.66999995708
CsF,-1,55,9,6,2,-4.00619983673,-19.4043006897,-0.569599986076,-4.27349996567,-2.22000002861,-11.2939996719,-0.547999978065,1.25100004673,2.46000003815,0.409999996424,3.16000008583,0.370000004768,1.97000002861,1.42999994755
CsI,-1,55,53,6,5,-4.00619983673,-11.2571001053,-0.569599986076,-3.5134999752,-2.22000002861,-7.23600006104,-0.547999978065,0.212999999523,2.46000003815,0.899999976158,3.16000008583,1.07000005245,1.97000002861,1.72000002861
CuF,-1,29,9,4,2,-8.38879966736,-19.4043006897,-1.6384999752,-4.27349996567,-4.85599994659,-11.2939996719,-0.64099997282,1.25100004673,1.20000004768,0.409999996424,1.67999994755,0.370000004768,2.57999992371,1.42999994755
BrK,-1,19,35,4,4,-4.43319988251,-12.649600029,-0.621299982071,-3.73930001259,-2.42600011826,-8.00100040436,-0.697000026703,0.708000004292,2.13000011444,0.75,2.44000005722,0.879999995232,1.78999996185,1.87000000477
ClK,-1,19,17,4,3,-4.43319988251,-13.9018001556,-0.621299982071,-3.97079992294,-2.42600011826,-8.69999980927,-0.697000026703,0.574000000954,2.13000011444,0.680000007153,2.44000005722,0.759999990463,1.78999996185,1.66999995708
FK,-1,19,9,4,2,-4.43319988251,-19.4043006897,-0.621299982071,-4.27349996567,-2.42600011826,-11.2939996719,-0.697000026703,1.25100004673,2.13000011444,0.409999996424,2.44000005722,0.370000004768,1.78999996185,1.42999994755
IK,-1,19,53,4,5,-4.43319988251,-11.2571001053,-0.621299982071,-3.5134999752,-2.42600011826,-7.23600006104,-0.697000026703,0.212999999523,2.13000011444,0.899999976158,2.44000005722,1.07000005245,1.78999996185,1.72000002861
BrLi,-1,3,35,2,4,-5.32910013199,-12.649600029,-0.698099970818,-3.73930001259,-2.87400007248,-8.00100040436,-0.977999985218,0.708000004292,1.64999997616,0.75,2,0.879999995232,6.92999982834,1.87000000477
ClLi,-1,3,17,2,3,-5.32910013199,-13.9018001556,-0.698099970818,-3.97079992294,-2.87400007248,-8.69999980927,-0.977999985218,0.574000000954,1.64999997616,0.680000007153,2,0.759999990463,6.92999982834,1.66999995708
FLi,-1,3,9,2,2,-5.32910013199,-19.4043006897,-0.698099970818,-4.27349996567,-2.87400007248,-11.2939996719,-0.977999985218,1.25100004673,1.64999997616,0.409999996424,2,0.370000004768,6.92999982834,1.42999994755
ILi,-1,3,53,2,5,-5.32910013199,-11.2571001053,-0.698099970818,-3.5134999752,-2.87400007248,-7.23600006104,-0.977999985218,0.212999999523,1.64999997616,0.899999976158,2,1.07000005245,6.92999982834,1.72000002861
MgO,-1,12,8,3,2,-8.03709983826,-16.4332008362,0.692499995232,-3.00589990616,-4.78200006485,-9.19699954987,-1.35800004005,2.54099988937,1.33000004292,0.460000008345,1.89999997616,0.430000007153,3.17000007629,2.22000002861
MgS,-1,12,16,3,3,-8.03709983826,-11.7951002121,0.692499995232,-2.84489989281,-4.78200006485,-7.10599994659,-1.35800004005,0.64200001955,1.33000004292,0.740000009537,1.89999997616,0.850000023842,3.17000007629,2.36999988556
MgSe,-1,12,34,3,4,-8.03709983826,-10.9460000992,0.692499995232,-2.75099992752,-4.78200006485,-6.65399980545,-1.35800004005,1.31599998474,1.33000004292,0.800000011921,1.89999997616,0.949999988079,3.17000007629,2.18000006676
MgTe,-1,12,52,3,5,-8.03709983826,-9.86670017242,0.692499995232,-2.66599988937,-4.78200006485,-6.10900020599,-1.35800004005,0.0989999994636,1.33000004292,0.939999997616,1.89999997616,1.13999998569,3.17000007629,1.83000004292
BrNa,-1,11,35,3,4,-5.22310018539,-12.649600029,-0.715699970722,-3.73930001259,-2.81900000572,-8.00100040436,-0.717999994755,0.708000004292,1.71000003815,0.75,2.59999990463,0.879999995232,6.57000017166,1.87000000477
ClNa,-1,11,17,3,3,-5.22310018539,-13.9018001556,-0.715699970722,-3.97079992294,-2.81900000572,-8.69999980927,-0.717999994755,0.574000000954,1.71000003815,0.680000007153,2.59999990463,0.759999990463,6.57000017166,1.66999995708
FNa,-1,11,9,3,2,-5.22310018539,-19.4043006897,-0.715699970722,-4.27349996567,-2.81900000572,-11.2939996719,-0.717999994755,1.25100004673,1.71000003815,0.409999996424,2.59999990463,0.370000004768,6.57000017166,1.42999994755
INa,-1,11,53,3,5,-5.22310018539,-11.2571001053,-0.715699970722,-3.5134999752,-2.81900000572,-7.23600006104,-0.717999994755,0.212999999523,1.71000003815,0.899999976158,2.59999990463,1.07000005245,6.57000017166,1.72000002861
BrRb,-1,37,35,5,4,-4.28889989853,-12.649600029,-0.590399980545,-3.73930001259,-2.3599998951,-8.00100040436,-0.704999983311,0.708000004292,2.24000000954,0.75,3.20000004768,0.879999995232,1.96000003815,1.87000000477
ClRb,-1,37,17,5,3,-4.28889989853,-13.9018001556,-0.590399980545,-3.97079992294,-2.3599998951,-8.69999980927,-0.704999983311,0.574000000954,2.24000000954,0.680000007153,3.20000004768,0.759999990463,1.96000003815,1.66999995708
FRb,-1,37,9,5,2,-4.28889989853,-19.4043006897,-0.590399980545,-4.27349996567,-2.3599998951,-11.2939996719,-0.704999983311,1.25100004673,2.24000000954,0.409999996424,3.20000004768,0.370000004768,1.96000003815,1.42999994755
IRb,-1,37,53,5,5,-4.28889989853,-11.2571001053,-0.590399980545,-3.5134999752,-2.3599998951,-7.23600006104,-0.704999983311,0.212999999523,2.24000000954,0.899999976158,3.20000004768,1.07000005245,1.96000003815,1.72000002861
OSr,-1,38,8,5,2,-6.03159999847,-16.4332008362,0.343100011349,-3.00589990616,-3.64100003242,-9.19699954987,-1.3789999485,2.54099988937,1.90999996662,0.460000008345,2.54999995232,0.430000007153,1.20000004768,2.22000002861
SSr,-1,38,16,5,3,-6.03159999847,-11.7951002121,0.343100011349,-2.84489989281,-3.64100003242,-7.10599994659,-1.3789999485,0.64200001955,1.90999996662,0.740000009537,2.54999995232,0.850000023842,1.20000004768,2.36999988556
SeSr,-1,38,34,5,4,-6.03159999847,-10.9460000992,0.343100011349,-2.75099992752,-3.64100003242,-6.65399980545,-1.3789999485,1.31599998474,1.90999996662,0.800000011921,2.54999995232,0.949999988079,1.20000004768,2.18000006676
SrTe,-1,38,52,5,5,-6.03159999847,-9.86670017242,0.343100011349,-2.66599988937,-3.64100003242,-6.10900020599,-1.3789999485,0.0989999994636,1.90999996662,0.939999997616,2.54999995232,1.13999998569,1.20000004768,1.83000004292
AgI,1,47,53,5,5,-8.0580997467,-11.2571001053,-1.66659998894,-3.5134999752,-4.71000003815,-7.23600006104,-0.479000002146,0.212999999523,1.32000005245,0.899999976158,1.87999999523,1.07000005245,2.97000002861,1.72000002861
AlAs,1,13,33,3,4,-5.78049993515,-9.26189994812,-0.3125,-1.83920001984,-2.78399991989,-5.34100008011,0.694999992847,0.0640000030398,1.09000003338,0.850000023842,1.38999998569,1.03999996185,1.94000005722,2.01999998093
AlN,1,13,7,3,2,-5.78049993515,-13.5852003098,-0.3125,-1.86749994755,-2.78399991989,-7.2389998436,0.694999992847,3.0569999218,1.09000003338,0.540000021458,1.38999998569,0.509999990463,1.94000005722,1.53999996185
AlP,1,13,15,3,3,-5.78049993515,-9.75059986115,-0.3125,-1.91999995708,-2.78399991989,-5.59600019455,0.694999992847,0.182999998331,1.09000003338,0.829999983311,1.38999998569,0.97000002861,1.94000005722,1.76999998093
AlSb,1,13,51,3,5,-5.78049993515,-8.46829986572,-0.3125,-1.84669995308,-2.78399991989,-4.99100017548,0.694999992847,0.104999996722,1.09000003338,1,1.38999998569,1.23000001907,1.94000005722,2.05999994278
AsGa,1,31,33,4,4,-5.81820011139,-9.26189994812,-0.108099997044,-1.83920001984,-2.73200011253,-5.34100008011,0.129999995232,0.0640000030398,0.990000009537,0.850000023842,1.33000004292,1.03999996185,2.16000008583,2.01999998093
AsB,1,5,33,2,4,-8.18999958038,-9.26189994812,-0.107400000095,-1.83920001984,-3.71499991417,-5.34100008011,2.24799990654,0.0640000030398,0.810000002384,0.850000023842,0.829999983311,1.03999996185,1.95000004768,2.01999998093
BN,1,5,7,2,2,-8.18999958038,-13.5852003098,-0.107400000095,-1.86749994755,-3.71499991417,-7.2389998436,2.24799990654,3.0569999218,0.810000002384,0.540000021458,0.829999983311,0.509999990463,1.95000004768,1.53999996185
BP,1,5,15,2,3,-8.18999958038,-9.75059986115,-0.107400000095,-1.91999995708,-3.71499991417,-5.59600019455,2.24799990654,0.182999998331,0.810000002384,0.829999983311,0.829999983311,0.97000002861,1.95000004768,1.76999998093
BSb,1,5,51,2,5,-8.18999958038,-8.46829986572,-0.107400000095,-1.84669995308,-3.71499991417,-4.99100017548,2.24799990654,0.104999996722,0.810000002384,1,0.829999983311,1.23000001907,1.95000004768,2.05999994278
BeO,1,4,8,2,2,-9.459400177,-16.4332008362,0.630500018597,-3.00589990616,-5.59999990463,-9.19699954987,-2.09800004959,2.54099988937,1.08000004292,0.460000008345,1.21000003815,0.430000007153,2.88000011444,2.22000002861
BeS,1,4,16,2,3,-9.459400177,-11.7951002121,0.630500018597,-2.84489989281,-5.59999990463,-7.10599994659,-2.09800004959,0.64200001955,1.08000004292,0.740000009537,1.21000003815,0.850000023842,2.88000011444,2.36999988556
BeSe,1,4,34,2,4,-9.459400177,-10.9460000992,0.630500018597,-2.75099992752,-5.59999990463,-6.65399980545,-2.09800004959,1.31599998474,1.08000004292,0.800000011921,1.21000003815,0.949999988079,2.88000011444,2.18000006676
BeTe,1,4,52,2,5,-9.459400177,-9.86670017242,0.630500018597,-2.66599988937,-5.59999990463,-6.10900020599,-2.09800004959,0.0989999994636,1.08000004292,0.939999997616,1.21000003815,1.13999998569,2.88000011444,1.83000004292
C2,1,6,6,2,2,-10.8516998291,-10.8516998291,-0.87239998579,-0.87239998579,-5.41599988937,-5.41599988937,1.99199998379,1.99199998379,0.639999985695,0.639999985695,0.629999995232,0.629999995232,1.62999999523,1.62999999523
CdS,1,48,16,5,3,-9.5813999176,-11.7951002121,0.838699996471,-2.84489989281,-5.95200014114,-7.10599994659,-1.30900001526,0.64200001955,1.23000001907,0.740000009537,1.74000000954,0.850000023842,2.59999990463,2.36999988556
CdSe,1,48,34,5,4,-9.5813999176,-10.9460000992,0.838699996471,-2.75099992752,-5.95200014114,-6.65399980545,-1.30900001526,1.31599998474,1.23000001907,0.800000011921,1.74000000954,0.949999988079,2.59999990463,2.18000006676
CdTe,1,48,52,5,5,-9.5813999176,-9.86670017242,0.838699996471,-2.66599988937,-5.95200014114,-6.10900020599,-1.30900001526,0.0989999994636,1.23000001907,0.939999997616,1.74000000954,1.13999998569,2.59999990463,1.83000004292
BrCu,1,29,35,4,4,-8.38879966736,-12.649600029,-1.6384999752,-3.73930001259,-4.85599994659,-8.00100040436,-0.64099997282,0.708000004292,1.20000004768,0.75,1.67999994755,0.879999995232,2.57999992371,1.87000000477
ClCu,1,29,17,4,3,-8.38879966736,-13.9018001556,-1.6384999752,-3.97079992294,-4.85599994659,-8.69999980927,-0.64099997282,0.574000000954,1.20000004768,0.680000007153,1.67999994755,0.759999990463,2.57999992371,1.66999995708
CuI,1,29,53,4,5,-8.38879966736,-11.2571001053,-1.6384999752,-3.5134999752,-4.85599994659,-7.23600006104,-0.64099997282,0.212999999523,1.20000004768,0.899999976158,1.67999994755,1.07000005245,2.57999992371,1.72000002861
GaN,1,31,7,4,2,-5.81820011139,-13.5852003098,-0.108099997044,-1.86749994755,-2.73200011253,-7.2389998436,0.129999995232,3.0569999218,0.990000009537,0.540000021458,1.33000004292,0.509999990463,2.16000008583,1.53999996185
GaP,1,31,15,4,3,-5.81820011139,-9.75059986115,-0.108099997044,-1.91999995708,-2.73200011253,-5.59600019455,0.129999995232,0.182999998331,0.990000009537,0.829999983311,1.33000004292,0.97000002861,2.16000008583,1.76999998093
GaSb,1,31,51,4,5,-5.81820011139,-8.46829986572,-0.108099997044,-1.84669995308,-2.73200011253,-4.99100017548,0.129999995232,0.104999996722,0.990000009537,1,1.33000004292,1.23000001907,2.16000008583,2.05999994278
Ge2,1,32,32,4,4,-7.56699991226,-7.56699991226,-0.949000000954,-0.949000000954,-4.04600000381,-4.04600000381,2.17499995232,2.17499995232,0.920000016689,0.920000016689,1.15999996662,1.15999996662,2.36999988556,2.36999988556
CGe,1,32,6,4,2,-7.56699991226,-10.8516998291,-0.949000000954,-0.87239998579,-4.04600000381,-5.41599988937,2.17499995232,1.99199998379,0.920000016689,0.639999985695,1.15999996662,0.629999995232,2.36999988556,1.62999999523
GeSi,1,32,14,4,3,-7.56699991226,-7.75769996643,-0.949000000954,-0.992999970913,-4.04600000381,-4.16300010681,2.17499995232,0.439999997616,0.920000016689,0.939999997616,1.15999996662,1.12999999523,2.36999988556,1.88999998569
AsIn,1,49,33,5,4,-5.53739976883,-9.26189994812,-0.256300002337,-1.83920001984,-2.6970000267,-5.34100008011,0.368000000715,0.0640000030398,1.12999999523,0.850000023842,1.5,1.03999996185,3.1099998951,2.01999998093
InN,1,49,7,5,2,-5.53739976883,-13.5852003098,-0.256300002337,-1.86749994755,-2.6970000267,-7.2389998436,0.368000000715,3.0569999218,1.12999999523,0.540000021458,1.5,0.509999990463,3.1099998951,1.53999996185
InP,1,49,15,5,3,-5.53739976883,-9.75059986115,-0.256300002337,-1.91999995708,-2.6970000267,-5.59600019455,0.368000000715,0.182999998331,1.12999999523,0.829999983311,1.5,0.97000002861,3.1099998951,1.76999998093
InSb,1,49,51,5,5,-5.53739976883,-8.46829986572,-0.256300002337,-1.84669995308,-2.6970000267,-4.99100017548,0.368000000715,0.104999996722,1.12999999523,1,1.5,1.23000001907,3.1099998951,2.05999994278
Si2,1,14,14,3,3,-7.75769996643,-7.75769996643,-0.992999970913,-0.992999970913,-4.16300010681,-4.16300010681,0.439999997616,0.439999997616,0.939999997616,0.939999997616,1.12999999523,1.12999999523,1.88999998569,1.88999998569
CSi,1,14,6,3,2,-7.75769996643,-10.8516998291,-0.992999970913,-0.87239998579,-4.16300010681,-5.41599988937,0.439999997616,1.99199998379,0.939999997616,0.639999985695,1.12999999523,0.629999995232,1.88999998569,1.62999999523
Sn2,1,50,50,5,5,-7.04279994965,-7.04279994965,-1.03919994831,-1.03919994831,-3.86599993706,-3.86599993706,0.00800000037998,0.00800000037998,1.05999994278,1.05999994278,1.34000003338,1.34000003338,2.02999997139,2.02999997139
CSn,1,50,6,5,2,-7.04279994965,-10.8516998291,-1.03919994831,-0.87239998579,-3.86599993706,-5.41599988937,0.00800000037998,1.99199998379,1.05999994278,0.639999985695,1.34000003338,0.629999995232,2.02999997139,1.62999999523
GeSn,1,50,32,5,4,-7.04279994965,-7.56699991226,-1.03919994831,-0.949000000954,-3.86599993706,-4.04600000381,0.00800000037998,2.17499995232,1.05999994278,0.920000016689,1.34000003338,1.15999996662,2.02999997139,2.36999988556
SiSn,1,50,14,5,3,-7.04279994965,-7.75769996643,-1.03919994831,-0.992999970913,-3.86599993706,-4.16300010681,0.00800000037998,0.439999997616,1.05999994278,0.939999997616,1.34000003338,1.12999999523,2.02999997139,1.88999998569
OZn,1,30,8,4,2,-10.1354999542,-16.4332008362,1.08070003986,-3.00589990616,-6.21700000763,-9.19699954987,-1.19400000572,2.54099988937,1.10000002384,0.460000008345,1.54999995232,0.430000007153,2.25,2.22000002861
SZn,1,30,16,4,3,-10.1354999542,-11.7951002121,1.08070003986,-2.84489989281,-6.21700000763,-7.10599994659,-1.19400000572,0.64200001955,1.10000002384,0.740000009537,1.54999995232,0.850000023842,2.25,2.36999988556
SeZn,1,30,34,4,4,-10.1354999542,-10.9460000992,1.08070003986,-2.75099992752,-6.21700000763,-6.65399980545,-1.19400000572,1.31599998474,1.10000002384,0.800000011921,1.54999995232,0.949999988079,2.25,2.18000006676
TeZn,1,30,52,4,5,-10.1354999542,-9.86670017242,1.08070003986,-2.66599988937,-6.21700000763,-6.10900020599,-1.19400000572,0.0989999994636,1.10000002384,0.939999997616,1.54999995232,1.13999998569,2.25,1.83000004292
This diff is collapsed.
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cpp_sisso import SISSOClassifier, generate_phi_0_from_csv, FeatureSpace\n",
"\n",
"phi_0, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds = generate_phi_0_from_csv(\n",
" \"data.csv\", \"class\", cols=\"all\", task_key=None, leave_out_frac=0.0, leave_out_inds=None\n",
")\n",
"\n",
"# Load a feature space from a predefined text file\n",
"feat_space = FeatureSpace(\n",
" \"phi.txt\", \n",
" phi_0, \n",
" task_sizes_train, \n",
" \"classification\",\n",
" 100, \n",
" 1.0\n",
")\n",
"\n",
"sisso = SISSOClassifier(\n",
" feat_space,\n",
" prop_unit,\n",
" prop.astype(float),\n",
" prop_test.astype(float),\n",
" task_sizes_train,\n",
" task_sizes_test,\n",
" leave_out_inds,\n",
" 2,\n",
" 2,\n",
" 2\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sisso.fit()"
]
},
{