Commit de2ee8fa authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Add functionality to output the feature space _phi as a text file

parent 101956ca
......@@ -173,3 +173,38 @@ After the calculations are finished we can then run the same analysis as we did
>>> plot_validation_rmse(models, "cv_50_error.pdf").show()
```
It is important to note here that, while it is not necessary to setup separate directories when using the python bindings, if you don't all output files will be overwritten reducing the reproducibility of the code.
## Using the Python Interface to Reproduce Previous Calculations
The final goal of this tutorial will be to discuss how to use the python interface to reproduce previous calculations easily.
As previous examples illustrated how the Model output files can be used to interact and extract information from the models generated by `SISSO++`, but these alone can not be used to recreate a calculation.
To increase the reproducibility of the code `SISSO++` can construct a `FeatureSpace` using a text file, which can be generated using .
Depending on what we want to study there are two ways of constructing a feature space form either the `phi.out` file or `selected_features.txt` As show below for using `selected_features.txt`
```python
from sissopp import phi_selected_from_file, read_csv, FeatureSpace
inputs = read_csv("data.csv", prop_key="E_RS - E_ZB", max_rung=0, leave_out_frac=0.0)
phi_sel = phi_selected_from_file("feature_space/selected_features.txt", inputs.phi_0)
inputs.phi_0 = phi_sel
feature_space = FeatureSpace(inputs)
```
and for using `phi.out`
```python
from sissopp import phi_selected_from_file, read_csv, FeatureSpace
inputs = read_csv("data.csv", prop_key="E_RS - E_ZB", max_rung=0, leave_out_frac=0.0)
feature_space = FeatureSpace(
"phi.txt",
inputs.phi_0,
inputs.prop_train,
[82],
project_type='regression',
cross_corr_max=1.0,
n_sis_select=100
)
```
From here calculations can continue as was done in the earlier examples.
......@@ -71,6 +71,7 @@ FeatureSpace::FeatureSpace(InputParser inputs):
_project_type(inputs.calc_type()),
_feature_space_file("feature_space/selected_features.txt"),
_feature_space_summary_file("feature_space/SIS_summary.txt"),
_phi_out_file(inputs.phi_out_file()),
_mpi_comm(inputs.mpi_comm()),
_cross_cor_max(inputs.cross_cor_max()),
_l_bound(inputs.l_bound()),
......@@ -782,6 +783,11 @@ void FeatureSpace::generate_feature_space(
{
_n_feat = feat_set.size();
}
if(_phi_out_file.size() > 0)
{
output_phi();
}
}
void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel)
......@@ -1061,11 +1067,11 @@ void FeatureSpace::sis(std::shared_ptr<LossFunction> loss)
// Generate features on the fly and get the best features for this rank
if(_n_rung_generate > 0)
{
#ifdef PARAMETERIZE
#ifdef PARAMETERIZE
_phi.resize(_n_feat);
_phi.insert(_phi.end(), _phi_reparam.begin() + _start_rung_reparam.back(), _phi_reparam.end());
_scores.resize(_phi.size());
#endif
#endif
start_time = omp_get_wtime();
phi_sel.resize(cur_feat_local);
scores_sel.resize(cur_feat_local);
......@@ -1208,3 +1214,21 @@ void FeatureSpace::remove_feature(const int ind)
_phi.erase(_phi.begin() + ind);
}
void FeatureSpace::output_phi()
{
boost::filesystem::path p(_phi_out_file.c_str());
boost::filesystem::create_directories(p.remove_filename());
std::ofstream out_file_stream = std::ofstream();
out_file_stream.open(_phi_out_file);
out_file_stream << "# Number of Features: " << _n_feat << std::endl;
out_file_stream << "# Maximum Rung of the Calculation: " << _max_rung << std::endl;
for(auto& feat : _phi)
{
out_file_stream << feat->postfix_expr() << std::endl;
}
out_file_stream.close();
}
......@@ -76,6 +76,7 @@ class FeatureSpace
const std::string _project_type; //!< The type of LossFunction to use when projecting the features onto a property
const std::string _feature_space_file; //!< File to output the computer readable representation of the selected features to
const std::string _feature_space_summary_file; //!< File to output the human readable representation of the selected features to
const std::string _phi_out_file; //!< Filename of the file to output the feature set to
std::function<bool(const double*, const int, const double, const std::vector<double>&, const double, const int, const int)> _is_valid; //!< Function used to determine of a feature is too correlated to previously selected features
std::function<bool(const double*, const int, const double, const std::vector<node_ptr>&, const std::vector<double>&, const double)> _is_valid_feat_list; //!< Function used to determine of a feature is too correlated to previously selected features within a given list
......@@ -238,6 +239,12 @@ public:
const double u_bound=1e50
);
// DocString: feat_space_output_phi
/**
* @brief Output the feature set to a file of a passed filename
*/
void output_phi();
#ifdef PARAMETERIZE
/**
* @brief Generate a new set of parameterized features from a single feature
......
......@@ -29,6 +29,7 @@ InputParser::InputParser() :
_prop_label("prop"),
_task_key("Task"),
_calc_type("regression"),
_phi_out_file(""),
_mpi_comm(mpi_setup::comm),
_cross_cor_max(1.0),
_l_bound(1e-50),
......@@ -65,6 +66,7 @@ InputParser::InputParser(pt::ptree ip, std::string fn, std::shared_ptr<MPI_Inter
_prop_key(ip.get<std::string>("property_key", "prop")),
_task_key(ip.get<std::string>("task_key", "task")),
_calc_type(ip.get<std::string>("calc_type", "regression")),
_phi_out_file(ip.get<std::string>("phi_out_file", "")),
_leave_out_inds(as_vector<int>(ip, "leave_out_inds")),
_mpi_comm(comm),
_cross_cor_max(ip.get<double>("max_feat_cross_correlation", 1.0)),
......
......@@ -83,6 +83,7 @@ private:
std::string _prop_label; //!< The label of the property
std::string _task_key; //!< Key used to find the task column in the data file
std::string _calc_type; //!< The type of LossFunction to use when projecting the features onto a property
std::string _phi_out_file; //!< Filename of the file to output the feature set to
std::shared_ptr<MPI_Interface> _mpi_comm; //!< The MPI communicator for the calculation
......@@ -509,6 +510,18 @@ public:
*/
inline void set_filename(const std::string filename) {_filename = filename;}
// DocString: inputs_get_phi_out_file
/**
* @brief Filename of the file to output the feature set to
*/
inline std::string phi_out_file() const {return _phi_out_file;}
// DocString: inputs_set_phi_out_file
/**
* @brief Set tilename of the file to output the feature set to
*/
inline void set_phi_out_file(const std::string phi_out_file) {_phi_out_file = phi_out_file;}
// DocString: inputs_get_data_file
/**
* @brief Name of the data file
......
......@@ -10,5 +10,6 @@
"leave_out_frac": 0.05,
"n_models_store": 1,
"leave_out_inds": [0, 1, 2, 60, 61],
"fix_intercept": false
"fix_intercept": false,
"phi_out_file": "feature_space/phi.out"
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment