Skip to content
Snippets Groups Projects
Commit 6ec76bd3 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Add basic error analysis functions

basic analysis scripts completed
parent 3041b3ab
Branches
Tags v1.0.2
No related merge requests found
......@@ -123,7 +123,6 @@ Model::Model(std::string train_file, std::string test_file)
std::vector<std::string> Model::populate_model(std::string filename, bool train)
{
std::ifstream file_stream;
file_stream.open(filename, std::ios::in);
......@@ -149,7 +148,6 @@ std::vector<std::string> Model::populate_model(std::string filename, bool train)
int n_task = 0;
int _n_dim = 0;
std::getline(file_stream, line);
do
{
++n_task;
......@@ -183,6 +181,7 @@ std::vector<std::string> Model::populate_model(std::string filename, bool train)
else
_task_sizes_test.push_back(std::stoi(split_line[1]));
}
if(train)
{
_n_samp_train = n_samp;
......@@ -197,10 +196,12 @@ std::vector<std::string> Model::populate_model(std::string filename, bool train)
_prop_test_est.resize(n_samp);
_test_error.resize(n_samp);
}
std::getline(file_stream, line);
std::getline(file_stream, line);
if(!train)
std::getline(file_stream, line);
std::vector<std::vector<double>> feat_vals(_n_dim, std::vector<double>(n_samp, 0.0));
for(int ns = 0; ns < n_samp; ++ns)
{
......@@ -224,6 +225,7 @@ std::vector<std::string> Model::populate_model(std::string filename, bool train)
feat_vals[nf][ns] = std::stod(split_line[2 + nf]);
}
}
if(train)
{
_D_train.resize(_n_dim * n_samp);
......@@ -236,6 +238,7 @@ std::vector<std::string> Model::populate_model(std::string filename, bool train)
for(int nf = 0; nf < _n_dim; ++nf)
std::copy_n(feat_vals[nf].data(), n_samp, &_D_test[nf * n_samp]);
}
return feature_expr;
}
......@@ -334,3 +337,35 @@ void Model::to_file(std::string filename, bool train, std::vector<int> test_inds
}
out_file_stream.close();
}
std::vector<double> Model::sorted_error()
{
std::vector<double> sorted_error(_train_error.size(), 0.0);
std::copy_n(_train_error.data(), _train_error.size(), sorted_error.data());
std::transform(sorted_error.begin(), sorted_error.end(), sorted_error.begin(), [](double e){return std::abs(e);});
std::sort(sorted_error.begin(), sorted_error.end());
return sorted_error;
}
std::vector<double> Model::sorted_test_error()
{
std::vector<double> sorted_error(_test_error.size(), 0.0);
std::copy_n(_test_error.data(), _test_error.size(), sorted_error.data());
std::transform(sorted_error.begin(), sorted_error.end(), sorted_error.begin(), [](double e){return std::abs(e);});
std::sort(sorted_error.begin(), sorted_error.end());
return sorted_error;
}
double Model::mape()
{
std::vector<double> percent_error(_train_error.size(), 0.0);
std::transform(_train_error.begin(), _train_error.end(), _prop_train.begin(), percent_error.begin(), [](double e, double p){return std::abs(e / p);});
return util_funcs::mean(percent_error);
}
double Model::test_mape()
{
std::vector<double> percent_error(_test_error.size(), 0.0);
std::transform(_test_error.begin(), _test_error.end(), _prop_test.begin(), percent_error.begin(), [](double e, double p){return std::abs(e / p);});
return util_funcs::mean(percent_error);
}
......@@ -169,6 +169,103 @@ public:
return std::abs(*std::max_element(_test_error.data(), _test_error.data() + _n_samp_test, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}));
}
// DocString: model_mae
/**
* @brief The mean absolute error of the model
* @return The mean absolute error of the training data
*/
inline double mae(){return std::accumulate(_train_error.begin(), _train_error.end(), 0.0, [](double total, double e){return total + std::abs(e);}) / _n_samp_train;}
// DocString: model_test_mae
/**
* @brief The mean absolute test error of the model
* @return The mean absolute error of the test data
*/
inline double test_mae(){return std::accumulate(_test_error.begin(), _test_error.end(), 0.0, [](double total, double e){return total + std::abs(e);}) / _n_samp_test;}
// DocString: model_mape
/**
* @brief The mean absolute error of the model
* @return The mean absolute error of the training data
*/
double mape();
// DocString: model_test_mape
/**
* @brief The mean absolute test error of the model
* @return The mean absolute error of the test data
*/
double test_mape();
/**
* @brief Sort the training error based on magnitude
* @return The error vector sorted
*/
std::vector<double> sorted_error();
/**
* @brief Sort the training test_error based on magnitude
* @return The test_error vector sorted
*/
std::vector<double> sorted_test_error();
// DocString: model_percentile_25_ae
/**
* @brief The mean absolute error of the model
* @return The mean absolute error of the training data
*/
inline double percentile_25_ae(){return sorted_error()[static_cast<int>(floor(_n_samp_train * 0.25))];}
// DocString: model_test_percentile_25_ae
/**
* @brief The mean absolute test error of the model
* @return The mean absolute error of the test data
*/
inline double percentile_25_test_ae(){return sorted_test_error()[static_cast<int>(floor(_n_samp_test * 0.25))];}
// DocString: model_percentile_50_ae
/**
* @brief The mean absolute error of the model
* @return The mean absolute error of the training data
*/
inline double percentile_50_ae(){return sorted_error()[static_cast<int>(floor(_n_samp_train * 0.50))];}
// DocString: model_test_percentile_50_ae
/**
* @brief The mean absolute test error of the model
* @return The mean absolute error of the test data
*/
inline double percentile_50_test_ae(){return sorted_test_error()[static_cast<int>(floor(_n_samp_test * 0.50))];}
// DocString: model_percentile_75_ae
/**
* @brief The mean absolute error of the model
* @return The mean absolute error of the training data
*/
inline double percentile_75_ae(){return sorted_error()[static_cast<int>(floor(_n_samp_train * 0.75))];}
// DocString: model_test_percentile_75_ae
/**
* @brief The mean absolute test error of the model
* @return The mean absolute error of the test data
*/
inline double percentile_75_test_ae(){return sorted_test_error()[static_cast<int>(floor(_n_samp_test * 0.75))];}
// DocString: model_percentile_95_ae
/**
* @brief The mean absolute error of the model
* @return The mean absolute error of the training data
*/
inline double percentile_95_ae(){return sorted_error()[static_cast<int>(floor(_n_samp_train * 0.95))];}
// DocString: model_test_percentile_95_ae
/**
* @brief The mean absolute test error of the model
* @return The mean absolute error of the test data
*/
inline double percentile_95_test_ae(){return sorted_test_error()[static_cast<int>(floor(_n_samp_test * 0.95))];}
/**
* @brief Convert the Model into an output file
*
......
"""Selected feature space analysis"""
import numpy as np
from sisso import phi_selected_from_file
def get_prevelance_of_primary_features(sisso_file, phi_0):
"""Get the prevalence of features inside of phi_selected
Args:
sisso_file (str): The selected feature file
phi_0 (list): the primary feature list
Returns:
dict (str, double): fraction each primary feature appears in the selected features
"""
phi_selected = phi_selected_from_file(sisso_file, phi_0)
phi_0_in_phi_sel = {}
for feat in phi_0:
phi_0_in_phi_sel[str(feat)] = 0.0
for feat in phi_selected:
for key in feat.primary_feat_decomp.keys():
phi_0_in_phi_sel[str(phi_0[key])] += 1.0 / len(phi_selected)
return phi_0_in_phi_sel
......@@ -352,7 +352,20 @@ void sisso::descriptor_identifier::registerModel()
.add_property("rmse", &Model::rmse, "@DocString_model_rmse@")
.add_property("test_rmse", &Model::test_rmse, "@DocString_model_test_rmse@")
.add_property("max_ae", &Model::max_ae, "@DocString_model_max_ae@")
.add_property("test_max_ae", &Model::test_max_ae, "@DocString_model_test_max_ae@");
.add_property("test_max_ae", &Model::test_max_ae, "@DocString_model_test_max_ae@")
.add_property("mae", &Model::mae, "@DocString_model_mae@")
.add_property("test_mae", &Model::test_mae, "@DocString_model_test_mae@")
.add_property("mape", &Model::mape, "@DocString_model_mape@")
.add_property("test_mape", &Model::test_mape, "@DocString_model_test_mape@")
.add_property("percentile_25_ae", &Model::percentile_25_ae, "@DocString_model_percentile_25_ae@")
.add_property("percentile_25_test_ae", &Model::percentile_25_test_ae, "@DocString_model_test_percentile_25_test_ae@")
.add_property("percentile_50_ae", &Model::percentile_50_ae, "@DocString_model_percentile_50_ae@")
.add_property("percentile_50_test_ae", &Model::percentile_50_test_ae, "@DocString_model_test_percentile_50_test_ae@")
.add_property("percentile_75_ae", &Model::percentile_75_ae, "@DocString_model_percentile_75_ae@")
.add_property("percentile_75_test_ae", &Model::percentile_75_test_ae, "@DocString_model_test_percentile_75_test_ae@")
.add_property("percentile_95_ae", &Model::percentile_95_ae, "@DocString_model_percentile_95_ae@")
.add_property("percentile_95_test_ae", &Model::percentile_95_test_ae, "@DocString_model_test_percentile_95_test_ae@")
;
}
void sisso::descriptor_identifier::registerSISSORegressor()
......
......@@ -29,8 +29,8 @@ namespace python_conv_utils
template<typename T>
std::vector<T> from_list(py::list lst)
{
std::vector<T> vec(len(lst));
for(int ll = vec.size() - 1; ll >= 0; --ll)
std::vector<T> vec(py::len(lst));
for(int ll = 0; ll < vec.size(); ++ll)
vec[ll] = py::extract<T>(lst[ll]);
return vec;
}
......@@ -66,9 +66,9 @@ namespace python_conv_utils
template<typename T_ptr, typename T_base>
std::vector<std::shared_ptr<T_ptr>> shared_ptr_vec_from_list(py::list lst)
{
std::vector<std::shared_ptr<T_ptr>> vec(len(lst));
for(int ll = vec.size() - 1; ll >= 0; --ll)
vec[ll] = std::make_shared<T_base>(py::extract<T_base>(lst.pop()));
std::vector<std::shared_ptr<T_ptr>> vec(py::len(lst));
for(int ll = 0; ll < vec.size(); ++ll)
vec[ll] = std::make_shared<T_base>(py::extract<T_base>(lst[ll]));
return vec;
}
......
......@@ -3,7 +3,6 @@
py::list str2node::phi_selected_from_file_py(std::string filename, py::list phi_0)
{
std::vector<node_ptr> phi_selected = phi_selected_from_file(filename, python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0));
py::list feat_lst;
for(auto& feat : phi_selected)
feat_lst.append<ModelNode>(ModelNode(feat->d_mat_ind(), feat->rung(), feat->expr(), feat->postfix_expr(), feat->value(), feat->test_value(), feat->unit()));
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment