Commit 63884b07 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Standardize all documentation

Similar concepts get the same definition
parent 50657732
/** @file feature_creation/domain/Domain.hpp
* @brief A representation of the domain for each feature
* @brief A representation of the domain for each feature
*
* A dictionary representation of the domain for a features (key = domain, value = power)
*
......
/** @file descriptor_identifier/ModelLogRegressorFitC/ModelLogRegressorFitC.hpp
* @brief Object to store the models generated form SISSO
* @brief Object to store the models generated form SISSO
*
* Creates a ModelLogRegressorFitC generated from SISSO and the corresponding output file.
* It also has functionality to read in an output file to regenerate the model.
......@@ -123,9 +123,9 @@ public:
*/
inline std::vector<std::array<double, 2>> c_coefs(){return _c_coefs;}
/**
* @brief Copy the error into a new array
* @brief Copy the training error into a different vector
*
* @param res pointer to the beginning of the vector to store the residual
* @param res Pointer to the head of the new vector to store the residual
*/
inline void copy_error(double* res){std::copy_n(_train_error.data(), _n_samp_train, res);}
......@@ -153,4 +153,4 @@ public:
*/
std::ostream& operator<< (std::ostream& outStream, const ModelLogRegressorFitC& model);
#endif
\ No newline at end of file
#endif
/** @file descriptor_identifier/SISSOLogRegressorFitC.hpp
* @brief Perform SISSO on a previously generated Feature Space
* @brief Perform SISSO on a previously generated Feature Space
*
* Takes in a feature space and performs SISSO on it while storing the selected features in _models
*
......@@ -41,7 +41,7 @@ public:
* @param leave_out_inds List of indexes from the initial data file in the test set
* @param n_dim Maximum dimensionality of the generated models
* @param n_residual Number of residuals to pass to the next SIS operation
* @param n_models_store Number of features to store in files
* @param n_models_store The number of models to output to files
* @param fix_intrecept If true fix intercept to 0
*/
SISSOLogRegressorFitC(std::shared_ptr<FeatureSpace> feat_space, Unit prop_unit, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, std::vector<int> leave_out_inds, int n_dim, int n_residual, int n_models_store, bool fix_intercept=false);
......@@ -76,7 +76,7 @@ public:
* @param prop The property to fit
* @param n_dim the dimensionality of the model
*/
void l0_norm(std::vector<double>& prop, int n_dim);
void l0_regularization(std::vector<double>& prop, int n_dim);
/**
* @brief Acessor function for models
......@@ -97,7 +97,7 @@ public:
* @param leave_out_inds List of indexes from the initial data file in the test set
* @param n_dim Maximum dimensionality of the generated models
* @param n_residual Number of residuals to pass to the next SIS operation
* @param n_models_store Number of features to store in files
* @param n_models_store The number of models to output to files
* @param fix_intrecept If true fix intercept to 0
*/
SISSOLogRegressorFitC(
......@@ -127,7 +127,7 @@ public:
* @param leave_out_inds List of indexes from the initial data file in the test set
* @param n_dim Maximum dimensionality of the generated models
* @param n_residual Number of residuals to pass to the next SIS operation
* @param n_models_store Number of features to store in files
* @param n_models_store The number of models to output to files
* @param fix_intrecept If true fix intercept to 0
*/
SISSOLogRegressorFitC(
......
......@@ -11,13 +11,12 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/** @file classificationutils.hpp
* @brief A set of functions to create convex hulls, and compare overlap/distance
/** @file classification/ConvexHull1D.hpp
* @brief A set of functions to create convex hulls, and compare overlap/distance
*
* @author Thomas A. R. Purcell (tpurcell)
* @bug No known bugs.
*/
#ifndef CONVEX_UTILS
#define CONVEX_UTILS
......@@ -29,13 +28,13 @@
class ConvexHull1D
{
protected :
std::vector<double> _sorted_value; //!< The value sorted based on the property
std::vector<double> _sorted_value; //!< The value of the input vector sorted based on the property vector
std::vector<double> _cls_max; //!< The maximum value of the property in each class
std::vector<double> _cls_min; //!< The minimum value of the property in each class
std::vector<int> _sorted_prop_inds; //!< The value sorted based on the property
std::vector<int> _cls_start; //!< The number of values in each class
std::vector<int> _cls_sz; //!< The number of values in each class
std::vector<int> _sorted_prop_inds; //!< The indexes of where each element of an input vector is in _sorted_value
std::vector<int> _cls_start; //!< The element where each new class starts
std::vector<int> _cls_sz; //!< The number of elements in each class
std::vector<double> _task_scores; //!< The scores for each task
......@@ -46,20 +45,20 @@ public:
/**
* @brief Constructor
*
* @param sizes The size of the tasks
* @param sizes The size of each task
* @param prop The pointer to the property vector
*/
ConvexHull1D(const std::vector<int> sizes, const double* prop);
/**
* @brief Initialize the projection objects
* @brief Default constructor
*/
ConvexHull1D();
/**
* @brief Initialize the projection objects
*
* @param sizes The size of the tasks
* @param sizes The size of each task
* @param prop The pointer to the property vector
*/
void initialize_prop(const std::vector<int>& sizes, const double* prop);
......@@ -67,7 +66,7 @@ public:
/**
* @brief Calculate the projection scores of a set of features to a vector via Pearson correlation
*
* @param value The pointer to the value of the data
* @param value The pointer to the input data
* @param width The buffer used for calculating the overlap
*
* @returns The projection score for the particular feature
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/** @file classification/LPWrapper.hpp
* @brief A class used to wrap Coin-Clp into a single class
* @brief A class used to wrap Coin-Clp into a single class
*
* @author Thomas A. R. Purcell (tpurcell)
* @bug No known bugs.
......@@ -30,9 +30,9 @@
class LPWrapper
{
protected:
ClpSimplex _simplex; //!< Model used to find if a point is inside the convex hull
ClpSimplex _simplex; //!< LP algorithm used to determine if a point is inside the convex hull
std::vector<double> _elements; //!< The elements of the A matrix (descriptor matrix) for the linear optimization problem
std::vector<double> _elements; //!< The elements of the A matrix (descriptor matrix) for the linear programing problem
std::vector<double> _row_lower; //!< The lower bound for the row constraints (value of the data point to check)
std::vector<double> _row_upper; //!< The upper bound for the row constraints (value of the data point to check)
std::vector<double> _row_lower_test; //!< The lower bound for the row constraints for the test set (value of the data point to check)
......@@ -42,20 +42,20 @@ protected:
std::vector<double> _column_upper; //!< The upper bound for the column constraints (all infinity)
std::vector<int> _column_start; //!< Vector len(n_samples) of the starting column for the A matrix
std::vector<int> _row; //!< Vector saying what row each data point is in
std::vector<int> _row_test; //!< Vector saying what row each data point is in for the test set
std::vector<int> _row; //!< Vector storing what row each data point is in
std::vector<int> _row_test; //!< Vector storing what row each data point is in for the test set
std::vector<int> _n_row_per_class; //!< Number of rows per class
std::vector<int> _n_row_per_class_test; //!< Number of rows per class in the test set
std::vector<int> _n_row_per_class; //!< //!< A vector storing the number of rows in each class/task combination for the training set [task * N_TASK * N_CLASS + class]
std::vector<int> _n_row_per_class_test; //!< //!< A vector storing the number of rows in each class/task combination for the test set [task * N_TASK * N_CLASS + class]
double _tol; //!< tolerance value (\epsilon) for the final row constraint
double _tol; //!< tolerance value (\epsilon) for the final row constraint 1 +/- _tol for the upper and lower constraint
int _n_dim; //!< The number of dimensions for the SVM problem
int _n_samp; //!< The number of samples per feature
int _n_samp_test; //!< The number of samples per feature in the test set
int _n_samp; //!< The number of samples in the training data per feature
int _n_samp_test; //!< The number of samples in the test data per feature
int _n_class; //!< The number of classes in the data set
int _n_row; //!< The number of rows in the A matrix
int _task_num; //!< The task number the LPWrapper is used for
int _task_num; //!< The current task number the LPWrapper is used for
int _n_col; //!< The number of columns in the A matrix
int _n_overlap; //!< The number of misclassified data points in the training set
int _n_overlap_test; //!< The number of misclassified data points in the test set
......@@ -66,14 +66,14 @@ public:
/**
* @brief The constructor for the LPWrapper
*
* @param samp_per_class number of samples per class
* @param task_num The task ID number to perform the calculation
* @param samp_per_class A vector storing the number of rows in each class/task combination for the training set [task * N_TASK * N_CLASS + class]
* @param task_num The task ID number used to perform the calculation
* @param n_class Number of classes in the dataset
* @param n_dim Number of dimensions of the problem
* @param n_samp Number of samples in the dataset
* @param n_samp_test Number of samples in the dataset for the test set
* @param samp_per_class number of samples per class for the test set
* @param tol The tolerance used to have a fuzzy border around the convex hull
* @param samp_per_class A vector storing number of test samples per class
* @param n_samp_test TheA vector storing tnumber of rows in each class/task combination for the test set [task * N_TASK * N_CLASS + class]
*/
LPWrapper(
std::vector<int> samp_per_class,
......@@ -122,75 +122,69 @@ public:
/**
* @brief Copies the data from a set of feature indexes (sorted_dmatrix) into the x_space
*
* @param cls class the number of the class to copy over
* @param inds list of indexes to pull data for
* @param cls Index defining what class to copy
* @param inds A vector storing the _d_mat_ind of the features to pull data from the Descriptor Matrix storage array
*/
void copy_data(const int cls, const std::vector<int> inds);
/**
* @brief Copies the data from a set data pointers
*
* @param cls class the number of the class to copy over
* @param val_ptrs The pointers to the feature's data
* @param cls Index defining what class to copy
* @param val_ptrs A vector storing pointers to the features training data
* @param test_val_ptrs A vector storing pointers to the features test data
*/
void copy_data(const int cls, const std::vector<double*> val_ptrs, const std::vector<double*> test_val_ptrs);
/**
* @brief Copy the data from a set of feature indexes (sorted_dmatrix) into the x_space and train the SVM model
* @brief Calculate the number of points in the overlap region of the convex hulls for the training set
*
* @param inds list of indexes to pull data for
* @param inds A vector storing the _d_mat_ind of the features to pull data from the Descriptor Matrix storage array
*/
int get_n_overlap(const std::vector<int> inds);
/**
* @brief Copy tthe data from a set data pointers and train the SVM model
* @brief Calculate the number of points in the overlap region of the convex hulls for the training and test sets and set the error vectors
*
* @param val_ptrs The pointers to the feature's data
* @param test_val_ptrs The pointers to the feature's data (Test set)
* @param error Pointer to the error vector
* @param test_error Pointer to the test_error
* @param val_ptrs A vector storing pointers to the features training data
* @param test_val_ptrs A vector storing pointers to the features test data
* @param error Pointer to the head of the error vector
* @param test_error Pointer to the head of the test error vector
*/
void set_n_overlap(const std::vector<double*> val_ptrs, const std::vector<double*> test_val_ptrs, double* error, double* test_error);
/**
* @brief The number of classes in the training set
* @return The number of classes in the training set
*/
inline int n_class() const {return _n_class;}
/**
* @brief The task id number
* @return The task id number
* @brief The index of the current task that is stored in the LPWrapper
*/
inline int task_num() const {return _task_num;}
/**
* @brief The number of dimensions of the Convex Hulls
* @return The number of dimensions of the Convex Hulls
*/
inline int n_dim() const {return _n_dim;}
/**
* @brief The number of samples in the training set
* @return The number of samples in the training set
*/
inline int n_samp() const {return _n_samp;}
/**
* @brief The number of samples in the test set
* @return The number of samples in the test set
*/
inline int n_samp_test() const {return _n_samp_test;}
/**
* @brief Number of misclassified samples in the data set
* @return Number of misclassified samples in the data set
*/
inline int n_overlap() const {return _n_overlap;}
/**
* @brief Number of misclassified samples in the test set
* @return Number of misclassified samples in the test set
*/
inline int n_overlap_test() const {return _n_overlap_test;}
};
......
......@@ -250,19 +250,19 @@ void SVMWrapper::train(const std::vector<double*> val_ptrs, const bool remap_coe
train(remap_coefs);
}
std::vector<double> SVMWrapper::predict(const int n_test_samp, const std::vector<double*> val_ptrs)
std::vector<double> SVMWrapper::predict(const int n_samp_test, const std::vector<double*> val_ptrs)
{
if(val_ptrs.size() > _n_dim)
{
throw std::logic_error("Size of the val_ptrs vector is larger than _n_dim");
}
std::vector<double> y_est_test(n_test_samp, 0.0);
std::vector<double> y_est_test(n_samp_test, 0.0);
std::vector<svm_node>x_space_test(n_test_samp * (val_ptrs.size() + 1));
std::vector<svm_node*>x_test(n_test_samp);
std::vector<svm_node>x_space_test(n_samp_test * (val_ptrs.size() + 1));
std::vector<svm_node*>x_test(n_samp_test);
for(int ss = 0; ss < n_test_samp; ++ss)
for(int ss = 0; ss < n_samp_test; ++ss)
{
for(int dd = 0; dd < val_ptrs.size(); ++dd)
{
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/** @file classification/SVMWrapper.hpp
* @brief A class used to wrap libsvm in more C++ oriented data structure
* @brief A class used to wrap libsvm in more C++ oriented data structure
*
* @author Thomas A. R. Purcell (tpurcell)
* @bug No known bugs.
......@@ -47,7 +47,7 @@ protected:
const double _C; //!< The C parameter for the SVM calculation
const int _n_dim; //!< The number of dimensions for the SVM problem
const int _n_samp; //!< The number of samples per feature
const int _n_samp; //!< The number of training samples per feature
const int _n_class; //!< The number of classes in the data set
int _n_misclassified; //!< The number of misclassified data points in the training set
......@@ -57,7 +57,7 @@ public:
*
* @param n_class Number of classes in the dataset
* @param n_dim Number of dimensions of the problem
* @param n_samp Number of samples in the dataset
* @param n_samp Number of training samples in the dataset
* @param prop pointer to the start of the property vector
*/
SVMWrapper(const int n_class, const int n_dim, const int n_samp, const double* prop);
......@@ -77,7 +77,7 @@ public:
* @param C The C value for the SVM calculation
* @param n_class Number of classes in the dataset
* @param n_dim Number of dimensions of the problem
* @param n_samp Number of samples in the dataset
* @param n_samp Number of training samples in the dataset
* @param prop pointer to the start of the property vector
*/
SVMWrapper(const double C, const int n_class, const int n_dim, const int n_samp, const double* prop);
......@@ -128,7 +128,7 @@ public:
/**
* @brief Copies the data from a set of feature indexes (sorted_dmatrix) into the x_space
*
* @param inds list of indexes to pull data for
* @param inds A vector storing the _d_mat_ind of the features to pull data from the Descriptor Matrix storage array
* @param task the task number for the calculation
*/
void copy_data(const std::vector<int> inds, const int task);
......@@ -150,16 +150,16 @@ public:
/**
* @brief Copy the data from a set of feature indexes (sorted_dmatrix) into the x_space and train the SVM model
*
* @param inds list of indexes to pull data for
* @param inds A vector storing the _d_mat_ind of the features to pull data from the Descriptor Matrix storage array
* @param task the task number for the calculation
* @param remap_coefs If true remap the final coefficients back to the unscaled feature space
*/
void train(const std::vector<int> inds, const int task, const bool remap_coefs=true);
/**
* @brief Copy tthe data from a set data pointers and train the SVM model
* @brief Copy the data from a set data pointers and train the SVM model
*
* @param val_ptrs The pointers to the feature's data
* @param val_ptrs A vector storing pointers to the features training data
* @param remap_coefs If true remap the final coefficients back to the unscaled feature space
*/
void train(const std::vector<double*> val_ptrs, const bool remap_coefs=true);
......@@ -167,58 +167,50 @@ public:
/**
* @brief Predict the class of a set of data from the SVM model
*
* @param n_test_samp the number of test samples to predict the class off
* @param val_ptrs vector of the data pointers for the feature's test set
* @param n_samp_test the number of test samples to predict the class off
* @param val_ptrs A vector storing pointers to the features test data
*
* @return The predicted class of the test samples
*/
std::vector<double> predict(const int n_test_samp, const std::vector<double*> val_ptrs);
std::vector<double> predict(const int n_samp_test, const std::vector<double*> val_ptrs);
/**
* @brief the coefficients of the decision planes for each of the one against one SVM models
* @return A list of all coefficients for the decision planes
*/
inline std::vector<std::vector<double>> coefs(){return _coefs;}
/**
* @brief the list of the bias terms in all the Svm models
* @return The bias term for each SVM generated decision function
* @brief the list of the bias terms in all the SVM models
*/
inline std::vector<double> intercept() const {return _intercept;}
/**
* @brief The number of classes in the training set
* @return The number of classes in the training set
*/
inline int n_class() const {return _n_class;}
/**
* @brief The number of dimensions of the SVM model
* @return The number of dimensions of the SVM model
*/
inline int n_dim() const {return _n_dim;}
/**
* @brief The number of samples in the training set
* @return The number of samples in the training set
*/
inline int n_samp() const {return _n_samp;}
/**
* @brief The predicted class for each sample in the data set
* @return The predicted class for each sample in the data set
*/
inline std::vector<double> y_estimate() const {return _y_est;}
/**
* @brief The actual class for each sample in the data set
* @return The actual class for each sample in the data set
*/
inline std::vector<double> y_actual() const {return _y;}
/**
* @brief Number of misclassified samples in the data set
* @return Number of misclassified samples in the data set
*/
inline int n_misclassified() const {return _n_misclassified;}
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/** @file classification/prop_sorted_d_mat.hpp
* @brief Central storage area for a sorted descriptor matrix based on the task/class of a sample
* @brief Central storage area for a sorted descriptor matrix based on the task/class of a sample
* @author Thomas A. R. Purcell (tpurcell)
* @bug No known bugs.
......@@ -27,10 +27,10 @@
namespace prop_sorted_d_mat
{
extern std::vector<double> SORTED_D_MATRIX; //!< The descriptor matrix
extern std::vector<double> SORTED_D_MATRIX; //!< The descriptor matrix with the feature's input data sorted by the class index
extern std::vector<int> CLASS_START; //!< The starting point for each class
extern std::vector<int> N_SAMPLES_PER_CLASS; //!< number of features in each class/task combination [task * N_TASK * N_CLASS + class]
extern std::vector<int> N_SAMPLES_PER_CLASS; //!< The number of samples in each class/task combination [task * N_TASK * N_CLASS + class]
extern int N_TASK; //!< Number of tasks for the calculation
extern int N_CLASS; //!< Number of classes per task for the calculation
......@@ -44,20 +44,19 @@ namespace prop_sorted_d_mat
* @param n_feat The number of features
* @param n_task The number of tasks
* @param n_class The number of classes
* @param n_samples_per_class The number of samples in each class
* @param n_samples_per_class The number of samples in each class/task combination [task * N_TASK * N_CLASS + class]
*/
void initialize_sroted_d_matrix_arr(int n_feat, int n_task, int n_class, std::vector<int> n_samples_per_class);
/**
* @brief Resize the descriptor matrix
* @details Using the total number of features
* @brief Resize the descriptor matrix to match the current number of features
*
* @param n_feats Number of features to select
*/
void resize_sroted_d_matrix_arr(int n_feats);
/**
* @brief Get the size of a particular class
* @brief Get the number of samples in a particular task/class combination
*
* @param task The task number
* @param cls the class number
......@@ -78,11 +77,13 @@ namespace prop_sorted_d_mat
{
return SORTED_D_MATRIX[feat_ind * N_SAMPLES + samp_ind];
}
/**
* @brief Access the sorted descriptor matrix by feature ind
*
* @param feat_ind feature index
* @return pointer to the start of the selected feature
* @param feat_ind The feature index
*
* @return Pointer to the start of the selected feature
*/
inline double* access_sorted_d_matrix(int feat_ind)
{
......@@ -90,11 +91,11 @@ namespace prop_sorted_d_mat
}
/**
* @brief Access the sorted descriptor matrix by the class and task number
* @brief Get a pointer to the head of the sorted description matrix for a given task/class combination
*
* @param task The task number
* @param cls The class number
* @return pointer to the start of a given class and task for feature 0
* @return Pointer to the start of a given class and task for all features
*/
inline double* access_sorted_d_matrix(int task, int cls)
{
......@@ -107,7 +108,7 @@ namespace prop_sorted_d_mat
* @param feat_ind feature index
* @param task The task number
* @param cls The class number
* @return pointer to the start of the selected feature for a given class and task
* @return pointer to the start of the selected feature for a given feature, class, and task
*/
inline double* access_sorted_d_matrix(int feat_ind, int task, int cls)
{
......
......@@ -21,7 +21,7 @@ Model::Model(
const std::vector<int> leave_out_inds
) :
_n_samp_train(feats[0]->n_samp()),
_n_samp_test(feats[0]->n_test_samp()),
_n_samp_test(feats[0]->n_samp_test()),
_n_dim(feats.size()),
_feats(feats),
_leave_out_inds(leave_out_inds),
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/** @file descriptor_identifier/Model/Model.hpp
* @brief Object to store the models generated form SISSO
* @brief Object to store the models generated form SISSO
*
* Creates a Model generated from SISSO and the corresponding output file.
* It also has functionality to read in an output file to regenerate the model.
......@@ -45,14 +45,14 @@ class Model
protected:
int _n_samp_train; //!< The number of samples per feature
int _n_samp_test; //!< The number of test samples per feature
int _n_dim; //!< Dimension of the model
int _n_dim; //!< The number of dimensions of the model
std::vector<model_node_ptr> _feats; //!< List of features in the model
std::vector<std::vector<double>> _coefs; //!< Coefficients for the features
std::vector<int> _leave_out_inds; //!< The indexes used as the test set
std::shared_ptr<LossFunction> _loss;
std::string _prop_label; //!< label for the model
std::shared_ptr<LossFunction> _loss; //!< The LossFunction used to evaluate the model
std::string _prop_label; //!< The property label for the model
Unit _prop_unit; //!< The Unit for the property
int _task_eval; //!< Which set of coefficients to use for evaluating the model for a new data set
......@@ -111,15 +111,14 @@ public:
Model& operator= (Model&& o) = default;
/**
* @brief Copy the error into a new array
* @brief Copy the training error into a different vector
*
* @param res pointer to the beginning of the vector to store the residual
* @param res Pointer to the head of the new vector to store the residual
*/
virtual void copy_error(double* res) const = 0;
/**
* @brief Read an output file and extract all relevant information
* @details Takes in an output file and extracts all data needed to recreate the model
* @brief Read an output file and extract all relevant information from it
*
* @param train_filename Name of the training output file
* @param test_filename Name of the test output file
......@@ -129,10 +128,10 @@ public:
/**
* @brief Create the LossFunction for the Model when constructed from a file
*
* @param prop_train Vector storing all data to train the SISSO models with
* @param prop_test Vector storing all data to test the SISSO models with
* @param task_sizes_train Number of training samples per task
* @param task_sizes_test Number of testing samples per task
* @param prop_train The property vector for all of the training samples
* @param prop_test The property vector for all of the test samples
* @param task_sizes_train The number of training samples per task
* @param task_sizes_test The number of test samples per task
*/
virtual void make_loss(
std::vector<double>& prop_train,
......@@ -142,7 +141,7 @@ public:
) = 0;
/**
* @brief Based off of the model line in a model file determine if the intercept is fixed
* @brief Based off of the model line in a model file determine if the bias term is 0.0
*
* @param model_line the model line from the file
* @return True if the intercept should be fixed
......@@ -244,13 +243,13 @@ public:
// DocString: model_n_samp_train
/**
* @brief Total number of samples being trained on
* @brief The number of samples in the training data per feature