FeatureSpace.hpp 3.91 KB
Newer Older
Thomas Purcell's avatar
Thomas Purcell committed
1
2
3
#ifndef FEATURE_SPACE
#define FEATURE_SPACE

Thomas Purcell's avatar
Thomas Purcell committed
4
#include <mpi_interface/MPI_Interface.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
5
6
7
#include <feature_creation/node/FeatureNode.hpp>
#include <feature_creation/node/operator_nodes/allowed_ops.hpp>

Thomas Purcell's avatar
Thomas Purcell committed
8
9
#include <boost/serialization/shared_ptr.hpp>

Thomas Purcell's avatar
Thomas Purcell committed
10
11
#include <iostream>

Thomas Purcell's avatar
Thomas Purcell committed
12
// namespace mpi = boost::mpi;
13
14
15
16
17
/**
 * @brief Feature Space for SISSO calculations
 * @details Stores and performs all feature calculations for SIS
 *
 */
Thomas Purcell's avatar
Thomas Purcell committed
18
19
class FeatureSpace
{
20
21
22
23
24
    std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPi communicator
    int _max_phi; //!< Maximum rung for the feature creation
    int _n_sis_select; //!< Number of features to select for each dimensions
    int _n_samp; //!< Number of samples
    int _n_feat; //!< Total number of features
Thomas Purcell's avatar
Thomas Purcell committed
25

26
    double _max_abs_feat_val; //!< Maximum absolute value for any feature
Thomas Purcell's avatar
Thomas Purcell committed
27

28
    std::vector<int> _start_gen; //!< list of starting index for each generation
Thomas Purcell's avatar
Thomas Purcell committed
29

30
31
32
    std::vector<double> _prop; //!< property to learn
    std::vector<double> _scores; //!< projection scores for each feature
    std::vector<double> _D; //!< matrix of slected features
Thomas Purcell's avatar
Thomas Purcell committed
33

34
35
36
37
    std::vector<std::string> _allowed_ops; //!< list of all allowed operators strings
    std::vector<un_op_node_gen> _un_operators; //!< list of all unary operators
    std::vector<bin_op_node_gen> _bin_operators; //!< list of all binary operators
    std::vector<bin_op_node_gen> _com_bin_operators; //!< list of all commutable binary operators
Thomas Purcell's avatar
Thomas Purcell committed
38

39
40
41
    std::vector<node_ptr> _phi_selected; //!< selected features
    std::vector<node_ptr> _phi; //!< all features
    std::vector<node_ptr> _phi_0; //!< initial feature space
Thomas Purcell's avatar
Thomas Purcell committed
42
43

public:
44
45
46
47
48
49
50
51
52
53
    /**
     * @brief Constructor for the feature space
     * @details constructs the feature space from an initial set of features and a list of allowed operatiors
     *
     * @param mpi_comm MPI communicator for the calculations
     * @param allowed_ops list of allowed operators
     * @param max_phi highest rung value for the calculation
     * @param n_sis_select number of features to select during each SIS step
     * @param max_abs_feat_val maximum absolute feature value
     */
Thomas Purcell's avatar
Thomas Purcell committed
54
    FeatureSpace(
Thomas Purcell's avatar
Thomas Purcell committed
55
        std::shared_ptr<MPI_Interface> mpi_comm,
Thomas Purcell's avatar
Thomas Purcell committed
56
57
58
59
60
61
62
        std::vector<node_ptr> phi_0,
        std::vector<std::string> allowed_ops,
        int max_phi=1,
        int n_sis_select=1,
        double max_abs_feat_val=1e27
    );

63
64
65
66
    /**
     * @brief Generate the full feature set from the allowed operators and initial feature set
     * @details populates phi with all features from an initial set and the allowed operators
     */
Thomas Purcell's avatar
Thomas Purcell committed
67
68
    void generate_feature_space();

69
70
71
    /**
     * @brief Accessor function for _phi_selected
     */
Thomas Purcell's avatar
Thomas Purcell committed
72
    inline std::vector<node_ptr> phi_selected(){return _phi_selected;};
73
74
75
76

    /**
     * @brief Accessor function for _phi
     */
Thomas Purcell's avatar
Thomas Purcell committed
77
    inline std::vector<node_ptr> phi(){return _phi;};
78
79
80
81

    /**
     * @brief Accessor function for _phi_0
     */
Thomas Purcell's avatar
Thomas Purcell committed
82
    inline std::vector<node_ptr> phi0(){return _phi_0;};
83
84
85
86

    /**
     * @brief Accessor function for _scores
     */
Thomas Purcell's avatar
Thomas Purcell committed
87
    inline std::vector<double>& scores(){return _scores;};
88
89
90
91

    /**
     * @brief Accessor function for _mpi_comm
     */
Thomas Purcell's avatar
Thomas Purcell committed
92
    inline std::shared_ptr<MPI_Interface> mpi_comm(){return _mpi_comm;}
93
94
95
96
97
98
99

    /**
     * @brief Access the value of a selected feature
     * @details Given a feature index get the selected values
     *
     * @param ind index of the selected feature
     */
Thomas Purcell's avatar
Thomas Purcell committed
100
    inline double* D(int ind){return &_D[ind * _n_samp];}
Thomas Purcell's avatar
Thomas Purcell committed
101

102
103
104
105
106
107
    /**
     * @brief calculate the projection scores for all features for a given property
     * @details Calculate the projection score based on the Pearson correlation
     *
     * @param prop [description]
     */
108
    void project_r(double* prop);
Thomas Purcell's avatar
Thomas Purcell committed
109

110
111
112
113
114
115
    /**
     * @brief Perform SIS on a feature set with a specified property
     * @details Perform sure-independence screening with either the correct property
     *
     * @param prop The property to calculate SIS from
     */
Thomas Purcell's avatar
Thomas Purcell committed
116
117
118
119
    void sis(std::vector<double>& prop);
};

#endif