Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
cpp_sisso
Commits
016b7a34
Commit
016b7a34
authored
Jun 12, 2020
by
Thomas Purcell
Browse files
Multi-task SISSO implimented
Works on initial test cases
parent
1f1553cc
Changes
14
Hide whitespace changes
Inline
Side-by-side
src/Makefile.am
View file @
016b7a34
...
...
@@ -29,6 +29,7 @@ __top_builddir__sisso_cpp_SOURCES = \
feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp
\
feature_creation/node/operator_nodes/allowed_operator_nodes/cos.cpp
\
feature_creation/node/operator_nodes/allowed_ops.cpp
\
utils/project.cpp
\
feature_creation/feature_space/FeatureSpace.cpp
\
inputs/InputParser.cpp
\
descriptor_identifier/Model/Model.cpp
\
...
...
src/descriptor_identifier/Model/Model.cpp
View file @
016b7a34
#include
<descriptor_identifier/Model/Model.hpp>
Model
::
Model
(
std
::
vector
<
double
>
prop_train
,
std
::
vector
<
double
>
prop_test
,
std
::
vector
<
std
::
shared_ptr
<
FeatureNode
>>
feats
)
:
Model
::
Model
(
std
::
vector
<
double
>
prop_train
,
std
::
vector
<
double
>
prop_test
,
std
::
vector
<
std
::
shared_ptr
<
FeatureNode
>>
feats
,
std
::
vector
<
int
>
task_sizes_train
,
std
::
vector
<
int
>
task_sizes_test
)
:
_n_samp_train
(
feats
[
0
]
->
n_samp
()),
_n_samp_test
(
feats
[
0
]
->
n_test_samp
()),
_n_dim
(
feats
.
size
()
+
1
),
_feats
(
feats
),
_coefs
(
_n_dim
),
_prop_train
(
prop_train
),
_prop_test
(
prop_test
),
_train_error
(
_n_samp_train
),
...
...
@@ -13,47 +12,67 @@ Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std:
_D_train
(
_n_samp_train
*
_n_dim
),
_D_test
(
_n_samp_test
*
_n_dim
),
_prop_train_est
(
_n_samp_train
,
0.0
),
_prop_test_est
(
_n_samp_test
,
0.0
)
_prop_test_est
(
_n_samp_test
,
0.0
),
_task_sizes_train
(
task_sizes_train
),
_task_sizes_test
(
task_sizes_test
)
{
_prop_train_est
.
reserve
(
_n_samp_train
);
_prop_test_est
.
reserve
(
_n_samp_test
);
std
::
vector
<
double
>
a
(
_n_samp_train
*
_n_dim
,
1.0
);
for
(
int
ff
=
0
;
ff
<
feats
.
size
();
++
ff
)
{
std
::
copy_n
(
feats
[
ff
]
->
value_ptr
(),
_n_samp_train
,
_D_train
.
data
()
+
ff
*
_n_samp_train
);
std
::
copy_n
(
feats
[
ff
]
->
value_ptr
(),
_n_samp_train
,
a
.
data
()
+
ff
*
_n_samp_train
);
if
(
_n_samp_test
>
0
)
std
::
copy_n
(
feats
[
ff
]
->
test_value
().
data
(),
_n_samp_test
,
_D_test
.
data
()
+
ff
*
_n_samp_test
);
}
std
::
copy_n
(
a
.
data
()
+
feats
.
size
()
*
_n_samp_train
,
_n_samp_train
,
_D_train
.
data
()
+
feats
.
size
()
*
_n_samp_train
);
std
::
vector
<
double
>
s
(
_n_dim
,
0.0
);
std
::
vector
<
double
>
work
(
_n_dim
*
_n_samp_train
,
0.0
);
int
rank
=
0
;
int
info
=
0
;
int
start
=
0
;
dgelss_
(
_n_samp_train
,
_n_dim
,
1
,
a
.
data
(),
_n_samp_train
,
prop_train
.
data
(),
_n_samp_train
,
s
.
data
(),
1e-13
,
&
rank
,
work
.
data
(),
work
.
size
(),
&
info
);
std
::
copy_n
(
prop_train
.
begin
(),
_n_dim
,
_coefs
.
data
());
dgemv_
(
'N'
,
_n_samp_train
,
_n_dim
,
1.0
,
_D_train
.
data
(),
_n_samp_train
,
_coefs
.
data
(),
1
,
0.0
,
_prop_train_est
.
data
(),
1
);
std
::
transform
(
_prop_train_est
.
begin
(),
_prop_train_est
.
end
(),
_prop_train
.
data
(),
_train_error
.
data
(),
std
::
minus
<
double
>
());
if
(
_n_samp_test
>
0
)
for
(
auto
&
sz
:
_task_sizes_train
)
{
std
::
copy_n
(
std
::
vector
<
double
>
(
_n_samp_test
,
1.0
).
data
(),
_n_samp_test
,
_D_test
.
data
()
+
feats
.
size
()
*
_n_samp_test
);
dgemv_
(
'N'
,
_n_samp_test
,
_n_dim
,
1.0
,
_D_test
.
data
(),
_n_samp_test
,
_coefs
.
data
(),
1
,
0.0
,
_prop_test_est
.
data
(),
1
);
std
::
transform
(
_prop_test_est
.
begin
(),
_prop_test_est
.
end
(),
_prop_test
.
data
(),
_test_error
.
data
(),
std
::
minus
<
double
>
());
std
::
fill_n
(
a
.
data
()
+
feats
.
size
()
*
sz
,
sz
,
1.0
);
std
::
fill_n
(
_D_train
.
data
()
+
feats
.
size
()
*
sz
,
sz
,
1.0
);
for
(
int
ff
=
0
;
ff
<
feats
.
size
();
++
ff
)
{
std
::
copy_n
(
feats
[
ff
]
->
value_ptr
()
+
start
,
sz
,
_D_train
.
data
()
+
ff
*
sz
);
std
::
copy_n
(
feats
[
ff
]
->
value_ptr
()
+
start
,
sz
,
a
.
data
()
+
ff
*
sz
);
}
dgelss_
(
sz
,
_n_dim
,
1
,
a
.
data
(),
sz
,
prop_train
.
data
()
+
start
,
sz
,
s
.
data
(),
1e-13
,
&
rank
,
work
.
data
(),
work
.
size
(),
&
info
);
_coefs
.
push_back
(
std
::
vector
<
double
>
(
_n_dim
,
0.0
));
std
::
copy_n
(
prop_train
.
begin
()
+
start
,
_n_dim
,
_coefs
.
back
().
data
());
dgemv_
(
'N'
,
sz
,
_n_dim
,
1.0
,
_D_train
.
data
(),
sz
,
_coefs
.
back
().
data
(),
1
,
0.0
,
_prop_train_est
.
data
()
+
start
,
1
);
std
::
transform
(
_prop_train_est
.
begin
()
+
start
,
_prop_train_est
.
begin
()
+
start
+
sz
,
_prop_train
.
data
()
+
start
,
_train_error
.
data
()
+
start
,
std
::
minus
<
double
>
());
start
+=
sz
;
}
start
=
0
;
int
ii
=
0
;
for
(
auto
&
sz
:
_task_sizes_test
)
{
if
(
sz
>
0
)
{
for
(
int
ff
=
0
;
ff
<
feats
.
size
();
++
ff
)
std
::
copy_n
(
feats
[
ff
]
->
test_value
().
data
()
+
start
,
sz
,
_D_test
.
data
()
+
ff
*
sz
);
std
::
fill_n
(
_D_test
.
data
()
+
feats
.
size
()
*
sz
,
sz
,
1.0
);
dgemv_
(
'N'
,
sz
,
_n_dim
,
1.0
,
_D_test
.
data
(),
sz
,
_coefs
[
ii
].
data
(),
1
,
0.0
,
_prop_test_est
.
data
()
+
start
,
1
);
std
::
transform
(
_prop_test_est
.
begin
()
+
start
,
_prop_test_est
.
begin
()
+
start
+
sz
,
_prop_test
.
data
()
+
start
,
_test_error
.
data
()
+
start
,
std
::
minus
<
double
>
());
}
++
ii
;
start
+=
sz
;
}
}
std
::
string
Model
::
toString
()
const
{
std
::
stringstream
unit_rep
;
unit_rep
<<
_coefs
[
_n_dim
-
1
]
;
unit_rep
<<
"c0"
;
for
(
int
ff
=
0
;
ff
<
_feats
.
size
();
++
ff
)
unit_rep
<<
" +
(
"
<<
_coefs
[
ff
]
<<
"
)
* "
<<
_feats
[
ff
]
->
expr
();
unit_rep
<<
" +
a
"
<<
std
::
to_string
(
ff
)
<<
" * "
<<
_feats
[
ff
]
->
expr
();
return
unit_rep
.
str
();
}
...
...
@@ -73,19 +92,31 @@ void Model::train_to_file(std::string filename)
out_file_stream
<<
"# "
<<
toString
()
<<
std
::
endl
;
out_file_stream
<<
"# RMSE: "
<<
rmse
()
<<
"; Max AE: "
<<
max_ae
()
<<
std
::
endl
;
out_file_stream
<<
"# coeffs:"
;
for
(
auto
&
coef
:
_coefs
)
out_file_stream
<<
" "
<<
std
::
setw
(
24
)
<<
std
::
setprecision
(
18
)
<<
coef
<<
";"
;
out_file_stream
<<
"
\n
# "
<<
std
::
setw
(
23
)
<<
"Property Value,"
<<
std
::
setw
(
24
)
<<
"Property Value (EST),"
;
out_file_stream
<<
"# Coefficients"
<<
std
::
endl
;
out_file_stream
<<
std
::
setw
(
10
)
<<
std
::
left
<<
"# Task,"
;
for
(
int
cc
=
0
;
cc
<
_coefs
[
0
].
size
()
-
1
;
++
cc
)
out_file_stream
<<
std
::
setw
(
24
)
<<
"a"
+
std
::
to_string
(
cc
);
out_file_stream
<<
std
::
setw
(
24
)
<<
"c0"
<<
std
::
endl
;
for
(
int
cc
=
0
;
cc
<
_coefs
.
size
();
++
cc
)
{
out_file_stream
<<
std
::
setw
(
10
)
<<
std
::
left
<<
"# "
+
std
::
to_string
(
cc
);
for
(
auto
&
coeff
:
_coefs
[
cc
])
out_file_stream
<<
std
::
setw
(
24
)
<<
std
::
setprecision
(
18
)
<<
coeff
;
out_file_stream
<<
"
\n
"
;
}
out_file_stream
<<
"
\n
"
<<
std
::
setw
(
24
)
<<
std
::
left
<<
"# Property Value"
<<
std
::
setw
(
24
)
<<
"Property Value (EST)"
;
for
(
int
ff
=
0
;
ff
<
_feats
.
size
();
++
ff
)
out_file_stream
<<
" Feature "
<<
ff
<<
" Value
,
"
;
out_file_stream
<<
std
::
setw
(
24
)
<<
"Feature "
+
std
::
to_string
(
ff
)
+
" Value"
;
out_file_stream
<<
std
::
endl
;
for
(
int
ss
=
0
;
ss
<
_n_samp_train
;
++
ss
)
{
out_file_stream
<<
std
::
setw
(
24
)
<<
std
::
setprecision
(
18
)
<<
_prop_train
[
ss
]
<<
std
::
setw
(
24
)
<<
std
::
setprecision
(
18
)
<<
_prop_train_est
[
ss
];
for
(
int
ff
=
0
;
ff
<
_n_dim
-
1
;
++
ff
)
out_file_stream
<<
std
::
setw
(
24
)
<<
std
::
setprecision
(
18
)
<<
_
D_train
[
ss
+
ff
*
_n_samp_train
];
out_file_stream
<<
std
::
setw
(
24
)
<<
std
::
setprecision
(
18
)
<<
_
feats
[
ff
]
->
value
()[
ss
];
out_file_stream
<<
std
::
endl
;
}
out_file_stream
.
close
();
...
...
@@ -100,23 +131,36 @@ void Model::test_to_file(std::string filename, std::vector<int> test_inds)
out_file_stream
.
open
(
filename
);
out_file_stream
<<
"# "
<<
toString
()
<<
std
::
endl
;
out_file_stream
<<
"# Testing Indexes: ["
<<
test_inds
[
0
];
for
(
int
ss
=
1
;
ss
<
_n_samp_test
;
++
ss
)
out_file_stream
<<
", "
<<
test_inds
[
ss
];
out_file_stream
<<
"]"
<<
std
::
endl
;
out_file_stream
<<
"# RMSE: "
<<
test_rmse
()
<<
"; Max AE: "
<<
test_max_ae
()
<<
std
::
endl
;
out_file_stream
<<
"# coeffs:"
;
for
(
auto
&
coef
:
_coefs
)
out_file_stream
<<
" "
<<
std
::
setw
(
24
)
<<
std
::
setprecision
(
18
)
<<
coef
<<
";"
;
out_file_stream
<<
"
\n
# "
<<
std
::
setw
(
23
)
<<
"Property Value,"
<<
std
::
setw
(
24
)
<<
"Property Value (EST),"
;
out_file_stream
<<
"# RMSE: "
<<
rmse
()
<<
"; Max AE: "
<<
max_ae
()
<<
std
::
endl
;
out_file_stream
<<
"# Coefficients"
<<
std
::
endl
;
out_file_stream
<<
std
::
setw
(
10
)
<<
std
::
left
<<
"# Task"
;
for
(
int
cc
=
0
;
cc
<
_coefs
[
0
].
size
()
-
1
;
++
cc
)
out_file_stream
<<
std
::
setw
(
24
)
<<
"a"
+
std
::
to_string
(
cc
);
out_file_stream
<<
std
::
setw
(
24
)
<<
"c0"
<<
std
::
endl
;
for
(
int
cc
=
0
;
cc
<
_coefs
.
size
();
++
cc
)
{
out_file_stream
<<
std
::
setw
(
10
)
<<
std
::
left
<<
"# "
+
std
::
to_string
(
cc
);
for
(
auto
&
coeff
:
_coefs
[
cc
])
out_file_stream
<<
std
::
setw
(
24
)
<<
std
::
setprecision
(
18
)
<<
coeff
;
out_file_stream
<<
"
\n
"
;
}
out_file_stream
<<
"#Test Indexes: [ "
<<
test_inds
[
0
];
for
(
int
ii
=
1
;
ii
<
test_inds
.
size
();
++
ii
)
out_file_stream
<<
", "
<<
test_inds
[
ii
];
out_file_stream
<<
" ]"
<<
std
::
endl
;
out_file_stream
<<
"
\n
"
<<
std
::
setw
(
24
)
<<
std
::
left
<<
"# Property Value"
<<
std
::
setw
(
24
)
<<
"Property Value (EST)"
;
for
(
int
ff
=
0
;
ff
<
_feats
.
size
();
++
ff
)
out_file_stream
<<
" Feature "
<<
ff
<<
" Value
,
"
;
out_file_stream
<<
std
::
setw
(
24
)
<<
"Feature "
+
std
::
to_string
(
ff
)
+
" Value"
;
out_file_stream
<<
std
::
endl
;
for
(
int
ss
=
0
;
ss
<
_n_samp_test
;
++
ss
)
{
out_file_stream
<<
std
::
setw
(
24
)
<<
std
::
setprecision
(
18
)
<<
_prop_test
[
ss
]
<<
std
::
setw
(
24
)
<<
std
::
setprecision
(
18
)
<<
_prop_test_est
[
ss
];
for
(
int
ff
=
0
;
ff
<
_
feats
.
size
()
;
++
ff
)
for
(
int
ff
=
0
;
ff
<
_
n_dim
-
1
;
++
ff
)
out_file_stream
<<
std
::
setw
(
24
)
<<
std
::
setprecision
(
18
)
<<
_feats
[
ff
]
->
test_value
()[
ss
];
out_file_stream
<<
std
::
endl
;
}
...
...
src/descriptor_identifier/Model/Model.hpp
View file @
016b7a34
...
...
@@ -21,7 +21,7 @@ class Model
std
::
vector
<
std
::
shared_ptr
<
FeatureNode
>>
_feats
;
//!< List of features in the model
std
::
vector
<
double
>
_coefs
;
//!< Coefficients for teh features
std
::
vector
<
std
::
vector
<
double
>
>
_coefs
;
//!< Coefficients for teh features
std
::
vector
<
double
>
_prop_train
;
//!< The property to be modeled
std
::
vector
<
double
>
_prop_test
;
//!< The property to be modeled
std
::
vector
<
double
>
_train_error
;
//!< The error of the model
...
...
@@ -32,6 +32,8 @@ class Model
std
::
vector
<
double
>
_prop_train_est
;
//!< The estimated Property
std
::
vector
<
double
>
_prop_test_est
;
//!< The estimated Property
std
::
vector
<
int
>
_task_sizes_train
;
//!< Number of samples in each task
std
::
vector
<
int
>
_task_sizes_test
;
//!< Number of samples in each task
public:
/**
* @brief Constructor for the model
...
...
@@ -39,7 +41,7 @@ public:
* @param prop The property
* @param feats The features for the model
*/
Model
(
std
::
vector
<
double
>
prop_train
,
std
::
vector
<
double
>
prop_test
,
std
::
vector
<
std
::
shared_ptr
<
FeatureNode
>>
feats
);
Model
(
std
::
vector
<
double
>
prop_train
,
std
::
vector
<
double
>
prop_test
,
std
::
vector
<
std
::
shared_ptr
<
FeatureNode
>>
feats
,
std
::
vector
<
int
>
task_sizes_train
,
std
::
vector
<
int
>
task_sizes_test
);
/**
...
...
src/descriptor_identifier/SISSORegressor.cpp
View file @
016b7a34
#include
<descriptor_identifier/SISSORegressor.hpp>
SISSORegressor
::
SISSORegressor
(
std
::
shared_ptr
<
FeatureSpace
>
feat_space
,
std
::
vector
<
double
>
prop
,
std
::
vector
<
double
>
prop_test
,
int
n_dim
,
int
n_residual
)
:
SISSORegressor
::
SISSORegressor
(
std
::
shared_ptr
<
FeatureSpace
>
feat_space
,
std
::
vector
<
double
>
prop
,
std
::
vector
<
double
>
prop_test
,
std
::
vector
<
int
>
task_sizes_train
,
std
::
vector
<
int
>
task_sizes_test
,
int
n_dim
,
int
n_residual
)
:
_prop
(
prop
),
_prop_test
(
prop_test
),
_a
(
new
double
[(
n_dim
+
1
)
*
prop
.
size
()]),
...
...
@@ -9,6 +9,8 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve
_error
(
new
double
[
prop
.
size
()]),
_work
(
nullptr
),
_s
(
new
double
[
n_dim
+
1
]),
_task_sizes_train
(
task_sizes_train
),
_task_sizes_test
(
task_sizes_test
),
_feat_space
(
feat_space
),
_mpi_comm
(
feat_space
->
mpi_comm
()),
_n_samp
(
prop
.
size
()),
...
...
@@ -33,22 +35,22 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve
_work
=
std
::
unique_ptr
<
double
[]
>
(
new
double
[
_lwork
]);
}
void
SISSORegressor
::
set_a
(
std
::
vector
<
int
>&
inds
)
void
SISSORegressor
::
set_a
(
std
::
vector
<
int
>&
inds
,
int
start
,
int
n_samp
)
{
for
(
int
ii
=
0
;
ii
<
inds
.
size
();
++
ii
)
std
::
copy_n
(
node_value_arrs
::
get_d_matrix_ptr
(
inds
[
ii
]),
_
n_samp
,
_a
.
get
()
+
ii
*
_
n_samp
);
std
::
copy_n
(
_ones
.
get
(),
_
n_samp
,
_a
.
get
()
+
inds
.
size
()
*
_
n_samp
);
std
::
copy_n
(
node_value_arrs
::
get_d_matrix_ptr
(
inds
[
ii
])
+
start
,
n_samp
,
_a
.
get
()
+
ii
*
n_samp
);
std
::
copy_n
(
_ones
.
get
(),
n_samp
,
_a
.
get
()
+
inds
.
size
()
*
n_samp
);
}
void
SISSORegressor
::
least_squares
(
std
::
vector
<
int
>&
inds
,
double
*
coeffs
)
void
SISSORegressor
::
least_squares
(
std
::
vector
<
int
>&
inds
,
double
*
coeffs
,
int
start
,
int
n_samp
)
{
int
info
;
int
n_dim
=
inds
.
size
()
+
1
;
set_a
(
inds
);
std
::
copy_n
(
_prop
.
data
(),
_
n_samp
,
_b
.
get
());
set_a
(
inds
,
start
,
n_samp
);
std
::
copy_n
(
_prop
.
data
()
+
start
,
n_samp
,
_b
.
get
());
dgelss_
(
_
n_samp
,
n_dim
,
1
,
_a
.
get
(),
_
n_samp
,
_b
.
get
(),
_
n_samp
,
_s
.
get
(),
1e-13
,
&
_rank
,
_work
.
get
(),
_lwork
,
&
info
);
dgelss_
(
n_samp
,
n_dim
,
1
,
_a
.
get
(),
n_samp
,
_b
.
get
(),
n_samp
,
_s
.
get
(),
1e-13
,
&
_rank
,
_work
.
get
(),
_lwork
,
&
info
);
if
(
info
==
0
)
std
::
copy_n
(
_b
.
get
(),
n_dim
,
coeffs
);
...
...
@@ -69,11 +71,11 @@ int SISSORegressor::get_opt_lwork(int n_dim)
throw
std
::
logic_error
(
"Failed to get lwork."
);
}
void
SISSORegressor
::
set_error
(
std
::
vector
<
int
>&
inds
,
double
*
coeffs
)
void
SISSORegressor
::
set_error
(
std
::
vector
<
int
>&
inds
,
double
*
coeffs
,
int
start
,
int
n_samp
)
{
set_a
(
inds
);
dgemv_
(
'N'
,
_
n_samp
,
inds
.
size
()
+
1
,
1.0
,
_a
.
get
(),
_
n_samp
,
coeffs
,
1
,
1e-13
,
_b
.
get
(),
1
);
std
::
transform
(
_prop
.
begin
()
,
_prop
.
end
()
,
_b
.
get
(),
_error
.
get
(),
std
::
minus
<
double
>
());
set_a
(
inds
,
start
,
n_samp
);
dgemv_
(
'N'
,
n_samp
,
inds
.
size
()
+
1
,
1.0
,
_a
.
get
(),
n_samp
,
coeffs
,
1
,
1e-13
,
_b
.
get
(),
1
);
std
::
transform
(
_prop
.
begin
()
+
start
,
_prop
.
begin
()
+
start
+
n_samp
,
_b
.
get
(),
_error
.
get
()
+
start
,
std
::
minus
<
double
>
());
}
void
SISSORegressor
::
fit
()
...
...
@@ -95,7 +97,7 @@ void SISSORegressor::fit()
std
::
vector
<
Model
>
models
;
for
(
int
rr
=
0
;
rr
<
_n_residual
;
++
rr
)
{
models
.
push_back
(
Model
(
_prop
,
_prop_test
,
{
_feat_space
->
phi_selected
()[
rr
]}));
models
.
push_back
(
Model
(
_prop
,
_prop_test
,
{
_feat_space
->
phi_selected
()[
rr
]}
,
_task_sizes_train
,
_task_sizes_test
));
models
.
back
().
copy_error
(
&
residual
[
rr
*
_n_samp
]);
}
_models
.
push_back
(
models
);
...
...
@@ -142,9 +144,16 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
util_funcs
::
iterate
(
inds
,
inds
.
size
(),
_mpi_comm
->
rank
());
do
{
least_squares
(
inds
,
coefs
.
data
());
set_error
(
inds
,
coefs
.
data
());
double
error
=
util_funcs
::
norm
(
_error
.
get
(),
_n_samp
);
int
start
=
0
;
double
error
=
0.0
;
for
(
auto
&
sz
:
_task_sizes_train
)
{
least_squares
(
inds
,
coefs
.
data
(),
start
,
sz
);
set_error
(
inds
,
coefs
.
data
(),
start
,
sz
);
error
+=
std
::
pow
(
util_funcs
::
norm
(
_error
.
get
()
+
start
,
sz
),
2.0
)
/
sz
;
start
+=
sz
;
}
error
=
std
::
sqrt
(
error
/
_task_sizes_train
.
size
());
if
(
error
<
min_errors
.
back
())
{
int
rr
=
0
;
...
...
@@ -174,7 +183,7 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
{
for
(
int
ii
=
0
;
ii
<
n_dim
;
++
ii
)
min_nodes
[
ii
]
=
_feat_space
->
phi_selected
()[
all_inds_min
[
inds
[
rr
]
*
n_dim
+
ii
]];
models
.
push_back
(
Model
(
_prop
,
_prop_test
,
min_nodes
));
models
.
push_back
(
Model
(
_prop
,
_prop_test
,
min_nodes
,
_task_sizes_train
,
_task_sizes_test
));
}
_models
.
push_back
(
models
);
...
...
src/descriptor_identifier/SISSORegressor.hpp
View file @
016b7a34
...
...
@@ -24,6 +24,8 @@ protected:
std
::
unique_ptr
<
double
[]
>
_work
;
//!< The work array for least squares problems
std
::
unique_ptr
<
double
[]
>
_s
;
//!< The S array for least squares problems
std
::
vector
<
int
>
_task_sizes_train
;
std
::
vector
<
int
>
_task_sizes_test
;
std
::
shared_ptr
<
FeatureSpace
>
_feat_space
;
//!< Feature Space for the problem
std
::
shared_ptr
<
MPI_Interface
>
_mpi_comm
;
//!< MPI Communicator
...
...
@@ -42,7 +44,7 @@ public:
* @param prop Property to model
* @param n_dim Maximum dimension of the model
*/
SISSORegressor
(
std
::
shared_ptr
<
FeatureSpace
>
feat_space
,
std
::
vector
<
double
>
prop
,
std
::
vector
<
double
>
prop_test
,
int
n_dim
,
int
n_residual
);
SISSORegressor
(
std
::
shared_ptr
<
FeatureSpace
>
feat_space
,
std
::
vector
<
double
>
prop
,
std
::
vector
<
double
>
prop_test
,
std
::
vector
<
int
>
task_sizes_train
,
std
::
vector
<
int
>
task_sizes_test
,
int
n_dim
,
int
n_residual
);
/**
* @brief Get the optimal size of the working array
...
...
@@ -58,7 +60,7 @@ public:
* @param inds Feature indexes to get the model of
* @param coeffs Coefficients for the model
*/
void
least_squares
(
std
::
vector
<
int
>&
inds
,
double
*
coeffs
);
void
least_squares
(
std
::
vector
<
int
>&
inds
,
double
*
coeffs
,
int
start
,
int
n_samp
);
/**
* @brief Set the residual for the next step
...
...
@@ -66,14 +68,14 @@ public:
* @param inds indexes of the selected features
* @param coeffs Coefficients of the model
*/
void
set_error
(
std
::
vector
<
int
>&
inds
,
double
*
coeffs
);
void
set_error
(
std
::
vector
<
int
>&
inds
,
double
*
coeffs
,
int
start
,
int
n_samp
);
/**
* @brief Set the A matrix for the least squares problem
*
* @param inds indexes of the selected features
*/
void
set_a
(
std
::
vector
<
int
>&
inds
);
void
set_a
(
std
::
vector
<
int
>&
inds
,
int
start
,
int
n_samp
);
/**
* @brief Fit the models
...
...
src/feature_creation/feature_space/FeatureSpace.cpp
View file @
016b7a34
...
...
@@ -22,6 +22,7 @@ FeatureSpace::FeatureSpace(
std
::
shared_ptr
<
MPI_Interface
>
mpi_comm
,
std
::
vector
<
node_ptr
>
phi_0
,
std
::
vector
<
std
::
string
>
allowed_ops
,
std
::
vector
<
int
>
task_sizes
,
int
max_phi
,
int
n_sis_select
,
int
max_store_rung
,
...
...
@@ -33,6 +34,7 @@ FeatureSpace::FeatureSpace(
_phi_0
(
phi_0
),
_allowed_ops
(
allowed_ops
),
_scores
(
phi_0
.
size
(),
0.0
),
_task_sizes
(
task_sizes
),
_start_gen
(
1
,
0
),
_mpi_comm
(
mpi_comm
),
_l_bound
(
min_abs_feat_val
),
...
...
@@ -44,6 +46,7 @@ FeatureSpace::FeatureSpace(
_n_rung_store
(
max_store_rung
),
_n_rung_generate
(
n_rung_generate
)
{
_project
=
project_funcs
::
project_r
;
if
(
_n_rung_generate
>
1
)
throw
std
::
logic_error
(
"A maximum of one rung can be generated on the fly."
);
else
if
(
_max_phi
-
_n_rung_generate
<
_n_rung_store
)
...
...
@@ -289,48 +292,9 @@ void FeatureSpace::generate_feature_space()
}
}
}
for
(
int
ii
=
0
;
ii
<
_mpi_comm
->
size
();
++
ii
)
{
_mpi_comm
->
barrier
();
if
(
_mpi_comm
->
rank
()
==
ii
)
for
(
auto
&
feat
:
_phi
)
std
::
cout
<<
feat
->
expr
()
<<
std
::
endl
;
_mpi_comm
->
barrier
();
}
_n_feat
=
_phi
.
size
();
}
void
FeatureSpace
::
project_r
(
double
*
prop
,
int
size
)
{
std
::
vector
<
double
>
scores
(
_phi
.
size
(),
0.0
);
for
(
int
ff
=
0
;
ff
<
_phi
.
size
();
++
ff
)
_scores
[
ff
]
=
-
1.0
*
std
::
abs
(
util_funcs
::
r
(
&
prop
[
0
],
_phi
[
ff
]
->
value_ptr
(),
_n_samp
));
for
(
int
pp
=
1
;
pp
<
size
/
_n_samp
;
++
pp
)
{
for
(
int
ff
=
0
;
ff
<
_phi
.
size
();
++
ff
)
scores
[
ff
]
=
-
1.0
*
std
::
abs
(
util_funcs
::
r
(
&
prop
[
_n_samp
*
pp
],
_phi
[
ff
]
->
value_ptr
(),
_n_samp
));
std
::
transform
(
scores
.
begin
(),
scores
.
end
(),
_scores
.
begin
(),
_scores
.
begin
(),
[](
double
s1
,
double
s2
){
return
std
::
min
(
s1
,
s2
);});
}
}
std
::
vector
<
double
>
FeatureSpace
::
project_r
(
double
*
prop
,
int
size
,
std
::
vector
<
node_ptr
>&
phi
)
{
std
::
vector
<
double
>
scores
(
phi
.
size
(),
0.0
);
std
::
vector
<
double
>
scores_temp
(
phi
.
size
(),
0.0
);
for
(
int
ff
=
0
;
ff
<
phi
.
size
();
++
ff
)
scores
[
ff
]
=
-
1.0
*
std
::
abs
(
util_funcs
::
r
(
&
prop
[
0
],
phi
[
ff
]
->
value_ptr
(),
_n_samp
));
for
(
int
pp
=
1
;
pp
<
size
/
_n_samp
;
++
pp
)
{
for
(
int
ff
=
0
;
ff
<
phi
.
size
();
++
ff
)
scores_temp
[
ff
]
=
-
1.0
*
std
::
abs
(
util_funcs
::
r
(
&
prop
[
_n_samp
*
pp
],
phi
[
ff
]
->
value_ptr
(),
_n_samp
));
std
::
transform
(
scores_temp
.
begin
(),
scores_temp
.
end
(),
scores
.
begin
(),
scores
.
begin
(),
[](
double
s1
,
double
s2
){
return
std
::
min
(
s1
,
s2
);});
}
return
scores
;
}
void
FeatureSpace
::
project_generated
(
double
*
prop
,
int
size
,
std
::
vector
<
std
::
shared_ptr
<
FeatureNode
>>&
phi_sel
,
std
::
vector
<
double
>&
scores_sel
,
std
::
vector
<
double
>&
scores_comp
)
{
for
(
auto
feat
=
_phi
.
begin
()
+
_start_gen
.
back
()
+
_mpi_comm
->
rank
();
feat
<
_phi
.
end
();
feat
+=
_mpi_comm
->
size
())
...
...
@@ -342,7 +306,9 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh
std
::
vector
<
node_ptr
>
generated_phi
;
generate_new_feats
(
feat
,
generated_phi
,
feat_ind
,
_l_bound
,
_u_bound
);
std
::
vector
<
double
>
scores
=
project_r
(
prop
,
size
,
generated_phi
);
std
::
vector
<
double
>
scores
(
generated_phi
.
size
(),
0.0
);
_project
(
prop
,
scores
.
data
(),
generated_phi
,
_task_sizes
,
size
/
_n_samp
);
std
::
vector
<
int
>
inds
=
util_funcs
::
argsort
(
scores
);
int
ii
=
0
;
...
...
@@ -434,7 +400,8 @@ void FeatureSpace::sis(std::vector<double>& prop)
node_value_arrs
::
resize_d_matrix_arr
(
_n_sis_select
);
_phi_selected
.
reserve
(
_phi_selected
.
size
()
+
_n_sis_select
);
project_r
(
prop
.
data
(),
prop
.
size
());
_project
(
prop
.
data
(),
_scores
.
data
(),
_phi
,
_task_sizes
,
prop
.
size
()
/
_n_samp
);
std
::
vector
<
int
>
inds
=
util_funcs
::
argsort
(
_scores
);
int
ii
=
0
;
...
...
@@ -459,7 +426,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
if
(
_n_rung_generate
>
0
)
{
for
(
auto
&
feat
:
phi_sel
)
phi_sel
.
resize
(
cur_feat_local
);
scores_sel
.
resize
(
cur_feat_local
);
project_generated
(
prop
.
data
(),
prop
.
size
(),
phi_sel
,
scores_sel
,
scores_comp
);
...
...
@@ -586,20 +552,20 @@ void FeatureSpace::sis(std::vector<double>& prop)
for
(
int
ii
=
_phi_selected
.
size
()
-
_n_sis_select
;
ii
<
_phi_selected
.
size
();
++
ii
)
{
_phi_selected
[
ii
]
->
set_value
();
_phi_selected
[
ii
]
->
set_test_value
();
++
cur_feat
;
}
}
else
{
// cur_feat +=
cur_feat_local;
cur_feat_local
=
0
;
for
(
auto
&
feat
:
phi_sel
)
{
std
::
cout
<<
scores_sel
[
cur_feat_local
]
<<
'\t'
<<
phi_sel
[
cur_feat_local
]
->
expr
()
<<
std
::
endl
;
_phi_selected
.
push_back
(
feat
);
_phi_selected
.
back
()
->
reindex
(
cur_feat
);
_phi_selected
.
back
()
->
set_value
();
_phi_selected
.
back
()
->
set_test_value
();
++
cur_feat
;
++
cur_feat_local
;
}
}
if
(
cur_feat
!=
node_value_arrs
::
N_SELECTED
)
...
...
src/feature_creation/feature_space/FeatureSpace.hpp
View file @
016b7a34
...
...
@@ -5,6 +5,7 @@
#include
<feature_creation/node/FeatureNode.hpp>
#include
<feature_creation/node/operator_nodes/allowed_ops.hpp>
#include
<feature_creation/node/value_storage/nodes_value_containers.hpp>
#include
<utils/project.hpp>
#include
<boost/serialization/shared_ptr.hpp>
...
...
@@ -29,10 +30,12 @@ class FeatureSpace
std
::
vector
<
bin_op_node_gen
>
_bin_operators
;
//!< list of all binary operators
std
::
vector
<
double
>
_scores
;
//!< projection scores for each feature
std
::
vector
<
double
>
_prop
;
//!< property to learn
std
::
vector
<
int
>
_task_sizes
;
//!< The number of elements in each task
std
::
vector
<
int
>
_start_gen
;
//!< list of starting index for each generation
std
::
function
<
void
(
double
*
,
double
*
,
std
::
vector
<
node_ptr
>&
,
std
::
vector
<
int
>&
,
int
)
>
_project
;
//!< Function used for projection onto SIS
std
::
shared_ptr
<
MPI_Interface
>
_mpi_comm
;
//!< MPi communicator
double
_l_bound
;
//!< lower bound for absolute value of the features
...
...
@@ -46,6 +49,7 @@ class FeatureSpace
int
_n_rung_generate
;
//!< Total number of rungs to generate on the fly
public:
/**
* @brief Constructor for the feature space
* @details constructs the feature space from an initial set of features and a list of allowed operatiors
...
...
@@ -60,6 +64,7 @@ public:
std
::
shared_ptr
<
MPI_Interface
>
mpi_comm
,
std
::
vector
<
node_ptr
>
phi_0
,
std
::
vector
<
std
::
string
>
allowed_ops
,
std
::
vector
<
int
>
task_sizes
,
int
max_phi
=
1
,
int
n_sis_select
=
1
,
int
max_store_rung
=
2
,
...
...
@@ -99,15 +104,7 @@ public:
*/
inline
std
::
shared_ptr
<
MPI_Interface
>
mpi_comm
(){
return
_mpi_comm
;}
/**
* @brief calculate the projection scores for all features for a given property
* @details Calculate the projection score based on the Pearson correlation
*
* @param prop [description]
*/
void
project_r
(
double
*
prop
,
int
size
);
std
::
vector
<
double
>
project_r
(
double
*
prop
,
int
size
,
std
::
vector
<
node_ptr
>&
phi
);
inline
std
::
vector
<
int
>
task_sizes
(){
return
_task_sizes
;}
void
generate_new_feats
(
std
::
vector
<
node_ptr
>::
iterator
&
feat
,
std
::
vector
<
node_ptr
>&
feat_set
,
int
&
feat_ind
,
double
l_bound
=
1e-50
,
double
u_bound
=
1e50
);
...
...
src/inputs/InputParser.cpp
View file @
016b7a34
...
...
@@ -2,10 +2,10 @@
InputParser
::
InputParser
(
boost
::
property_tree
::
ptree
IP
,
std
::
string
fn
,
std
::
shared_ptr
<
MPI_Interface
>
comm
)
:
_opset
(
as_vector
<
std
::
string
>
(
IP
,
"opset"
)),
_leave_out_inds
(
as_vector
<
int
>
(
IP
,
"leave_out_inds"
)),
_filename
(
fn
),
_data_file
(
IP
.
get
<
std
::
string
>
(
"data_file"
,
"data.csv"
)),
_prop_key
(
IP
.
get
<
std
::
string
>
(
"property_key"
,
"prop"
)),
_leave_out_inds
(
as_vector
<
int
>
(
IP
,
"leave_out_inds"
)),
_l_bound
(
IP
.
get
<
double
>
(
"min_abs_feat_val"
,
1e-50
)),
_u_bound
(
IP
.
get
<
double
>
(
"max_abs_feat_val"
,
1e50
)),
_n_dim
(
IP
.
get
<
int
>
(
"desc_dim"
)),
...
...
@@ -13,38 +13,118 @@ InputParser::InputParser(boost::property_tree::ptree IP, std::string fn, std::sh
_max_rung
(
IP
.
get
<
int
>
(
"max_rung"
)),
_max_store_rung
(
IP
.
get
<
int
>
(
"n_rung_store"
,
_max_rung
-
1
)),
_n_rung_generate
(
IP
.
get
<
int
>
(
"n_rung_generate"
,
0
)),
_n_samp
(
-
1
),
_n_samp
(
0
),
_n_residuals
(
IP
.
get
<
int
>
(
"n_residual"
,
1
))