Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
cpp_sisso
Commits
a8f3e7db
Commit
a8f3e7db
authored
Sep 18, 2021
by
Thomas Purcell
Browse files
Move del inds to pre-broadcast
Hopefully will remove the data overwrite issue
parent
e1924308
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/feature_creation/feature_space/FeatureSpace.cpp
View file @
a8f3e7db
...
...
@@ -573,6 +573,92 @@ void FeatureSpace::generate_non_param_feats(
}
}
void
FeatureSpace
::
remove_duplicate_features
(
std
::
vector
<
node_ptr
>&
feat_set
,
int
start
)
{
std
::
vector
<
double
>
scores
(
feat_set
.
size
(),
0.0
);
project_funcs
::
project_r
(
_prop_train
.
data
(),
scores
.
data
(),
feat_set
,
_task_sizes_train
,
1
);
scores
.
erase
(
scores
.
begin
(),
scores
.
begin
()
+
start
);
std
::
vector
<
int
>
inds
=
util_funcs
::
argsort
<
double
>
(
scores
);
std
::
vector
<
int
>
del_inds
;
node_value_arrs
::
clear_temp_reg
();
for
(
int
sc
=
0
;
sc
<
scores
.
size
()
-
1
;
++
sc
)
{
#ifdef PARAMETERIZE
if
(
feat_set
[
inds
[
sc
]
+
start
]
->
n_params
()
>
0
)
{
continue
;
}
#endif
double
*
val_ptr
=
feat_set
[
start
+
inds
[
sc
]]
->
stand_value_ptr
();
double
base_val
=
std
::
abs
(
std
::
inner_product
(
val_ptr
,
val_ptr
+
_n_samp_train
,
val_ptr
,
0.0
)
);
if
(
scores
[
inds
[
sc
]]
>
-
1e-7
)
{
// If score is 0.0 then check against all other 0.0 values
for
(
int
sc2
=
0
;
sc2
<
sc
;
++
sc2
)
{
double
comp
=
1.0
/
static_cast
<
double
>
(
_n_samp_train
)
*
std
::
abs
(
base_val
-
std
::
abs
(
std
::
inner_product
(
val_ptr
,
val_ptr
+
_n_samp_train
,
feat_set
[
start
+
inds
[
sc2
]]
->
stand_value_ptr
(
true
),
0.0
)
)
);
if
(
comp
<
1e-10
)
{
del_inds
.
push_back
(
-
1
*
(
inds
[
sc
]
+
start
));
break
;
}
}
}
else
if
(
scores
[
inds
[
sc
+
1
]]
-
scores
[
inds
[
sc
]]
<
1e-7
)
{
// Otherwise just compare against the closest neighbor
double
comp
=
1.0
/
static_cast
<
double
>
(
_n_samp_train
)
*
std
::
abs
(
base_val
-
std
::
abs
(
std
::
inner_product
(
val_ptr
,
val_ptr
+
_n_samp_train
,
feat_set
[
start
+
inds
[
sc
+
1
]]
->
stand_value_ptr
(
true
),
0.0
)
)
);
if
(
comp
<
1e-10
)
{
del_inds
.
push_back
(
-
1
*
(
inds
[
sc
]
+
start
));
}
}
}
inds
=
util_funcs
::
argsort
<
int
>
(
del_inds
);
for
(
int
ii
=
0
;
ii
<
inds
.
size
();
++
ii
)
{
feat_set
.
erase
(
feat_set
.
begin
()
-
del_inds
[
inds
[
ii
]]);
}
// Reindex
for
(
int
ff
=
start
;
ff
<
feat_set
.
size
();
++
ff
)
{
feat_set
[
ff
]
->
reindex
(
ff
);
}
}
void
FeatureSpace
::
generate_feature_space
(
std
::
vector
<
node_ptr
>&
feat_set
,
std
::
vector
<
int
>&
start_rung
,
...
...
@@ -669,7 +755,6 @@ void FeatureSpace::generate_feature_space(
if
((
nn
<
_max_rung
)
||
(
nn
<=
_n_rung_store
)
||
(
_mpi_comm
->
size
()
==
1
))
{
int
new_phi_size
;
int
phi_size_start
=
feat_set
.
size
();
if
(
_mpi_comm
->
rank
()
==
0
)
{
std
::
vector
<
std
::
vector
<
node_ptr
>>
next_phi_gathered
;
...
...
@@ -679,7 +764,6 @@ void FeatureSpace::generate_feature_space(
{
feat_set
.
insert
(
feat_set
.
end
(),
next_phi_vec
.
begin
(),
next_phi_vec
.
end
());
}
new_phi_size
=
feat_set
.
size
();
// Sort the features to ensure consistent feature spaces for all MPI/OpenMP configurations
std
::
sort
(
...
...
@@ -694,118 +778,27 @@ void FeatureSpace::generate_feature_space(
feat_set
.
end
(),
[
&
feat_ind
](
node_ptr
n
){
n
->
reindex
(
feat_ind
);
++
feat_ind
;}
);
mpi
::
broadcast
(
*
_mpi_comm
,
new_phi_size
,
0
);
for
(
int
bb
=
0
;
bb
<=
(
new_phi_size
-
phi_size_start
)
/
10000
;
++
bb
)
if
(
nn
<
_max_rung
)
{
mpi
::
broadcast
(
*
_mpi_comm
,
&
feat_set
[
phi_size_start
+
bb
*
10000
],
std
::
min
(
10000
,
new_phi_size
-
phi_size_start
-
bb
*
10000
),
0
);
remove_duplicate_features
(
feat_set
,
start_rung
.
back
()
);
}
new_phi_size
=
feat_set
.
size
();
mpi
::
broadcast
(
*
_mpi_comm
,
new_phi_size
,
0
);
mpi
::
broadcast
(
*
_mpi_comm
,
&
feat_set
[
start_rung
.
back
()],
new_phi_size
-
start_rung
.
back
(),
0
);
}
else
{
mpi
::
gather
(
*
_mpi_comm
,
next_phi
,
0
);
mpi
::
broadcast
(
*
_mpi_comm
,
new_phi_size
,
0
);
feat_set
.
resize
(
new_phi_size
);
for
(
int
bb
=
0
;
bb
<=
(
new_phi_size
-
phi_size_start
)
/
10000
;
++
bb
)
{
mpi
::
broadcast
(
*
_mpi_comm
,
&
feat_set
[
phi_size_start
+
bb
*
10000
],
std
::
min
(
10000
,
new_phi_size
-
phi_size_start
-
bb
*
10000
),
0
);
}
feat_set
.
resize
(
new_phi_size
);
mpi
::
broadcast
(
*
_mpi_comm
,
&
feat_set
[
start_rung
.
back
()],
new_phi_size
-
start_rung
.
back
(),
0
);
}
if
(
phi_size_start
==
new_phi_size
)
if
(
start_rung
.
back
()
==
feat_set
.
size
())
{
throw
std
::
logic_error
(
"No features created during this rung ("
+
std
::
to_string
(
nn
)
+
")"
);
}
node_value_arrs
::
clear_temp_reg
();
if
(
nn
<
_max_rung
)
{
// Remove identical features
_scores
.
resize
(
feat_set
.
size
());
_mpi_comm
->
barrier
();
project_funcs
::
project_r
(
_prop_train
.
data
(),
_scores
.
data
(),
feat_set
,
_task_sizes_train
,
1
);
_scores
.
erase
(
_scores
.
begin
(),
_scores
.
begin
()
+
start_rung
[
start_rung
.
size
()
-
1
]);
inds
=
util_funcs
::
argsort
<
double
>
(
_scores
);
std
::
vector
<
int
>
del_inds
;
_mpi_comm
->
barrier
();
node_value_arrs
::
clear_temp_reg
();
for
(
int
sc
=
0
;
sc
<
_scores
.
size
()
-
1
;
++
sc
)
{
#ifdef PARAMETERIZE
if
(
feat_set
[
inds
[
sc
]
+
start_rung
.
back
()]
->
n_params
()
>
0
)
{
continue
;
}
#endif
if
(
_scores
[
inds
[
sc
]]
>
-
1e-10
)
{
double
base_val
=
std
::
abs
(
util_funcs
::
r
(
feat_set
[
start_rung
.
back
()
+
inds
[
sc
]]
->
value_ptr
(),
feat_set
[
start_rung
.
back
()
+
inds
[
sc
]]
->
value_ptr
(),
_n_samp_train
)
);
for
(
int
sc2
=
sc
+
1
;
sc2
<
_scores
.
size
();
++
sc2
)
{
double
comp
=
std
::
abs
(
base_val
-
std
::
abs
(
util_funcs
::
r
(
feat_set
[
start_rung
.
back
()
+
inds
[
sc
]]
->
value_ptr
(),
feat_set
[
start_rung
.
back
()
+
inds
[
sc2
]]
->
value_ptr
(
0
,
true
),
_n_samp_train
)
)
);
if
(
comp
<
1e-10
)
{
del_inds
.
push_back
(
-
1
*
(
inds
[
sc
]
+
start_rung
.
back
()));
break
;
}
}
}
else
if
(
_scores
[
inds
[
sc
+
1
]]
-
_scores
[
inds
[
sc
]]
<
1e-10
)
{
double
base_val
=
std
::
abs
(
util_funcs
::
r
(
feat_set
[
start_rung
.
back
()
+
inds
[
sc
]]
->
value_ptr
(),
feat_set
[
start_rung
.
back
()
+
inds
[
sc
]]
->
value_ptr
(),
_n_samp_train
)
);
double
comp
=
std
::
abs
(
base_val
-
std
::
abs
(
util_funcs
::
r
(
feat_set
[
start_rung
.
back
()
+
inds
[
sc
]]
->
value_ptr
(),
feat_set
[
start_rung
.
back
()
+
inds
[
sc
+
1
]]
->
value_ptr
(
0
,
true
),
_n_samp_train
)
)
);
if
(
comp
<
1e-10
)
{
del_inds
.
push_back
(
-
1
*
(
inds
[
sc
]
+
start_rung
.
back
()));
}
}
}
inds
=
util_funcs
::
argsort
<
int
>
(
del_inds
);
for
(
int
ii
=
0
;
ii
<
inds
.
size
();
++
ii
)
{
feat_set
.
erase
(
feat_set
.
begin
()
-
del_inds
[
inds
[
ii
]]);
}
// Reindex
for
(
int
ff
=
start_rung
.
back
();
ff
<
feat_set
.
size
();
++
ff
)
{
feat_set
[
ff
]
->
reindex
(
ff
);
}
}
node_value_arrs
::
clear_temp_reg
();
if
(
!
reparam
)
{
...
...
@@ -1046,12 +1039,13 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std:
}
#endif
auto
start
=
_phi
.
begin
()
+
_start_rung
.
back
()
+
_mpi_comm
->
rank
();
#ifdef OMP45
#pragma omp for schedule(monotonic: dynamic)
#else
#pragma omp for schedule(dynamic)
#endif
for
(
auto
feat
=
_phi
.
begin
()
+
_start_rung
.
back
()
+
_mpi_comm
->
rank
()
;
feat
<
_phi
.
end
();
feat
+=
_mpi_comm
->
size
())
for
(
auto
feat
=
start
;
feat
<
_phi
.
end
();
feat
+=
_mpi_comm
->
size
())
{
unsigned
long
int
feat_ind
=
_phi
.
size
()
+
2
*
_n_sis_select
*
(
omp_get_num_threads
()
+
_mpi_comm
->
size
());
...
...
src/feature_creation/feature_space/FeatureSpace.hpp
View file @
a8f3e7db
...
...
@@ -146,6 +146,14 @@ public:
*/
void
initialize_fs_output_files
()
const
;
/**
* @brief Remove duplicate features from the feature space
*
* @param feat_set Feature space to remove the duplicates from
* @param start The index to start the removal from
*/
void
remove_duplicate_features
(
std
::
vector
<
node_ptr
>&
feat_set
,
int
start
);
/**
* @brief Populate _phi using _phi_0 and the allowed operators up to (_max_rung - _n_rung_generate)^th rung
*/
...
...
src/mpi_interface/MPI_Ops.cpp
View file @
a8f3e7db
...
...
@@ -59,6 +59,18 @@ void mpi_reduce_op::set_op(std::string project_type, double cross_cor_max, int n
std
::
vector
<
node_sc_pair
>
mpi_reduce_op
::
select_top_feats
(
std
::
vector
<
node_sc_pair
>
in_vec_1
,
std
::
vector
<
node_sc_pair
>
in_vec_2
)
{
for
(
int
ff
=
0
;
ff
<
in_vec_1
.
size
();
++
ff
)
{
std
::
cout
<<
ff
<<
'\t'
<<
in_vec_1
[
ff
].
_feat
->
feat
(
0
)
->
arr_ind
()
<<
'\t'
<<
in_vec_1
[
ff
].
_feat
->
expr
()
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
for
(
int
ff
=
0
;
ff
<
in_vec_2
.
size
();
++
ff
)
{
std
::
cout
<<
ff
<<
'\t'
<<
in_vec_2
[
ff
].
_feat
->
feat
(
0
)
->
arr_ind
()
<<
'\t'
<<
in_vec_2
[
ff
].
_feat
->
expr
()
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
// Set up an output vector
std
::
vector
<
node_sc_pair
>
out_vec
;
out_vec
.
reserve
(
N_SIS_SELECT
);
...
...
@@ -79,5 +91,12 @@ std::vector<node_sc_pair> mpi_reduce_op::select_top_feats(std::vector<node_sc_pa
}
++
ff
;
}
for
(
int
ff
=
0
;
ff
<
out_vec
.
size
();
++
ff
)
{
std
::
cout
<<
ff
<<
'\t'
<<
out_vec
[
ff
].
_feat
->
feat
(
0
)
->
arr_ind
()
<<
'\t'
<<
out_vec
[
ff
].
_feat
->
expr
()
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
return
out_vec
;
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment