precision_macros.h 9.37 KB
Newer Older
1
#ifdef DOUBLE_PRECISION_REAL
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#define M_elpa_transpose_vectors_real_PRECISION elpa_transpose_vectors_real_double
#define M_elpa_reduce_add_vectors_real_PRECISION elpa_reduce_add_vectors_real_double

#define M_bandred_real_PRECISION bandred_real_double
#define M_trans_ev_band_to_full_real_PRECISION trans_ev_band_to_full_real_double
#define M_tridiag_band_real_PRECISION tridiag_band_real_double
#define M_trans_ev_tridi_to_band_real_PRECISION trans_ev_tridi_to_band_real_double
#define M_band_band_real_PRECISION band_band_real_double
#define M_tridiag_real_PRECISION tridiag_real_double
#define M_trans_ev_real_PRECISION trans_ev_real_double
#define M_solve_tridi_PRECISION solve_tridi_double
#define M_solve_tridi_col_PRECISION solve_tridi_col_double
#define M_solve_tridi_single_problem_PRECISION solve_tridi_single_problem_double

#define M_qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_double
#define M_hh_transform_real_PRECISION hh_transform_real_double
#define M_symm_matrix_allreduce_PRECISION symm_matrix_allreduce_double
#define M_redist_band_real_PRECISION redist_band_real_double
#define M_unpack_row_real_cpu_PRECISION unpack_row_real_cpu_double
#define M_unpack_row_real_cpu_openmp_PRECISION unpack_row_real_cpu_openmp_double
#define M_unpack_and_prepare_row_group_real_gpu_PRECISION unpack_and_prepare_row_group_real_gpu_double
#define M_extract_hh_tau_real_gpu_PRECISION extract_hh_tau_real_gpu_double
#define M_compute_hh_dot_products_real_gpu_PRECISION compute_hh_dot_products_real_gpu_double
#define M_compute_hh_trafo_real_cpu_openmp_PRECISION compute_hh_trafo_real_cpu_openmp_double
#define M_compute_hh_trafo_real_cpu_PRECISION compute_hh_trafo_real_cpu_double
#define M_pack_row_group_real_gpu_PRECISION pack_row_group_real_gpu_double
#define M_pack_row_real_cpu_openmp_PRECISION pack_row_real_cpu_openmp_double
#define M_pack_row_real_cpu_PRECISION pack_row_real_cpu_double
#define M_wy_gen_PRECISION wy_gen_double
#define M_wy_right_PRECISION wy_right_double
#define M_wy_left_PRECISION wy_left_double
#define M_wy_symm_PRECISION wy_symm_double
#define M_merge_recursive_PRECISION merge_recursive_double
#define M_merge_systems_PRECISION merge_systems_double
#define M_distribute_global_column_PRECISION distribute_global_column_double
#define M_check_monotony_PRECISION check_monotony_double
#define M_global_gather_PRECISION global_gather_double
#define M_resort_ev_PRECISION resort_ev_double
#define M_transform_columns_PRECISION transform_columns_double
#define M_solve_secular_equation_PRECISION solve_secular_equation_double
#define M_global_product_PRECISION global_product_double
#define M_add_tmp_PRECISION add_tmp_double
#define M_v_add_s_PRECISION v_add_s_double

#define M_PRECISION_SYRK DSYRK
#define M_PRECISION_TRMV DTRMV
#define M_PRECISION_GEMM DGEMM
49
#define M_PRECISION_GEMV DGEMV
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#define M_PRECISION_TRMM DTRMM
#define M_PRECISION_SYMV DSYMV
#define M_PRECISION_SYMM DSYMM
#define M_PRECISION_SYR2 DSYR2
#define M_PRECISION_SYR2K DSYR2K
#define M_PRECISION_GEQRF dgeqrf
#define M_PRECISION_STEDC dstedc
#define M_PRECISION_STEQR dsteqr
#define M_PRECISION_LAMRG DLAMRG
#define M_PRECISION_LAMCH DLAMCH
#define M_PRECISION_LAPY2 DLAPY2
#define M_PRECISION_LAED4 DLAED4
#define M_PRECISION_LAED5 DLAED5

#define M_cublas_PRECISION_gemm cublas_dgemm
#define M_cublas_PRECISION_trmm cublas_dtrmm
#define M_cublas_PRECISION_gemv cublas_dgemv

#define M_PRECISION_SUFFIX "_double"
69
70
71
72
73
#define M_CONST_0_0 0.0_rk8
#define M_CONST_0_5 0.5_rk8
#define M_CONST_1_0 1.0_rk8
#define M_CONST_2_0 2.0_rk8
#define M_CONST_8_0 8.0_rk8
74
75
#define M_size_of_PRECISION_real size_of_double_real_datatype
#define M_MPI_REAL_PRECISION MPI_REAL8
76
77
78

#else

79
80
81
82
83
#undef M_elpa_transpose_vectors_real_PRECISION
#undef M_elpa_reduce_add_vectors_real_PRECISION

#undef M_bandred_real_PRECISION
#undef M_trans_ev_band_to_full_real_PRECISION
84
#undef M_tridiag_band_real_PRECISION
85
86
#undef M_trans_ev_tridi_to_band_real_PRECISION
#undef M_band_band_real_PRECISION
87
#undef M_tridiag_real_PRECISION
88
89
90
#undef M_trans_ev_real_PRECISION
#undef M_solve_tridi_PRECISION
#undef M_solve_tridi_col_PRECISION
91
#undef M_solve_tridi_single_problem_PRECISION
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144

#undef M_qr_pdgeqrf_2dcomm_PRECISION
#undef M_hh_transform_real_PRECISION
#undef M_symm_matrix_allreduce_PRECISION
#undef M_redist_band_real_PRECISION
#undef M_unpack_row_real_cpu_PRECISION 
#undef M_unpack_row_real_cpu_openmp_PRECISION 
#undef M_unpack_and_prepare_row_group_real_gpu_PRECISION 
#undef M_extract_hh_tau_real_gpu_PRECISION 
#undef M_compute_hh_dot_products_real_gpu_PRECISION 
#undef M_compute_hh_trafo_real_cpu_openmp_PRECISION 
#undef M_compute_hh_trafo_real_cpu_PRECISION 
#undef M_pack_row_group_real_gpu_PRECISION
#undef M_pack_row_real_cpu_openmp_PRECISION 
#undef M_pack_row_real_cpu_PRECISION
#undef M_wy_gen_PRECISION 
#undef M_wy_right_PRECISION
#undef M_wy_left_PRECISION
#undef M_wy_symm_PRECISION
#undef M_merge_recursive_PRECISION
#undef M_merge_systems_PRECISION
#undef M_distribute_global_column_PRECISION 
#undef M_check_monotony_PRECISION
#undef M_global_gather_PRECISION 
#undef M_resort_ev_PRECISION
#undef M_transform_columns_PRECISION
#undef M_solve_secular_equation_PRECISION 
#undef M_global_product_PRECISION 
#undef M_add_tmp_PRECISION
#undef M_v_add_s_PRECISION

#undef M_PRECISION_SYRK 
#undef M_PRECISION_TRMV 
#undef M_PRECISION_GEMM 
#undef M_PRECISION_GEMV
#undef M_PRECISION_TRMM 
#undef M_PRECISION_SYMV 
#undef M_PRECISION_SYMM 
#undef M_PRECISION_SYR2
#undef M_PRECISION_SYR2K
#undef M_PRECISION_GEQRF
#undef M_PRECISION_STEDC 
#undef M_PRECISION_STEQR 
#undef M_PRECISION_LAMRG
#undef M_PRECISION_LAMCH
#undef M_PRECISION_LAPY2
#undef M_PRECISION_LAED4
#undef M_PRECISION_LAED5

#undef M_cublas_PRECISION_gemm
#undef M_cublas_PRECISION_trmm 
#undef M_cublas_PRECISION_gemv
#undef M_PRECISION_SUFFIX
145
146
147
148
149
#undef M_CONST_0_0 
#undef M_CONST_0_5 
#undef M_CONST_1_0 
#undef M_CONST_2_0
#undef M_CONST_8_0
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#undef M_size_of_PRECISION_real
#undef M_MPI_REAL_PRECISION

#define M_elpa_transpose_vectors_real_PRECISION elpa_transpose_vectors_real_single
#define M_elpa_reduce_add_vectors_real_PRECISION elpa_reduce_add_vectors_real_single

#define M_bandred_real_PRECISION bandred_real_single
#define M_trans_ev_band_to_full_real_PRECISION trans_ev_band_to_full_real_single
#define M_tridiag_band_real_PRECISION tridiag_band_real_single
#define M_trans_ev_tridi_to_band_real_PRECISION trans_ev_tridi_to_band_real_single
#define M_band_band_real_PRECISION band_band_real_single
#define M_tridiag_real_PRECISION tridiag_real_single
#define M_trans_ev_real_PRECISION trans_ev_real_single
#define M_solve_tridi_PRECISION solve_tridi_single
#define M_solve_tridi_col_PRECISION solve_tridi_col_single
#define M_solve_tridi_single_problem_PRECISION solve_tridi_single_problem_single

#define M_qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_single
#define M_hh_transform_real_PRECISION hh_transform_real_single
#define M_symm_matrix_allreduce_PRECISION symm_matrix_allreduce_single
#define M_redist_band_real_PRECISION redist_band_real_single
#define M_unpack_row_real_cpu_PRECISION unpack_row_real_cpu_single
#define M_unpack_row_real_cpu_openmp_PRECISION unpack_row_real_cpu_openmp_single
#define M_unpack_and_prepare_row_group_real_gpu_PRECISION unpack_and_prepare_row_group_real_gpu_single
#define M_extract_hh_tau_real_gpu_PRECISION extract_hh_tau_real_gpu_single
#define M_compute_hh_dot_products_real_gpu_PRECISION compute_hh_dot_products_real_gpu_single
#define M_compute_hh_trafo_real_cpu_openmp_PRECISION compute_hh_trafo_real_cpu_openmp_single
#define M_compute_hh_trafo_real_cpu_PRECISION compute_hh_trafo_real_cpu_single
#define M_pack_row_group_real_gpu_PRECISION pack_row_group_real_gpu_single
#define M_pack_row_real_cpu_openmp_PRECISION pack_row_real_cpu_openmp_single
#define M_pack_row_real_cpu_PRECISION pack_row_real_cpu_single
#define M_wy_gen_PRECISION wy_gen_single
#define M_wy_right_PRECISION wy_right_single
#define M_wy_left_PRECISION wy_left_single
#define M_wy_symm_PRECISION wy_symm_single
#define M_merge_recursive_PRECISION merge_recursive_single
#define M_merge_systems_PRECISION merge_systems_single
#define M_distribute_global_column_PRECISION distribute_global_column_single
#define M_check_monotony_PRECISION check_monotony_single
#define M_global_gather_PRECISION global_gather_single
#define M_resort_ev_PRECISION resort_ev_single
#define M_transform_columns_PRECISION transform_columns_single
#define M_solve_secular_equation_PRECISION solve_secular_equation_single
#define M_global_product_PRECISION global_product_single
#define M_add_tmp_PRECISION add_tmp_single
#define M_v_add_s_PRECISION v_add_s_single

#define M_PRECISION_SYRK SSYRK
#define M_PRECISION_TRMV STRMV
#define M_PRECISION_GEMM SGEMM
#define M_PRECISION_GEMV SGEMV
#define M_PRECISION_TRMM STRMM
#define M_PRECISION_SYMV SSYMV
#define M_PRECISION_SYMM SSYMM
#define M_PRECISION_SYR2 SSYR2
#define M_PRECISION_SYR2K SSYR2K
#define M_PRECISION_GEQRF sgeqrf
#define M_PRECISION_STEDC sstedc
#define M_PRECISION_STEQR ssteqr
#define M_PRECISION_LAMRG SLAMRG
#define M_PRECISION_LAMCH SLAMCH
#define M_PRECISION_LAPY2 SLAPY2
#define M_PRECISION_LAED4 SLAED4
#define M_PRECISION_LAED5 SLAED5

#define M_cublas_PRECISION_gemm cublas_sgemm
#define M_cublas_PRECISION_trmm cublas_strmm
#define M_cublas_PRECISION_gemv cublas_sgemv

#define M_PRECISION_SUFFIX "_single"
220
221
222
223
224
#define M_CONST_0_0 0.0_rk4
#define M_CONST_0_5 0.5_rk4
#define M_CONST_1_0 1.0_rk4
#define M_CONST_2_0 2.0_rk4
#define M_CONST_8_0 8.0_rk4
225
226
#define M_size_of_PRECISION_real size_of_single_real_datatype
#define M_MPI_REAL_PRECISION MPI_REAL4
227
228
#endif