10 gsl_matrix *gm_sa,
double *lut, int32_t *lut_dims, int32_t *min_cost_loc,
11 oe_info_str *oe_info )
41 int32_t nx, ny, ix, iy, idim, nitr, nitr_max, conv_flag, mqstep;
42 int32_t tmp_attempt_ctr, new_k_init;
43 double j_cost_m, j_cost_a, j_cost, j_old, djconv, mq, mq0;
44 double yres[8], xres[3], xpdx[3];
45 double j_old_t1, j_old_t2;
47 static int32_t f_init = 0;
48 static double *y_init, *k_init;
49 static gsl_vector *gv_yres, *gv_xres, *gv_inv_sy_yres, *gv_inv_sa_xres;
50 static gsl_vector *gv_xpdx, *gv_x_fg, *gv_xa, *gv_r_minus_xa, *gv_jp_t2;
51 static gsl_vector *gv_y_init_rho, *gv_jp_t1, *gv_jp_t1_f2;
52 static gsl_vector *gv_jp, *gv_dx, *gv_nx, *gv_ny;
53 static gsl_matrix *gm_sa_inv, *gm_sy_inv_k_init, *gm_ident;
54 static gsl_matrix *gm_jpp_t1, *gm_k_init, *gm_jpp, *gm_scl_ident;
55 static gsl_matrix *gm_jpp_ident, *gm_jpp_aux, *gm_jpp_ident_inv;
56 static gsl_matrix *gm_sx, *gm_sx_t2_f1, *gm_sx_t2, *gm_sx_uinv;
57 static gsl_matrix *gm_gain, *gm_av_kernel, *gm_sy_inv, *gm_g_pt1;
58 static gsl_matrix *gm_g_pt2, *gm_g_pt3, *gm_g_pt4;
67 y_init = (
double *)malloc( ny *
sizeof(
double) );
68 k_init = (
double *)malloc( ny * nx *
sizeof(
double) );
71 gm_jpp_t1 = gsl_matrix_alloc( nx, nx );
72 gv_x_fg = gsl_vector_alloc(nx);
73 gm_ident = gsl_matrix_alloc( nx, nx );
74 gv_yres = gsl_vector_alloc(ny);
75 gv_xres = gsl_vector_alloc(nx);
76 gv_xpdx = gsl_vector_alloc( nx );
77 gv_inv_sa_xres = gsl_vector_alloc(nx);
78 gv_inv_sy_yres = gsl_vector_alloc(ny);
79 gm_sy_inv_k_init = gsl_matrix_alloc( ny, nx );
80 gm_k_init = gsl_matrix_alloc( nx, ny );
81 gv_jp_t1_f2 = gsl_vector_alloc( ny );
82 gm_jpp = gsl_matrix_alloc( nx,nx );
83 gv_xa = gsl_vector_alloc( nx );
84 gv_r_minus_xa = gsl_vector_alloc( nx );
85 gv_jp_t2 = gsl_vector_alloc( nx );
86 gv_y_init_rho = gsl_vector_alloc( ny );
87 gv_jp_t1 = gsl_vector_alloc( nx );
88 gv_jp = gsl_vector_alloc( nx );
89 gm_scl_ident = gsl_matrix_alloc( nx, nx );
90 gm_jpp_ident = gsl_matrix_alloc( nx, nx );
91 gm_jpp_aux = gsl_matrix_alloc( nx, nx );
92 gv_dx = gsl_vector_alloc( nx );
93 gv_nx = gsl_vector_alloc( nx );
94 gv_ny = gsl_vector_alloc( ny );
95 gm_sx = gsl_matrix_alloc( nx, nx );
96 gm_sx_t2_f1 = gsl_matrix_alloc( ny, nx );
97 gm_sx_t2 = gsl_matrix_alloc( nx, nx );
98 gm_sx_uinv = gsl_matrix_alloc( nx, nx );
99 gm_av_kernel = gsl_matrix_alloc( nx, nx );
100 gm_g_pt1 = gsl_matrix_alloc( nx, ny );
101 gm_g_pt2 = gsl_matrix_alloc( nx, nx );
102 gm_g_pt3 = gsl_matrix_alloc( nx, nx );
103 gm_g_pt4 = gsl_matrix_alloc( nx, nx );
104 gm_gain = gsl_matrix_alloc( nx, ny );
106 oe_info->x_prd_state = (
float *) malloc( nx *
sizeof(
float) );
122 for( idim = 0; idim < 3; idim++ )
123 snax[idim] = lut_dims[idim];
127 for( ix = 0; ix < nx; ix++ )
128 gsl_vector_set( gv_x_fg, ix, min_cost_loc[ix] );
130 gsl_matrix_set_identity( gm_ident );
139 for( idim = 0; idim < ny; idim++ )
140 yres[idim] = y_init[idim] - rhot[idim];
142 for( idim = 0; idim < ny; idim++ )
143 gsl_vector_set( gv_yres, idim, yres[idim] );
145 for( idim = 0; idim < nx; idim++ )
146 xres[idim] = gv_x_fg->data[idim] - xa[idim];
148 for( idim = 0; idim < nx; idim++ )
149 gsl_vector_set( gv_xres, idim, xres[idim] );
156 gsl_blas_dgemv( CblasNoTrans, 1., gm_sy_inv, gv_yres, 0., gv_inv_sy_yres );
157 gsl_blas_ddot( gv_yres, gv_inv_sy_yres, &j_old_t1 );
160 gsl_blas_dgemv( CblasNoTrans, 1., gm_sa_inv, gv_xres, 0., gv_inv_sa_xres );
161 gsl_blas_ddot( gv_xres, gv_inv_sa_xres, &j_old_t2 );
163 j_old = j_old_t1 + j_old_t2;
172 for( iy = 0; iy < ny; iy++ )
173 for( ix = 0; ix < nx; ix++ )
174 gsl_matrix_set( gm_k_init, ix, iy, *( k_init + ix + nx * iy ) );
176 gsl_blas_dgemm( CblasNoTrans, CblasTrans, 1., gm_sy_inv, gm_k_init, 0., gm_sy_inv_k_init );
178 gsl_blas_dgemm( CblasNoTrans, CblasNoTrans, 1., gm_k_init, gm_sy_inv_k_init, 0, gm_jpp_t1 );
181 gsl_matrix_memcpy( gm_jpp, gm_sa_inv );
182 gsl_matrix_add( gm_jpp, gm_jpp_t1 );
186 for( ix = 0; ix < nx; ix++ )
187 tsum += gsl_matrix_get( gm_jpp, ix, ix );
196 while( ( conv_flag == 0 ) && ( nitr <= nitr_max ) )
207 while( ( j_cost >= j_old ) && ( tmp_attempt_ctr <= 100 ) )
224 for( iy = 0; iy < ny; iy++ )
225 for( ix = 0; ix < nx; ix++ )
226 gsl_matrix_set( gm_k_init, ix, iy, *( k_init + ix + nx * iy ) );
239 for( ix = 0; ix < nx; ix++ )
240 gsl_vector_set( gv_xa, ix, xa[ix] );
243 gsl_vector_memcpy( gv_r_minus_xa, gv_x_fg );
244 gsl_vector_sub( gv_r_minus_xa, gv_xa );
249 gsl_blas_dgemv( CblasNoTrans, 1., gm_sa_inv, gv_r_minus_xa, 0.,
254 for( iy = 0; iy < ny; iy++ )
255 gsl_vector_set( gv_y_init_rho, iy, y_init[iy] - rhot[iy] );
258 gsl_blas_dgemv( CblasNoTrans, 1., gm_sy_inv, gv_y_init_rho, 0., gv_jp_t1_f2);
261 gsl_blas_dgemv( CblasNoTrans, 1., gm_k_init, gv_jp_t1_f2, 0., gv_jp_t1 );
264 gsl_vector_memcpy( gv_jp, gv_jp_t1 );
265 gsl_vector_add( gv_jp, gv_jp_t2 );
271 if( new_k_init == 1 )
280 for( iy = 0; iy < ny; iy++ )
281 for( ix = 0; ix < nx; ix++ )
282 gsl_matrix_set( gm_k_init, ix, iy, *( k_init + ix + nx * iy ) );
284 gsl_blas_dgemm( CblasNoTrans, CblasTrans, 1., gm_sy_inv, gm_k_init,
285 0., gm_sy_inv_k_init );
287 gsl_blas_dgemm( CblasNoTrans, CblasNoTrans, 1., gm_k_init,
288 gm_sy_inv_k_init, 0, gm_jpp_t1 );
291 gsl_matrix_memcpy( gm_jpp, gm_sa_inv );
292 gsl_matrix_add( gm_jpp, gm_jpp_t1 );
300 gsl_matrix_memcpy( gm_scl_ident, gm_ident );
301 gsl_matrix_scale( gm_scl_ident, mq );
304 gsl_matrix_memcpy( gm_jpp_ident, gm_scl_ident );
305 gsl_matrix_memcpy( gm_jpp_aux, gm_jpp );
306 gsl_matrix_add( gm_jpp_ident, gm_jpp_aux );
312 gsl_blas_dgemv( CblasNoTrans, -1., gm_jpp_ident_inv, gv_jp, 0., gv_dx );
313 gsl_matrix_free( gm_jpp_ident_inv );
317 for( ix = 0; ix < nx; ix++ )
319 if( isnan( gv_dx->data[ix] ) )
321 gsl_vector_set_zero( gv_dx );
322 tmp_attempt_ctr=1000;
327 for( ix = 0; ix < nx; ix++ )
329 xpdx[ix] = gv_x_fg->data[ix] + gv_dx->data[ix];
330 xpdx[ix] = ( xpdx[ix] <= 1.e-5 ) ? 1.e-5 : xpdx[ix];
331 xpdx[ix] = ( xpdx[ix] > lut_dims[ix] - 1 ) ? lut_dims[ix] - 1 : xpdx[ix];
334 for( ix = 0; ix < nx; ix++ )
335 gsl_vector_set( gv_xpdx, ix, xpdx[ix] );
338 for( iy = 0; iy < ny; iy++ )
339 for( ix = 0; ix < nx; ix++ )
340 gsl_matrix_set( gm_k_init, ix, iy, *( k_init + ix + nx * iy ) );
343 for( idim = 0; idim < ny; idim++ )
345 yres[idim] = y_init[idim] - rhot[idim];
346 gsl_vector_set( gv_yres, idim, yres[idim] );
348 for( idim = 0; idim < nx; idim++ )
350 xres[idim] = gv_x_fg->data[idim] - xa[idim];
351 gsl_vector_set( gv_xres, idim, xres[idim] );
354 for( idim = 0; idim < ny; idim++ )
355 gsl_vector_set( gv_yres, idim, yres[idim] );
360 gsl_blas_dgemv( CblasNoTrans, 1., gm_sy_inv, gv_yres, 0., gv_ny );
361 gsl_blas_ddot( gv_yres, gv_ny, &j_cost_m );
364 gsl_blas_dgemv( CblasNoTrans, 1., gm_sa_inv, gv_xres, 0., gv_nx );
365 gsl_blas_ddot( gv_xres, gv_nx, &j_cost_a );
371 j_cost = j_cost_m + j_cost_a;
373 if( isnan( j_cost ) )
375 printf(
"%s, %d, E: Meaningless cost found\n", __FILE__, __LINE__ );
395 gsl_vector_memcpy( gv_x_fg, gv_xpdx );
398 if( (
fabs( j_cost - j_old ) < ( djconv * ny ) ) ||
399 ( j_cost_m < djconv * ny ) ) conv_flag = 1;
421 gsl_blas_dgemm( CblasNoTrans, CblasTrans, 1., gm_sy_inv, gm_k_init, 0., gm_sx_t2_f1 );
423 gsl_blas_dgemm( CblasNoTrans, CblasNoTrans, 1., gm_k_init, gm_sx_t2_f1, 0., gm_sx_t2 );
425 gsl_matrix_memcpy( gm_sx_uinv, gm_sa_inv );
426 gsl_matrix_add( gm_sx_uinv, gm_sx_t2 );
430 gsl_matrix_memcpy( gm_sx, mat_xfr );
431 gsl_matrix_free( mat_xfr );
442 gsl_blas_dgemm( CblasNoTrans, CblasNoTrans, 1., gm_k_init, gm_sy_inv, 0.,
446 gsl_blas_dgemm( CblasNoTrans, CblasTrans, 1., gm_g_pt1, gm_k_init, 0., gm_g_pt2 );
449 gsl_matrix_memcpy( gm_g_pt3, gm_sa_inv );
450 gsl_matrix_add( gm_g_pt3, gm_g_pt2 );
454 gsl_matrix_memcpy( gm_g_pt4, mat_xfr );
455 gsl_matrix_free( mat_xfr );
459 gsl_blas_dgemm( CblasNoTrans, CblasNoTrans, 1., gm_g_pt4, gm_g_pt1, 0., gm_gain );
465 gsl_blas_dgemm( CblasNoTrans, CblasTrans, 1., gm_gain, gm_k_init,
470 for( ix = 0; ix < nx; ix++ )
471 oe_info->x_prd_state[ix] = gsl_vector_get( gv_x_fg, ix );
472 oe_info->gm_sx = gm_sx;
473 oe_info->conv_flag = conv_flag;
474 oe_info->cost = j_cost;
475 oe_info->acost = j_cost_a;
476 oe_info->gm_ak = gm_av_kernel;
477 oe_info->gm_gain = gm_gain;
478 oe_info->nitr = nitr;
481 gsl_matrix_free( gm_sa_inv );
482 gsl_matrix_free( gm_sy_inv );
493 int32_t ny,
double *y_init,
double *k_init )
513 int32_t ibnd, icor, nx, iprd;
514 float ar_wt[8], ar_wtu[8], orig_wt[3];
515 double yl, yu, dx, dlut;
517 int64_t ar_off[8], ar_offu[8], tmp_off;
521 iint_3d( snax, gv_x_fg->data, ar_off, ar_wt, orig_wt );
523 for( ibnd = 0; ibnd < ny; ibnd++ )
526 for( icor = 0; icor < 8; icor++ )
528 tmp_off = snax[0] * snax[1] * snax[2];
529 y_init[ibnd] += *( lut + tmp_off * ibnd + ar_off[icor] ) * ar_wt[icor];
538 for( iprd = 0; iprd < nx; iprd++ )
540 xl[iprd] = gv_x_fg->data[iprd];
541 xu[iprd] = gv_x_fg->data[iprd];
545 for( iprd = 0; iprd < nx; iprd++ )
549 xl[iprd] = gv_x_fg->data[iprd] - dlut;
550 if( xl[iprd] < 0. ) xl[iprd] = 0.;
551 xu[iprd] = gv_x_fg->data[iprd] + dlut;
552 if( xu[iprd] > snax[iprd]-1 ) xu[iprd] = snax[iprd]-1;
554 dx = xu[iprd] - xl[iprd];
557 iint_3d( snax, xl, ar_off, ar_wt, orig_wt );
558 iint_3d( snax, xu, ar_offu, ar_wtu, orig_wt );
560 for( ibnd = 0; ibnd < ny; ibnd++ )
564 for( icor = 0; icor < 8; icor++ )
566 tmp_off = snax[0] * snax[1] * snax[2];
567 yl += *( lut + tmp_off * ibnd + ar_off[icor] ) * ar_wt[icor];
568 yu += *( lut + tmp_off * ibnd + ar_offu[icor] ) * ar_wtu[icor];
570 *( k_init + iprd + 3 * ibnd ) = ( yu - yl ) / dx;