25 #include "getfem/getfem_generic_assembly_compile_and_exec.h"
26 #include "getfem/getfem_generic_assembly_functions_and_operators.h"
31 #define GA_DEBUG_INFO(a)
38 template <
class VEC1,
class VEC2>
39 inline void copy_scaled_4(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
40 auto it1 = v1.begin();
41 auto it2 = v2.begin(), it2e = v2.end();
44 *it2++ = (*it1++) * a;
45 *it2++ = (*it1++) * a;
46 *it2++ = (*it1++) * a;
47 *it2++ = (*it1++) * a;
50 *it2++ = (*it1++) * a;
53 template <
class VEC1,
class VEC2>
54 inline void add_scaled_4(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
55 auto it1 = v1.begin();
56 auto it2 = v2.begin(), it2e = v2.end();
59 *it2++ += (*it1++) * a;
60 *it2++ += (*it1++) * a;
61 *it2++ += (*it1++) * a;
62 *it2++ += (*it1++) * a;
65 *it2++ += (*it1++) * a;
68 template <
class VEC1,
class VEC2>
69 inline void copy_scaled_8(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
70 auto it1 = v1.begin();
71 auto it2 = v2.begin(), it2e = v2.end();
74 *it2++ = (*it1++) * a;
75 *it2++ = (*it1++) * a;
76 *it2++ = (*it1++) * a;
77 *it2++ = (*it1++) * a;
78 *it2++ = (*it1++) * a;
79 *it2++ = (*it1++) * a;
80 *it2++ = (*it1++) * a;
81 *it2++ = (*it1++) * a;
84 *it2++ = (*it1++) * a;
87 template <
class VEC1,
class VEC2>
88 inline void add_scaled_8(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
89 auto it1 = v1.begin();
90 auto it2 = v2.begin(), it2e = v2.end();
93 *it2++ += (*it1++) * a;
94 *it2++ += (*it1++) * a;
95 *it2++ += (*it1++) * a;
96 *it2++ += (*it1++) * a;
97 *it2++ += (*it1++) * a;
98 *it2++ += (*it1++) * a;
99 *it2++ += (*it1++) * a;
100 *it2++ += (*it1++) * a;
103 *it2++ += (*it1++) * a;
106 bool operator <(
const gauss_pt_corresp &gpc1,
107 const gauss_pt_corresp &gpc2) {
108 if (gpc1.pai != gpc2.pai)
109 return (gpc1.pai < gpc2.pai );
110 if (gpc1.nodes.size() != gpc2.nodes.size())
111 return (gpc1.nodes.size() < gpc2.nodes.size());
112 for (
size_type i = 0; i < gpc1.nodes.size(); ++i)
113 if (gpc1.nodes[i] != gpc2.nodes[i])
114 return (gpc1.nodes[i] < gpc2.nodes[i]);
115 if (gpc1.pgt1 != gpc2.pgt1)
116 return (gpc1.pgt1 < gpc2.pgt1);
117 if (gpc1.pgt2 != gpc2.pgt2)
118 return (gpc1.pgt2 < gpc2.pgt2);
122 bool operator <(
const ga_instruction_set::region_mim &rm1,
123 const ga_instruction_set::region_mim &rm2) {
124 if (rm1.mim() != rm2.mim())
return (rm1.mim() < rm2.mim());
125 if (rm1.region() != rm2.region())
return (rm1.region() < rm2.region());
126 return (rm1.psd() < rm2.psd());
133 struct ga_instruction_extract_local_im_data :
public ga_instruction {
136 papprox_integration &pai;
137 const base_vector &U;
138 const fem_interpolation_context &ctx;
141 GA_DEBUG_INFO(
"Instruction: extract local im data");
145 GMM_ASSERT1(imd.linked_mesh_im().int_method_of_element(cv)
146 ->approx_method() == pai,
"Im data have to be used only "
147 "on their original integration method.");
149 size_type ipt = imd.filtered_index_of_point(cv, ctx.ii());
151 "Im data with no data on the current integration point.");
152 auto it = U.begin()+ipt*qdim;
153 std::copy(it, it+qdim, t.begin());
156 ga_instruction_extract_local_im_data
157 (base_tensor &t_,
const im_data &imd_,
const base_vector &U_,
158 papprox_integration &pai_,
const fem_interpolation_context &ctx_,
160 : t(t_), imd(imd_), pai(pai_), U(U_), ctx(ctx_), qdim(qdim_),
165 struct ga_instruction_slice_local_dofs :
public ga_instruction {
167 const base_vector &U;
168 const fem_interpolation_context &ctx;
172 GA_DEBUG_INFO(
"Instruction: Slice local dofs");
173 GMM_ASSERT1(qmult1 != 0 && qmult2 != 0,
"Internal error");
175 coeff, qmult1, qmult2);
178 ga_instruction_slice_local_dofs(
const mesh_fem &mf_,
const base_vector &U_,
179 const fem_interpolation_context &ctx_,
182 : mf(mf_), U(U_), ctx(ctx_), coeff(coeff_),
183 qmult1(qmult1_), qmult2(qmult2_) {}
186 struct ga_instruction_update_pfp :
public ga_instruction {
188 const fem_interpolation_context &ctx;
189 fem_precomp_pool &fp_pool;
193 GA_DEBUG_INFO(
"Instruction: Pfp update");
194 if (ctx.have_pgp()) {
196 ? ctx.convex_num() : mf.convex_index().first_true();
197 pfem pf = mf.fem_of_element(cv);
198 if (!pfp || pf != pfp->get_pfem() ||
199 ctx.pgp()->get_ppoint_tab() != pfp->get_ppoint_tab()) {
200 pfp = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
208 ga_instruction_update_pfp(
const mesh_fem &mf_, pfem_precomp &pfp_,
209 const fem_interpolation_context &ctx_,
210 fem_precomp_pool &fp_pool_)
211 : mf(mf_), ctx(ctx_), fp_pool(fp_pool_), pfp(pfp_) {}
214 struct ga_instruction_first_ind_tensor :
public ga_instruction {
216 const fem_interpolation_context &ctx;
218 const mesh_fem *mfn, **mfg;
221 GA_DEBUG_INFO(
"Instruction: adapt first index of tensor");
222 const mesh_fem &mf = *(mfg ? *mfg : mfn);
223 GA_DEBUG_ASSERT(mfg ? *mfg : mfn,
"Internal error");
224 size_type cv_1 = ctx.is_convex_num_valid()
225 ? ctx.convex_num() : mf.convex_index().first_true();
226 pfem pf = mf.fem_of_element(cv_1);
227 GMM_ASSERT1(pf,
"An element without finite element method defined");
228 size_type Qmult = qdim / pf->target_dim();
230 if (t.sizes()[0] != s)
231 { bgeot::multi_index mi = t.sizes(); mi[0] = s; t.adjust_sizes(mi); }
235 ga_instruction_first_ind_tensor(base_tensor &t_,
236 const fem_interpolation_context &ctx_,
238 const mesh_fem **mfg_)
239 : t(t_), ctx(ctx_), qdim(qdim_), mfn(mfn_), mfg(mfg_) {}
242 struct ga_instruction_second_ind_tensor
243 :
public ga_instruction_first_ind_tensor {
246 GA_DEBUG_INFO(
"Instruction: adapt second index of tensor");
247 const mesh_fem &mf = *(mfg ? *mfg : mfn);
248 size_type cv_1 = ctx.is_convex_num_valid()
249 ? ctx.convex_num() : mf.convex_index().first_true();
250 pfem pf = mf.fem_of_element(cv_1);
251 GMM_ASSERT1(pf,
"An element without finite element methode defined");
252 size_type Qmult = qdim / pf->target_dim();
254 if (t.sizes()[1] != s)
255 { bgeot::multi_index mi = t.sizes(); mi[1] = s; t.adjust_sizes(mi); }
259 ga_instruction_second_ind_tensor(base_tensor &t_,
260 fem_interpolation_context &ctx_,
262 const mesh_fem **mfg_)
263 : ga_instruction_first_ind_tensor(t_, ctx_, qdim_, mfn_, mfg_) {}
267 struct ga_instruction_two_first_ind_tensor :
public ga_instruction {
269 const fem_interpolation_context &ctx1, &ctx2;
271 const mesh_fem *mfn1, **mfg1;
273 const mesh_fem *mfn2, **mfg2;
276 GA_DEBUG_INFO(
"Instruction: adapt two first indices of tensor");
277 const mesh_fem &mf1 = *(mfg1 ? *mfg1 : mfn1);
278 const mesh_fem &mf2 = *(mfg2 ? *mfg2 : mfn2);
279 size_type cv_1 = ctx1.is_convex_num_valid()
280 ? ctx1.convex_num() : mf1.convex_index().first_true();
281 size_type cv_2 = ctx2.is_convex_num_valid()
282 ? ctx2.convex_num() : mf2.convex_index().first_true();
283 pfem pf1 = mf1.fem_of_element(cv_1);
284 GMM_ASSERT1(pf1,
"An element without finite element method defined");
285 pfem pf2 = mf2.fem_of_element(cv_2);
286 GMM_ASSERT1(pf2,
"An element without finite element method defined");
287 size_type Qmult1 = qdim1 / pf1->target_dim();
288 size_type s1 = pf1->nb_dof(cv_1) * Qmult1;
289 size_type Qmult2 = qdim2 / pf2->target_dim();
290 size_type s2 = pf2->nb_dof(cv_2) * Qmult2;
291 GMM_ASSERT1(s1 > 0 && s2 >0,
"Element without degrees of freedom");
292 if (t.sizes()[0] != s1 || t.sizes()[1] != s2) {
293 bgeot::multi_index mi = t.sizes();
294 mi[0] = s1; mi[1] = s2;
300 ga_instruction_two_first_ind_tensor
301 (base_tensor &t_,
const fem_interpolation_context &ctx1_,
302 const fem_interpolation_context &ctx2_,
303 size_type qdim1_,
const mesh_fem *mfn1_,
const mesh_fem **mfg1_,
304 size_type qdim2_,
const mesh_fem *mfn2_,
const mesh_fem **mfg2_)
305 : t(t_), ctx1(ctx1_), ctx2(ctx2_), qdim1(qdim1_), mfn1(mfn1_),
306 mfg1(mfg1_), qdim2(qdim2_), mfn2(mfn2_), mfg2(mfg2_) {}
310 struct ga_instruction_X_component :
public ga_instruction {
312 const fem_interpolation_context &ctx;
316 GA_DEBUG_INFO(
"Instruction: X component");
321 ga_instruction_X_component
322 (scalar_type &t_,
const fem_interpolation_context &ctx_,
size_type n_)
323 : t(t_), ctx(ctx_), n(n_) {}
326 struct ga_instruction_X :
public ga_instruction {
328 const fem_interpolation_context &ctx;
331 GA_DEBUG_INFO(
"Instruction: X");
332 GA_DEBUG_ASSERT(t.size() == ctx.xreal().size(),
"dimensions mismatch");
333 gmm::copy(ctx.xreal(), t.as_vector());
337 ga_instruction_X(base_tensor &t_,
const fem_interpolation_context &ctx_)
338 : t(t_), ctx(ctx_) {}
341 struct ga_instruction_copy_small_vect :
public ga_instruction {
343 const base_small_vector &vec;
346 GA_DEBUG_INFO(
"Instruction: copy small vector");
347 GMM_ASSERT1(t.size() == vec.size(),
"Invalid vector size.");
348 gmm::copy(vec, t.as_vector());
351 ga_instruction_copy_small_vect(base_tensor &t_,
352 const base_small_vector &vec_)
353 : t(t_), vec(vec_) {}
356 struct ga_instruction_copy_Normal :
public ga_instruction_copy_small_vect {
359 GA_DEBUG_INFO(
"Instruction: unit normal vector");
360 GMM_ASSERT1(t.size() == vec.size(),
"Invalid outward unit normal "
361 "vector. Possible reasons: not on boundary or "
362 "transformation failed.");
363 gmm::copy(vec, t.as_vector());
366 ga_instruction_copy_Normal(base_tensor &t_,
367 const base_small_vector &Normal_)
368 : ga_instruction_copy_small_vect(t_, Normal_) {}
371 struct ga_instruction_level_set_normal_vector :
public ga_instruction {
373 const mesh_im_level_set *mimls;
374 const fem_interpolation_context &ctx;
375 base_small_vector vec;
378 GA_DEBUG_INFO(
"Instruction: unit normal vector to a level-set");
379 mimls->compute_normal_vector(ctx, vec);
380 GMM_ASSERT1(t.size() == vec.size(),
"Invalid outward unit normal "
381 "vector. Possible reasons: not on boundary or "
382 "transformation failed.");
383 gmm::copy(vec, t.as_vector());
386 ga_instruction_level_set_normal_vector
387 (base_tensor &t_,
const mesh_im_level_set *mimls_,
388 const fem_interpolation_context &ctx_)
389 : t(t_), mimls(mimls_), ctx(ctx_), vec(t.size()) {}
392 struct ga_instruction_element_size :
public ga_instruction {
397 GA_DEBUG_INFO(
"Instruction: element_size");
398 GMM_ASSERT1(t.size() == 1,
"Invalid element size.");
402 ga_instruction_element_size(base_tensor &t_, scalar_type &es_)
406 struct ga_instruction_element_K :
public ga_instruction {
408 const fem_interpolation_context &ctx;
411 GA_DEBUG_INFO(
"Instruction: element_K");
412 GMM_ASSERT1(t.size() == (ctx.K()).size(),
"Invalid tensor size.");
413 gmm::copy(ctx.K().as_vector(), t.as_vector());
416 ga_instruction_element_K(base_tensor &t_,
417 const fem_interpolation_context &ct)
421 struct ga_instruction_element_B :
public ga_instruction {
423 const fem_interpolation_context &ctx;
426 GA_DEBUG_INFO(
"Instruction: element_B");
427 GMM_ASSERT1(t.size() == (ctx.B()).size(),
"Invalid tensor size.");
428 gmm::copy(ctx.B().as_vector(), t.as_vector());
431 ga_instruction_element_B(base_tensor &t_,
432 const fem_interpolation_context &ct)
436 struct ga_instruction_val_base :
public ga_instruction {
438 fem_interpolation_context &ctx;
440 const pfem_precomp &pfp;
443 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
448 if (ctx.have_pgp()) ctx.pfp_base_value(t, pfp);
450 ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
451 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
457 ga_instruction_val_base(base_tensor &tt, fem_interpolation_context &ct,
458 const mesh_fem &mf_,
const pfem_precomp &pfp_)
459 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
462 struct ga_instruction_xfem_plus_val_base :
public ga_instruction {
464 fem_interpolation_context &ctx;
469 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
470 if (ctx.have_pgp()) ctx.set_pfp(pfp);
471 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
472 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
473 int old_xfem_side = ctx.xfem_side();
474 ctx.set_xfem_side(1);
476 ctx.set_xfem_side(old_xfem_side);
480 ga_instruction_xfem_plus_val_base(base_tensor &tt,
481 fem_interpolation_context &ct,
482 const mesh_fem &mf_, pfem_precomp &pfp_)
483 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
486 struct ga_instruction_xfem_minus_val_base :
public ga_instruction {
488 fem_interpolation_context &ctx;
493 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
494 if (ctx.have_pgp()) ctx.set_pfp(pfp);
495 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
496 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
497 int old_xfem_side = ctx.xfem_side();
498 ctx.set_xfem_side(-1);
500 ctx.set_xfem_side(old_xfem_side);
504 ga_instruction_xfem_minus_val_base
505 (base_tensor &tt, fem_interpolation_context &ct,
506 const mesh_fem &mf_, pfem_precomp &pfp_)
507 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
510 struct ga_instruction_grad_base :
public ga_instruction_val_base {
513 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
518 if (ctx.have_pgp()) ctx.pfp_grad_base_value(t, pfp);
520 ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
521 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
522 ctx.grad_base_value(t);
527 ga_instruction_grad_base(base_tensor &tt, fem_interpolation_context &ct,
528 const mesh_fem &mf_, pfem_precomp &pfp_)
529 : ga_instruction_val_base(tt, ct, mf_, pfp_)
533 struct ga_instruction_xfem_plus_grad_base :
public ga_instruction_val_base {
536 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
537 if (ctx.have_pgp()) ctx.set_pfp(pfp);
538 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
539 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
540 int old_xfem_side = ctx.xfem_side();
541 ctx.set_xfem_side(1);
542 ctx.grad_base_value(t);
543 ctx.set_xfem_side(old_xfem_side);
547 ga_instruction_xfem_plus_grad_base
548 (base_tensor &tt, fem_interpolation_context &ct,
549 const mesh_fem &mf_, pfem_precomp &pfp_)
550 : ga_instruction_val_base(tt, ct, mf_, pfp_)
554 struct ga_instruction_xfem_minus_grad_base :
public ga_instruction_val_base {
557 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
558 if (ctx.have_pgp()) ctx.set_pfp(pfp);
559 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
560 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
561 int old_xfem_side = ctx.xfem_side();
562 ctx.set_xfem_side(-1);
563 ctx.grad_base_value(t);
564 ctx.set_xfem_side(old_xfem_side);
568 ga_instruction_xfem_minus_grad_base
569 (base_tensor &tt, fem_interpolation_context &ct,
570 const mesh_fem &mf_, pfem_precomp &pfp_)
571 : ga_instruction_val_base(tt, ct, mf_, pfp_)
576 struct ga_instruction_hess_base :
public ga_instruction_val_base {
579 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
580 if (ctx.have_pgp()) ctx.set_pfp(pfp);
581 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
582 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
583 ctx.hess_base_value(t);
587 ga_instruction_hess_base(base_tensor &tt, fem_interpolation_context &ct,
588 const mesh_fem &mf_, pfem_precomp &pfp_)
589 : ga_instruction_val_base(tt, ct, mf_, pfp_)
593 struct ga_instruction_xfem_plus_hess_base :
public ga_instruction_val_base {
596 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
597 if (ctx.have_pgp()) ctx.set_pfp(pfp);
598 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
599 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
600 int old_xfem_side = ctx.xfem_side();
601 ctx.set_xfem_side(1);
602 ctx.hess_base_value(t);
603 ctx.set_xfem_side(old_xfem_side);
607 ga_instruction_xfem_plus_hess_base
608 (base_tensor &tt, fem_interpolation_context &ct,
609 const mesh_fem &mf_, pfem_precomp &pfp_)
610 : ga_instruction_val_base(tt, ct, mf_, pfp_)
614 struct ga_instruction_xfem_minus_hess_base :
public ga_instruction_val_base {
617 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
618 if (ctx.have_pgp()) ctx.set_pfp(pfp);
619 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
620 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
621 int old_xfem_side = ctx.xfem_side();
622 ctx.set_xfem_side(-1);
623 ctx.hess_base_value(t);
624 ctx.set_xfem_side(old_xfem_side);
628 ga_instruction_xfem_minus_hess_base
629 (base_tensor &tt, fem_interpolation_context &ct,
630 const mesh_fem &mf_, pfem_precomp &pfp_)
631 : ga_instruction_val_base(tt, ct, mf_, pfp_)
635 struct ga_instruction_val :
public ga_instruction {
638 const base_tensor &Z;
639 const base_vector &coeff;
643 GA_DEBUG_INFO(
"Instruction: variable value");
645 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
646 GA_DEBUG_ASSERT(t.size() == qdim,
"dimensions mismatch");
649 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
650 "Wrong size for coeff vector");
651 auto itc = coeff.begin();
auto itZ = Z.begin();
652 a = (*itc++) * (*itZ++);
653 while (itc != coeff.end()) a += (*itc++) * (*itZ++);
656 if (target_dim == 1) {
657 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
658 "Wrong size for coeff vector");
659 auto itc = coeff.begin();
auto itZ = Z.begin();
660 for (
auto it = t.begin(); it != t.end(); ++it)
661 *it = (*itc++) * (*itZ);
663 for (
size_type j = 1; j < ndof; ++j, ++itZ) {
664 for (
auto it = t.begin(); it != t.end(); ++it)
665 *it += (*itc++) * (*itZ);
669 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
670 "Wrong size for coeff vector");
673 auto itc = coeff.begin();
676 for (
size_type q = 0; q < Qmult; ++q, ++itc) {
677 for (
size_type r = 0; r < target_dim; ++r)
678 *it++ += (*itc) * Z[j + r*ndof];
686 ga_instruction_val(base_tensor &tt,
const base_tensor &Z_,
688 : a(tt[0]), t(tt), Z(Z_), coeff(co), qdim(q) {}
691 struct ga_instruction_grad :
public ga_instruction_val {
694 GA_DEBUG_INFO(
"Instruction: gradient");
696 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
699 GA_DEBUG_ASSERT(t.size() == N,
"dimensions mismatch");
700 GA_DEBUG_ASSERT(coeff.size() == ndof,
"Wrong size for coeff vector");
701 auto itZ = Z.begin();
702 for (
auto it = t.begin(); it != t.end(); ++it) {
703 auto itc = coeff.begin();
704 *it = (*itc++) * (*itZ++);
705 while (itc != coeff.end()) *it += (*itc++) * (*itZ++);
709 if (target_dim == 1) {
710 GA_DEBUG_ASSERT(t.size() == N*qdim,
"dimensions mismatch");
711 GA_DEBUG_ASSERT(coeff.size() == ndof*qdim,
712 "Wrong size for coeff vector");
714 auto itZ = Z.begin();
auto it = t.begin() + q;
717 auto itc = coeff.begin() + q;
718 *it = (*itc) * (*itZ++);
720 { itc += qdim; *it += (*itc) * (*itZ++); }
725 GA_DEBUG_ASSERT(t.size() == N*qdim,
"dimensions mismatch");
726 GA_DEBUG_ASSERT(coeff.size() == ndof*Qmult,
727 "Wrong size for coeff vector");
730 auto itZ = Z.begin();
732 for (
size_type r = 0; r < target_dim; ++r)
734 t[r + q*target_dim + k*qdim] += coeff[j*Qmult+q] * (*itZ++);
741 ga_instruction_grad(base_tensor &tt,
const base_tensor &Z_,
743 : ga_instruction_val(tt, Z_, co, q)
748 struct ga_instruction_hess :
public ga_instruction_val {
751 GA_DEBUG_INFO(
"Instruction: Hessian");
753 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
754 size_type NN = gmm::sqr(t.sizes().back());
755 GA_DEBUG_ASSERT(NN == Z.sizes()[2],
"Internal error");
757 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
758 "Wrong size for coeff vector");
759 auto it = Z.begin();
auto itt = t.begin();
760 for (
size_type kl = 0; kl < NN; ++kl, ++itt) {
761 *itt = scalar_type(0);
762 for (
auto itc = coeff.begin(); itc != coeff.end(); ++itc, ++it)
763 *itt += (*itc) * (*it);
765 GMM_ASSERT1(itt == t.end(),
"dimensions mismatch");
768 if (target_dim == 1) {
769 GA_DEBUG_ASSERT(t.size() == NN*qdim,
"dimensions mismatch");
770 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
771 "Wrong size for coeff vector");
774 base_tensor::const_iterator it = Z.begin();
776 for (
size_type j = 0; j < ndof; ++j, ++it)
777 t[q + kl*qdim] += coeff[j*qdim+q] * (*it);
781 GA_DEBUG_ASSERT(t.size() == NN*qdim,
"dimensions mismatch");
782 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
783 "Wrong size for coeff vector");
786 base_tensor::const_iterator it = Z.begin();
788 for (
size_type r = 0; r < target_dim; ++r)
789 for (
size_type j = 0; j < ndof; ++j, ++it)
790 t[r + q*target_dim + kl*qdim] += coeff[j*Qmult+q] * (*it);
797 ga_instruction_hess(base_tensor &tt,
const base_tensor &Z_,
799 : ga_instruction_val(tt, Z_, co, q)
803 struct ga_instruction_diverg :
public ga_instruction_val {
806 GA_DEBUG_INFO(
"Instruction: divergence");
808 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
812 GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
813 "Dimensions mismatch for divergence operator");
814 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
815 "Wrong size for coeff vector");
817 t[0] = scalar_type(0);
818 base_tensor::const_iterator it = Z.begin();
821 if (k) it += (N*ndof + 1);
824 t[0] += coeff[j] * (*it);
832 t[0] += coeff[j*N+k] * (*it);
838 ga_instruction_diverg(base_tensor &tt,
const base_tensor &Z_,
840 : ga_instruction_val(tt, Z_, co, q)
844 struct ga_instruction_copy_val_base :
public ga_instruction {
846 const base_tensor &Z;
850 GA_DEBUG_INFO(
"Instruction: value of test functions");
852 GA_DEBUG_ASSERT(t.size() == Z.size(),
"Wrong size for base vector");
853 std::copy(Z.begin(), Z.end(), t.begin());
858 std::copy(Z.begin(), Z.end(), t.begin());
860 if (target_dim == 1) {
862 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
863 "Wrong size for base vector");
864 std::fill(t.begin(), t.end(), scalar_type(0));
865 auto itZ = Z.begin();
870 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
874 for (
size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
878 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
879 "Wrong size for base vector");
880 std::fill(t.begin(), t.end(), scalar_type(0));
881 auto itZ = Z.begin();
882 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
885 for (
size_type k = 0; k < target_dim; ++k) {
886 auto it = t.begin() + (ss * k);
887 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
892 { it2 += sss; *it2 = *itZ; }
901 ga_instruction_copy_val_base(base_tensor &tt,
const base_tensor &Z_,
905 struct ga_instruction_copy_grad_base :
public ga_instruction_copy_val_base {
908 GA_DEBUG_INFO(
"Instruction: gradient of test functions");
910 std::copy(Z.begin(), Z.end(), t.begin());
915 std::copy(Z.begin(), Z.end(), t.begin());
917 if (target_dim == 1) {
920 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
921 "Wrong size for gradient vector");
922 std::fill(t.begin(), t.end(), scalar_type(0));
923 base_tensor::const_iterator itZ = Z.begin();
924 size_type s = t.sizes()[0], sss = s+1, ssss = s*target_dim*Qmult;
928 base_tensor::iterator it = t.begin() + (ssss*l);
929 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
931 base_tensor::iterator it2 = it;
933 for (
size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
939 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
940 "Wrong size for gradient vector");
941 std::fill(t.begin(), t.end(), scalar_type(0));
942 base_tensor::const_iterator itZ = Z.begin();
943 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
948 for (
size_type k = 0; k < target_dim; ++k) {
949 base_tensor::iterator it = t.begin() + (ss * k + ssss*l);
950 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
952 base_tensor::iterator it2 = it;
954 for (
size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
963 ga_instruction_copy_grad_base(base_tensor &tt,
const base_tensor &Z_,
965 : ga_instruction_copy_val_base(tt,Z_,q) {}
968 struct ga_instruction_copy_vect_val_base :
public ga_instruction {
970 const base_tensor &Z;
974 GA_DEBUG_INFO(
"Instruction: vectorized value of test functions");
977 GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
978 "Wrong size for base vector");
980 auto itZ = Z.begin();
985 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
989 for (
size_type j = 1; j < qdim; ++j) { it2 += sss; *it2 = *itZ; }
994 ga_instruction_copy_vect_val_base(base_tensor &tt,
const base_tensor &Z_,
998 struct ga_instruction_copy_vect_grad_base
999 :
public ga_instruction_copy_vect_val_base {
1001 virtual int exec() {
1002 GA_DEBUG_INFO(
"Instruction: vectorized gradient of test functions");
1005 GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
1006 "Wrong size for gradient vector");
1008 base_tensor::const_iterator itZ = Z.begin();
1009 size_type s = t.sizes()[0], sss = s+1, ssss = s*qdim;
1013 base_tensor::iterator it = t.begin() + (ssss*l);
1014 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
1016 base_tensor::iterator it2 = it;
1018 for (
size_type j = 1; j < qdim; ++j) { it2+=sss; *it2=*itZ; }
1024 ga_instruction_copy_vect_grad_base(base_tensor &tt,
const base_tensor &Z_,
1026 : ga_instruction_copy_vect_val_base(tt,Z_,q) {}
1029 struct ga_instruction_copy_hess_base :
public ga_instruction_copy_val_base {
1031 virtual int exec() {
1032 GA_DEBUG_INFO(
"Instruction: Hessian of test functions");
1036 gmm::copy(Z.as_vector(), t.as_vector());
1039 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
1040 "Wrong size for Hessian vector");
1042 base_tensor::const_iterator itZ = Z.begin();
1043 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
1046 size_type NNdim = Z.sizes()[2]*target_dim;
1047 for (
size_type klm = 0; klm < NNdim; ++klm) {
1048 base_tensor::iterator it = t.begin() + (ss * klm);
1049 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
1051 base_tensor::iterator it2 = it;
1053 for (
size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
1060 ga_instruction_copy_hess_base(base_tensor &tt,
const base_tensor &Z_,
1062 : ga_instruction_copy_val_base(tt, Z_, q) {}
1065 struct ga_instruction_copy_diverg_base :
public ga_instruction_copy_val_base {
1067 virtual int exec() {
1068 GA_DEBUG_INFO(
"Instruction: divergence of test functions");
1073 GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
1074 "Dimensions mismatch for divergence operator");
1075 GA_DEBUG_ASSERT(t.size() == ndof * Qmult,
1076 "Wrong size for divergence vector");
1078 base_tensor::const_iterator itZ = Z.begin();
1082 base_tensor::iterator it = t.begin();
1083 if (l) itZ += target_dim*ndof+1;
1085 if (i) { ++it; ++itZ; }
1092 base_tensor::iterator it = t.begin() + j;
1095 if (i) { it += Qmult; ++itZ; }
1103 ga_instruction_copy_diverg_base(base_tensor &tt,
const base_tensor &Z_,
1105 : ga_instruction_copy_val_base(tt, Z_, q) {}
1108 struct ga_instruction_elementary_trans {
1109 const base_vector &coeff_in;
1110 base_vector coeff_out;
1111 pelementary_transformation elemtrans;
1112 const mesh_fem &mf1, &mf2;
1113 const fem_interpolation_context &ctx;
1118 if (icv != ctx.convex_num() || M.size() == 0) {
1119 M.base_resize(m, n);
1120 icv = ctx.convex_num();
1121 elemtrans->give_transformation(mf1, mf2, icv, M);
1123 coeff_out.resize(gmm::mat_nrows(M));
1124 gmm::mult(M, coeff_in, coeff_out);
1127 ga_instruction_elementary_trans
1128 (
const base_vector &co, pelementary_transformation e,
1129 const mesh_fem &mf1_,
const mesh_fem &mf2_,
1130 const fem_interpolation_context &ctx_, base_matrix &M_,
1132 : coeff_in(co), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1134 ~ga_instruction_elementary_trans() {};
1137 struct ga_instruction_elementary_trans_val
1138 :
public ga_instruction_val, ga_instruction_elementary_trans {
1140 virtual int exec() {
1141 GA_DEBUG_INFO(
"Instruction: variable value with elementary "
1145 do_transformation(coeff_in.size(), ndof*Qmult);
1146 return ga_instruction_val::exec();
1149 ga_instruction_elementary_trans_val
1150 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1151 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1152 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1153 : ga_instruction_val(tt, Z_, coeff_out, q),
1154 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1157 struct ga_instruction_elementary_trans_grad
1158 :
public ga_instruction_grad, ga_instruction_elementary_trans {
1160 virtual int exec() {
1161 GA_DEBUG_INFO(
"Instruction: gradient with elementary transformation");
1164 do_transformation(coeff_in.size(), ndof*Qmult);
1165 return ga_instruction_grad::exec();
1168 ga_instruction_elementary_trans_grad
1169 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1170 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1171 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1172 : ga_instruction_grad(tt, Z_, coeff_out, q),
1173 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1176 struct ga_instruction_elementary_trans_hess
1177 :
public ga_instruction_hess, ga_instruction_elementary_trans {
1179 virtual int exec() {
1180 GA_DEBUG_INFO(
"Instruction: Hessian with elementary transformation");
1183 do_transformation(coeff_in.size(), ndof*Qmult);
1184 return ga_instruction_hess::exec();
1187 ga_instruction_elementary_trans_hess
1188 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1189 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1190 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1191 : ga_instruction_hess(tt, Z_, coeff_out, q),
1192 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1195 struct ga_instruction_elementary_trans_diverg
1196 :
public ga_instruction_diverg, ga_instruction_elementary_trans {
1198 virtual int exec() {
1199 GA_DEBUG_INFO(
"Instruction: divergence with elementary transformation");
1202 do_transformation(coeff_in.size(), ndof*Qmult);
1203 return ga_instruction_diverg::exec();
1206 ga_instruction_elementary_trans_diverg
1207 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1208 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1209 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1210 : ga_instruction_diverg(tt, Z_, coeff_out, q),
1211 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1214 struct ga_instruction_update_group_info :
public ga_instruction {
1215 const ga_workspace &workspace;
1216 const ga_instruction_set &gis;
1217 const ga_instruction_set::interpolate_info &inin;
1218 const std::string gname;
1219 ga_instruction_set::variable_group_info &vgi;
1221 virtual int exec() {
1222 GA_DEBUG_INFO(
"Instruction: Update group info for "+gname);
1223 if (vgi.cached_mesh && vgi.cached_mesh == inin.m)
1226 vgi.cached_mesh = inin.m;
1227 const std::string &varname
1228 = inin.m ? workspace.variable_in_group(gname, *(inin.m))
1229 : workspace.first_variable_of_group(gname);
1230 vgi.varname = &varname;
1231 vgi.mf = workspace.associated_mf(varname);
1232 GA_DEBUG_ASSERT(vgi.mf,
"Group variable should always have a mesh_fem");
1233 vgi.reduced_mf = vgi.mf->is_reduced();
1234 if (vgi.reduced_mf) {
1235 const auto it = gis.really_extended_vars.find(varname);
1236 GA_DEBUG_ASSERT(it != gis.really_extended_vars.end(),
1237 "Variable " << varname <<
" not in extended variables");
1238 vgi.U = &(it->second);
1239 vgi.I = &(workspace.temporary_interval_of_variable(varname));
1241 vgi.U = &(workspace.value(varname));
1242 vgi.I = &(workspace.interval_of_variable(varname));
1244 vgi.alpha = workspace.factor_of_variable(varname);
1248 ga_instruction_update_group_info
1249 (
const ga_workspace &workspace_,
const ga_instruction_set &gis_,
1250 const ga_instruction_set::interpolate_info &inin_,
1251 const std::string &gname_, ga_instruction_set::variable_group_info &vgi_)
1252 : workspace(workspace_), gis(gis_), inin(inin_), gname(gname_), vgi(vgi_)
1256 struct ga_instruction_interpolate_filter :
public ga_instruction {
1258 const ga_instruction_set::interpolate_info &inin;
1262 virtual int exec() {
1263 GA_DEBUG_INFO(
"Instruction: interpolated filter");
1264 if ((pt_type ==
size_type(-1) && inin.pt_type) ||
1265 (pt_type !=
size_type(-1) && inin.pt_type == pt_type)) {
1266 GA_DEBUG_INFO(
"Instruction: interpolated filter: pass");
1270 GA_DEBUG_INFO(
"Instruction: interpolated filter: filtered");
1277 ga_instruction_interpolate_filter
1278 (base_tensor &t_,
const ga_instruction_set::interpolate_info &inin_,
1280 : t(t_), inin(inin_), pt_type(ind_), nb(nb_) {}
1283 struct ga_instruction_copy_interpolated_small_vect :
public ga_instruction {
1285 const base_small_vector &vec;
1286 const ga_instruction_set::interpolate_info &inin;
1288 virtual int exec() {
1289 GA_DEBUG_INFO(
"Instruction: copy small vector");
1290 GMM_ASSERT1(!(inin.has_ctx) || inin.ctx.is_convex_num_valid(),
1291 "Invalid element, probably transformation failed");
1292 GMM_ASSERT1(t.size() == vec.size(),
"Invalid vector size.");
1293 gmm::copy(vec, t.as_vector());
1296 ga_instruction_copy_interpolated_small_vect
1297 (base_tensor &t_,
const base_small_vector &vec_,
1298 const ga_instruction_set::interpolate_info &inin_)
1299 : t(t_), vec(vec_), inin(inin_) {}
1302 struct ga_instruction_interpolate :
public ga_instruction {
1305 const mesh_fem *mfn, **mfg;
1306 const base_vector *Un, **Ug;
1307 fem_interpolation_context &ctx;
1311 fem_precomp_pool &fp_pool;
1312 ga_instruction_set::interpolate_info &inin;
1314 virtual int exec() {
1315 GMM_ASSERT1(ctx.is_convex_num_valid(),
"No valid element for the "
1316 "transformation. Probably transformation failed");
1317 const mesh_fem &mf = *(mfg ? *mfg : mfn);
1318 const base_vector &U = *(Ug ? *Ug : Un);
1319 GMM_ASSERT1(&(mf.linked_mesh()) == *m,
"Interpolation of a variable "
1320 "on another mesh than the one it is defined on");
1322 pfem pf = mf.fem_of_element(ctx.convex_num());
1323 GMM_ASSERT1(pf,
"Undefined finite element method");
1324 if (ctx.have_pgp()) {
1326 inin.pfps[&mf] = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
1327 ctx.set_pfp(inin.pfps[&mf]);
1334 ga_instruction_interpolate
1335 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1336 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1338 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1339 : t(tt), m(m_), mfn(mfn_), mfg(mfg_), Un(Un_), Ug(Ug_),
1340 ctx(ctx_), qdim(q), ipt(ipt_), fp_pool(fp_pool_), inin(inin_) {}
1343 struct ga_instruction_interpolate_val :
public ga_instruction_interpolate {
1345 virtual int exec() {
1346 GA_DEBUG_INFO(
"Instruction: interpolated variable value");
1347 ga_instruction_interpolate::exec();
1348 ctx.pf()->interpolation(ctx, coeff, t.as_vector(), dim_type(qdim));
1353 ga_instruction_interpolate_val
1354 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1355 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1357 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1358 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_,ctx_, q, ipt_,
1363 struct ga_instruction_interpolate_grad :
public ga_instruction_interpolate {
1365 virtual int exec() {
1366 GA_DEBUG_INFO(
"Instruction: interpolated variable grad");
1367 ga_instruction_interpolate::exec();
1368 base_matrix v(qdim, ctx.N());
1369 ctx.pf()->interpolation_grad(ctx, coeff, v, dim_type(qdim));
1370 gmm::copy(v.as_vector(), t.as_vector());
1374 ga_instruction_interpolate_grad
1375 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1376 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1378 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1379 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1384 struct ga_instruction_interpolate_hess :
public ga_instruction_interpolate {
1386 virtual int exec() {
1387 GA_DEBUG_INFO(
"Instruction: interpolated variable hessian");
1388 ga_instruction_interpolate::exec();
1389 base_matrix v(qdim, ctx.N()*ctx.N());
1390 ctx.pf()->interpolation_hess(ctx, coeff, v, dim_type(qdim));
1391 gmm::copy(v.as_vector(), t.as_vector());
1395 ga_instruction_interpolate_hess
1396 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1397 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1399 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1400 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1405 struct ga_instruction_interpolate_diverg :
public ga_instruction_interpolate {
1407 virtual int exec() {
1408 GA_DEBUG_INFO(
"Instruction: interpolated variable divergence");
1409 ga_instruction_interpolate::exec();
1410 ctx.pf()->interpolation_diverg(ctx, coeff, t[0]);
1414 ga_instruction_interpolate_diverg
1415 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1416 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1418 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1419 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1424 struct ga_instruction_interpolate_base {
1427 const mesh_fem *mfn, **mfg;
1429 ga_instruction_set::interpolate_info &inin;
1430 fem_precomp_pool &fp_pool;
1432 virtual int exec() {
1433 GMM_ASSERT1(inin.ctx.is_convex_num_valid(),
"No valid element for "
1434 "the transformation. Probably transformation failed");
1435 const mesh_fem &mf = *(mfg ? *mfg : mfn);
1436 GMM_ASSERT1(&(mf.linked_mesh()) == *m,
"Interpolation of a variable "
1437 "on another mesh than the one it is defined on");
1439 pfem pf = mf.fem_of_element(inin.ctx.convex_num());
1440 GMM_ASSERT1(pf,
"Undefined finite element method");
1442 if (inin.ctx.have_pgp()) {
1444 inin.pfps[&mf] = fp_pool(pf, inin.ctx.pgp()->get_ppoint_tab());
1445 inin.ctx.set_pfp(inin.pfps[&mf]);
1447 inin.ctx.set_pf(pf);
1452 ga_instruction_interpolate_base
1453 (
const mesh **m_,
const mesh_fem *mfn_,
const mesh_fem **mfg_,
1454 const size_type &ipt_, ga_instruction_set::interpolate_info &inin_,
1455 fem_precomp_pool &fp_pool_)
1456 : m(m_), mfn(mfn_), mfg(mfg_), ipt(ipt_), inin(inin_),
1457 fp_pool(fp_pool_) {}
1460 struct ga_instruction_interpolate_val_base
1461 :
public ga_instruction_copy_val_base, ga_instruction_interpolate_base {
1463 virtual int exec() {
1464 GA_DEBUG_INFO(
"Instruction: interpolated base value");
1465 ga_instruction_interpolate_base::exec();
1466 inin.ctx.pf()->real_base_value(inin.ctx, ZZ);
1467 return ga_instruction_copy_val_base::exec();
1470 ga_instruction_interpolate_val_base
1471 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1473 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1474 : ga_instruction_copy_val_base(t_, ZZ, q),
1475 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1479 struct ga_instruction_interpolate_grad_base
1480 :
public ga_instruction_copy_grad_base, ga_instruction_interpolate_base {
1482 virtual int exec() {
1483 GA_DEBUG_INFO(
"Instruction: interpolated base grad");
1484 ga_instruction_interpolate_base::exec();
1485 inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ);
1486 return ga_instruction_copy_grad_base::exec();
1489 ga_instruction_interpolate_grad_base
1490 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1492 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1493 : ga_instruction_copy_grad_base(t_, ZZ, q),
1494 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1498 struct ga_instruction_interpolate_hess_base
1499 :
public ga_instruction_copy_hess_base, ga_instruction_interpolate_base {
1501 virtual int exec() {
1502 GA_DEBUG_INFO(
"Instruction: interpolated base hessian");
1503 ga_instruction_interpolate_base::exec();
1504 inin.ctx.pf()->real_hess_base_value(inin.ctx, ZZ);
1505 return ga_instruction_copy_hess_base::exec();
1508 ga_instruction_interpolate_hess_base
1509 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1511 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1512 : ga_instruction_copy_hess_base(t_, ZZ, q),
1513 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1517 struct ga_instruction_interpolate_diverg_base
1518 :
public ga_instruction_copy_diverg_base, ga_instruction_interpolate_base {
1520 virtual int exec() {
1521 GA_DEBUG_INFO(
"Instruction: interpolated base divergence");
1522 ga_instruction_interpolate_base::exec();
1523 inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ);
1524 return ga_instruction_copy_diverg_base::exec();
1527 ga_instruction_interpolate_diverg_base
1528 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1530 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1531 : ga_instruction_copy_diverg_base(t_, ZZ, q),
1532 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1537 struct ga_instruction_elementary_trans_base {
1540 pelementary_transformation elemtrans;
1541 const mesh_fem &mf1, &mf2;
1542 const fem_interpolation_context &ctx;
1547 if (icv != ctx.convex_num() || M.size() == 0) {
1548 M.base_resize(m, n);
1549 icv = ctx.convex_num();
1550 elemtrans->give_transformation(mf1, mf2, icv, M);
1552 t_out.mat_reduction(t_in, M, 0);
1555 ga_instruction_elementary_trans_base
1556 (base_tensor &t_, pelementary_transformation e,
const mesh_fem &mf1_,
1557 const mesh_fem &mf2_,
1558 const fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1559 : t_out(t_), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1563 struct ga_instruction_elementary_trans_val_base
1564 :
public ga_instruction_copy_val_base,
1565 ga_instruction_elementary_trans_base {
1567 virtual int exec() {
1568 GA_DEBUG_INFO(
"Instruction: value of test functions with elementary "
1572 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1]);
1573 ga_instruction_copy_val_base::exec();
1574 do_transformation(t_out.sizes()[0], ndof*Qmult);
1578 ga_instruction_elementary_trans_val_base
1579 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1580 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1581 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1582 : ga_instruction_copy_val_base(t_in, Z_, q),
1583 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1587 struct ga_instruction_elementary_trans_grad_base
1588 :
public ga_instruction_copy_grad_base,
1589 ga_instruction_elementary_trans_base {
1591 virtual int exec() {
1592 GA_DEBUG_INFO(
"Instruction: gradient of test functions with elementary "
1596 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1597 ga_instruction_copy_grad_base::exec();
1598 do_transformation(t_out.sizes()[0], ndof*Qmult);
1602 ga_instruction_elementary_trans_grad_base
1603 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1604 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1605 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1606 : ga_instruction_copy_grad_base(t_in, Z_, q),
1607 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1611 struct ga_instruction_elementary_trans_hess_base
1612 :
public ga_instruction_copy_hess_base,
1613 ga_instruction_elementary_trans_base {
1615 virtual int exec() {
1616 GA_DEBUG_INFO(
"Instruction: Hessian of test functions with elementary "
1620 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1621 ga_instruction_copy_hess_base::exec();
1622 do_transformation(t_out.sizes()[0], ndof*Qmult);
1626 ga_instruction_elementary_trans_hess_base
1627 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1628 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1629 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1630 : ga_instruction_copy_hess_base(t_in, Z_, q),
1631 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1635 struct ga_instruction_elementary_trans_diverg_base
1636 :
public ga_instruction_copy_diverg_base,
1637 ga_instruction_elementary_trans_base {
1639 virtual int exec() {
1640 GA_DEBUG_INFO(
"Instruction: divergence of test functions with elementary "
1644 t_in.adjust_sizes(Qmult*ndof);
1645 ga_instruction_copy_diverg_base::exec();
1646 do_transformation(t_out.sizes()[0], ndof*Qmult);
1650 ga_instruction_elementary_trans_diverg_base
1651 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1652 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1653 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1654 : ga_instruction_copy_diverg_base(t_in, Z_, q),
1655 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1660 struct ga_instruction_add :
public ga_instruction {
1662 const base_tensor &tc1, &tc2;
1663 virtual int exec() {
1664 GA_DEBUG_INFO(
"Instruction: addition");
1665 GA_DEBUG_ASSERT(t.size() == tc1.size(),
1666 "internal error " << t.size() <<
" != " << tc1.size());
1667 GA_DEBUG_ASSERT(t.size() == tc2.size(),
1668 "internal error " << t.size() <<
" != " << tc2.size());
1669 gmm::add(tc1.as_vector(), tc2.as_vector(), t.as_vector());
1672 ga_instruction_add(base_tensor &t_,
1673 const base_tensor &tc1_,
const base_tensor &tc2_)
1674 : t(t_), tc1(tc1_), tc2(tc2_) {}
1677 struct ga_instruction_add_to :
public ga_instruction {
1679 const base_tensor &tc1;
1680 virtual int exec() {
1681 GA_DEBUG_INFO(
"Instruction: addition");
1682 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"internal error " << t.size()
1683 <<
" incompatible with " << tc1.size());
1684 gmm::add(tc1.as_vector(), t.as_vector());
1687 ga_instruction_add_to(base_tensor &t_,
const base_tensor &tc1_)
1688 : t(t_), tc1(tc1_) {}
1691 struct ga_instruction_add_to_coeff :
public ga_instruction {
1693 const base_tensor &tc1;
1695 virtual int exec() {
1696 GA_DEBUG_INFO(
"Instruction: addition with scale");
1697 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"internal error " << t.size()
1698 <<
" incompatible with " << tc1.size());
1699 gmm::add(gmm::scaled(tc1.as_vector(), coeff), t.as_vector());
1702 ga_instruction_add_to_coeff(base_tensor &t_,
const base_tensor &tc1_,
1703 scalar_type &coeff_)
1704 : t(t_), tc1(tc1_), coeff(coeff_) {}
1707 struct ga_instruction_sub :
public ga_instruction {
1709 const base_tensor &tc1, &tc2;
1710 virtual int exec() {
1711 GA_DEBUG_INFO(
"Instruction: subtraction");
1712 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
1714 gmm::add(tc1.as_vector(), gmm::scaled(tc2.as_vector(), scalar_type(-1)),
1718 ga_instruction_sub(base_tensor &t_,
1719 const base_tensor &tc1_,
const base_tensor &tc2_)
1720 : t(t_), tc1(tc1_), tc2(tc2_) {}
1723 struct ga_instruction_opposite :
public ga_instruction {
1725 virtual int exec() {
1726 GA_DEBUG_INFO(
"Instruction: multiplication with -1");
1727 gmm::scale(t.as_vector(), scalar_type(-1));
1730 ga_instruction_opposite(base_tensor &t_) : t(t_) {}
1733 struct ga_instruction_print_tensor :
public ga_instruction {
1735 pga_tree_node pnode;
1736 const fem_interpolation_context &ctx;
1738 virtual int exec() {
1739 GA_DEBUG_INFO(
"Instruction: tensor print");
1740 cout <<
"Print term "; ga_print_node(pnode, cout);
1741 cout <<
" on Gauss point " << ipt <<
"/" << nbpt <<
" of element "
1742 << ctx.convex_num() <<
": " << t << endl;
1745 ga_instruction_print_tensor(base_tensor &t_, pga_tree_node pnode_,
1746 const fem_interpolation_context &ctx_,
1748 : t(t_), pnode(pnode_), ctx(ctx_), nbpt(nbpt_), ipt(ipt_) {}
1751 struct ga_instruction_copy_tensor :
public ga_instruction {
1753 const base_tensor &tc1;
1754 virtual int exec() {
1755 GA_DEBUG_INFO(
"Instruction: tensor copy");
1756 std::copy(tc1.begin(), tc1.end(), t.begin());
1760 ga_instruction_copy_tensor(base_tensor &t_,
const base_tensor &tc1_)
1761 : t(t_), tc1(tc1_) {}
1764 struct ga_instruction_clear_tensor :
public ga_instruction {
1766 virtual int exec() {
1767 GA_DEBUG_INFO(
"Instruction: clear tensor");
1768 std::fill(t.begin(), t.end(), scalar_type(0));
1771 ga_instruction_clear_tensor(base_tensor &t_) : t(t_) {}
1774 struct ga_instruction_copy_tensor_possibly_void :
public ga_instruction {
1776 const base_tensor &tc1;
1777 virtual int exec() {
1778 GA_DEBUG_INFO(
"Instruction: tensor copy possibly void");
1780 gmm::copy(tc1.as_vector(), t.as_vector());
1785 ga_instruction_copy_tensor_possibly_void(base_tensor &t_,
1786 const base_tensor &tc1_)
1787 : t(t_), tc1(tc1_) {}
1790 struct ga_instruction_copy_scalar :
public ga_instruction {
1791 scalar_type &t;
const scalar_type &t1;
1792 virtual int exec() {
1793 GA_DEBUG_INFO(
"Instruction: scalar copy");
1797 ga_instruction_copy_scalar(scalar_type &t_,
const scalar_type &t1_)
1801 struct ga_instruction_copy_vect :
public ga_instruction {
1803 const base_vector &t1;
1804 virtual int exec() {
1805 GA_DEBUG_INFO(
"Instruction: fixed size tensor copy");
1809 ga_instruction_copy_vect(base_vector &t_,
const base_vector &t1_)
1813 struct ga_instruction_trace :
public ga_instruction {
1815 const base_tensor &tc1;
1818 virtual int exec() {
1819 GA_DEBUG_INFO(
"Instruction: Trace");
1820 GA_DEBUG_ASSERT(t.size()*n*n == tc1.size(),
"Wrong sizes");
1822 auto it = t.begin();
1823 auto it1 = tc1.begin();
1824 for (; it != t.end(); ++it, ++it1) {
1827 for (
size_type i = 1; i < n; ++i) { it2 += s; *it += *it2; }
1832 ga_instruction_trace(base_tensor &t_,
const base_tensor &tc1_,
size_type n_)
1833 : t(t_), tc1(tc1_), n(n_) {}
1836 struct ga_instruction_deviator :
public ga_instruction {
1838 const base_tensor &tc1;
1841 virtual int exec() {
1842 GA_DEBUG_INFO(
"Instruction: Deviator");
1843 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1845 gmm::copy(tc1.as_vector(), t.as_vector());
1849 base_tensor::iterator it = t.begin();
1850 base_tensor::const_iterator it1 = tc1.begin();
1851 for (; j < nb; ++it, ++it1, ++j) {
1853 base_tensor::const_iterator it2 = it1;
1855 for (
size_type i = 1; i < n; ++i) { it2 += s; tr += *it2; }
1856 tr /= scalar_type(n);
1858 base_tensor::iterator it3 = it;
1860 for (
size_type i = 1; i < n; ++i) { it3 += s; *it3 -= tr; }
1865 ga_instruction_deviator(base_tensor &t_,
const base_tensor &tc1_,
1867 : t(t_), tc1(tc1_), n(n_) {}
1870 struct ga_instruction_transpose :
public ga_instruction {
1872 const base_tensor &tc1;
1874 virtual int exec() {
1875 GA_DEBUG_INFO(
"Instruction: transpose");
1876 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1879 auto it = t.begin();
1886 for (
size_type l = 0; l < n0; ++l, ++it)
1891 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1894 ga_instruction_transpose(base_tensor &t_,
const base_tensor &tc1_,
1896 : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1899 struct ga_instruction_swap_indices :
public ga_instruction {
1901 const base_tensor &tc1;
1903 virtual int exec() {
1904 GA_DEBUG_INFO(
"Instruction: swap indices");
1905 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1906 size_type ii1 = t.size() / (nn1*nn2*ii2*ii3);
1908 auto it = t.begin();
1913 size_type ind = j*ii1+k*ii1*nn1+l*ii1*nn1*ii2+i*ii1*nn1*ii2*nn2;
1914 for (
size_type m = 0; m < ii1; ++m, ++it)
1917 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1920 ga_instruction_swap_indices(base_tensor &t_,
const base_tensor &tc1_,
1923 : t(t_), tc1(tc1_), nn1(n1_), nn2(n2_), ii2(i2_), ii3(i3_) {}
1926 struct ga_instruction_index_move_last :
public ga_instruction {
1928 const base_tensor &tc1;
1930 virtual int exec() {
1931 GA_DEBUG_INFO(
"Instruction: swap indices");
1932 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1935 auto it = t.begin();
1939 for (
size_type k = 0; k < ii1; ++k, ++it)
1942 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1945 ga_instruction_index_move_last(base_tensor &t_,
const base_tensor &tc1_,
1947 : t(t_), tc1(tc1_), nn(n_), ii2(i2_) {}
1950 struct ga_instruction_transpose_no_test :
public ga_instruction {
1952 const base_tensor &tc1;
1954 virtual int exec() {
1955 GA_DEBUG_INFO(
"Instruction: transpose");
1956 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1958 auto it = t.begin();
1963 for (
size_type k = 0; k < n2; ++k, ++it)
1964 *it = tc1[s2 + k*n1];
1967 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1970 ga_instruction_transpose_no_test(base_tensor &t_,
const base_tensor &tc1_,
1973 : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1976 struct ga_instruction_transpose_test :
public ga_instruction {
1978 const base_tensor &tc1;
1979 virtual int exec() {
1980 GA_DEBUG_INFO(
"Instruction: copy tensor and transpose test functions");
1981 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1982 GA_DEBUG_ASSERT(t.sizes().size() >= 2,
"Wrong sizes");
1984 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s3 = s1*s2;
1986 base_tensor::iterator it = t.begin();
1989 for (
size_type i = 0; i < s1; ++i, ++it)
1990 *it = tc1[j+s2*i+k*s3];
1993 ga_instruction_transpose_test(base_tensor &t_,
const base_tensor &tc1_)
1994 : t(t_), tc1(tc1_) {}
1997 struct ga_instruction_sym :
public ga_instruction {
1999 const base_tensor &tc1;
2000 virtual int exec() {
2001 GA_DEBUG_INFO(
"Instruction: symmetric part");
2002 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2004 size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2008 base_tensor::iterator it = t.begin() + s*(i + s1*j);
2009 base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2010 it1T = tc1.begin() + s*(j + s2*i);
2011 for (
size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ + *it1T++);
2015 ga_instruction_sym(base_tensor &t_,
const base_tensor &tc1_)
2016 : t(t_), tc1(tc1_) {}
2019 struct ga_instruction_skew :
public ga_instruction {
2021 const base_tensor &tc1;
2022 virtual int exec() {
2023 GA_DEBUG_INFO(
"Instruction: skew-symmetric part");
2024 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2026 size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2030 base_tensor::iterator it = t.begin() + s*(i + s1*j);
2031 base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2032 it1T = tc1.begin() + s*(j + s2*i);
2033 for (
size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ - *it1T++);
2037 ga_instruction_skew(base_tensor &t_,
const base_tensor &tc1_)
2038 : t(t_), tc1(tc1_) {}
2041 struct ga_instruction_scalar_add :
public ga_instruction {
2043 const scalar_type &c, &d;
2044 virtual int exec() {
2045 GA_DEBUG_INFO(
"Instruction: scalar addition");
2049 ga_instruction_scalar_add(scalar_type &t_,
const scalar_type &c_,
2050 const scalar_type &d_)
2051 : t(t_), c(c_), d(d_) {}
2054 struct ga_instruction_scalar_sub :
public ga_instruction {
2056 const scalar_type &c, &d;
2057 virtual int exec() {
2058 GA_DEBUG_INFO(
"Instruction: scalar subtraction");
2062 ga_instruction_scalar_sub(scalar_type &t_,
const scalar_type &c_,
2063 const scalar_type &d_)
2064 : t(t_), c(c_), d(d_) {}
2067 struct ga_instruction_scalar_scalar_mult :
public ga_instruction {
2069 const scalar_type &c, &d;
2070 virtual int exec() {
2071 GA_DEBUG_INFO(
"Instruction: scalar multiplication");
2075 ga_instruction_scalar_scalar_mult(scalar_type &t_,
const scalar_type &c_,
2076 const scalar_type &d_)
2077 : t(t_), c(c_), d(d_) {}
2080 struct ga_instruction_scalar_scalar_div :
public ga_instruction {
2082 const scalar_type &c, &d;
2083 virtual int exec() {
2084 GA_DEBUG_INFO(
"Instruction: scalar division");
2088 ga_instruction_scalar_scalar_div(scalar_type &t_,
const scalar_type &c_,
2089 const scalar_type &d_)
2090 : t(t_), c(c_), d(d_) {}
2093 struct ga_instruction_scalar_mult :
public ga_instruction {
2094 base_tensor &t, &tc1;
2095 const scalar_type &c;
2096 virtual int exec() {
2097 GA_DEBUG_INFO(
"Instruction: multiplication of a tensor by a scalar " << c);
2098 gmm::copy(gmm::scaled(tc1.as_vector(), c), t.as_vector());
2101 ga_instruction_scalar_mult(base_tensor &t_, base_tensor &tc1_,
2102 const scalar_type &c_)
2103 : t(t_), tc1(tc1_), c(c_) {}
2106 struct ga_instruction_scalar_div :
public ga_instruction {
2107 base_tensor &t, &tc1;
2108 const scalar_type &c;
2109 virtual int exec() {
2110 GA_DEBUG_INFO(
"Instruction: division of a tensor by a scalar");
2111 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2113 base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2114 for (; it != t.end(); ++it, ++it1) *it = *it1/c;
2117 ga_instruction_scalar_div(base_tensor &t_, base_tensor &tc1_,
2118 const scalar_type &c_)
2119 : t(t_), tc1(tc1_), c(c_) {}
2123 struct ga_instruction_cross_product_tf :
public ga_instruction {
2124 base_tensor &t, &tc1, &tc2;
2126 virtual int exec() {
2127 GA_DEBUG_INFO(
"Instruction: Cross product with test functions");
2129 size_type n1 = tc1.size() / 3, n2 = tc2.size() / 3, nn=n1*n2;
2130 GA_DEBUG_ASSERT(t.size() == nn*3,
"Bad tensor size for cross product");
2131 size_type mm=2*nn, n1_2 = 2*n1, n2_2 = 2*n2;
2132 base_tensor::iterator it = t.begin(), it2 = tc2.begin();
2135 for (
size_type i = 0; i < n2; ++i, ++it2) {
2136 base_tensor::iterator it1 = tc1.begin();
2137 for (
size_type j = 0; j < n1; ++j, ++it, ++it1) {
2138 *it = - it1[n1] *it2[n2_2] + it1[n1_2]*it2[n2];
2139 it[nn] = - it1[n1_2]*it2[0] + it1[0] *it2[n2_2];
2140 it[mm] = - it1[0] *it2[n2] + it1[n1] *it2[0];
2144 for (
size_type i = 0; i < n2; ++i, ++it2) {
2145 base_tensor::iterator it1 = tc1.begin();
2146 for (
size_type j = 0; j < n1; ++j, ++it, ++it1) {
2147 *it = it1[n1] *it2[n2_2] - it1[n1_2]*it2[n2];
2148 it[nn] = it1[n1_2]*it2[0] - it1[0] *it2[n2_2];
2149 it[mm] = it1[0] *it2[n2] - it1[n1] *it2[0];
2155 ga_instruction_cross_product_tf(base_tensor &t_, base_tensor &tc1_,
2156 base_tensor &tc2_,
bool inv_)
2157 : t(t_), tc1(tc1_), tc2(tc2_), inv(inv_) {}
2161 struct ga_instruction_cross_product :
public ga_instruction {
2162 base_tensor &t, &tc1, &tc2;
2163 virtual int exec() {
2164 GA_DEBUG_INFO(
"Instruction: Cross product with test functions");
2165 GA_DEBUG_ASSERT(t.size() == 3 && tc1.size() == 3 && tc2.size() == 3,
2166 "Bad tensor size for cross product");
2167 t[0] = tc1[1]*tc2[2] - tc1[2]*tc2[1];
2168 t[1] = tc1[2]*tc2[0] - tc1[0]*tc2[2];
2169 t[2] = tc1[0]*tc2[1] - tc1[1]*tc2[0];
2172 ga_instruction_cross_product(base_tensor &t_, base_tensor &tc1_,
2174 : t(t_), tc1(tc1_), tc2(tc2_) {}
2180 struct ga_instruction_dotmult :
public ga_instruction {
2181 base_tensor &t, &tc1, &tc2;
2182 virtual int exec() {
2183 GA_DEBUG_INFO(
"Instruction: componentwise multiplication");
2184 size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2185 GA_DEBUG_ASSERT(t.size() == s1_1*s2,
"Wrong sizes");
2187 base_tensor::iterator it = t.begin();
2189 for (
size_type m = 0; m < s1_1; ++m, ++it)
2190 *it = tc1[m+s1_1*i] * tc2[i];
2193 ga_instruction_dotmult(base_tensor &t_, base_tensor &tc1_,
2195 : t(t_), tc1(tc1_), tc2(tc2_) {}
2198 struct ga_instruction_dotdiv :
public ga_instruction {
2199 base_tensor &t, &tc1, &tc2;
2200 virtual int exec() {
2201 GA_DEBUG_INFO(
"Instruction: componentwise division");
2202 size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2203 GA_DEBUG_ASSERT(t.size() == s1_1*s2,
"Wrong sizes");
2205 base_tensor::iterator it = t.begin();
2207 for (
size_type m = 0; m < s1_1; ++m, ++it)
2208 *it = tc1[m+s1_1*i] / tc2[i];
2211 ga_instruction_dotdiv(base_tensor &t_, base_tensor &tc1_,
2213 : t(t_), tc1(tc1_), tc2(tc2_) {}
2217 struct ga_instruction_dotmult_spec :
public ga_instruction {
2218 base_tensor &t, &tc1, &tc2;
2219 virtual int exec() {
2220 GA_DEBUG_INFO(
"Instruction: specific componentwise multiplication");
2221 size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
2224 base_tensor::iterator it = t.begin();
2227 for (
size_type m = 0; m < s1_1; ++m, ++it)
2228 *it = tc1[m+s1_1*i] * tc2[n+s2_1*i];
2229 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2232 ga_instruction_dotmult_spec(base_tensor &t_, base_tensor &tc1_,
2234 : t(t_), tc1(tc1_), tc2(tc2_) {}
2238 struct ga_instruction_contract_1_1 :
public ga_instruction {
2239 base_tensor &t, &tc1;
2241 virtual int exec() {
2242 GA_DEBUG_INFO(
"Instruction: single contraction on a single tensor");
2244 size_type ii1 = tc1.size() / (nn*nn*ii2*ii3);
2246 base_tensor::iterator it = t.begin();
2249 for (
size_type k = 0; k < ii1; ++k, ++it) {
2250 *it = scalar_type(0);
2251 size_type pre_ind = k+j*ii1*nn+i*ii1*nn*ii2*nn;
2253 *it += tc1[pre_ind+n*ii1+n*ii1*nn*ii2];
2256 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2259 ga_instruction_contract_1_1(base_tensor &t_, base_tensor &tc1_,
2261 : t(t_), tc1(tc1_), nn(n_), ii2(i2_), ii3(i3_) {}
2265 struct ga_instruction_contract_2_1 :
public ga_instruction {
2266 base_tensor &t, &tc1, &tc2;
2268 virtual int exec() {
2269 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2271 size_type ift1 = tc1.size() / (nn*ii1*ii2);
2272 size_type ift2 = tc2.size() / (nn*ii3*ii4);
2274 base_tensor::iterator it = t.begin();
2280 for (
size_type q = 0; q < ift1; ++q, ++it) {
2281 *it = scalar_type(0);
2282 size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2283 size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2285 *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2288 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2291 ga_instruction_contract_2_1(base_tensor &t_, base_tensor &tc1_,
2295 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2296 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2300 struct ga_instruction_contract_2_1_rev :
public ga_instruction {
2301 base_tensor &t, &tc1, &tc2;
2303 virtual int exec() {
2304 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2306 size_type ift1 = tc1.size() / (nn*ii1*ii2);
2307 size_type ift2 = tc2.size() / (nn*ii3*ii4);
2309 base_tensor::iterator it = t.begin();
2315 for (
size_type p = 0; p < ift2; ++p, ++it) {
2316 *it = scalar_type(0);
2317 size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2318 size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2320 *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2323 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2326 ga_instruction_contract_2_1_rev(base_tensor &t_, base_tensor &tc1_,
2330 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2331 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2335 struct ga_instruction_contract_2_2 :
public ga_instruction {
2336 base_tensor &t, &tc1, &tc2;
2337 size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2339 virtual int exec() {
2340 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2342 size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2343 size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2345 size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2346 if (inv_tc2) std::swap(sn1, sn2);
2348 base_tensor::iterator it = t.begin();
2356 for (
size_type s = 0; s < ift1; ++s, ++it) {
2357 *it = scalar_type(0);
2359 = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2361 = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2364 *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2365 * tc2[ind2+n1*sn1+n2*sn2];
2368 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2371 ga_instruction_contract_2_2(base_tensor &t_, base_tensor &tc1_,
2377 : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2378 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2383 struct ga_instruction_contract_2_2_rev :
public ga_instruction {
2384 base_tensor &t, &tc1, &tc2;
2385 size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2387 virtual int exec() {
2388 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2390 size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2391 size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2393 size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2394 if (inv_tc2) std::swap(sn1, sn2);
2396 base_tensor::iterator it = t.begin();
2404 for (
size_type r = 0; r < ift2; ++r, ++it) {
2405 *it = scalar_type(0);
2407 = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2409 = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2412 *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2413 * tc2[ind2+n1*sn1+n2*sn2];
2416 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2419 ga_instruction_contract_2_2_rev(base_tensor &t_, base_tensor &tc1_,
2425 : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2426 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2432 struct ga_instruction_matrix_mult :
public ga_instruction {
2433 base_tensor &t, &tc1, &tc2;
2435 virtual int exec() {
2436 GA_DEBUG_INFO(
"Instruction: order one contraction "
2437 "(dot product or matrix multiplication)");
2442 base_tensor::iterator it = t.begin();
2444 for (
size_type i = 0; i < s1; ++i, ++it) {
2445 *it = scalar_type(0);
2447 *it += tc1[i+j*s1] * tc2[j+k*n];
2449 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2452 ga_instruction_matrix_mult(base_tensor &t_, base_tensor &tc1_,
2454 : t(t_), tc1(tc1_), tc2(tc2_), n(n_) {}
2458 struct ga_instruction_matrix_mult_spec :
public ga_instruction {
2459 base_tensor &t, &tc1, &tc2;
2462 virtual int exec() {
2463 GA_DEBUG_INFO(
"Instruction: specific order one contraction "
2464 "(dot product or matrix multiplication)");
2468 base_tensor::iterator it = t.begin();
2472 for (
size_type i = 0; i < q; ++i, ++it) {
2473 *it = scalar_type(0);
2475 *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2477 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2480 ga_instruction_matrix_mult_spec(base_tensor &t_, base_tensor &tc1_,
2483 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2487 struct ga_instruction_matrix_mult_spec2 :
public ga_instruction {
2488 base_tensor &t, &tc1, &tc2;
2491 virtual int exec() {
2492 GA_DEBUG_INFO(
"Instruction: specific order one contraction "
2493 "(dot product or matrix multiplication)");
2497 base_tensor::iterator it = t.begin();
2501 for (
size_type j = 0; j < l; ++j, ++it) {
2502 *it = scalar_type(0);
2504 *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2506 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2509 ga_instruction_matrix_mult_spec2(base_tensor &t_, base_tensor &tc1_,
2512 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2516 struct ga_instruction_contraction :
public ga_instruction {
2517 base_tensor &t, &tc1, &tc2;
2519 virtual int exec() {
2520 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn);
2522 long m = int(tc1.size()/nn), k = int(nn), n = int(tc2.size()/nn);
2523 long lda = m, ldb = n, ldc = m;
2524 char T =
'T', N =
'N';
2525 scalar_type
alpha(1), beta(0);
2526 gmm::dgemm_(&N, &T, &m, &n, &k, &alpha, &(tc1[0]), &lda, &(tc2[0]), &ldb,
2527 &beta, &(t[0]), &ldc);
2529 size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2530 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2532 auto it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2533 for (
auto it = t.begin(); it != t.end(); ++it) {
2534 auto it11 = it1, it22 = it2;
2535 scalar_type a = (*it11) * (*it22);
2537 { it11 += s1; it22 += s2; a += (*it11) * (*it22); }
2539 ++it2;
if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2551 ga_instruction_contraction(base_tensor &t_, base_tensor &tc1_,
2553 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2557 struct ga_instruction_contraction_opt0_2 :
public ga_instruction {
2558 base_tensor &t, &tc1, &tc2;
2560 virtual int exec() {
2561 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << n*q <<
2562 " optimized for vectorized second tensor of type 2");
2563 size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2565 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2567 auto it = t.begin(), it1 = tc1.begin();
2568 for (
size_type i = 0; i < s1; ++i, ++it1) {
2569 auto it2 = tc2.begin();
2573 for (
size_type l = 0; l < q; ++l, ++it) {
2575 auto ittt1 = itt1, ittt2 = it2;
2576 *it = *ittt1 * (*ittt2);
2578 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2589 ga_instruction_contraction_opt0_2(base_tensor &t_, base_tensor &tc1_,
2592 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) {}
2597 struct ga_instruction_contraction_opt0_2_unrolled :
public ga_instruction {
2598 base_tensor &t, &tc1, &tc2;
2600 virtual int exec() {
2601 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N*q
2602 <<
" optimized for vectorized second tensor of type 2");
2603 size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2605 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2607 auto it = t.begin(), it1 = tc1.begin();
2608 for (
size_type i = 0; i < s1; ++i, ++it1) {
2609 auto it2 = tc2.begin();
2613 for (
size_type l = 0; l < q; ++l, ++it) {
2615 auto ittt1 = itt1, ittt2 = it2;
2616 *it = *ittt1 * (*ittt2);
2618 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2625 ga_instruction_contraction_opt0_2_unrolled(base_tensor &t_, base_tensor &tc1_,
2627 : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2631 template <
int N,
int Q>
2632 struct ga_instruction_contraction_opt0_2_dunrolled :
public ga_instruction {
2633 base_tensor &t, &tc1, &tc2;
2634 virtual int exec() {
2635 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N*Q
2636 <<
" optimized for vectorized second tensor of type 2");
2637 size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q), s2_q = s2/Q;
2639 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2641 auto it = t.begin(), it1 = tc1.begin();
2642 for (
size_type i = 0; i < s1; ++i, ++it1) {
2643 auto it2 = tc2.begin();
2647 for (
size_type l = 0; l < Q; ++l, ++it) {
2649 auto ittt1 = itt1, ittt2 = it2;
2650 *it = *ittt1 * (*ittt2);
2652 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2659 ga_instruction_contraction_opt0_2_dunrolled
2660 (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2661 : t(t_), tc1(tc1_), tc2(tc2_) {}
2665 struct ga_instruction_contraction_opt2_0 :
public ga_instruction {
2666 base_tensor &t, &tc1, &tc2;
2668 virtual int exec() {
2669 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << n*q <<
2670 " optimized for vectorized second tensor of type 2");
2671 size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2672 size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2673 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2675 auto it = t.begin();
2677 auto it1 = tc1.begin() + i*q;
2679 auto it2 = tc2.begin() + l*s2;
2680 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
2681 auto itt1 = it1, itt2 = it2;
2682 *it = *itt1 * (*itt2);
2684 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2691 ga_instruction_contraction_opt2_0(base_tensor &t_, base_tensor &tc1_,
2694 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) { }
2699 struct ga_instruction_contraction_opt2_0_unrolled :
public ga_instruction {
2700 base_tensor &t, &tc1, &tc2;
2702 virtual int exec() {
2703 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N*q
2704 <<
" optimized for vectorized second tensor of type 2");
2705 size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2706 size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2707 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2709 auto it = t.begin(), it1 = tc1.begin();
2710 for (
size_type i = 0; i < s1_q; ++i, it1 += q) {
2712 auto it2 = tc2.begin() + l*s2;
2713 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
2714 auto itt1 = it1, itt2 = it2;
2715 *it = *itt1 * (*itt2);
2717 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2724 ga_instruction_contraction_opt2_0_unrolled(base_tensor &t_, base_tensor &tc1_,
2726 : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2730 template <
int N,
int Q>
2731 struct ga_instruction_contraction_opt2_0_dunrolled :
public ga_instruction {
2732 base_tensor &t, &tc1, &tc2;
2733 virtual int exec() {
2734 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N*Q
2735 <<
" optimized for vectorized second tensor of type 2");
2736 size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q);
2737 size_type s1_q = s1/Q, s1_qq = s1*Q, s2_qq = s2*Q;
2738 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2740 auto it = t.begin(), it1 = tc1.begin();
2741 for (
size_type i = 0; i < s1_q; ++i, it1 += Q) {
2743 auto it2 = tc2.begin() + l*s2;
2744 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
2745 auto itt1 = it1, itt2 = it2;
2746 *it = *itt1 * (*itt2);
2748 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2755 ga_instruction_contraction_opt2_0_dunrolled
2756 (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2757 : t(t_), tc1(tc1_), tc2(tc2_) {}
2761 struct ga_instruction_contraction_opt0_1 :
public ga_instruction {
2762 base_tensor &t, &tc1, &tc2;
2764 virtual int exec() {
2765 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn <<
2766 " optimized for vectorized second tensor of type 1");
2767 size_type ss1=tc1.size(), s1 = ss1/nn, s2=tc2.size()/nn, s2_n=s2/nn;
2769 auto it = t.begin(), it1 = tc1.begin();
2770 for (
size_type i = 0; i < s1; ++i, ++it1) {
2771 auto it2 = tc2.begin();
2775 *it++ = (*itt1) * (*it2);
2777 { itt1 += s1; *it++ = (*itt1) * (*it2); }
2782 ga_instruction_contraction_opt0_1(base_tensor &t_, base_tensor &tc1_,
2784 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2787 template<
int N>
inline void reduc_elem_unrolled_opt1_
2788 (
const base_vector::iterator &it,
const base_vector::iterator &it1,
2790 it[N-1] = it1[(N-1)*s1] * a;
2791 reduc_elem_unrolled_opt1_<N-1>(it, it1, a, s1);
2793 template<>
inline void reduc_elem_unrolled_opt1_<1>
2794 (
const base_vector::iterator &it,
const base_vector::iterator &it1,
2796 { *it = (*it1) * a; }
2800 struct ga_instruction_contraction_opt0_1_unrolled :
public ga_instruction {
2801 base_tensor &t, &tc1, &tc2;
2802 virtual int exec() {
2803 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N
2804 <<
" optimized for vectorized second tensor of type 1");
2805 size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2806 auto it = t.begin(), it1 = tc1.begin();
2807 for (
size_type i = 0; i < s1; ++i, ++it1) {
2808 auto it2 = tc2.begin(), it2e = it2 + s2;
2809 for (; it2 != it2e; it2 += N, it += N)
2810 reduc_elem_unrolled_opt1_<N>(it, it1, *it2, s1);
2814 ga_instruction_contraction_opt0_1_unrolled(base_tensor &t_, base_tensor &tc1_,
2816 : t(t_), tc1(tc1_), tc2(tc2_) {}
2820 struct ga_instruction_contraction_opt1_1 :
public ga_instruction {
2821 base_tensor &t, &tc1, &tc2;
2823 virtual int exec() {
2824 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn <<
2825 " optimized for both vectorized tensor of type 1");
2826 size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_1 = s2+1;
2827 GA_DEBUG_ASSERT(t.size() == s2*s1,
"Internal error");
2831 auto it2 = tc2.begin();
2834 auto it1 = tc1.begin(), it = t.begin() + j*nn;
2836 if (i) { it1 += nn, it += s2*nn; }
2837 scalar_type a = (*it1) * (*it2);
2839 *itt = a; itt += s2_1; *itt = a;
2840 for (
size_type k = 2; k < nn; ++k) { itt += s2_1; *itt = a; }
2845 ga_instruction_contraction_opt1_1(base_tensor &t_, base_tensor &tc1_,
2847 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2852 template<
int N>
inline scalar_type reduc_elem_unrolled__
2853 (base_tensor::iterator &it1, base_tensor::iterator &it2,
2855 return (it1[(N-1)*s1])*(it2[(N-1)*s2])
2856 + reduc_elem_unrolled__<N-1>(it1, it2, s1, s2);
2858 template<>
inline scalar_type reduc_elem_unrolled__<1>
2859 (base_tensor::iterator &it1, base_tensor::iterator &it2,
2861 {
return (*it1)*(*it2); }
2864 template<
int N>
struct ga_instruction_contraction_unrolled
2865 :
public ga_instruction {
2866 base_tensor &t, &tc1, &tc2;
2867 virtual int exec() {
2868 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N);
2869 size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2870 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error, " << t.size()
2871 <<
" != " << s1 <<
"*" << s2);
2872 base_tensor::iterator it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2873 for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
2874 *it = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2875 ++it2;
if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2879 ga_instruction_contraction_unrolled(base_tensor &t_, base_tensor &tc1_,
2881 : t(t_), tc1(tc1_), tc2(tc2_) {}
2884 template<
int N,
int S2>
inline void reduc_elem_d_unrolled__
2885 (base_tensor::iterator &it, base_tensor::iterator &it1,
2887 *it++ = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2888 reduc_elem_d_unrolled__<N, S2-1>(it, it1, ++it2, s1, s2);
2893 template<>
inline void reduc_elem_d_unrolled__<1, 0>
2894 (base_tensor::iterator &, base_tensor::iterator &,
2896 template<>
inline void reduc_elem_d_unrolled__<2, 0>
2897 (base_tensor::iterator &, base_tensor::iterator &,
2899 template<>
inline void reduc_elem_d_unrolled__<3, 0>
2900 (base_tensor::iterator &, base_tensor::iterator &,
2902 template<>
inline void reduc_elem_d_unrolled__<4, 0>
2903 (base_tensor::iterator &, base_tensor::iterator &,
2905 template<>
inline void reduc_elem_d_unrolled__<5, 0>
2906 (base_tensor::iterator &, base_tensor::iterator &,
2908 template<>
inline void reduc_elem_d_unrolled__<6, 0>
2909 (base_tensor::iterator &, base_tensor::iterator &,
2911 template<>
inline void reduc_elem_d_unrolled__<7, 0>
2912 (base_tensor::iterator &, base_tensor::iterator &,
2914 template<>
inline void reduc_elem_d_unrolled__<8, 0>
2915 (base_tensor::iterator &, base_tensor::iterator &,
2917 template<>
inline void reduc_elem_d_unrolled__<9, 0>
2918 (base_tensor::iterator &, base_tensor::iterator &,
2920 template<>
inline void reduc_elem_d_unrolled__<10, 0>
2921 (base_tensor::iterator &, base_tensor::iterator &,
2923 template<>
inline void reduc_elem_d_unrolled__<11, 0>
2924 (base_tensor::iterator &, base_tensor::iterator &,
2926 template<>
inline void reduc_elem_d_unrolled__<12, 0>
2927 (base_tensor::iterator &, base_tensor::iterator &,
2929 template<>
inline void reduc_elem_d_unrolled__<13, 0>
2930 (base_tensor::iterator &, base_tensor::iterator &,
2932 template<>
inline void reduc_elem_d_unrolled__<14, 0>
2933 (base_tensor::iterator &, base_tensor::iterator &,
2935 template<>
inline void reduc_elem_d_unrolled__<15, 0>
2936 (base_tensor::iterator &, base_tensor::iterator &,
2938 template<>
inline void reduc_elem_d_unrolled__<16, 0>
2939 (base_tensor::iterator &, base_tensor::iterator &,
2944 template<
int N,
int S2>
struct ga_ins_red_d_unrolled
2945 :
public ga_instruction {
2946 base_tensor &t, &tc1, &tc2;
2947 virtual int exec() {
2948 GA_DEBUG_INFO(
"Instruction: doubly unrolled contraction operation of size "
2950 size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2951 GA_DEBUG_ASSERT(s2 == S2,
"Internal error");
2952 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error, " << t.size()
2953 <<
" != " << s1 <<
"*" << s2);
2954 base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2955 for (
size_type ii = 0; ii < s1; ++ii, ++it1) {
2956 base_tensor::iterator it2 = tc2.begin();
2957 reduc_elem_d_unrolled__<N, S2>(it, it1, it2, s1, s2);
2959 GA_DEBUG_ASSERT(it == t.end(),
"Internal error");
2962 ga_ins_red_d_unrolled(base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2963 : t(t_), tc1(tc1_), tc2(tc2_) {}
2967 pga_instruction ga_instruction_contraction_switch
2968 (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
2970 base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
2972 if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
2973 tc1_.qdim() == n && tc2_.qdim() == n) {
2975 t_.set_sparsity(10, tc1_.qdim());
2976 return std::make_shared<ga_instruction_contraction_opt1_1>(t, tc1, tc2, n);
2979 if (tc2_.sparsity() == 1) {
2982 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
2985 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
2988 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
2991 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
2994 return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
2997 if (tc2_.sparsity() == 2) {
2999 size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
3006 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3010 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3014 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3017 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3024 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3028 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3032 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3035 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3042 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3046 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3050 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3053 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3057 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3060 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3063 return std::make_shared<ga_instruction_contraction_opt0_2>
3068 if (tc1_.sparsity() == 2) {
3070 size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
3077 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3081 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3085 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3088 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3095 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3099 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3103 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3106 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3113 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3117 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3121 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3124 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3127 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3130 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3133 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3136 return std::make_shared<ga_instruction_contraction_opt2_0>
3137 (t,tc1,tc2, n1, q1);
3143 case 2 :
return std::make_shared<ga_instruction_contraction_unrolled< 2>>
3145 case 3 :
return std::make_shared<ga_instruction_contraction_unrolled< 3>>
3147 case 4 :
return std::make_shared<ga_instruction_contraction_unrolled< 4>>
3149 case 5 :
return std::make_shared<ga_instruction_contraction_unrolled< 5>>
3151 case 6 :
return std::make_shared<ga_instruction_contraction_unrolled< 6>>
3153 case 7 :
return std::make_shared<ga_instruction_contraction_unrolled< 7>>
3155 case 8 :
return std::make_shared<ga_instruction_contraction_unrolled< 8>>
3157 case 9 :
return std::make_shared<ga_instruction_contraction_unrolled< 9>>
3159 case 10 :
return std::make_shared<ga_instruction_contraction_unrolled<10>>
3161 case 11 :
return std::make_shared<ga_instruction_contraction_unrolled<11>>
3163 case 12 :
return std::make_shared<ga_instruction_contraction_unrolled<12>>
3165 case 13 :
return std::make_shared<ga_instruction_contraction_unrolled<13>>
3167 case 14 :
return std::make_shared<ga_instruction_contraction_unrolled<14>>
3169 case 15 :
return std::make_shared<ga_instruction_contraction_unrolled<15>>
3171 case 16 :
return std::make_shared<ga_instruction_contraction_unrolled<16>>
3173 default :
return std::make_shared<ga_instruction_contraction>
3178 pga_instruction ga_uniform_instruction_contraction_switch
3179 (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3181 base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3183 if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3184 tc1_.qdim() == n && tc2_.qdim() == n) {
3186 t_.set_sparsity(10, tc1_.qdim());
3187 return std::make_shared<ga_instruction_contraction_opt1_1>(t,tc1,tc2,n);
3189 if (tc2_.sparsity() == 1) {
3192 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3195 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3198 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3201 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3204 return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
3207 if (tc2_.sparsity() == 2) {
3209 size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
3216 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3220 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3224 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3227 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3234 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3238 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3242 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3245 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3252 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3256 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3260 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3263 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3267 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3270 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3273 return std::make_shared<ga_instruction_contraction_opt0_2>
3278 if (tc1_.sparsity() == 2) {
3280 size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
3287 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3291 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3295 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3298 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3305 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3309 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3313 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3316 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3323 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3327 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3331 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3334 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3337 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3340 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3343 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3346 return std::make_shared<ga_instruction_contraction_opt2_0>
3347 (t,tc1,tc2, n1, q1);
3357 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,1>>(t, tc1, tc2);
3358 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,1>>(t, tc1, tc2);
3359 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,1>>(t, tc1, tc2);
3360 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3364 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,2>>(t, tc1, tc2);
3365 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,2>>(t, tc1, tc2);
3366 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,2>>(t, tc1, tc2);
3367 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3371 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,3>>(t, tc1, tc2);
3372 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,3>>(t, tc1, tc2);
3373 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,3>>(t, tc1, tc2);
3374 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3378 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,4>>(t, tc1, tc2);
3379 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,4>>(t, tc1, tc2);
3380 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,4>>(t, tc1, tc2);
3381 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3385 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,5>>(t, tc1, tc2);
3386 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,5>>(t, tc1, tc2);
3387 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,5>>(t, tc1, tc2);
3388 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3392 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,6>>(t, tc1, tc2);
3393 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,6>>(t, tc1, tc2);
3394 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,6>>(t, tc1, tc2);
3395 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3399 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,7>>(t, tc1, tc2);
3400 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,7>>(t, tc1, tc2);
3401 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,7>>(t, tc1, tc2);
3402 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3406 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,8>>(t, tc1, tc2);
3407 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,8>>(t, tc1, tc2);
3408 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,8>>(t, tc1, tc2);
3409 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3413 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,9>>(t, tc1, tc2);
3414 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,9>>(t, tc1, tc2);
3415 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,9>>(t, tc1, tc2);
3416 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3420 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,10>>(t, tc1, tc2);
3421 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,10>>(t, tc1, tc2);
3422 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,10>>(t, tc1, tc2);
3423 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3425 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3431 struct ga_instruction_spec_contraction :
public ga_instruction {
3432 base_tensor &t, &tc1, &tc2;
3434 virtual int exec() {
3435 GA_DEBUG_INFO(
"Instruction: specific contraction operation of "
3437 size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3439 base_tensor::iterator it = t.begin();
3442 for (
size_type m = 0; m < s1; ++m, ++it) {
3443 *it = scalar_type(0);
3445 *it += tc1[m+i*s1+j*s111] * tc2[n+j*s2];
3447 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3450 ga_instruction_spec_contraction(base_tensor &t_, base_tensor &tc1_,
3452 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3456 struct ga_instruction_spec2_contraction :
public ga_instruction {
3457 base_tensor &t, &tc1, &tc2;
3459 virtual int exec() {
3460 GA_DEBUG_INFO(
"Instruction: second specific contraction operation of "
3462 size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3463 size_type s2 = tc2.sizes()[0], s22 = tc2.size() / (s2*nn), s222 = s2*s22;
3464 base_tensor::iterator it = t.begin();
3468 for (
size_type n = 0; n < s2; ++n, ++it) {
3469 *it = scalar_type(0);
3471 *it += tc1[m+i*s1+k*s111] * tc2[n+j*s2+k*s222];
3473 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3476 ga_instruction_spec2_contraction(base_tensor &t_, base_tensor &tc1_,
3478 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3482 struct ga_instruction_simple_tmult :
public ga_instruction {
3483 base_tensor &t, &tc1, &tc2;
3484 virtual int exec() {
3485 GA_DEBUG_INFO(
"Instruction: simple tensor product");
3487 GA_DEBUG_ASSERT(t.size() == s1 * tc2.size(),
"Wrong sizes");
3488 base_tensor::iterator it2=tc2.begin(), it1=tc1.begin(), it1end=it1 + s1;
3489 for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
3490 *it = *(it2) * (*it1);
3491 ++it1;
if (it1 == it1end) { it1 = tc1.begin(), ++it2; }
3495 ga_instruction_simple_tmult(base_tensor &t_, base_tensor &tc1_,
3497 : t(t_), tc1(tc1_), tc2(tc2_) {}
3500 template<
int S1>
inline void tmult_elem_unrolled__
3501 (base_tensor::iterator &it, base_tensor::iterator &it1,
3502 base_tensor::iterator &it2) {
3503 *it++ = (*it1++)*(*it2);
3504 tmult_elem_unrolled__<S1-1>(it, it1, it2);
3506 template<>
inline void tmult_elem_unrolled__<0>
3507 (base_tensor::iterator &, base_tensor::iterator &,
3508 base_tensor::iterator &) { }
3511 template<
int S1>
struct ga_instruction_simple_tmult_unrolled
3512 :
public ga_instruction {
3513 base_tensor &t, &tc1, &tc2;
3514 virtual int exec() {
3516 GA_DEBUG_ASSERT(tc1.size() == S1,
3517 "Wrong sizes " << tc1.size() <<
" != " << S1);
3518 GA_DEBUG_INFO(
"Instruction: simple tensor product, unrolled with "
3519 << S1 <<
" operations");
3520 GA_DEBUG_ASSERT(t.size() == S1 * s2,
3521 "Wrong sizes " << t.size() <<
" != " << S1 <<
"*" << s2);
3522 base_tensor::iterator it = t.begin(), it2 = tc2.begin();
3523 for (
size_type ii = 0; ii < s2; ++ii, ++it2) {
3524 base_tensor::iterator it1 = tc1.begin();
3525 tmult_elem_unrolled__<S1>(it, it1, it2);
3527 GA_DEBUG_ASSERT(it == t.end(),
"Internal error");
3530 ga_instruction_simple_tmult_unrolled(base_tensor &t_, base_tensor &tc1_,
3532 : t(t_), tc1(tc1_), tc2(tc2_) {}
3535 pga_instruction ga_uniform_instruction_simple_tmult
3536 (base_tensor &t, base_tensor &tc1, base_tensor &tc2) {
3537 switch(tc1.size()) {
3538 case 2 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 2>>
3540 case 3 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 3>>
3542 case 4 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 4>>
3544 case 5 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 5>>
3546 case 6 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 6>>
3548 case 7 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 7>>
3550 case 8 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 8>>
3552 case 9 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 9>>
3554 case 10 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<10>>
3556 case 11 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<11>>
3558 case 12 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<12>>
3560 case 13 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<13>>
3562 case 14 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<14>>
3564 case 15 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<15>>
3566 case 16 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<16>>
3568 default :
return std::make_shared<ga_instruction_simple_tmult>
3575 struct ga_instruction_spec_tmult :
public ga_instruction {
3576 base_tensor &t, &tc1, &tc2;
3578 virtual int exec() {
3579 GA_DEBUG_INFO(
"Instruction: specific tensor product");
3580 GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(),
"Wrong sizes");
3584 base_tensor::iterator it = t.begin();
3588 for (
size_type m = 0; m < s1_1; ++m, ++it)
3589 *it = tc1[m+i*s1_1] * tc2[n+j*s2_1];
3590 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3593 ga_instruction_spec_tmult(base_tensor &t_, base_tensor &tc1_,
3596 : t(t_), tc1(tc1_), tc2(tc2_), s1_2(s1_2_), s2_2(s2_2_) {}
3600 struct ga_instruction_spec2_tmult :
public ga_instruction {
3601 base_tensor &t, &tc1, &tc2;
3602 virtual int exec() {
3603 GA_DEBUG_INFO(
"Instruction: second specific tensor product");
3604 GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(),
"Wrong sizes");
3606 size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
3608 base_tensor::iterator it = t.begin();
3611 for (
size_type m = 0; m < s2_1; ++m, ++it)
3612 *it = tc1[i] * tc2[m+j*s2_1];
3613 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3616 ga_instruction_spec2_tmult(base_tensor &t_, base_tensor &tc1_,
3618 : t(t_), tc1(tc1_), tc2(tc2_) {}
3623 struct ga_instruction_simple_c_matrix :
public ga_instruction {
3625 std::vector<scalar_type *> components;
3626 virtual int exec() {
3627 GA_DEBUG_INFO(
"Instruction: gathering components for explicit "
3629 GA_DEBUG_ASSERT(t.size() == components.size(),
"Wrong sizes");
3630 for (
size_type i = 0; i < components.size(); ++i)
3631 t[i] = *(components[i]);
3634 ga_instruction_simple_c_matrix(base_tensor &t_,
3635 std::vector<scalar_type *> &components_)
3636 : t(t_), components(components_) {}
3639 struct ga_instruction_c_matrix_with_tests :
public ga_instruction {
3641 const std::vector<const base_tensor *> components;
3642 virtual int exec() {
3643 GA_DEBUG_INFO(
"Instruction: gathering components for explicit "
3644 "matrix with tests functions");
3645 size_type s = t.size() / components.size();
3646 GA_DEBUG_ASSERT(s,
"Wrong sizes");
3647 base_tensor::iterator it = t.begin();
3648 for (
size_type i = 0; i < components.size(); ++i) {
3649 const base_tensor &t1 = *(components[i]);
3650 if (t1.size() > 1) {
3651 GA_DEBUG_ASSERT(t1.size() == s,
"Wrong sizes, " << t1.size()
3653 for (
size_type j = 0; j < s; ++j) *it++ = t1[j];
3655 for (
size_type j = 0; j < s; ++j) *it++ = t1[0];
3660 ga_instruction_c_matrix_with_tests
3661 (base_tensor &t_,
const std::vector<const base_tensor *> &components_)
3662 : t(t_), components(components_) {}
3665 struct ga_instruction_eval_func_1arg_1res :
public ga_instruction {
3667 const scalar_type &c;
3668 pscalar_func_onearg f1;
3669 virtual int exec() {
3670 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
3671 "predefined function on a scalar");
3675 ga_instruction_eval_func_1arg_1res(scalar_type &t_,
const scalar_type &c_,
3676 pscalar_func_onearg f1_)
3677 : t(t_), c(c_), f1(f1_) {}
3680 struct ga_instruction_eval_func_1arg_1res_expr :
public ga_instruction {
3682 const scalar_type &c;
3683 const ga_predef_function &F;
3684 virtual int exec() {
3685 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
3686 "predefined function on a scalar");
3690 ga_instruction_eval_func_1arg_1res_expr(scalar_type &t_,
3691 const scalar_type &c_,
3692 const ga_predef_function &F_)
3693 : t(t_), c(c_), F(F_) {}
3696 struct ga_instruction_eval_func_1arg :
public ga_instruction {
3697 base_tensor &t, &tc1;
3698 pscalar_func_onearg f1;
3699 virtual int exec() {
3700 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
3701 "predefined function on tensor");
3702 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3703 for (
size_type i = 0; i < t.size(); ++i) t[i] = (*f1)(tc1[i]);
3706 ga_instruction_eval_func_1arg(base_tensor &t_, base_tensor &c_,
3707 pscalar_func_onearg f1_)
3708 : t(t_), tc1(c_), f1(f1_) {}
3711 struct ga_instruction_eval_func_1arg_expr :
public ga_instruction {
3712 base_tensor &t, &tc1;
3713 const ga_predef_function &F;
3714 virtual int exec() {
3715 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
3716 "predefined function on tensor");
3717 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3718 for (
size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i]);
3721 ga_instruction_eval_func_1arg_expr(base_tensor &t_, base_tensor &c_,
3722 const ga_predef_function &F_)
3723 : t(t_), tc1(c_), F(F_) {}
3726 struct ga_instruction_eval_func_2arg_1res :
public ga_instruction {
3728 const scalar_type &c, &d;
3729 pscalar_func_twoargs f2;
3730 virtual int exec() {
3731 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3732 "predefined function on two scalar");
3736 ga_instruction_eval_func_2arg_1res(scalar_type &t_,
const scalar_type &c_,
3737 const scalar_type &d_,
3738 pscalar_func_twoargs f2_)
3739 : t(t_), c(c_), d(d_), f2(f2_) {}
3742 struct ga_instruction_eval_func_2arg_1res_expr :
public ga_instruction {
3744 const scalar_type &c, &d;
3745 const ga_predef_function &F;
3746 virtual int exec() {
3747 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3748 "predefined function on two scalar");
3752 ga_instruction_eval_func_2arg_1res_expr(scalar_type &t_,
3753 const scalar_type &c_,
3754 const scalar_type &d_,
3755 const ga_predef_function &F_)
3756 : t(t_), c(c_), d(d_), F(F_) {}
3759 struct ga_instruction_eval_func_2arg_first_scalar :
public ga_instruction {
3760 base_tensor &t, &tc1, &tc2;
3761 pscalar_func_twoargs f2;
3762 virtual int exec() {
3763 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3764 "predefined function on one scalar and one tensor");
3765 GA_DEBUG_ASSERT(t.size() == tc2.size(),
"Wrong sizes");
3766 for (
size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[0], tc2[i]);
3769 ga_instruction_eval_func_2arg_first_scalar
3770 (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3771 pscalar_func_twoargs f2_)
3772 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3775 struct ga_instruction_eval_func_2arg_first_scalar_expr
3776 :
public ga_instruction {
3777 base_tensor &t, &tc1, &tc2;
3778 const ga_predef_function &F;
3779 virtual int exec() {
3780 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3781 "predefined function on one scalar and one tensor");
3782 GA_DEBUG_ASSERT(t.size() == tc2.size(),
"Wrong sizes");
3783 for (
size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[0], tc2[i]);
3786 ga_instruction_eval_func_2arg_first_scalar_expr
3787 (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3788 const ga_predef_function &F_)
3789 : t(t_), tc1(c_), tc2(d_), F(F_) {}
3792 struct ga_instruction_eval_func_2arg_second_scalar :
public ga_instruction {
3793 base_tensor &t, &tc1, &tc2;
3794 pscalar_func_twoargs f2;
3795 virtual int exec() {
3796 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3797 "predefined function on one tensor and one scalar");
3798 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3799 for (
size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[0]);
3802 ga_instruction_eval_func_2arg_second_scalar(base_tensor &t_,
3805 pscalar_func_twoargs f2_)
3806 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3809 struct ga_instruction_eval_func_2arg_second_scalar_expr
3810 :
public ga_instruction {
3811 base_tensor &t, &tc1, &tc2;
3812 const ga_predef_function &F;
3813 virtual int exec() {
3814 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3815 "predefined function on one tensor and one scalar");
3816 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3817 for (
size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[0]);
3820 ga_instruction_eval_func_2arg_second_scalar_expr
3821 (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3822 const ga_predef_function &F_)
3823 : t(t_), tc1(c_), tc2(d_), F(F_) {}
3826 struct ga_instruction_eval_func_2arg :
public ga_instruction {
3827 base_tensor &t, &tc1, &tc2;
3828 pscalar_func_twoargs f2;
3829 virtual int exec() {
3830 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3831 "predefined function on two tensors");
3832 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3835 for (
size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[i]);
3838 ga_instruction_eval_func_2arg(base_tensor &t_, base_tensor &c_,
3839 base_tensor &d_, pscalar_func_twoargs f2_)
3840 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3843 struct ga_instruction_eval_func_2arg_expr :
public ga_instruction {
3844 base_tensor &t, &tc1, &tc2;
3845 const ga_predef_function &F;
3846 virtual int exec() {
3847 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3848 "predefined function on two tensors");
3849 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3852 for (
size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[i]);
3855 ga_instruction_eval_func_2arg_expr(base_tensor &t_, base_tensor &c_,
3857 const ga_predef_function &F_)
3858 : t(t_), tc1(c_), tc2(d_), F(F_) {}
3861 struct ga_instruction_eval_OP :
public ga_instruction {
3863 const ga_nonlinear_operator &OP;
3864 ga_nonlinear_operator::arg_list args;
3865 virtual int exec() {
3866 GA_DEBUG_INFO(
"Instruction: operator evaluation");
3870 ga_instruction_eval_OP(base_tensor &t_,
const ga_nonlinear_operator &OP_,
3871 ga_nonlinear_operator::arg_list &args_)
3872 : t(t_), OP(OP_), args(args_) {}
3875 struct ga_instruction_eval_derivative_OP :
public ga_instruction {
3877 const ga_nonlinear_operator &OP;
3878 ga_nonlinear_operator::arg_list args;
3880 virtual int exec() {
3881 GA_DEBUG_INFO(
"Instruction: operator derivative evaluation");
3882 OP.derivative(args, der1, t);
3885 ga_instruction_eval_derivative_OP(base_tensor &t_,
3886 const ga_nonlinear_operator &OP_,
3887 ga_nonlinear_operator::arg_list &args_,
3889 : t(t_), OP(OP_), args(args_), der1(der1_) {}
3892 struct ga_instruction_eval_second_derivative_OP :
public ga_instruction {
3894 const ga_nonlinear_operator &OP;
3895 ga_nonlinear_operator::arg_list args;
3897 virtual int exec() {
3898 GA_DEBUG_INFO(
"Instruction: operator second derivative evaluation");
3899 OP.second_derivative(args, der1, der2, t);
3902 ga_instruction_eval_second_derivative_OP
3903 (base_tensor &t_,
const ga_nonlinear_operator &OP_,
3905 : t(t_), OP(OP_), args(args_), der1(der1_), der2(der2_) {}
3908 struct ga_instruction_tensor_slice :
public ga_instruction {
3909 base_tensor &t, &tc1;
3910 bgeot::multi_index mi, indices;
3911 virtual int exec() {
3912 GA_DEBUG_INFO(
"Instruction: tensor slice");
3914 for (bgeot::multi_index mi3(order); !mi3.finished(t.sizes());
3915 mi3.incrementation(t.sizes())) {
3917 mi[indices[j]] = mi3[j];
3922 ga_instruction_tensor_slice(base_tensor &t_, base_tensor &tc1_,
3923 bgeot::multi_index &mi_,
3924 bgeot::multi_index &indices_)
3925 : t(t_), tc1(tc1_), mi(mi_), indices(indices_) {}
3928 struct ga_instruction_transformation_call :
public ga_instruction {
3929 const ga_workspace &workspace;
3930 ga_instruction_set::interpolate_info &inin;
3931 pinterpolate_transformation trans;
3932 fem_interpolation_context &ctx;
3933 const base_small_vector &Normal;
3937 virtual int exec() {
3938 GA_DEBUG_INFO(
"Instruction: call interpolate transformation");
3942 inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m), cv,
3943 face_num, P_ref, inin.Normal,
3944 inin.derivatives, compute_der);
3947 inin.m->points_of_convex(cv, inin.G);
3948 inin.ctx.change((inin.m)->trans_of_convex(cv),
3949 0, P_ref, inin.G, cv, face_num);
3950 inin.has_ctx =
true;
3953 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
3955 inin.Normal.resize(0);
3956 inin.pt_y = inin.ctx.xreal();
3958 inin.ctx.invalid_convex_num();
3959 inin.Normal.resize(0);
3961 inin.has_ctx =
false;
3964 inin.ctx.invalid_convex_num();
3965 inin.Normal.resize(0);
3966 inin.pt_y.resize(0);
3967 inin.has_ctx =
false;
3969 GA_DEBUG_INFO(
"Instruction: end of call interpolate transformation");
3972 ga_instruction_transformation_call
3973 (
const ga_workspace &w, ga_instruction_set::interpolate_info &i,
3974 pinterpolate_transformation t, fem_interpolation_context &ctxx,
3975 const base_small_vector &No,
const mesh &mm,
bool compute_der_)
3976 : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
3977 compute_der(compute_der_) {}
3980 struct ga_instruction_neighbor_transformation_call :
public ga_instruction {
3981 const ga_workspace &workspace;
3982 ga_instruction_set::interpolate_info &inin;
3983 pinterpolate_transformation trans;
3984 fem_interpolation_context &ctx;
3985 base_small_vector &Normal;
3988 papprox_integration &pai;
3990 std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp;
3992 virtual int exec() {
3993 bool cancel_optimization =
false;
3994 GA_DEBUG_INFO(
"Instruction: call interpolate neighbor transformation");
3996 if (!(ctx.have_pgp()) || !pai || pai->is_built_on_the_fly()
3997 || cancel_optimization) {
3998 inin.ctx.invalid_convex_num();
4003 auto adj_face = m.adjacent_face(cv, f);
4005 inin.ctx.invalid_convex_num();
4007 gauss_pt_corresp gpc;
4008 gpc.pgt1 = m.trans_of_convex(cv);
4009 gpc.pgt2 = m.trans_of_convex(adj_face.cv);
4011 auto inds_pt1 = m.ind_points_of_face_of_convex(cv, f);
4012 auto inds_pt2 = m.ind_points_of_face_of_convex(adj_face.cv,
4014 auto str1 = gpc.pgt1->structure();
4015 auto str2 = gpc.pgt2->structure();
4016 size_type nbptf1 = str1->nb_points_of_face(f);
4017 size_type nbptf2 = str2->nb_points_of_face(adj_face.f);
4018 gpc.nodes.resize(nbptf1*2);
4019 for (
size_type i = 0; i < nbptf1; ++i) {
4020 gpc.nodes[2*i] = str1->ind_points_of_face(f)[i];
4022 for (
size_type j = 0; j < nbptf2; ++j) {
4023 if (inds_pt2[j] == inds_pt1[i]) {
4024 gpc.nodes[2*i+1] = str2->ind_points_of_face(adj_face.f)[j];
4029 GMM_ASSERT1(found,
"Internal error");
4031 bgeot::pstored_point_tab pspt = 0;
4032 auto itm = neighbor_corresp.find(gpc);
4033 if (itm != neighbor_corresp.end()) {
4036 size_type nbpt = pai->nb_points_on_face(f);
4038 gic.init(m.points_of_convex(adj_face.cv), gpc.pgt2);
4039 size_type first_ind = pai->ind_first_point_on_face(f);
4041 &spt = *(pai->pintegration_points());
4043 m.points_of_convex(cv, G);
4044 fem_interpolation_context ctx_x(gpc.pgt1, 0, spt[0], G, cv, f);
4045 std::vector<base_node> P_ref(nbpt);
4048 ctx_x.set_xref(spt[first_ind+i]);
4049 bool converged =
true;
4050 gic.
invert(ctx_x.xreal(), P_ref[i], converged);
4051 bool is_in = (gpc.pgt2->convex_ref()->is_in(P_ref[i]) < 1E-4);
4052 GMM_ASSERT1(is_in && converged,
"Geometric transformation "
4053 "inversion has failed in neighbor transformation");
4055 pspt = store_point_tab(P_ref);
4056 neighbor_corresp[gpc] = pspt;
4058 m.points_of_convex(adj_face.cv, inin.G);
4059 bgeot::pgeotrans_precomp pgp = gp_pool(gpc.pgt2, pspt);
4060 inin.ctx.change(pgp, 0, 0, inin.G, adj_face.cv, adj_face.f);
4065 if (inin.ctx.have_pgp()) {
4066 inin.ctx.set_ii(ipt);
4068 inin.has_ctx =
true;
4069 inin.pt_y = inin.ctx.xreal();
4071 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4078 inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m),
4079 cv, face_num, P_ref, inin.Normal,
4080 inin.derivatives,
false);
4083 inin.m->points_of_convex(cv, inin.G);
4084 inin.ctx.change((inin.m)->trans_of_convex(cv),
4085 0, P_ref, inin.G, cv, face_num);
4086 inin.has_ctx =
true;
4089 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4091 inin.Normal.resize(0);
4092 inin.pt_y = inin.ctx.xreal();
4094 inin.ctx.invalid_convex_num();
4096 inin.has_ctx =
false;
4099 inin.ctx.invalid_convex_num();
4100 inin.Normal.resize(0);
4101 inin.pt_y.resize(0);
4102 inin.has_ctx =
false;
4105 GA_DEBUG_INFO(
"Instruction: end of call neighbor interpolate "
4109 ga_instruction_neighbor_transformation_call
4110 (
const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4111 pinterpolate_transformation t, fem_interpolation_context &ctxx,
4112 base_small_vector &No,
const mesh &mm,
size_type &ipt_,
4114 std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp_)
4115 : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
4116 ipt(ipt_), pai(pai_), gp_pool(gp_pool_),
4117 neighbor_corresp(neighbor_corresp_) {}
4121 struct ga_instruction_scalar_assembly :
public ga_instruction {
4122 const base_tensor &t;
4123 scalar_type &E, &coeff;
4124 virtual int exec() {
4125 GA_DEBUG_INFO(
"Instruction: scalar term assembly");
4129 ga_instruction_scalar_assembly(base_tensor &t_, scalar_type &E_,
4130 scalar_type &coeff_)
4131 : t(t_), E(E_), coeff(coeff_) {}
4134 struct ga_instruction_vector_assembly_mf :
public ga_instruction
4136 const base_tensor &t;
4137 base_vector &VI, &Vi;
4138 const fem_interpolation_context &ctx;
4139 const gmm::sub_interval *
const&I, *
const I__;
4140 const mesh_fem *
const&mf, *
const mf__;
4141 const bool &reduced_mf;
4142 const scalar_type &coeff;
4145 const bool interpolate;
4146 virtual int exec() {
4147 GA_DEBUG_INFO(
"Instruction: vector term assembly for fem variable");
4148 bool empty_weight = (coeff == scalar_type(0));
4149 if (ipt == 0 || interpolate) {
4150 if (empty_weight) elem.resize(0);
4151 elem.resize(t.size());
4153 copy_scaled_4(t, coeff, elem);
4154 }
else if (!empty_weight)
4156 add_scaled_4(t, coeff, elem);
4158 if (ipt == nbpt-1 || interpolate) {
4159 GA_DEBUG_ASSERT(mf,
"Internal error");
4160 if (!ctx.is_convex_num_valid())
return 0;
4163 if (qmult > 1) qmult /= mf->fem_of_element(cv_1)->target_dim();
4164 base_vector &V = reduced_mf ? Vi : VI;
4165 GA_DEBUG_ASSERT(V.size() >= I->first() + mf->nb_basic_dof(),
4166 "Bad assembly vector size " << V.size() <<
">=" <<
4167 I->first() <<
"+"<< mf->nb_basic_dof());
4168 auto itr = elem.cbegin();
4169 auto itw = V.begin() + I->first();
4170 for (
const auto &dof : mf->ind_scalar_basic_dof_of_element(cv_1))
4172 *(itw+dof+q) += *itr++;
4173 GMM_ASSERT1(itr == elem.end(),
"Internal error");
4178 ga_instruction_vector_assembly_mf
4179 (
const base_tensor &t_, base_vector &VI_, base_vector &Vi_,
4180 const fem_interpolation_context &ctx_,
4181 const gmm::sub_interval *&I_,
const mesh_fem *&mf_,
4182 const bool &reduced_mf_,
4185 : t(t_), VI(VI_), Vi(Vi_), ctx(ctx_),
4186 I(I_), I__(nullptr), mf(mf_), mf__(nullptr), reduced_mf(reduced_mf_),
4187 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4189 ga_instruction_vector_assembly_mf
4190 (
const base_tensor &t_, base_vector &V_,
4191 const fem_interpolation_context &ctx_,
4192 const gmm::sub_interval &I_,
const mesh_fem &mf_,
4195 : t(t_), VI(V_), Vi(V_), ctx(ctx_),
4196 I(I__), I__(&I_), mf(mf__), mf__(&mf_), reduced_mf(false_),
4197 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4199 const bool false_=
false;
4202 struct ga_instruction_vector_assembly_imd :
public ga_instruction {
4203 const base_tensor &t;
4205 const fem_interpolation_context &ctx;
4206 const gmm::sub_interval &I;
4210 const bool initialize;
4211 virtual int exec() {
4212 GA_DEBUG_INFO(
"Instruction: vector term assembly for im_data variable");
4214 size_type i = t.size() * imd.filtered_index_of_point(cv, ipt);
4215 GMM_ASSERT1(i+t.size() <= I.size(),
4216 "Internal error "<<i<<
"+"<<t.size()<<
" <= "<<I.size());
4217 auto itw = V.begin() + I.first() + i;
4219 for (
const auto &val : t.as_vector())
4222 for (
const auto &val : t.as_vector())
4223 *itw++ += coeff*val;
4226 ga_instruction_vector_assembly_imd
4227 (
const base_tensor &t_, base_vector &V_,
4228 const fem_interpolation_context &ctx_,
const gmm::sub_interval &I_,
4229 const im_data &imd_, scalar_type &coeff_,
const size_type &ipt_,
4230 bool initialize_=
false)
4231 : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), coeff(coeff_), ipt(ipt_),
4232 initialize(initialize_)
4236 struct ga_instruction_vector_assembly :
public ga_instruction {
4237 const base_tensor &t;
4239 const gmm::sub_interval &I;
4241 virtual int exec() {
4242 GA_DEBUG_INFO(
"Instruction: vector term assembly for "
4243 "fixed size variable");
4244 gmm::add(gmm::scaled(t.as_vector(), coeff), gmm::sub_vector(V, I));
4247 ga_instruction_vector_assembly(
const base_tensor &t_, base_vector &V_,
4248 const gmm::sub_interval &I_,
4249 scalar_type &coeff_)
4250 : t(t_), V(V_), I(I_), coeff(coeff_) {}
4253 struct ga_instruction_assignment :
public ga_instruction {
4254 const base_tensor &t;
4256 const fem_interpolation_context &ctx;
4258 virtual int exec() {
4259 GA_DEBUG_INFO(
"Instruction: Assignement to im_data");
4260 imd->set_tensor(V, ctx.convex_num(), ctx.ii(), t);
4263 ga_instruction_assignment(
const base_tensor &t_, base_vector &V_,
4264 const fem_interpolation_context &ctx_,
4265 const im_data *imd_)
4266 : t(t_), V(V_), ctx(ctx_), imd(imd_) {}
4269 struct ga_instruction_extract_residual_on_imd_dofs :
public ga_instruction {
4271 const base_vector &V;
4272 const fem_interpolation_context &ctx;
4273 const gmm::sub_interval &I;
4276 virtual int exec() {
4277 GA_DEBUG_INFO(
"Instruction: extract residual for im_data variable");
4280 size_type i = t.size() * imd.filtered_index_of_point(cv, ipt);
4281 GMM_ASSERT1(i+t.size() <= I.size(),
4282 "Internal error "<<i<<
"+"<<t.size()<<
" <= "<<I.size());
4283 for (
auto &&val : t.as_vector())
4284 val = V[ifirst+(i++)];
4287 ga_instruction_extract_residual_on_imd_dofs
4288 (base_tensor &t_,
const base_vector &V_,
4289 const fem_interpolation_context &ctx_,
const gmm::sub_interval &I_,
4290 const im_data &imd_,
const size_type &ipt_)
4291 : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), ipt(ipt_)
4296 template <
class MAT>
4297 inline void add_elem_matrix
4298 (MAT &K,
const std::vector<size_type> &dofs1,
4299 const std::vector<size_type> &dofs2, std::vector<size_type> &,
4300 base_vector &elem, scalar_type threshold,
size_type ) {
4301 base_vector::const_iterator it = elem.cbegin();
4304 if (gmm::abs(*it) > threshold)
4305 K(dof1, dof2) += *it;
4317 inline void add_elem_matrix
4319 const std::vector<size_type> &dofs1,
const std::vector<size_type> &dofs2,
4320 std::vector<size_type> &dofs1_sort,
4321 base_vector &elem, scalar_type threshold,
size_type N) {
4325 dofs1_sort.resize(s1);
4328 while (j > 0 && dofs1[i] < dofs1[dofs1_sort[k]])
4329 { dofs1_sort[j] = dofs1_sort[k]; j--; k--; }
4338 gmm::elt_rsvector_<scalar_type> ev;
4341 base_vector::const_iterator it = elem.cbegin();
4344 if (first) first =
false;
4346 std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4350 col.reserve(maxest);
4354 if (gmm::abs(ev.e) > threshold) {
4364 if (gmm::abs(ev.e) > threshold) {
4371 if (col[l].c < ev.c) {
4379 auto itc = col.begin() + ind;
4380 if (ind != nb && itc->c == ev.c)
4383 if (nb - ind > 1300)
4384 GMM_WARNING2(
"Inefficient addition of element in rsvector with "
4385 << col.size() - ind <<
" non-zero entries");
4388 itc = col.begin() + ind;
4389 auto ite = col.end();
4392 for (; ite != itc; --ite) { --itee; *ite = *itee; }
4405 inline void add_elem_matrix_contiguous_rows
4408 const std::vector<size_type> &dofs2,
4409 base_vector &elem, scalar_type threshold) {
4411 gmm::elt_rsvector_<scalar_type> ev;
4413 base_vector::const_iterator it = elem.cbegin();
4416 if (first) first =
false;
4418 std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4425 if (gmm::abs(ev.e) > threshold) {
4434 if (gmm::abs(ev.e) > threshold) {
4441 if (col[l].c < ev.c) {
4449 auto itc = col.begin() + ind;
4450 if (ind != nb && itc->c == ev.c)
4453 if (nb - ind > 1300)
4454 GMM_WARNING2(
"Inefficient addition of element in rsvector with "
4455 << col.size() - ind <<
" non-zero entries");
4458 itc = col.begin() + ind;
4459 auto ite = col.end();
4462 for (; ite != itc; --ite) { --itee; *ite = *itee; }
4474 inline void populate_dofs_vector
4475 (std::vector<size_type> &dofs,
4477 const getfem::mesh::ind_set &mfdofs)
4479 dofs.assign(size, ifirst);
4480 auto itd = dofs.begin();
4482 for (
const auto &dof : mfdofs) *itd++ += dof;
4484 for (
const auto &dof : mfdofs)
4485 for (
size_type q = 0; q < qmult; ++q) *itd++ += dof + q;
4488 inline void populate_dofs_vector
4490 const getfem::mesh::ind_set &mfdofs)
4492 dofs.assign(size, ifirst);
4493 auto itd = dofs.begin();
4494 for (
const auto &dof : mfdofs) *itd++ += dof;
4498 inline void populate_contiguous_dofs_vector
4501 dofs.assign(size, ifirst);
4502 for (
size_type i=0; i < size; ++i) dofs[i] += i;
4505 struct ga_instruction_matrix_assembly_base :
public ga_instruction {
4506 const base_tensor &t;
4507 const fem_interpolation_context &ctx1, &ctx2;
4508 const scalar_type &alpha1, &alpha2, &coeff;
4512 std::vector<size_type> dofs1, dofs2, dofs1_sort;
4513 void add_tensor_to_element_matrix(
bool initialize,
bool empty_weight) {
4515 if (empty_weight) elem.resize(0);
4516 elem.resize(t.size());
4518 copy_scaled_4(t, coeff*alpha1*alpha2, elem);
4519 }
else if (!empty_weight)
4522 add_scaled_4(t, coeff*alpha1*alpha2, elem);
4524 ga_instruction_matrix_assembly_base
4525 (
const base_tensor &t_,
4526 const fem_interpolation_context &ctx1_,
4527 const fem_interpolation_context &ctx2_,
4528 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
4530 : t(t_), ctx1(ctx1_), ctx2(ctx2_), alpha1(a1), alpha2(a2),
4531 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_),
4532 dofs1(0), dofs2(0), dofs1_sort(0)
4535 const bool false_=
false;
4540 struct ga_instruction_matrix_assembly_mf_mf
4541 :
public ga_instruction_matrix_assembly_base
4543 model_real_sparse_matrix &Krr, &Kru, &Kur, &Kuu;
4544 const gmm::sub_interval *
const&I1, *
const&I2, *
const I1__, *
const I2__;
4545 const mesh_fem *
const&mf1, *
const&mf2, *
const mf1__, *
const mf2__;
4546 const bool &reduced_mf1, &reduced_mf2;
4547 virtual int exec() {
4548 GA_DEBUG_INFO(
"Instruction: matrix term assembly");
4549 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
4551 bool initialize = (ipt == 0 || interpolate);
4552 bool empty_weight = (coeff == scalar_type(0));
4553 add_tensor_to_element_matrix(initialize, empty_weight);
4555 if (ipt == nbpt-1 || interpolate) {
4556 model_real_sparse_matrix &K = reduced_mf1 ? (reduced_mf2 ? Kuu : Kur)
4557 : (reduced_mf2 ? Kru : Krr);
4558 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
4561 if (ninf == scalar_type(0))
return 0;
4563 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4564 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4565 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4569 if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
4570 populate_dofs_vector(dofs1, s1, ifirst1, qmult1,
4571 mf1->ind_scalar_basic_dof_of_element(cv1));
4572 if (mf1 == mf2 && cv1 == cv2) {
4573 if (ifirst1 == ifirst2) {
4574 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4576 populate_dofs_vector(dofs2, dofs1.size(), ifirst2 - ifirst1, dofs1);
4577 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4580 N = std::max(N, ctx2.N());
4582 if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
4583 populate_dofs_vector(dofs2, s2, ifirst2, qmult2,
4584 mf2->ind_scalar_basic_dof_of_element(cv2));
4585 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4591 ga_instruction_matrix_assembly_mf_mf
4592 (
const base_tensor &t_,
4593 model_real_sparse_matrix &Krr_, model_real_sparse_matrix &Kru_,
4594 model_real_sparse_matrix &Kur_, model_real_sparse_matrix &Kuu_,
4595 const fem_interpolation_context &ctx1_,
4596 const fem_interpolation_context &ctx2_,
4597 const ga_instruction_set::variable_group_info &vgi1,
4598 const ga_instruction_set::variable_group_info &vgi2,
4601 : ga_instruction_matrix_assembly_base
4602 (t_, ctx1_, ctx2_, vgi1.
alpha, vgi2.
alpha, coeff_, nbpt_, ipt_,
4604 Krr(Krr_), Kru(Kru_), Kur(Kur_), Kuu(Kuu_),
4605 I1(vgi1.I), I2(vgi2.I), I1__(nullptr), I2__(nullptr),
4606 mf1(vgi1.mf), mf2(vgi2.mf), mf1__(nullptr), mf2__(nullptr),
4607 reduced_mf1(vgi1.reduced_mf), reduced_mf2(vgi2.reduced_mf) {}
4609 ga_instruction_matrix_assembly_mf_mf
4610 (
const base_tensor &t_,
4611 model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
4612 const fem_interpolation_context &ctx1_,
4613 const fem_interpolation_context &ctx2_,
4614 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
4615 const ga_instruction_set::variable_group_info &vgi2,
4618 : ga_instruction_matrix_assembly_base
4619 (t_, ctx1_, ctx2_, a1, vgi2.
alpha, coeff_, nbpt_, ipt_, interpolate_),
4620 Krr(Kxr_), Kru(Kxu_), Kur(Kxr_), Kuu(Kxu_),
4621 I1(I1__), I2(vgi2.I), I1__(&I1_), I2__(nullptr),
4622 mf1(mf1__), mf2(vgi2.mf), mf1__(&mf1_), mf2__(nullptr),
4623 reduced_mf1(false_), reduced_mf2(vgi2.reduced_mf) {}
4625 ga_instruction_matrix_assembly_mf_mf
4626 (
const base_tensor &t_,
4627 model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
4628 const fem_interpolation_context &ctx1_,
4629 const fem_interpolation_context &ctx2_,
4630 const ga_instruction_set::variable_group_info &vgi1,
4631 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
4634 : ga_instruction_matrix_assembly_base
4635 (t_, ctx1_, ctx2_, vgi1.
alpha, a2, coeff_, nbpt_, ipt_, interpolate_),
4636 Krr(Krx_), Kru(Krx_), Kur(Kux_), Kuu(Kux_),
4637 I1(vgi1.I), I2(I2__), I1__(nullptr), I2__(&I2_),
4638 mf1(vgi1.mf), mf2(mf2__), mf1__(nullptr), mf2__(&mf2_),
4639 reduced_mf1(vgi1.reduced_mf), reduced_mf2(false_) {}
4641 ga_instruction_matrix_assembly_mf_mf
4642 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4643 const fem_interpolation_context &ctx1_,
4644 const fem_interpolation_context &ctx2_,
4645 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
4646 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
4649 : ga_instruction_matrix_assembly_base
4650 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, interpolate_),
4651 Krr(K_), Kru(K_), Kur(K_), Kuu(K_),
4652 I1(I1__), I2(I2__), I1__(&I1_), I2__(&I2_),
4653 mf1(mf1__), mf2(mf2__), mf1__(&mf1_), mf2__(&mf2_),
4654 reduced_mf1(false_), reduced_mf2(false_) {}
4658 struct ga_instruction_matrix_assembly_imd_mf
4659 :
public ga_instruction_matrix_assembly_base
4661 model_real_sparse_matrix &Kxr, &Kxu;
4662 const gmm::sub_interval *I1, *I2__, *
const &I2;
4663 const im_data *imd1;
4664 const mesh_fem *
const mf2__, *
const &mf2;
4665 const bool &reduced_mf2;
4666 virtual int exec() {
4667 GA_DEBUG_INFO(
"Instruction: matrix term assembly");
4668 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
4670 bool empty_weight = (coeff == scalar_type(0));
4671 add_tensor_to_element_matrix(
true, empty_weight);
4674 if (ninf == scalar_type(0))
return 0;
4676 model_real_sparse_matrix &K = reduced_mf2 ? Kxu : Kxr;
4677 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
4678 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4679 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4680 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4681 if (imd1) ifirst1 += s1 * imd1->filtered_index_of_point(cv1, ipt);
4683 populate_contiguous_dofs_vector(dofs1, s1, ifirst1);
4685 if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
4686 populate_dofs_vector(dofs2, s2, ifirst2, qmult2,
4687 mf2->ind_scalar_basic_dof_of_element(cv2));
4688 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx2.N());
4692 ga_instruction_matrix_assembly_imd_mf
4693 (
const base_tensor &t_,
4694 model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
4695 const fem_interpolation_context &ctx1_,
4696 const fem_interpolation_context &ctx2_,
4697 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
4698 const ga_instruction_set::variable_group_info &vgi2,
4699 const scalar_type &coeff_,
const size_type &ipt_)
4700 : ga_instruction_matrix_assembly_base
4701 (t_, ctx1_, ctx2_, a1, vgi2.
alpha, coeff_, zero_, ipt_, false),
4702 Kxr(Kxr_), Kxu(Kxu_), I1(&I1_), I2__(nullptr), I2(vgi2.I),
4703 imd1(imd1_), mf2__(nullptr), mf2(vgi2.mf), reduced_mf2(vgi2.reduced_mf)
4706 ga_instruction_matrix_assembly_imd_mf
4707 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4708 const fem_interpolation_context &ctx1_,
4709 const fem_interpolation_context &ctx2_,
4710 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
4711 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
4712 const scalar_type &coeff_,
const size_type &ipt_)
4713 : ga_instruction_matrix_assembly_base
4714 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4715 Kxr(K_), Kxu(K_), I1(&I1_), I2__(&I2_), I2(I2__),
4716 imd1(imd1_), mf2__(&mf2_), mf2(mf2__), reduced_mf2(false_) {}
4719 struct ga_instruction_matrix_assembly_mf_imd
4720 :
public ga_instruction_matrix_assembly_base
4722 model_real_sparse_matrix &Krx, &Kux;
4723 const gmm::sub_interval *
const &I1, *
const I1__, *I2;
4724 const mesh_fem *
const &mf1, *
const mf1__;
4725 const bool &reduced_mf1;
4726 const im_data *imd2;
4727 virtual int exec() {
4728 GA_DEBUG_INFO(
"Instruction: matrix term assembly");
4729 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
4731 bool empty_weight = (coeff == scalar_type(0));
4732 add_tensor_to_element_matrix(
true, empty_weight);
4735 if (ninf == scalar_type(0))
return 0;
4737 model_real_sparse_matrix &K = reduced_mf1 ? Kux : Krx;
4738 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
4739 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4740 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4741 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4742 if (imd2) ifirst2 += s2 * imd2->filtered_index_of_point(cv2, ipt);
4745 if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
4746 populate_dofs_vector(dofs1, s1, ifirst1, qmult1,
4747 mf1->ind_scalar_basic_dof_of_element(cv1));
4748 populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
4749 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx1.N());
4753 ga_instruction_matrix_assembly_mf_imd
4754 (
const base_tensor &t_,
4755 model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
4756 const fem_interpolation_context &ctx1_,
4757 const fem_interpolation_context &ctx2_,
4758 const ga_instruction_set::variable_group_info &vgi1,
4759 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
4760 const scalar_type &coeff_,
const size_type &ipt_)
4761 : ga_instruction_matrix_assembly_base
4762 (t_, ctx1_, ctx2_, vgi1.
alpha, a2, coeff_, zero_, ipt_, false),
4763 Krx(Krx_), Kux(Kux_), I1(vgi1.I), I1__(nullptr), I2(&I2_),
4764 mf1(vgi1.mf), mf1__(nullptr), reduced_mf1(vgi1.reduced_mf), imd2(imd2_)
4767 ga_instruction_matrix_assembly_mf_imd
4768 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4769 const fem_interpolation_context &ctx1_,
4770 const fem_interpolation_context &ctx2_,
4771 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
4772 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
4773 const scalar_type &coeff_,
const size_type &ipt_)
4774 : ga_instruction_matrix_assembly_base
4775 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4776 Krx(K_), Kux(K_), I1(I1__), I1__(&I1_), I2(&I2_),
4777 mf1(mf1__), mf1__(&mf1_), reduced_mf1(false_), imd2(imd2_) {}
4782 struct ga_instruction_matrix_assembly_imd_imd
4783 :
public ga_instruction_matrix_assembly_base
4785 model_real_sparse_matrix &K;
4786 const gmm::sub_interval &I1, &I2;
4787 const im_data *imd1, *imd2;
4788 virtual int exec() {
4789 GA_DEBUG_INFO(
"Instruction: matrix term assembly");
4790 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4792 bool empty_weight = (coeff == scalar_type(0));
4793 add_tensor_to_element_matrix(
true, empty_weight);
4796 if (ninf == scalar_type(0))
return 0;
4798 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4799 size_type ifirst1 = I1.first(), ifirst2 = I2.first();
4801 ifirst1 += s1 * imd1->filtered_index_of_point(ctx1.convex_num(), ipt);
4803 ifirst2 += s2 * imd2->filtered_index_of_point(ctx2.convex_num(), ipt);
4805 populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
4806 add_elem_matrix_contiguous_rows(K, ifirst1, s1, dofs2, elem, ninf*1E-14);
4809 ga_instruction_matrix_assembly_imd_imd
4810 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4811 const fem_interpolation_context &ctx1_,
4812 const fem_interpolation_context &ctx2_,
4813 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
4814 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
4815 const scalar_type &coeff_,
const size_type &ipt_)
4816 : ga_instruction_matrix_assembly_base
4817 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4818 K(K_), I1(I1_), I2(I2_), imd1(imd1_), imd2(imd2_) {}
4822 struct ga_instruction_matrix_assembly_standard_scalar
4823 :
public ga_instruction_matrix_assembly_base
4825 model_real_sparse_matrix &K;
4826 const gmm::sub_interval &I1, &I2;
4827 const mesh_fem *pmf1, *pmf2;
4828 virtual int exec() {
4829 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
4832 elem.resize(t.size());
4834 copy_scaled_4(t, coeff*alpha1*alpha2, elem);
4838 add_scaled_4(t, coeff*alpha1*alpha2, elem);
4840 if (ipt == nbpt-1) {
4841 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4844 if (ninf == scalar_type(0))
return 0;
4846 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num(), N=ctx1.N();
4848 auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4849 GA_DEBUG_ASSERT(ct1.size() == t.sizes()[0],
"Internal error");
4850 populate_dofs_vector(dofs1, ct1.size(), I1.first(), ct1);
4852 if (pmf2 == pmf1 && cv1 == cv2) {
4853 if (I1.first() == I2.first()) {
4854 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4856 populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
4858 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4862 auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4863 GA_DEBUG_ASSERT(ct2.size() == t.sizes()[1],
"Internal error");
4864 populate_dofs_vector(dofs2, ct2.size(), I2.first(), ct2);
4865 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4870 ga_instruction_matrix_assembly_standard_scalar
4871 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4872 const fem_interpolation_context &ctx1_,
4873 const fem_interpolation_context &ctx2_,
4874 const gmm::sub_interval &I1_,
const gmm::sub_interval &I2_,
4875 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
4876 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
4878 : ga_instruction_matrix_assembly_base
4879 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
4880 K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
4883 struct ga_instruction_matrix_assembly_standard_vector
4884 :
public ga_instruction_matrix_assembly_base
4886 model_real_sparse_matrix &K;
4887 const gmm::sub_interval &I1, &I2;
4888 const mesh_fem *pmf1, *pmf2;
4889 virtual int exec() {
4890 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
4893 elem.resize(t.size());
4894 copy_scaled_8(t, coeff*alpha1*alpha2, elem);
4899 add_scaled_8(t, coeff*alpha1*alpha2, elem);
4901 if (ipt == nbpt-1) {
4902 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4905 if (ninf == scalar_type(0))
return 0;
4906 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], N = ctx1.N();
4908 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4911 if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
4912 populate_dofs_vector(dofs1, s1, I1.first(), qmult1,
4913 pmf1->ind_scalar_basic_dof_of_element(cv1));
4915 if (pmf2 == pmf1 && cv1 == cv2 && I1.first() == I2.first()) {
4916 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4918 if (pmf2 == pmf1 && cv1 == cv2) {
4919 populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
4924 if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
4925 populate_dofs_vector(dofs2, s2, I2.first(), qmult2,
4926 pmf2->ind_scalar_basic_dof_of_element(cv2));
4928 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4933 ga_instruction_matrix_assembly_standard_vector
4934 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4935 const fem_interpolation_context &ctx1_,
4936 const fem_interpolation_context &ctx2_,
4937 const gmm::sub_interval &I1_,
const gmm::sub_interval &I2_,
4938 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
4939 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
4941 : ga_instruction_matrix_assembly_base
4942 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
4943 K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
4947 struct ga_instruction_matrix_assembly_standard_vector_opt10
4948 :
public ga_instruction_matrix_assembly_base
4950 model_real_sparse_matrix &K;
4951 const gmm::sub_interval &I1, &I2;
4952 const mesh_fem *pmf1, *pmf2;
4953 virtual int exec() {
4954 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
4955 "vector fems optimized for format 10 qdim " << QQ);
4957 size_type ss1 = t.sizes()[0]/QQ, ss2 = t.sizes()[1]/QQ;
4958 scalar_type e = coeff*alpha1*alpha2;
4960 elem.resize(ss1*ss2);
4961 auto itel = elem.begin();
4963 auto it = t.begin() + j*s1_q;
4964 for (
size_type i = 0; i < ss1; ++i, it += QQ)
4965 *itel++ = (*it) * e;
4968 auto itel = elem.begin();
4970 auto it = t.begin() + j*s1_q;
4971 for (
size_type i = 0; i < ss1; ++i, it += QQ)
4972 *itel++ += (*it) * e;
4975 if (ipt == nbpt-1) {
4976 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4979 if (ninf == scalar_type(0))
return 0;
4981 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4982 size_type i1 = I1.first(), i2 = I2.first();
4984 populate_dofs_vector(dofs1, ss1, i1,
4985 pmf1->ind_scalar_basic_dof_of_element(cv1));
4986 bool same_dofs(pmf2 == pmf1 && cv1 == cv2 && i1 == i2);
4990 populate_dofs_vector(dofs2, ss2, i2,
4991 pmf2->ind_scalar_basic_dof_of_element(cv2));
4993 std::vector<size_type> &dofs2_ = same_dofs ? dofs1 : dofs2;
4994 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
4995 for (
size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4996 if (!same_dofs)
for (
size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4997 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
4999 for (
size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5000 if (!same_dofs)
for (
size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5001 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5007 ga_instruction_matrix_assembly_standard_vector_opt10
5008 (
const base_tensor &t_, model_real_sparse_matrix &Kn_,
5009 const fem_interpolation_context &ctx1_,
5010 const fem_interpolation_context &ctx2_,
5011 const gmm::sub_interval &In1_,
const gmm::sub_interval &In2_,
5012 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
5013 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
5015 : ga_instruction_matrix_assembly_base
5016 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5017 K(Kn_), I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_)
5019 static_assert(QQ >= 2 && QQ <=3,
5020 "Template implemented only for QQ=2 and QQ=3");
5025 struct ga_instruction_condensation_sub :
public ga_instruction {
5028 gmm::dense_matrix<base_tensor *> KQJprime;
5029 std::vector<base_tensor *> RQprime;
5030 gmm::dense_matrix<base_tensor const *> KQQloc, KQJloc;
5031 base_tensor invKqqqq, Kqqjj;
5033 std::vector<std::array<size_type,3>> partQ, partJ;
5034 const scalar_type &coeff;
5035 virtual int exec() {
5036 GA_DEBUG_INFO(
"Instruction: variable cluster subdiagonal condensation");
5038 for (
const auto &qqq1 : partQ) {
5039 size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5040 for (
const auto &qqq2 : partQ) {
5041 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5042 if (KQQloc(q1,q2)) {
5043 auto itr = KQQloc(q1,q2)->cbegin();
5044 GMM_ASSERT1(KQQloc(q1,q2)->size()
5045 == (qq1end-qq1start)*(qq2end-qq2start),
5047 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2)
5048 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5049 invKqqqq(qq1,qq2) = *itr++;
5054 bgeot::lu_inverse(&(invKqqqq[0]), invKqqqq.size(0));
5058 for (
auto &&jjj : partJ) {
5061 for (
const auto &qqq : partQ) {
5065 GMM_ASSERT1(new_j == KQJloc(q,j)->size(1),
"Internal error");
5067 new_j = KQJloc(q,j)->size(1);
5071 for (
const auto &qqq : partQ) {
5073 KQJprime(q,j)->adjust_sizes(qqq[2]-qqq[1], new_j);
5080 Kqqjj.adjust_sizes(partQ.back()[2], partJ.back()[2]);
5086 for (
const auto &jjj : partJ) {
5087 size_type j = jjj[0], jjstart = jjj[1], jjend = jjj[2];
5088 for (
const auto &qqq2 : partQ) {
5089 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5091 auto itr = KQJloc(q2,j)->begin();
5092 for (
size_type jj=jjstart; jj < jjend; ++jj) {
5093 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5094 for (
size_type qq1=0; qq1 < partQ.back()[2]; ++qq1) {
5095 Kqqjj(qq1,jj) += invKqqqq(qq1,qq2)*(*itr);
5100 GMM_ASSERT1(itr == KQJloc(q2,j)->cend(),
"Internal error");
5104 for (
const auto &qqq2 : partQ) {
5105 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5107 auto itr = RQprime[q2]->cbegin();
5108 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5109 for (
size_type qq1=0; qq1 < invKqqqq.size(0); ++qq1)
5110 Rqq[qq1] += invKqqqq(qq1,qq2)*(*itr);
5112 GMM_ASSERT1(itr == RQprime[q2]->cend(),
"Internal error");
5118 for (
const auto &qqq1 : partQ) {
5119 size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5121 auto itw = RQprime[q1]->begin();
5122 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5123 *itw++ = Rqq[qq1]/coeff;
5125 for (
const auto &jjj2 : partJ) {
5126 size_type j2 = jjj2[0], jj2start = jjj2[1], jj2end = jjj2[2];
5127 auto itw = KQJprime(q1,j2)->begin();
5128 for (
size_type jj2=jj2start; jj2 < jj2end; ++jj2)
5129 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5130 *itw++ = Kqqjj(qq1,jj2);
5136 ga_instruction_condensation_sub(gmm::dense_matrix<base_tensor *> &KQJpr,
5137 std::vector<base_tensor *> &RQpr,
5138 const gmm::dense_matrix<base_tensor *> &KQQ,
5139 const gmm::dense_matrix<base_tensor *> &KQJ,
5140 const std::set<size_type> &Qset,
5141 const scalar_type &coeff_)
5142 : KQJprime(KQJpr), RQprime(RQpr), coeff(coeff_)
5145 KQQloc.resize(KQQ.nrows(), KQQ.ncols());
5146 KQJloc.resize(KQJ.nrows(), KQJ.ncols());
5147 for (
size_type i=0; i < KQQ.as_vector().size(); ++i) KQQloc[i] = KQQ[i];
5148 for (
size_type i=0; i < KQJ.as_vector().size(); ++i) KQJloc[i] = KQJ[i];
5150 for (
size_type j=0; j < KQJ.ncols(); ++j)
5153 partJ.push_back(std::array<size_type,3>{j,0,0});
5159 partQ.push_back(std::array<size_type,3>{q,0,0});
5161 for (
auto &qqq1 : partQ) {
5166 GMM_ASSERT1(new_q == KQQ(q1,q2)->size(0) &&
5167 new_q == KQQ(q2,q1)->size(1),
"Internal error");
5169 new_q = KQQ(q1,q2)->size(0);
5174 invKqqqq.adjust_sizes(partQ.back()[2], partQ.back()[2]);
5175 Rqq.resize(partQ.back()[2]);
5181 struct ga_instruction_condensation_super_K :
public ga_instruction {
5183 std::vector<base_tensor *> KiQ, KQj;
5186 virtual int exec() {
5187 GA_DEBUG_INFO(
"Instruction: contribution of condensation to kept part");
5191 Kij.adjust_sizes(m,n);
5194 const base_tensor &K1 = *KiQ[k], &K2 = *KQj[k];
5196 GMM_ASSERT1(K1.size(0) == m && K2.size(1) == n && K2.size(0) == qqsize,
5199 base_tensor::iterator it = Kij.begin();
5201 for (
size_type ii = 0; ii < m; ++ii, ++it)
5202 for (
size_type qq = 0; qq < qqsize; ++qq)
5203 *it -= K1[ii+qq*m] * K2[qq+jj*qqsize];
5204 GA_DEBUG_ASSERT(it == Kij.end(),
"Wrong sizes");
5208 ga_instruction_condensation_super_K(base_tensor &Kij_,
5209 const std::vector<base_tensor *> KiQ_,
5210 const std::vector<base_tensor *> KQj_)
5211 : Kij(Kij_), KiQ(KiQ_), KQj(KQj_)
5214 GMM_ASSERT1(KiQ.size() == KQj.size(),
"Internal error");
5218 struct ga_instruction_condensation_super_R :
public ga_instruction {
5220 std::vector<base_tensor *> KiQ, RQpr;
5223 virtual int exec() {
5224 GA_DEBUG_INFO(
"Instruction: contribution of condensation to primary rhs");
5230 const base_tensor &K1 = *KiQ[k], &R2 = *RQpr[k];
5232 GMM_ASSERT1(K1.size(0) == m && R2.size(0) == qqsize,
"Internal error");
5233 base_tensor::iterator it = Ri.begin();
5234 for (
size_type ii = 0; ii < m; ++ii, ++it)
5235 for (
size_type qq = 0; qq < qqsize; ++qq)
5236 *it -= K1[ii+qq*m] * R2[qq];
5237 GA_DEBUG_ASSERT(it == Ri.end(),
"Wrong sizes");
5241 ga_instruction_condensation_super_R(base_tensor &Ri_,
5242 const std::vector<base_tensor *> KiQ_,
5243 const std::vector<base_tensor *> RQpr_)
5244 : Ri(Ri_), KiQ(KiQ_), RQpr(RQpr_)
5247 GMM_ASSERT1(KiQ.size() == RQpr.size(),
"Internal error");
5255 static void extend_variable_in_gis(
const ga_workspace &workspace,
5256 const std::string &varname,
5257 ga_instruction_set &gis) {
5258 if (workspace.variable_group_exists(varname)) {
5259 for (
const std::string &v : workspace.variable_group(varname))
5260 extend_variable_in_gis(workspace, v, gis);
5261 }
else if (gis.extended_vars.count(varname) == 0) {
5262 const mesh_fem *mf = workspace.associated_mf(varname);
5263 if (mf->is_reduced()) {
5264 auto n = (mf->get_qdim() == 1) ? workspace.qdim(varname) : 1;
5265 base_vector &U = gis.really_extended_vars[varname];
5267 mf->extend_vector(workspace.value(varname), U);
5268 gis.extended_vars[varname] = &(gis.really_extended_vars[varname]);
5270 gis.extended_vars[varname] = &(workspace.value(varname));
5275 static void ga_clear_node_list
5276 (pga_tree_node pnode, std::map<scalar_type,
5277 std::list<pga_tree_node> > &node_list) {
5278 std::list<pga_tree_node> &loc_node_list = node_list[pnode->hash_value];
5279 for (std::list<pga_tree_node>::iterator it = loc_node_list.begin();
5280 it != loc_node_list.end(); ) {
5281 if (*it == pnode) it = loc_node_list.erase(it);
else ++it;
5283 for (
size_type i = 0; i < pnode->children.size(); ++i)
5284 ga_clear_node_list(pnode->children[i], node_list);
5289 static void ga_compile_node(
const pga_tree_node pnode,
5290 ga_workspace &workspace,
5291 ga_instruction_set &gis,
5292 ga_instruction_set::region_mim_instructions &rmi,
5293 const mesh &m,
bool function_case,
5294 ga_if_hierarchy &if_hierarchy) {
5296 if (pnode->node_type == GA_NODE_PREDEF_FUNC ||
5297 pnode->node_type == GA_NODE_OPERATOR ||
5298 pnode->node_type == GA_NODE_SPEC_FUNC ||
5299 pnode->node_type == GA_NODE_CONSTANT ||
5300 pnode->node_type == GA_NODE_ALLINDICES ||
5301 pnode->node_type == GA_NODE_RESHAPE ||
5302 pnode->node_type == GA_NODE_SWAP_IND ||
5303 pnode->node_type == GA_NODE_IND_MOVE_LAST ||
5304 pnode->node_type == GA_NODE_CONTRACT)
return;
5308 pga_instruction pgai;
5309 ga_if_hierarchy *pif_hierarchy = &if_hierarchy;
5310 ga_if_hierarchy new_if_hierarchy;
5312 const mesh_fem *mf1 = 0, *mf2 = 0;
5313 const mesh_fem **mfg1 = 0, **mfg2 = 0;
5314 fem_interpolation_context *pctx1 = 0, *pctx2 = 0;
5315 bool tensor_to_clear =
false;
5316 bool tensor_to_adapt =
false;
5318 if (pnode->test_function_type) {
5319 if (pnode->name_test1.size())
5320 mf1 = workspace.associated_mf(pnode->name_test1);
5323 const std::string &intn1 = pnode->interpolate_name_test1;
5325 if (workspace.secondary_domain_exists(intn1)) {
5326 pctx1 = &(rmi.secondary_domain_infos.ctx);
5328 tensor_to_adapt =
true;
5329 pctx1 = &(rmi.interpolate_infos[intn1].ctx);
5330 if (workspace.variable_group_exists(pnode->name_test1)) {
5331 ga_instruction_set::variable_group_info &vgi =
5332 rmi.interpolate_infos[intn1].groups_info[pnode->name_test1];
5339 if (pnode->name_test2.size())
5340 mf2 = workspace.associated_mf(pnode->name_test2);
5343 const std::string &intn2 = pnode->interpolate_name_test2;
5345 if (workspace.secondary_domain_exists(intn2)) {
5346 pctx2 = &(rmi.secondary_domain_infos.ctx);
5348 tensor_to_adapt =
true;
5349 pctx2 = &(rmi.interpolate_infos[intn2].ctx);
5350 if (workspace.variable_group_exists(pnode->name_test2)) {
5351 ga_instruction_set::variable_group_info &vgi =
5352 rmi.interpolate_infos[intn2].groups_info[pnode->name_test2];
5363 pnode->t.set_to_original(); pnode->t.set_sparsity(0, 0);
5364 bool is_uniform =
false;
5365 if (pnode->test_function_type == 1) {
5367 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5368 (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5369 if (mf1 && mf1->is_uniform())
5370 { is_uniform =
true; pctx1->invalid_convex_num(); }
5371 }
else if (pnode->test_function_type == 2) {
5373 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5374 (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5375 if (mf2 && mf2->is_uniform())
5376 { is_uniform =
true; pctx2->invalid_convex_num(); }
5377 }
else if (pnode->test_function_type == 3) {
5378 if ((mf1 || mfg1) && (mf2 || mfg2)) {
5379 pgai = std::make_shared<ga_instruction_two_first_ind_tensor>
5380 (pnode->tensor(), *pctx1, *pctx2, pnode->qdim1, mf1, mfg1,
5381 pnode->qdim2, mf2, mfg2);
5382 if (mf1 && mf1->is_uniform() && mf2 && mf2->is_uniform()) {
5384 pctx1->invalid_convex_num();
5385 pctx2->invalid_convex_num();
5387 }
else if (mf1 || mfg1) {
5388 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5389 (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5390 if (mf1 && mf1->is_uniform())
5391 { is_uniform =
true; pctx1->invalid_convex_num(); }
5392 }
else if (mf2 || mfg2) {
5393 pgai = std::make_shared<ga_instruction_second_ind_tensor>
5394 (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5395 if (mf2 && mf2->is_uniform())
5396 { is_uniform =
true; pctx2->invalid_convex_num(); }
5401 pnode->t.set_to_original();
5402 if (rmi.node_list.find(pnode->hash_value) != rmi.node_list.end()) {
5403 for (pga_tree_node &pnode1 : rmi.node_list[pnode->hash_value]) {
5407 if (sub_tree_are_equal(pnode, pnode1, workspace, 1)) {
5408 pnode->t.set_to_copy(pnode1->t);
5411 if (sub_tree_are_equal(pnode, pnode1, workspace, 2)) {
5413 if (pnode->nb_test_functions() == 2) {
5417 else { rmi.instructions.push_back(std::move(pgai)); }
5419 pgai = std::make_shared<ga_instruction_transpose_test>
5420 (pnode->tensor(), pnode1->tensor());
5421 rmi.instructions.push_back(std::move(pgai));
5423 pnode->t.set_to_copy(pnode1->t);
5428 std::stringstream ss;
5429 ss <<
"Detected wrong equivalent nodes:" << endl;
5430 ga_print_node(pnode, ss);
5431 ss << endl <<
" and " << endl;
5432 ga_print_node(pnode1, ss);
5433 ss << endl <<
"No problem, but hash values could be adapted." << endl;
5434 GMM_TRACE2(ss.str());
5439 if (is_uniform) { pgai->exec(); }
5441 if (tensor_to_adapt)
5442 rmi.instructions.push_back(std::move(pgai));
5444 rmi.elt_instructions.push_back(std::move(pgai));
5448 size_type interpolate_filter_inst = rmi.instructions.size();
5449 if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5450 pgai = pga_instruction();
5451 rmi.instructions.push_back(std::move(pgai));
5452 if_hierarchy.increment();
5453 new_if_hierarchy.child_of(if_hierarchy);
5454 pif_hierarchy = &new_if_hierarchy;
5457 for (
size_type i = 0; i < pnode->children.size(); ++i)
5458 ga_compile_node(pnode->children[i], workspace, gis, rmi, m,
5459 function_case, *pif_hierarchy);
5461 if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5462 const std::string &intn = pnode->interpolate_name;
5463 ga_instruction_set::interpolate_info &inin = rmi.interpolate_infos[intn];
5464 pgai = std::make_shared<ga_instruction_interpolate_filter>
5465 (pnode->tensor(), inin, pnode->nbc1,
5466 int(rmi.instructions.size() - interpolate_filter_inst));
5467 rmi.instructions[interpolate_filter_inst].swap(pgai);
5468 pgai = std::make_shared<ga_instruction_copy_tensor>
5469 (pnode->tensor(), pnode->children[0]->tensor());
5470 rmi.instructions.push_back(std::move(pgai));
5471 ga_clear_node_list(pnode->children[0], rmi.node_list);
5474 static scalar_type minus = -scalar_type(1);
5475 size_type nbch = pnode->children.size();
5476 pga_tree_node child0 = (nbch > 0) ? pnode->children[0] : 0;
5477 pga_tree_node child1 = (nbch > 1) ? pnode->children[1] : 0;
5478 bgeot::multi_index mi;
5479 const bgeot::multi_index &size0 = child0 ? child0->t.sizes() : mi;
5481 size_type dim0 = child0 ? child0->tensor_order() : 0;
5482 size_type dim1 = child1 ? child1->tensor_order() : 0;
5484 switch (pnode->node_type) {
5486 case GA_NODE_PREDEF_FUNC:
case GA_NODE_OPERATOR:
case GA_NODE_SPEC_FUNC:
5487 case GA_NODE_CONSTANT:
case GA_NODE_ALLINDICES:
case GA_NODE_ZERO:
5488 case GA_NODE_RESHAPE:
case GA_NODE_CROSS_PRODUCT:
5489 case GA_NODE_SWAP_IND:
case GA_NODE_IND_MOVE_LAST:
5490 case GA_NODE_CONTRACT:
case GA_NODE_INTERPOLATE_FILTER:
5494 GMM_ASSERT1(!function_case,
5495 "No use of X is allowed in scalar functions");
5497 GA_DEBUG_ASSERT(pnode->tensor().size() == 1,
"dimensions mismatch");
5498 GMM_ASSERT1(pnode->nbc1 <= m.dim(),
5499 "Bad index for X in expression");
5500 pgai = std::make_shared<ga_instruction_X_component>
5501 (pnode->tensor()[0], gis.ctx, pnode->nbc1-1);
5503 if (pnode->tensor().size() != m.dim())
5504 pnode->init_vector_tensor(m.dim());
5505 pgai = std::make_shared<ga_instruction_X>(pnode->tensor(), gis.ctx);
5507 rmi.instructions.push_back(std::move(pgai));
5510 case GA_NODE_ELT_SIZE:
5511 GMM_ASSERT1(!function_case,
5512 "No use of element_size is allowed in functions");
5513 if (pnode->tensor().size() != 1) pnode->init_scalar_tensor(0);
5514 pgai = std::make_shared<ga_instruction_element_size>
5515 (pnode->tensor(), gis.elt_size);
5516 gis.need_elt_size =
true;
5517 rmi.instructions.push_back(std::move(pgai));
5521 GMM_ASSERT1(!function_case,
5522 "No use of element_K is allowed in functions");
5523 pgai = std::make_shared<ga_instruction_element_K>(pnode->tensor(),
5525 rmi.instructions.push_back(std::move(pgai));
5529 GMM_ASSERT1(!function_case,
5530 "No use of element_B is allowed in functions");
5531 pgai = std::make_shared<ga_instruction_element_B>(pnode->tensor(),
5533 rmi.instructions.push_back(std::move(pgai));
5536 case GA_NODE_NORMAL:
5538 GMM_ASSERT1(!function_case,
5539 "No use of Normal is allowed in functions");
5540 if (pnode->tensor().size() != m.dim())
5541 pnode->init_vector_tensor(m.dim());
5542 const mesh_im_level_set *mimls
5543 =
dynamic_cast<const mesh_im_level_set *
>(rmi.im);
5544 if (mimls && mimls->location()==mesh_im_level_set::INTEGRATE_BOUNDARY) {
5546 pgai = std::make_shared<ga_instruction_level_set_normal_vector>
5547 (pnode->tensor(), mimls, gis.ctx);
5548 rmi.instructions.push_back(std::move(pgai));
5550 pgai = std::make_shared<ga_instruction_copy_Normal>
5551 (pnode->tensor(), gis.Normal);
5552 rmi.instructions.push_back(std::move(pgai));
5557 case GA_NODE_INTERPOLATE_X:
5558 case GA_NODE_INTERPOLATE_NORMAL:
5559 GMM_ASSERT1(!function_case,
5560 "No use of Interpolate is allowed in functions");
5561 if (pnode->tensor().size() != m.dim())
5562 pnode->init_vector_tensor(m.dim());
5563 if (pnode->node_type == GA_NODE_INTERPOLATE_X)
5564 pgai = std::make_shared<ga_instruction_copy_interpolated_small_vect>
5566 rmi.interpolate_infos[pnode->interpolate_name].pt_y,
5567 rmi.interpolate_infos[pnode->interpolate_name]);
5568 else if (pnode->node_type == GA_NODE_INTERPOLATE_NORMAL)
5569 pgai = std::make_shared<ga_instruction_copy_Normal>
5571 rmi.interpolate_infos[pnode->interpolate_name].Normal);
5572 rmi.instructions.push_back(std::move(pgai));
5575 case GA_NODE_SECONDARY_DOMAIN_X:
5576 case GA_NODE_SECONDARY_DOMAIN_NORMAL:
5578 GMM_ASSERT1(!function_case,
5579 "No use of Secondary_domain is allowed in functions");
5580 auto psd = workspace.secondary_domain(pnode->interpolate_name);
5581 size_type sddim = psd->mim().linked_mesh().dim();
5582 if (pnode->tensor().size() != sddim)
5583 pnode->init_vector_tensor(sddim);
5584 if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_X)
5585 pgai = std::make_shared<ga_instruction_X>
5586 (pnode->tensor(), rmi.secondary_domain_infos.ctx);
5587 else if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_NORMAL)
5588 pgai = std::make_shared<ga_instruction_copy_Normal>
5589 (pnode->tensor(), rmi.secondary_domain_infos.Normal);
5590 rmi.instructions.push_back(std::move(pgai));
5594 case GA_NODE_VAL:
case GA_NODE_GRAD:
5595 case GA_NODE_HESS:
case GA_NODE_DIVERG:
5596 case GA_NODE_ELEMENTARY_VAL:
case GA_NODE_ELEMENTARY_GRAD:
5597 case GA_NODE_ELEMENTARY_HESS:
case GA_NODE_ELEMENTARY_DIVERG:
5598 case GA_NODE_XFEM_PLUS_VAL:
case GA_NODE_XFEM_PLUS_GRAD:
5599 case GA_NODE_XFEM_PLUS_HESS:
case GA_NODE_XFEM_PLUS_DIVERG:
5600 case GA_NODE_XFEM_MINUS_VAL:
case GA_NODE_XFEM_MINUS_GRAD:
5601 case GA_NODE_XFEM_MINUS_HESS:
case GA_NODE_XFEM_MINUS_DIVERG:
5603 bool is_elementary = (pnode->node_type == GA_NODE_ELEMENTARY_VAL ||
5604 pnode->node_type == GA_NODE_ELEMENTARY_GRAD ||
5605 pnode->node_type == GA_NODE_ELEMENTARY_HESS ||
5606 pnode->node_type == GA_NODE_ELEMENTARY_DIVERG);
5607 if (function_case) {
5608 GMM_ASSERT1(!is_elementary,
5609 "No elementary transformation is allowed in functions");
5610 GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_PLUS_VAL &&
5611 pnode->node_type != GA_NODE_XFEM_PLUS_GRAD &&
5612 pnode->node_type != GA_NODE_XFEM_PLUS_HESS &&
5613 pnode->node_type != GA_NODE_XFEM_PLUS_DIVERG,
5614 "Xfem_plus not allowed in functions");
5615 GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_MINUS_VAL &&
5616 pnode->node_type != GA_NODE_XFEM_MINUS_GRAD &&
5617 pnode->node_type != GA_NODE_XFEM_MINUS_HESS &&
5618 pnode->node_type != GA_NODE_XFEM_MINUS_DIVERG,
5619 "Xfem_plus not allowed in functions");
5620 const mesh_fem *mf = workspace.associated_mf(pnode->name);
5621 const im_data *imd = workspace.associated_im_data(pnode->name);
5622 GMM_ASSERT1(!mf,
"No fem expression is allowed in "
5623 "function expression");
5624 GMM_ASSERT1(!imd,
"No integration method data is allowed in "
5625 "function expression");
5626 if (gmm::vect_size(workspace.value(pnode->name)) == 1)
5627 pgai = std::make_shared<ga_instruction_copy_scalar>
5628 (pnode->tensor()[0], (workspace.value(pnode->name))[0]);
5630 pgai = std::make_shared<ga_instruction_copy_vect>
5631 (pnode->tensor().as_vector(), workspace.value(pnode->name));
5632 rmi.instructions.push_back(std::move(pgai));
5634 const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
5635 const im_data *imd = workspace.associated_im_data(pnode->name);
5637 if (is_elementary) {
5638 mf = workspace.associated_mf(pnode->elementary_target);
5639 GMM_ASSERT1(mf && mfo,
5640 "Wrong context for elementary transformation");
5641 GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
5642 "The finite element of variable " << pnode->name
5643 <<
" has to be defined on the same mesh than the "
5644 <<
"integration method or interpolation used");
5648 pgai = std::make_shared<ga_instruction_extract_local_im_data>
5649 (pnode->tensor(), *imd, workspace.value(pnode->name),
5650 gis.pai, gis.ctx, workspace.qdim(pnode->name));
5651 rmi.instructions.push_back(std::move(pgai));
5653 GMM_ASSERT1(mf,
"Internal error");
5655 GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
5656 "The finite element of variable " <<
5657 (is_elementary ? pnode->elementary_target : pnode->name)
5658 <<
" has to be defined on the same mesh than the "
5659 <<
"integration method or interpolation used");
5662 if (rmi.local_dofs.count(pnode->name) == 0) {
5663 rmi.local_dofs[pnode->name] = base_vector(1);
5664 extend_variable_in_gis(workspace, pnode->name, gis);
5667 if (qmult2 > 1 && !(mfo->is_uniformly_vectorized()))
5669 pgai = std::make_shared<ga_instruction_slice_local_dofs>
5670 (*mfo, *(gis.extended_vars[pnode->name]), gis.ctx,
5671 rmi.local_dofs[pnode->name],
5672 workspace.qdim(pnode->name) / mfo->get_qdim(), qmult2);
5673 rmi.elt_instructions.push_back(std::move(pgai));
5677 if (mf->is_uniform()) {
5678 if (rmi.pfps.count(mf) == 0) {
5680 pgai = std::make_shared<ga_instruction_update_pfp>
5681 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5682 rmi.begin_instructions.push_back(std::move(pgai));
5684 }
else if (rmi.pfps.count(mf) == 0 ||
5685 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
5686 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
5688 pgai = std::make_shared<ga_instruction_update_pfp>
5689 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5690 rmi.instructions.push_back(std::move(pgai));
5694 pgai = pga_instruction();
5695 switch (pnode->node_type) {
5696 case GA_NODE_VAL:
case GA_NODE_ELEMENTARY_VAL:
5697 if (rmi.base.count(mf) == 0 ||
5698 !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
5699 rmi.base_hierarchy[mf].push_back(if_hierarchy);
5700 pgai = std::make_shared<ga_instruction_val_base>
5701 (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5704 case GA_NODE_XFEM_PLUS_VAL:
5705 if (rmi.xfem_plus_base.count(mf) == 0 ||
5706 !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
5708 rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
5709 pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
5710 (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5713 case GA_NODE_XFEM_MINUS_VAL:
5714 if (rmi.xfem_minus_base.count(mf) == 0 ||
5715 !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
5717 rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
5718 pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
5719 (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5722 case GA_NODE_GRAD:
case GA_NODE_DIVERG:
5723 case GA_NODE_ELEMENTARY_GRAD:
case GA_NODE_ELEMENTARY_DIVERG:
5724 if (rmi.grad.count(mf) == 0 ||
5725 !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
5726 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
5727 pgai = std::make_shared<ga_instruction_grad_base>
5728 (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5731 case GA_NODE_XFEM_PLUS_GRAD:
case GA_NODE_XFEM_PLUS_DIVERG:
5732 if (rmi.xfem_plus_grad.count(mf) == 0 ||
5733 !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
5735 rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
5736 pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
5737 (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5740 case GA_NODE_XFEM_MINUS_GRAD:
case GA_NODE_XFEM_MINUS_DIVERG:
5741 if (rmi.xfem_minus_grad.count(mf) == 0 ||
5742 !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
5744 rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
5745 pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
5746 (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5749 case GA_NODE_HESS:
case GA_NODE_ELEMENTARY_HESS:
5750 if (rmi.hess.count(mf) == 0 ||
5751 !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
5752 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
5753 pgai = std::make_shared<ga_instruction_hess_base>
5754 (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5757 case GA_NODE_XFEM_PLUS_HESS:
5758 if (rmi.xfem_plus_hess.count(mf) == 0 ||
5759 !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
5761 rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
5762 pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
5763 (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5766 case GA_NODE_XFEM_MINUS_HESS:
5767 if (rmi.xfem_minus_hess.count(mf) == 0 ||
5768 !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
5770 rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
5771 pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
5772 (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5776 default : GMM_ASSERT1(
false,
"Internal error");
5778 if (pgai) rmi.instructions.push_back(std::move(pgai));
5781 switch (pnode->node_type) {
5783 pgai = std::make_shared<ga_instruction_val>
5784 (pnode->tensor(), rmi.base[mf], rmi.local_dofs[pnode->name],
5785 workspace.qdim(pnode->name));
5788 pgai = std::make_shared<ga_instruction_grad>
5789 (pnode->tensor(), rmi.grad[mf],
5790 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5793 pgai = std::make_shared<ga_instruction_hess>
5794 (pnode->tensor(), rmi.hess[mf],
5795 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5797 case GA_NODE_DIVERG:
5798 pgai = std::make_shared<ga_instruction_diverg>
5799 (pnode->tensor(), rmi.grad[mf],
5800 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5802 case GA_NODE_XFEM_PLUS_VAL:
5803 pgai = std::make_shared<ga_instruction_val>
5804 (pnode->tensor(), rmi.xfem_plus_base[mf],
5805 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5807 case GA_NODE_XFEM_PLUS_GRAD:
5808 pgai = std::make_shared<ga_instruction_grad>
5809 (pnode->tensor(), rmi.xfem_plus_grad[mf],
5810 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5812 case GA_NODE_XFEM_PLUS_HESS:
5813 pgai = std::make_shared<ga_instruction_hess>
5814 (pnode->tensor(), rmi.xfem_plus_hess[mf],
5815 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5817 case GA_NODE_XFEM_PLUS_DIVERG:
5818 pgai = std::make_shared<ga_instruction_diverg>
5819 (pnode->tensor(), rmi.xfem_plus_grad[mf],
5820 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5822 case GA_NODE_XFEM_MINUS_VAL:
5823 pgai = std::make_shared<ga_instruction_val>
5824 (pnode->tensor(), rmi.xfem_minus_base[mf],
5825 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5827 case GA_NODE_XFEM_MINUS_GRAD:
5828 pgai = std::make_shared<ga_instruction_grad>
5829 (pnode->tensor(), rmi.xfem_minus_grad[mf],
5830 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5832 case GA_NODE_XFEM_MINUS_HESS:
5833 pgai = std::make_shared<ga_instruction_hess>
5834 (pnode->tensor(), rmi.xfem_minus_hess[mf],
5835 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5837 case GA_NODE_XFEM_MINUS_DIVERG:
5838 pgai = std::make_shared<ga_instruction_diverg>
5839 (pnode->tensor(), rmi.xfem_minus_grad[mf],
5840 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5842 case GA_NODE_ELEMENTARY_VAL:
5844 ga_instruction_set::elementary_trans_info &eti
5845 = rmi.elementary_trans_infos
5846 [std::make_tuple(pnode->elementary_name, mfo, mf)];
5848 std::make_shared<ga_instruction_elementary_trans_val>
5849 (pnode->tensor(), rmi.base[mf],
5850 rmi.local_dofs[pnode->name],
5851 workspace.qdim(pnode->elementary_target),
5852 workspace.elementary_transformation(pnode->elementary_name),
5853 *mfo, *mf, gis.ctx, eti.M, eti.icv);
5856 case GA_NODE_ELEMENTARY_GRAD:
5858 ga_instruction_set::elementary_trans_info &eti
5859 = rmi.elementary_trans_infos
5860 [std::make_tuple(pnode->elementary_name, mfo, mf)];
5862 std::make_shared<ga_instruction_elementary_trans_grad>
5863 (pnode->tensor(), rmi.grad[mf],
5864 rmi.local_dofs[pnode->name],
5865 workspace.qdim(pnode->elementary_target),
5866 workspace.elementary_transformation(pnode->elementary_name),
5867 *mfo, *mf, gis.ctx, eti.M, eti.icv);
5870 case GA_NODE_ELEMENTARY_HESS:
5872 ga_instruction_set::elementary_trans_info &eti
5873 = rmi.elementary_trans_infos
5874 [std::make_tuple(pnode->elementary_name, mfo, mf)];
5876 std::make_shared<ga_instruction_elementary_trans_hess>
5877 (pnode->tensor(), rmi.hess[mf],
5878 rmi.local_dofs[pnode->name],
5879 workspace.qdim(pnode->elementary_target),
5880 workspace.elementary_transformation(pnode->elementary_name),
5881 *mfo, *mf, gis.ctx, eti.M, eti.icv);
5884 case GA_NODE_ELEMENTARY_DIVERG:
5886 ga_instruction_set::elementary_trans_info &eti
5887 = rmi.elementary_trans_infos
5888 [std::make_tuple(pnode->elementary_name, mfo, mf)];
5890 std::make_shared<ga_instruction_elementary_trans_diverg>
5891 (pnode->tensor(), rmi.grad[mf],
5892 rmi.local_dofs[pnode->name],
5893 workspace.qdim(pnode->elementary_target),
5894 workspace.elementary_transformation(pnode->elementary_name),
5895 *mfo, *mf, gis.ctx, eti.M, eti.icv);
5900 rmi.instructions.push_back(std::move(pgai));
5906 case GA_NODE_SECONDARY_DOMAIN_VAL:
case GA_NODE_SECONDARY_DOMAIN_GRAD:
5907 case GA_NODE_SECONDARY_DOMAIN_HESS:
case GA_NODE_SECONDARY_DOMAIN_DIVERG:
5909 GMM_ASSERT1(!function_case,
"internal error");
5910 const mesh_fem *mf = workspace.associated_mf(pnode->name);
5911 const im_data *imd = workspace.associated_im_data(pnode->name);
5912 const std::string &intn = pnode->interpolate_name;
5913 auto &sdi = rmi.secondary_domain_infos;
5915 fem_interpolation_context *pctx = &(sdi.ctx);
5916 papprox_integration pai = sdi.pai;
5917 psecondary_domain psd = workspace.secondary_domain(intn);
5920 pgai = std::make_shared<ga_instruction_extract_local_im_data>
5921 (pnode->tensor(), *imd, workspace.value(pnode->name),
5922 pai, *pctx, workspace.qdim(pnode->name));
5923 rmi.instructions.push_back(std::move(pgai));
5925 GMM_ASSERT1(mf,
"Internal error");
5926 GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
5927 "The finite element of variable " << pnode->name <<
5928 " has to be defined on the same mesh than the "
5929 "integration method or interpolation used on the "
5930 "secondary domain");
5933 if (sdi.local_dofs.count(pnode->name) == 0) {
5934 sdi.local_dofs[pnode->name] = base_vector(1);
5935 extend_variable_in_gis(workspace, pnode->name, gis);
5937 if (qmult2 > 1 && !(mf->is_uniformly_vectorized()))
5939 pgai = std::make_shared<ga_instruction_slice_local_dofs>
5940 (*mf, *(gis.extended_vars[pnode->name]), *pctx,
5941 sdi.local_dofs[pnode->name],
5942 workspace.qdim(pnode->name) / mf->get_qdim(), qmult2);
5943 rmi.elt_instructions.push_back(std::move(pgai));
5947 if (mf->is_uniform()) {
5948 if (sdi.pfps.count(mf) == 0) {
5950 pgai = std::make_shared<ga_instruction_update_pfp>
5951 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
5952 rmi.begin_instructions.push_back(std::move(pgai));
5954 }
else if (sdi.pfps.count(mf) == 0 ||
5955 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
5956 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
5958 pgai = std::make_shared<ga_instruction_update_pfp>
5959 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
5960 rmi.instructions.push_back(std::move(pgai));
5964 pgai = pga_instruction();
5965 switch (pnode->node_type) {
5966 case GA_NODE_SECONDARY_DOMAIN_VAL:
5967 if (sdi.base.count(mf) == 0 ||
5968 !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
5969 rmi.base_hierarchy[mf].push_back(if_hierarchy);
5970 pgai = std::make_shared<ga_instruction_val_base>
5971 (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
5974 case GA_NODE_SECONDARY_DOMAIN_GRAD:
5975 case GA_NODE_SECONDARY_DOMAIN_DIVERG:
5976 if (sdi.grad.count(mf) == 0 ||
5977 !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
5978 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
5979 pgai = std::make_shared<ga_instruction_grad_base>
5980 (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
5983 case GA_NODE_SECONDARY_DOMAIN_HESS:
5984 if (sdi.hess.count(mf) == 0 ||
5985 !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
5986 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
5987 pgai = std::make_shared<ga_instruction_hess_base>
5988 (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
5991 default : GMM_ASSERT1(
false,
"Internal error");
5993 if (pgai) rmi.instructions.push_back(std::move(pgai));
5996 switch (pnode->node_type) {
5997 case GA_NODE_SECONDARY_DOMAIN_VAL:
5998 pgai = std::make_shared<ga_instruction_val>
5999 (pnode->tensor(), sdi.base[mf], sdi.local_dofs[pnode->name],
6000 workspace.qdim(pnode->name));
6002 case GA_NODE_SECONDARY_DOMAIN_GRAD:
6003 pgai = std::make_shared<ga_instruction_grad>
6004 (pnode->tensor(), sdi.grad[mf],
6005 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6007 case GA_NODE_SECONDARY_DOMAIN_HESS:
6008 pgai = std::make_shared<ga_instruction_hess>
6009 (pnode->tensor(), sdi.hess[mf],
6010 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6012 case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6013 pgai = std::make_shared<ga_instruction_diverg>
6014 (pnode->tensor(), sdi.grad[mf],
6015 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6019 rmi.instructions.push_back(std::move(pgai));
6024 case GA_NODE_INTERPOLATE_VAL:
case GA_NODE_INTERPOLATE_GRAD:
6025 case GA_NODE_INTERPOLATE_HESS:
case GA_NODE_INTERPOLATE_DIVERG:
6027 extend_variable_in_gis(workspace, pnode->name, gis);
6029 const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6030 const std::string &intn = pnode->interpolate_name;
6031 const base_vector *Un = gis.extended_vars[pnode->name], **Ug = 0;
6032 fem_interpolation_context *pctx = &(rmi.interpolate_infos[intn].ctx);
6033 const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6034 if (workspace.variable_group_exists(pnode->name)) {
6035 ga_instruction_set::variable_group_info &vgi =
6036 rmi.interpolate_infos[intn].groups_info[pnode->name];
6037 mfg = &(vgi.mf); mfn = 0; Ug = &(vgi.U); Un = 0;
6040 if (pnode->node_type == GA_NODE_INTERPOLATE_VAL) {
6042 pgai = std::make_shared<ga_instruction_interpolate_val>
6043 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6044 workspace.qdim(pnode->name),
6045 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6046 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD) {
6048 pgai = std::make_shared<ga_instruction_interpolate_grad>
6049 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6050 workspace.qdim(pnode->name),
6051 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6052 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS) {
6054 pgai = std::make_shared<ga_instruction_interpolate_hess>
6055 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6056 workspace.qdim(pnode->name),
6057 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6059 pgai = std::make_shared<ga_instruction_interpolate_diverg>
6060 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6061 workspace.qdim(pnode->name),
6062 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6064 rmi.instructions.push_back(std::move(pgai));
6068 case GA_NODE_INTERPOLATE_DERIVATIVE:
6069 GMM_ASSERT1(!function_case,
6070 "No use of Interpolate is allowed in functions");
6071 pgai = std::make_shared<ga_instruction_copy_tensor_possibly_void>
6073 rmi.interpolate_infos[pnode->interpolate_name_der]
6074 .derivatives[var_trans_pair(pnode->name, pnode->interpolate_name)]);
6075 rmi.instructions.push_back(std::move(pgai));
6078 case GA_NODE_VAL_TEST:
case GA_NODE_GRAD_TEST:
6079 case GA_NODE_HESS_TEST:
case GA_NODE_DIVERG_TEST:
6080 case GA_NODE_ELEMENTARY_VAL_TEST:
case GA_NODE_ELEMENTARY_GRAD_TEST:
6081 case GA_NODE_ELEMENTARY_HESS_TEST:
case GA_NODE_ELEMENTARY_DIVERG_TEST:
6082 case GA_NODE_XFEM_PLUS_VAL_TEST:
case GA_NODE_XFEM_PLUS_GRAD_TEST:
6083 case GA_NODE_XFEM_PLUS_HESS_TEST:
case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6084 case GA_NODE_XFEM_MINUS_VAL_TEST:
case GA_NODE_XFEM_MINUS_GRAD_TEST:
6085 case GA_NODE_XFEM_MINUS_HESS_TEST:
case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6089 bool is_elementary = (pnode->node_type==GA_NODE_ELEMENTARY_VAL_TEST ||
6090 pnode->node_type==GA_NODE_ELEMENTARY_GRAD_TEST ||
6091 pnode->node_type==GA_NODE_ELEMENTARY_HESS_TEST ||
6092 pnode->node_type==GA_NODE_ELEMENTARY_DIVERG_TEST);
6093 const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6094 if (is_elementary) {
6095 mf = workspace.associated_mf(pnode->elementary_target);
6096 GMM_ASSERT1(mf && mfo,
6097 "Wrong context for elementary transformation");
6098 GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6099 "The finite element of variable " << pnode->name
6100 <<
" has to be defined on the same mesh than the "
6101 <<
"integration method or interpolation used");
6105 GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6106 "The finite element of variable " << pnode->name <<
6107 (is_elementary ? pnode->elementary_target : pnode->name)
6108 <<
" and the applied integration method have to be"
6109 <<
" defined on the same mesh");
6113 if (rmi.pfps.count(mf) == 0) {
6115 pgai = std::make_shared<ga_instruction_update_pfp>
6116 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6117 rmi.begin_instructions.push_back(std::move(pgai));
6119 }
else if (rmi.pfps.count(mf) == 0 ||
6120 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6121 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6123 pgai = std::make_shared<ga_instruction_update_pfp>
6124 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6125 rmi.instructions.push_back(std::move(pgai));
6129 pgai = pga_instruction();
6130 switch (pnode->node_type) {
6131 case GA_NODE_VAL_TEST:
case GA_NODE_ELEMENTARY_VAL_TEST:
6132 if (rmi.base.find(mf) == rmi.base.end() ||
6133 !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6134 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6135 pgai = std::make_shared<ga_instruction_val_base>
6136 (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6139 case GA_NODE_XFEM_PLUS_VAL_TEST:
6140 if (rmi.xfem_plus_base.find(mf) == rmi.xfem_plus_base.end() ||
6141 !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6143 rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6144 pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6145 (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6148 case GA_NODE_XFEM_MINUS_VAL_TEST:
6149 if (rmi.xfem_minus_base.find(mf) == rmi.xfem_minus_base.end() ||
6150 !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6152 rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6153 pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6154 (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6157 case GA_NODE_GRAD_TEST:
case GA_NODE_DIVERG_TEST:
6158 case GA_NODE_ELEMENTARY_GRAD_TEST:
6159 case GA_NODE_ELEMENTARY_DIVERG_TEST:
6160 if (rmi.grad.find(mf) == rmi.grad.end() ||
6161 !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6162 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6163 pgai = std::make_shared<ga_instruction_grad_base>
6164 (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6167 case GA_NODE_XFEM_PLUS_GRAD_TEST:
case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6168 if (rmi.xfem_plus_grad.find(mf) == rmi.xfem_plus_grad.end() ||
6169 !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6171 rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6172 pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6173 (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6176 case GA_NODE_XFEM_MINUS_GRAD_TEST:
6177 case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6178 if (rmi.xfem_minus_grad.find(mf) == rmi.xfem_minus_grad.end() ||
6179 !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6181 rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6182 pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6183 (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6186 case GA_NODE_HESS_TEST:
case GA_NODE_ELEMENTARY_HESS_TEST:
6187 if (rmi.hess.count(mf) == 0 ||
6188 !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6189 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6190 pgai = std::make_shared<ga_instruction_hess_base>
6191 (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6194 case GA_NODE_XFEM_PLUS_HESS_TEST:
6195 if (rmi.xfem_plus_hess.count(mf) == 0 ||
6196 !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6198 rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6199 pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6200 (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6203 case GA_NODE_XFEM_MINUS_HESS_TEST:
6204 if (rmi.xfem_minus_hess.find(mf) == rmi.xfem_minus_hess.end() ||
6205 !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6207 rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6208 pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6209 (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6213 default : GMM_ASSERT1(
false,
"Internal error");
6215 if (pgai) rmi.instructions.push_back(std::move(pgai));
6218 switch(pnode->node_type) {
6219 case GA_NODE_VAL_TEST:
6221 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6222 pnode->t.set_sparsity(1, mf->get_qdim());
6223 tensor_to_clear =
true;
6224 pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6225 (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6227 pgai = std::make_shared<ga_instruction_copy_val_base>
6228 (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6231 case GA_NODE_GRAD_TEST:
6233 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6234 pnode->t.set_sparsity(2, mf->get_qdim());
6235 tensor_to_clear =
true;
6236 pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6237 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6239 pgai = std::make_shared<ga_instruction_copy_grad_base>
6240 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6243 case GA_NODE_HESS_TEST:
6245 pgai = std::make_shared<ga_instruction_copy_hess_base>
6246 (pnode->tensor(), rmi.hess[mf], mf->get_qdim());
6247 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6248 pnode->t.set_sparsity(3, mf->get_qdim());
6250 case GA_NODE_DIVERG_TEST:
6252 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6253 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6255 case GA_NODE_XFEM_PLUS_VAL_TEST:
6257 pgai = std::make_shared<ga_instruction_copy_val_base>
6258 (pnode->tensor(), rmi.xfem_plus_base[mf], mf->get_qdim());
6259 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6260 pnode->t.set_sparsity(1, mf->get_qdim());
6262 case GA_NODE_XFEM_PLUS_GRAD_TEST:
6264 pgai = std::make_shared<ga_instruction_copy_grad_base>
6265 (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6266 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6267 pnode->t.set_sparsity(2, mf->get_qdim());
6269 case GA_NODE_XFEM_PLUS_HESS_TEST:
6271 pgai = std::make_shared<ga_instruction_copy_hess_base>
6272 (pnode->tensor(), rmi.xfem_plus_hess[mf], mf->get_qdim());
6273 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6274 pnode->t.set_sparsity(3, mf->get_qdim());
6276 case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6278 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6279 (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6281 case GA_NODE_XFEM_MINUS_VAL_TEST:
6283 pgai = std::make_shared<ga_instruction_copy_val_base>
6284 (pnode->tensor(), rmi.xfem_minus_base[mf], mf->get_qdim());
6285 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6286 pnode->t.set_sparsity(1, mf->get_qdim());
6288 case GA_NODE_XFEM_MINUS_GRAD_TEST:
6290 pgai = std::make_shared<ga_instruction_copy_grad_base>
6291 (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6292 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6293 pnode->t.set_sparsity(2, mf->get_qdim());
6295 case GA_NODE_XFEM_MINUS_HESS_TEST:
6297 pgai = std::make_shared<ga_instruction_copy_hess_base>
6298 (pnode->tensor(), rmi.xfem_minus_hess[mf], mf->get_qdim());
6299 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6300 pnode->t.set_sparsity(3, mf->get_qdim());
6302 case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6304 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6305 (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6307 case GA_NODE_ELEMENTARY_VAL_TEST:
6309 ga_instruction_set::elementary_trans_info &eti
6310 = rmi.elementary_trans_infos
6311 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6313 std::make_shared<ga_instruction_elementary_trans_val_base>
6314 (pnode->tensor(), rmi.base[mf], mf->get_qdim(),
6315 workspace.elementary_transformation(pnode->elementary_name),
6316 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6319 case GA_NODE_ELEMENTARY_GRAD_TEST:
6321 ga_instruction_set::elementary_trans_info &eti
6322 = rmi.elementary_trans_infos
6323 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6325 std::make_shared<ga_instruction_elementary_trans_grad_base>
6326 (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6327 workspace.elementary_transformation(pnode->elementary_name),
6328 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6331 case GA_NODE_ELEMENTARY_HESS_TEST:
6333 ga_instruction_set::elementary_trans_info &eti
6334 = rmi.elementary_trans_infos
6335 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6337 std::make_shared<ga_instruction_elementary_trans_hess_base>
6338 (pnode->tensor(), rmi.hess[mf], mf->get_qdim(),
6339 workspace.elementary_transformation(pnode->elementary_name),
6340 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6343 case GA_NODE_ELEMENTARY_DIVERG_TEST:
6345 ga_instruction_set::elementary_trans_info &eti
6346 = rmi.elementary_trans_infos
6347 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6349 std::make_shared<ga_instruction_elementary_trans_diverg_base>
6350 (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6351 workspace.elementary_transformation(pnode->elementary_name),
6352 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6357 if (pgai) rmi.instructions.push_back(std::move(pgai));
6359 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6363 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6364 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6365 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6366 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6368 GMM_ASSERT1(!function_case,
"internal error");
6369 const mesh_fem *mf = workspace.associated_mf(pnode->name);
6370 const std::string &intn = pnode->interpolate_name;
6371 auto &sdi = rmi.secondary_domain_infos;
6373 fem_interpolation_context *pctx = &(sdi.ctx);
6374 papprox_integration pai = sdi.pai;
6375 psecondary_domain psd = workspace.secondary_domain(intn);
6377 GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6378 "The finite element of variable " << pnode->name <<
6379 " and the applied integration method have to be"
6380 " defined on the same mesh for secondary domain");
6384 if (sdi.pfps.count(mf) == 0) {
6386 pgai = std::make_shared<ga_instruction_update_pfp>
6387 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6388 rmi.begin_instructions.push_back(std::move(pgai));
6390 }
else if (sdi.pfps.count(mf) == 0 ||
6391 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6392 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6394 pgai = std::make_shared<ga_instruction_update_pfp>
6395 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6396 rmi.instructions.push_back(std::move(pgai));
6400 pgai = pga_instruction();
6401 switch (pnode->node_type) {
6402 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6403 if (sdi.base.count(mf) == 0 ||
6404 !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6405 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6406 pgai = std::make_shared<ga_instruction_val_base>
6407 (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6410 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6411 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6412 if (sdi.grad.count(mf) == 0 ||
6413 !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6414 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6415 pgai = std::make_shared<ga_instruction_grad_base>
6416 (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6419 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6420 if (sdi.hess.count(mf) == 0 ||
6421 !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6422 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6423 pgai = std::make_shared<ga_instruction_hess_base>
6424 (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6427 default : GMM_ASSERT1(
false,
"Internal error");
6429 if (pgai) rmi.instructions.push_back(std::move(pgai));
6432 switch(pnode->node_type) {
6433 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6435 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6436 pnode->t.set_sparsity(1, mf->get_qdim());
6437 tensor_to_clear =
true;
6438 pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6439 (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6441 pgai = std::make_shared<ga_instruction_copy_val_base>
6442 (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6445 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6447 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6448 pnode->t.set_sparsity(2, mf->get_qdim());
6449 tensor_to_clear =
true;
6450 pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6451 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6453 pgai = std::make_shared<ga_instruction_copy_grad_base>
6454 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6457 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6459 pgai = std::make_shared<ga_instruction_copy_hess_base>
6460 (pnode->tensor(), sdi.hess[mf], mf->get_qdim());
6461 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6462 pnode->t.set_sparsity(3, mf->get_qdim());
6464 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6466 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6467 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6471 if (pgai) rmi.instructions.push_back(std::move(pgai));
6473 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6477 case GA_NODE_INTERPOLATE_VAL_TEST:
case GA_NODE_INTERPOLATE_GRAD_TEST:
6478 case GA_NODE_INTERPOLATE_HESS_TEST:
case GA_NODE_INTERPOLATE_DIVERG_TEST:
6480 const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6481 const std::string &intn = pnode->interpolate_name;
6482 const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6483 if (workspace.variable_group_exists(pnode->name)) {
6484 ga_instruction_set::variable_group_info &vgi =
6485 rmi.interpolate_infos[intn].groups_info[pnode->name];
6486 mfg = &(vgi.mf); mfn = 0;
6489 if (pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST) {
6491 pgai = std::make_shared<ga_instruction_interpolate_val_base>
6492 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6493 workspace.qdim(pnode->name), rmi.interpolate_infos[intn],
6495 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST) {
6497 pgai = std::make_shared<ga_instruction_interpolate_grad_base>
6498 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6499 workspace.qdim(pnode->name),
6500 rmi.interpolate_infos[intn], gis.fp_pool);
6501 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST) {
6503 pgai = std::make_shared<ga_instruction_interpolate_hess_base>
6504 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6505 workspace.qdim(pnode->name),
6506 rmi.interpolate_infos[intn], gis.fp_pool);
6509 pgai = std::make_shared<ga_instruction_interpolate_diverg_base>
6510 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6511 workspace.qdim(pnode->name),
6512 rmi.interpolate_infos[intn], gis.fp_pool);
6514 rmi.instructions.push_back(std::move(pgai));
6515 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6520 switch(pnode->op_type) {
6523 if (pnode->tensor().size() == 1) {
6524 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
6525 "Internal error: child0 not scalar");
6526 GA_DEBUG_ASSERT(child1->tensor().size() == 1,
6527 "Internal error: child1 not scalar");
6528 pgai = std::make_shared<ga_instruction_scalar_add>
6529 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6531 pgai = std::make_shared<ga_instruction_add>
6532 (pnode->tensor(), child0->tensor(), child1->tensor());
6534 if (child0->t.sparsity() == child1->t.sparsity()
6535 && child0->t.qdim() == child1->t.qdim())
6536 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6537 rmi.instructions.push_back(std::move(pgai));
6541 if (pnode->tensor().size() == 1) {
6542 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
6543 "Internal error: child0 not scalar");
6544 GA_DEBUG_ASSERT(child1->tensor().size() == 1,
6545 "Internal error: child1 not scalar");
6546 pgai = std::make_shared<ga_instruction_scalar_sub>
6547 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6549 pgai = std::make_shared<ga_instruction_sub>
6550 (pnode->tensor(), child0->tensor(), child1->tensor());
6552 if (child0->t.sparsity() == child1->t.sparsity()
6553 && child0->t.qdim() == child1->t.qdim())
6554 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6555 rmi.instructions.push_back(std::move(pgai));
6558 case GA_UNARY_MINUS:
6559 if (pnode->tensor().size() == 1) {
6560 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
"Internal error");
6561 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6562 (pnode->tensor()[0], child0->tensor()[0], minus);
6564 pgai = std::make_shared<ga_instruction_scalar_mult>
6565 (pnode->tensor(), child0->tensor(), minus);
6567 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6568 rmi.instructions.push_back(std::move(pgai));
6572 case GA_DOT:
case GA_COLON:
case GA_MULT:
6574 size_type tps0 = child0->tensor_proper_size();
6575 size_type tps1 = child1->tensor_proper_size();
6576 size_type s1 = (tps0 * tps1) / pnode->tensor_proper_size();
6579 pgai = pga_instruction();
6580 if ((pnode->op_type == GA_DOT && dim1 <= 1) ||
6581 (pnode->op_type == GA_COLON && dim1 <= 2) ||
6582 (pnode->op_type == GA_MULT && dim0 == 4) ||
6583 (pnode->op_type == GA_MULT && dim1 <= 1) ||
6584 child0->tensor().size() == 1 || tps1 == 1) {
6586 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6587 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6588 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6590 else if (child0->tensor().size() == 1) {
6591 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6592 pgai = std::make_shared<ga_instruction_scalar_mult>
6593 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6595 else if (child1->tensor().size() == 1) {
6596 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6597 pgai = std::make_shared<ga_instruction_scalar_mult>
6598 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6600 else if (pnode->test_function_type < 3) {
6603 pgai = ga_uniform_instruction_simple_tmult
6604 (pnode->tensor(), child0->tensor(), child1->tensor());
6606 pgai = std::make_shared<ga_instruction_simple_tmult>
6607 (pnode->tensor(), child0->tensor(), child1->tensor());
6611 pgai = ga_uniform_instruction_simple_tmult
6612 (pnode->tensor(), child1->tensor(), child0->tensor());
6614 pgai = std::make_shared<ga_instruction_simple_tmult>
6615 (pnode->tensor(), child1->tensor(), child0->tensor());
6616 }
else if (is_uniform)
6617 pgai = ga_uniform_instruction_contraction_switch
6618 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6620 pgai = ga_instruction_contraction_switch
6621 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6624 if (child1->test_function_type == 1 ||
6625 child1->test_function_type == 3) {
6626 if (child1->test_function_type == 3 ||
6627 child1->tensor_proper_size() <= s2) {
6630 pgai = ga_uniform_instruction_simple_tmult
6631 (pnode->tensor(), child1->tensor(), child0->tensor());
6633 pgai = std::make_shared<ga_instruction_simple_tmult>
6634 (pnode->tensor(), child1->tensor(), child0->tensor());
6635 }
else if (is_uniform)
6636 pgai = ga_uniform_instruction_contraction_switch
6637 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6639 pgai = ga_instruction_contraction_switch
6640 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6642 pgai = std::make_shared<ga_instruction_spec_contraction>
6643 (pnode->tensor(), child1->tensor(), child0->tensor(), s2);
6644 }
else if (child1->test_function_type == 0 ||
6645 (child0->tensor_proper_size() == s2 &&
6646 child1->tensor_proper_size() == s2)) {
6649 pgai = ga_uniform_instruction_simple_tmult
6650 (pnode->tensor(), child0->tensor(), child1->tensor());
6652 pgai = std::make_shared<ga_instruction_simple_tmult>
6653 (pnode->tensor(), child0->tensor(), child1->tensor());
6656 pgai = ga_uniform_instruction_contraction_switch
6657 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6659 pgai = ga_instruction_contraction_switch
6660 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6663 if (child0->tensor_proper_size() == s2)
6664 pgai = ga_uniform_instruction_contraction_switch
6665 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6666 else if (child1->tensor_proper_size() == s2)
6667 pgai = std::make_shared<ga_instruction_spec_contraction>
6668 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6670 pgai = std::make_shared<ga_instruction_spec2_contraction>
6671 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6676 if (pnode->test_function_type < 3) {
6679 pgai = ga_uniform_instruction_simple_tmult
6680 (pnode->tensor(), child0->tensor(), child1->tensor());
6682 pgai = std::make_shared<ga_instruction_simple_tmult>
6683 (pnode->tensor(), child0->tensor(), child1->tensor());
6685 if (child1->test_function_type == 0)
6686 pgai = std::make_shared<ga_instruction_matrix_mult>
6687 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6689 pgai = std::make_shared<ga_instruction_matrix_mult_spec>
6690 (pnode->tensor(), child0->tensor(), child1->tensor(),
6691 s2, tps0/s2, tps1/s2);
6694 if (child0->tensor_proper_size() == 1) {
6695 if (child0->test_function_type == 0 ||
6696 child0->test_function_type == 1) {
6698 pgai = ga_uniform_instruction_simple_tmult
6699 (pnode->tensor(), child0->tensor(), child1->tensor());
6701 pgai = std::make_shared<ga_instruction_simple_tmult>
6702 (pnode->tensor(), child0->tensor(), child1->tensor());
6704 pgai = std::make_shared<ga_instruction_spec_tmult>
6705 (pnode->tensor(), child1->tensor(), child0->tensor(),
6708 if (child1->test_function_type == 0)
6709 pgai = std::make_shared<ga_instruction_matrix_mult>
6710 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6711 else if (child1->test_function_type == 2)
6712 pgai = std::make_shared<ga_instruction_matrix_mult_spec>
6713 (pnode->tensor(), child0->tensor(), child1->tensor(),
6714 s2, tps0/s2, tps1/s2);
6716 pgai = std::make_shared<ga_instruction_matrix_mult_spec2>
6717 (pnode->tensor(), child0->tensor(), child1->tensor(),
6718 s2, tps0/s2, tps1/s2);
6722 rmi.instructions.push_back(std::move(pgai));
6727 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6728 pgai = std::make_shared<ga_instruction_scalar_scalar_div>
6729 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6730 }
else if (child1->tensor().size() == 1) {
6731 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6732 pgai = std::make_shared<ga_instruction_scalar_div>
6733 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6734 }
else GMM_ASSERT1(
false,
"Internal error");
6735 rmi.instructions.push_back(std::move(pgai));
6739 pnode->t.set_to_copy(child0->t);
6740 pgai = std::make_shared<ga_instruction_print_tensor>
6741 (pnode->tensor(), child0, gis.ctx, gis.nbpt, gis.ipt);
6742 rmi.instructions.push_back(std::move(pgai));
6746 if (pnode->tensor_proper_size() > 1) {
6747 size_type n1 = child0->tensor_proper_size(0);
6748 size_type n2 = (child0->tensor_order() > 1) ?
6749 child0->tensor_proper_size(1) : 1;
6751 for (
size_type i = 2; i < child0->tensor_order(); ++i)
6752 nn *= child0->tensor_proper_size(i);
6753 if (child0->nb_test_functions() == 0)
6754 pgai = std::make_shared<ga_instruction_transpose_no_test>
6755 (pnode->tensor(), child0->tensor(), n1, n2, nn);
6757 pgai = std::make_shared<ga_instruction_transpose>
6758 (pnode->tensor(), child0->tensor(), n1, n2, nn);
6759 rmi.instructions.push_back(std::move(pgai));
6761 pnode->t.set_to_copy(child0->t);
6766 if (pnode->tensor_proper_size() != 1) {
6767 pgai = std::make_shared<ga_instruction_sym>
6768 (pnode->tensor(), child0->tensor());
6769 rmi.instructions.push_back(std::move(pgai));
6771 pnode->t.set_to_copy(child0->t);
6777 pgai = std::make_shared<ga_instruction_skew>
6778 (pnode->tensor(), child0->tensor());
6779 rmi.instructions.push_back(std::move(pgai));
6785 size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
6787 pnode->t.set_to_copy(child0->t);
6789 pgai = std::make_shared<ga_instruction_trace>
6790 (pnode->tensor(), child0->tensor(), N);
6791 rmi.instructions.push_back(std::move(pgai));
6798 size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
6799 pgai = std::make_shared<ga_instruction_deviator>
6800 (pnode->tensor(), child0->tensor(), N);
6801 rmi.instructions.push_back(std::move(pgai));
6807 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6808 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6809 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6810 }
else if (child0->tensor().size() == 1) {
6811 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6812 pgai = std::make_shared<ga_instruction_scalar_mult>
6813 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6815 else if (child1->tensor().size() == 1) {
6816 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6817 pgai = std::make_shared<ga_instruction_scalar_mult>
6818 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6820 else if (child1->test_function_type == 0)
6821 pgai = std::make_shared<ga_instruction_dotmult>
6822 (pnode->tensor(), child0->tensor(), child1->tensor());
6823 else if (child0->test_function_type == 0)
6824 pgai = std::make_shared<ga_instruction_dotmult>
6825 (pnode->tensor(), child1->tensor(), child0->tensor());
6826 else if (child0->test_function_type == 1)
6827 pgai = std::make_shared<ga_instruction_dotmult_spec>
6828 (pnode->tensor(), child0->tensor(), child1->tensor());
6830 pgai = std::make_shared<ga_instruction_dotmult_spec>
6831 (pnode->tensor(), child1->tensor(), child0->tensor());
6833 rmi.instructions.push_back(std::move(pgai));
6838 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6839 pgai = std::make_shared<ga_instruction_scalar_scalar_div>
6840 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6841 }
else if (child1->tensor().size() == 1) {
6842 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6843 pgai = std::make_shared<ga_instruction_scalar_div>
6844 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6845 }
else if (child1->test_function_type == 0) {
6846 pgai = std::make_shared<ga_instruction_dotdiv>
6847 (pnode->tensor(), child0->tensor(), child1->tensor());
6848 }
else GMM_ASSERT1(
false,
"Internal error");
6849 rmi.instructions.push_back(std::move(pgai));
6854 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6855 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6856 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6857 }
else if (child0->tensor().size() == 1) {
6858 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6859 pgai = std::make_shared<ga_instruction_scalar_mult>
6860 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6862 else if (child1->tensor().size() == 1) {
6863 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6864 pgai = std::make_shared<ga_instruction_scalar_mult>
6865 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6867 else if (child1->test_function_type == 0) {
6869 pgai = ga_uniform_instruction_simple_tmult
6870 (pnode->tensor(), child0->tensor(), child1->tensor());
6872 pgai = std::make_shared<ga_instruction_simple_tmult>
6873 (pnode->tensor(), child0->tensor(), child1->tensor());
6874 }
else if (child1->tensor_proper_size() == 1)
6875 pgai = std::make_shared<ga_instruction_spec2_tmult>
6876 (pnode->tensor(), child0->tensor(), child1->tensor());
6878 pgai = std::make_shared<ga_instruction_spec_tmult>
6879 (pnode->tensor(), child0->tensor(), child1->tensor(),
6880 child0->tensor_proper_size(),
6881 child1->tensor_proper_size());
6883 rmi.instructions.push_back(std::move(pgai));
6886 default:GMM_ASSERT1(
false,
"Unexpected operation. Internal error.");
6890 case GA_NODE_C_MATRIX:
6892 if (pnode->test_function_type) {
6893 std::vector<const base_tensor *> components(pnode->children.size());
6894 for (
size_type i = 0; i < pnode->children.size(); ++i)
6895 components[i] = &(pnode->children[i]->tensor());
6896 pgai = std::make_shared<ga_instruction_c_matrix_with_tests>
6897 (pnode->tensor(), components);
6899 std::vector<scalar_type *> components(pnode->children.size());
6900 for (
size_type i = 0; i < pnode->children.size(); ++i)
6901 components[i] = &(pnode->children[i]->tensor()[0]);
6902 pgai = std::make_shared<ga_instruction_simple_c_matrix>
6903 (pnode->tensor(), components);
6905 rmi.instructions.push_back(std::move(pgai));
6909 case GA_NODE_PARAMS:
6910 if (child0->node_type == GA_NODE_RESHAPE) {
6911 pgai = std::make_shared<ga_instruction_copy_tensor>(pnode->tensor(),
6913 rmi.instructions.push_back(std::move(pgai));
6914 }
else if (child0->node_type == GA_NODE_CROSS_PRODUCT) {
6915 pga_tree_node child2 = pnode->children[2];
6916 if (child1->test_function_type==2 && child2->test_function_type==1)
6917 pgai = std::make_shared<ga_instruction_cross_product_tf>
6918 (pnode->tensor(), child2->tensor(), child1->tensor(),
true);
6919 else if (child1->test_function_type || child2->test_function_type)
6920 pgai = std::make_shared<ga_instruction_cross_product_tf>
6921 (pnode->tensor(), child1->tensor(), child2->tensor(),
false);
6923 pgai = std::make_shared<ga_instruction_cross_product>
6924 (pnode->tensor(), child1->tensor(), child2->tensor());
6925 rmi.instructions.push_back(std::move(pgai));
6926 }
else if (child0->node_type == GA_NODE_IND_MOVE_LAST) {
6928 ind =
size_type(round(pnode->children[2]->tensor()[0])-1);
6930 for (
size_type i = 0; i < child1->tensor_order(); ++i)
6931 if (i>ind) ii2 *= child1->tensor_proper_size(i);
6932 size_type nn = child1->tensor_proper_size(ind);
6933 pgai = std::make_shared<ga_instruction_index_move_last>
6934 (pnode->tensor(), child1->tensor(), nn, ii2);
6935 rmi.instructions.push_back(std::move(pgai));
6936 }
else if (child0->node_type == GA_NODE_SWAP_IND) {
6939 ind[i] =
size_type(round(pnode->children[i]->tensor()[0])-1);
6940 if (ind[2] > ind[3]) std::swap(ind[2], ind[3]);
6942 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
6943 if (i>ind[2] && i<ind[3]) ii2 *= child1->tensor_proper_size(i);
6944 if (i>ind[3]) ii3 *= child1->tensor_proper_size(i);
6946 size_type nn1 = child1->tensor_proper_size(ind[2]);
6947 size_type nn2 = child1->tensor_proper_size(ind[3]);
6949 pgai = std::make_shared<ga_instruction_swap_indices>
6950 (pnode->tensor(), child1->tensor(), nn1, nn2, ii2, ii3);
6951 rmi.instructions.push_back(std::move(pgai));
6952 }
else if (child0->node_type == GA_NODE_CONTRACT) {
6953 std::vector<size_type> ind(2), indsize(2);
6954 pga_tree_node child2(0);
6955 if (pnode->children.size() == 4)
6956 { ind[0] = 2; ind[1] = 3; }
6957 else if (pnode->children.size() == 5)
6958 { ind[0] = 2; ind[1] = 4; child2 = pnode->children[3]; }
6959 else if (pnode->children.size() == 7) {
6960 ind.resize(4); indsize.resize(4);
6961 ind[0] = 2; ind[1] = 3; ind[2] = 5; ind[3] = 6;
6962 child2 = pnode->children[4];
6965 for (
size_type i = 1; i < pnode->children.size(); ++i) {
6967 ind[kk] =
size_type(round(pnode->children[i]->tensor()[0])-1);
6968 indsize[kk] = pnode->children[ll]->tensor_proper_size(ind[kk]);
6973 if (pnode->children.size() == 4) {
6975 if (i1 > i2) std::swap(i1, i2);
6977 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
6978 if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
6979 if (i > i2) ii3 *= child1->tensor_proper_size(i);
6981 pgai = std::make_shared<ga_instruction_contract_1_1>
6982 (pnode->tensor(), child1->tensor(), indsize[0], ii2, ii3);
6984 else if (pnode->children.size() == 5) {
6987 size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1;
6988 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
6989 if (i < i1) ii1 *= child1->tensor_proper_size(i);
6990 if (i > i1) ii2 *= child1->tensor_proper_size(i);
6992 for (
size_type i = 0; i < child2->tensor_order(); ++i) {
6993 if (i < i2) ii3 *= child2->tensor_proper_size(i);
6994 if (i > i2) ii4 *= child2->tensor_proper_size(i);
6996 if (child1->test_function_type==1 && child2->test_function_type==2)
6997 pgai = std::make_shared<ga_instruction_contract_2_1_rev>
6998 (pnode->tensor(), child1->tensor(), child2->tensor(),
6999 indsize[0], ii1, ii2, ii3, ii4);
7001 pgai = std::make_shared<ga_instruction_contract_2_1>
7002 (pnode->tensor(), child1->tensor(), child2->tensor(),
7003 indsize[0], ii1, ii2, ii3, ii4);
7005 else if (pnode->children.size() == 7) {
7007 size_type i1 = ind[0], i2 = ind[1], i3 = ind[2], i4 = ind[3];
7008 size_type nn1 = indsize[0], nn2 = indsize[1];
7009 size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1, ii5 = 1, ii6 = 1;
7011 { std::swap(i1, i2); std::swap(i3, i4); std::swap(nn1, nn2); }
7012 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7013 if (i < i1) ii1 *= child1->tensor_proper_size(i);
7014 if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7015 if (i > i2) ii3 *= child1->tensor_proper_size(i);
7017 for (
size_type i = 0; i < child2->tensor_order(); ++i) {
7018 if (i < i3 && i < i4) ii4 *= child2->tensor_proper_size(i);
7019 if ((i > i3 && i < i4) || (i > i4 && i < i3))
7020 ii5 *= child2->tensor_proper_size(i);
7021 if (i > i3 && i > i4) ii6 *= child2->tensor_proper_size(i);
7023 if (child1->test_function_type==1 && child2->test_function_type==2)
7024 pgai = std::make_shared<ga_instruction_contract_2_2_rev>
7025 (pnode->tensor(), child1->tensor(), child2->tensor(),
7026 nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7028 pgai = std::make_shared<ga_instruction_contract_2_2>
7029 (pnode->tensor(), child1->tensor(), child2->tensor(),
7030 nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7032 rmi.instructions.push_back(std::move(pgai));
7033 }
else if (child0->node_type == GA_NODE_PREDEF_FUNC) {
7035 std::string name = child0->name;
7036 const ga_predef_function_tab &PREDEF_FUNCTIONS
7038 ga_predef_function_tab::const_iterator it = PREDEF_FUNCTIONS.find(name);
7039 const ga_predef_function &F = it->second;
7041 pga_tree_node child2 = (nbargs == 2) ? pnode->children[2] : child1;
7044 if (child1->tensor().size() == 1) {
7046 pgai = std::make_shared<ga_instruction_eval_func_1arg_1res>
7047 (pnode->tensor()[0], child1->tensor()[0], F.f1());
7049 pgai = std::make_shared<ga_instruction_eval_func_1arg_1res_expr>
7050 (pnode->tensor()[0], child1->tensor()[0], F);
7053 pgai = std::make_shared<ga_instruction_eval_func_1arg>
7054 (pnode->tensor(), child1->tensor(), F.f1());
7056 pgai = std::make_shared<ga_instruction_eval_func_1arg_expr>
7057 (pnode->tensor(), child1->tensor(), F);
7060 if (child1->tensor().size() == 1 && child2->tensor().size() == 1) {
7062 pgai = std::make_shared<ga_instruction_eval_func_2arg_1res>
7063 (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7066 pgai = std::make_shared<ga_instruction_eval_func_2arg_1res_expr>
7067 (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7069 }
else if (child1->tensor().size() == 1) {
7072 std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
7073 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7076 std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
7077 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7078 }
else if (child2->tensor().size() == 1) {
7081 std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
7082 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7085 std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
7086 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7089 pgai = std::make_shared<ga_instruction_eval_func_2arg>
7090 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7092 pgai = std::make_shared<ga_instruction_eval_func_2arg_expr>
7093 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7096 rmi.instructions.push_back(std::move(pgai));
7098 }
else if (child0->node_type == GA_NODE_SPEC_FUNC) {
7100 GMM_ASSERT1(
false,
"Internal error");
7102 }
else if (child0->node_type == GA_NODE_OPERATOR) {
7104 ga_predef_operator_tab &PREDEF_OPERATORS
7106 ga_predef_operator_tab::T::iterator it
7107 = PREDEF_OPERATORS.tab.find(child0->name);
7108 const ga_nonlinear_operator &OP = *(it->second);
7109 ga_nonlinear_operator::arg_list args;
7110 for (
size_type i = 1; i < pnode->children.size(); ++i)
7111 args.push_back(&(pnode->children[i]->tensor()));
7113 if (child0->der1 && child0->der2 == 0) {
7114 pgai = std::make_shared<ga_instruction_eval_derivative_OP>
7115 (pnode->tensor(), OP, args, child0->der1);
7116 }
else if (child0->der1 && child0->der2) {
7117 pgai = std::make_shared<ga_instruction_eval_second_derivative_OP>
7118 (pnode->tensor(), OP, args, child0->der1, child0->der2);
7120 pgai = std::make_shared<ga_instruction_eval_OP>(pnode->tensor(),
7123 rmi.instructions.push_back(std::move(pgai));
7126 bgeot::multi_index mi1(size0.size()), indices;
7127 if (pnode->tensor().size() == 1) {
7128 for (
size_type i = 0; i < child0->tensor_order(); ++i)
7129 mi1[i] =
size_type(round(pnode->children[i+1]->tensor()[0])-1);
7130 pgai = std::make_shared<ga_instruction_copy_scalar>
7131 (pnode->tensor()[0], child0->tensor()(mi1));
7133 size_type nb_test = pnode->nb_test_functions();
7134 for (
size_type i = 0; i < nb_test; ++i) indices.push_back(i);
7135 for (
size_type i = 0; i < child0->tensor_order(); ++i) {
7136 if (pnode->children[i+1]->node_type != GA_NODE_ALLINDICES)
7138 =
size_type(round(pnode->children[i+1]->tensor()[0])- 1);
7140 indices.push_back(i+nb_test);
7142 pgai = std::make_shared<ga_instruction_tensor_slice>
7143 (pnode->tensor(), child0->tensor(), mi1, indices);
7145 rmi.instructions.push_back(std::move(pgai));
7150 default:GMM_ASSERT1(
false,
"Unexpected node type " << pnode->node_type
7151 <<
" in compilation. Internal error.");
7153 if (tensor_to_clear) {
7156 pgai = std::make_shared<ga_instruction_clear_tensor>(pnode->tensor());
7157 rmi.elt_instructions.push_back(std::move(pgai));
7160 rmi.node_list[pnode->hash_value].push_back(pnode);
7163 void ga_compile_function(ga_workspace &workspace,
7164 ga_instruction_set &gis,
bool scalar) {
7165 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7166 const ga_workspace::tree_description &td = workspace.tree_info(i);
7168 gis.trees.push_back(*(td.ptree));
7169 pga_tree_node root = gis.trees.back().root;
7171 GMM_ASSERT1(!scalar || (root->tensor().size() == 1),
7172 "The result of the given expression is not a scalar");
7173 ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7174 gis.all_instructions[rm].m = td.m;
7175 ga_if_hierarchy if_hierarchy;
7176 ga_compile_node(root, workspace, gis, gis.all_instructions[rm],
7177 *(td.m),
true, if_hierarchy);
7179 gis.coeff = scalar_type(1);
7180 pga_instruction pgai;
7181 workspace.assembled_tensor() = root->tensor();
7182 pgai = std::make_shared<ga_instruction_add_to_coeff>
7183 (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7184 gis.all_instructions[rm].instructions.push_back(std::move(pgai));
7189 static bool ga_node_used_interpolates
7190 (
const pga_tree_node pnode,
const ga_workspace &workspace,
7191 std::map<std::string, std::set<std::string> > &interpolates,
7192 std::set<std::string> &interpolates_der) {
7194 bool intrpl(pnode->node_type == GA_NODE_INTERPOLATE_VAL ||
7195 pnode->node_type == GA_NODE_INTERPOLATE_GRAD ||
7196 pnode->node_type == GA_NODE_INTERPOLATE_HESS ||
7197 pnode->node_type == GA_NODE_INTERPOLATE_DIVERG);
7198 bool intrpl_test(pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST ||
7199 pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST ||
7200 pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST ||
7201 pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST);
7203 if (intrpl || intrpl_test ||
7204 pnode->node_type == GA_NODE_INTERPOLATE_FILTER ||
7205 pnode->node_type == GA_NODE_INTERPOLATE_X ||
7206 pnode->node_type == GA_NODE_INTERPOLATE_NORMAL) {
7207 interpolates[pnode->interpolate_name].size();
7208 if (intrpl || intrpl_test) {
7209 if (workspace.variable_group_exists(pnode->name))
7210 interpolates[pnode->interpolate_name].insert(pnode->name);
7214 if (pnode->node_type == GA_NODE_INTERPOLATE_DERIVATIVE) {
7215 interpolates_der.insert(pnode->interpolate_name_der);
7216 interpolates[pnode->interpolate_name_der].size();
7217 if (workspace.variable_group_exists(pnode->name))
7218 interpolates[pnode->interpolate_name_der].insert(pnode->name);
7220 for (
size_type i = 0; i < pnode->children.size(); ++i)
7221 found = ga_node_used_interpolates(pnode->children[i], workspace,
7222 interpolates, interpolates_der)
7228 static void ga_compile_interpolate_trans
7229 (
const pga_tree_node pnode,
const ga_workspace &workspace,
7230 ga_instruction_set &gis, ga_instruction_set::region_mim_instructions &rmi,
7233 std::set<std::string> interpolates_der;
7234 std::map<std::string, std::set<std::string> > transformations;
7235 ga_node_used_interpolates(pnode, workspace, transformations,
7238 for (
const auto &transformation : transformations) {
7239 const std::string &transname = transformation.first;
7240 bool compute_der = (interpolates_der.count(transname) != 0);
7241 if (rmi.transformations.count(transname) == 0 ||
7242 (compute_der && rmi.transformations_der.count(transname) == 0)) {
7243 rmi.transformations[transname].size();
7244 gis.transformations.insert(transname);
7245 if (compute_der) rmi.transformations_der.insert(transname);
7246 pga_instruction pgai;
7247 if (transname.compare(
"neighbor_element") == 0 ||
7248 transname.compare(
"neighbour_elt") == 0) {
7249 pgai = std::make_shared<ga_instruction_neighbor_transformation_call>
7250 (workspace, rmi.interpolate_infos[transname],
7251 workspace.interpolate_transformation(transname), gis.ctx,
7252 gis.Normal, m, gis.ipt, gis.pai, gis.gp_pool,
7253 gis.neighbor_corresp);
7255 pgai = std::make_shared<ga_instruction_transformation_call>
7256 (workspace, rmi.interpolate_infos[transname],
7257 workspace.interpolate_transformation(transname), gis.ctx,
7258 gis.Normal, m, compute_der);
7260 if (pgai) rmi.instructions.push_back(std::move(pgai));
7263 for (
const std::string &nodename : transformation.second) {
7264 if (rmi.transformations[transname].count(nodename) == 0) {
7265 auto&& inin = rmi.interpolate_infos[transname];
7266 pga_instruction pgai =
7267 std::make_shared<ga_instruction_update_group_info>
7268 (workspace, gis, inin, nodename, inin.groups_info[nodename]);
7269 rmi.instructions.push_back(std::move(pgai));
7270 rmi.transformations[transname].insert(nodename);
7276 void ga_compile_interpolation(ga_workspace &workspace,
7277 ga_instruction_set &gis) {
7278 gis.transformations.clear();
7279 gis.all_instructions.clear();
7280 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7281 const ga_workspace::tree_description &td = workspace.tree_info(i);
7282 if (td.operation != ga_workspace::ASSEMBLY) {
7283 gis.trees.push_back(*(td.ptree));
7286 const mesh *m = td.m;
7287 GMM_ASSERT1(m,
"Internal error");
7288 ga_semantic_analysis(gis.trees.back(), workspace, *m,
7289 ref_elt_dim_of_mesh(*m),
true,
false);
7290 pga_tree_node root = gis.trees.back().root;
7293 ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7294 auto &rmi = gis.all_instructions[rm];
7298 ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7299 ga_compile_node(root, workspace, gis,rmi, *(td.m),
false,
7300 rmi.current_hierarchy);
7303 workspace.assembled_tensor() = root->tensor();
7304 pga_instruction pgai = std::make_shared<ga_instruction_add_to>
7305 (workspace.assembled_tensor(), root->tensor());
7306 rmi.instructions.push_back(std::move(pgai));
7313 struct var_set : std::map<std::string,size_type> {
7315 size_type operator[](
const std::string &name) {
7318 auto it = find(name);
7325 std::string operator[](
const size_type &
id)
const {
7326 for (
const auto &key_value : *
this)
7327 if (key_value.second ==
id)
7328 return key_value.first;
7329 return std::string(
"");
7334 struct condensation_description {
7335 var_set Ivars, Jvars, Qvars;
7338 std::vector<std::set<size_type>> Qclusters, Jclusters;
7344 std::vector<size_type> cluster_of_Qvar;
7346 gmm::dense_matrix<base_tensor *> KQQ,
7350 std::vector<base_tensor *> RI,
7354 void ga_compile(ga_workspace &workspace,
7355 ga_instruction_set &gis,
size_type order,
bool condensation) {
7356 gis.transformations.clear();
7357 gis.all_instructions.clear();
7358 gis.unreduced_terms.clear();
7359 workspace.clear_temporary_variable_intervals();
7361 std::map<const ga_instruction_set::region_mim, condensation_description>
7364 if (condensation && order == 2) {
7365 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7366 ga_workspace::tree_description &td = workspace.tree_info(i);
7367 if (td.order != 2 && td.order !=
size_type(-1))
7369 ga_tree tree(*(td.ptree));
7370 ga_semantic_analysis(tree, workspace, td.mim->linked_mesh(),
7371 ref_elt_dim_of_mesh(td.mim->linked_mesh()),
7373 pga_tree_node root = tree.root;
7376 v1_is_intern = workspace.is_internal_variable(root->name_test1),
7377 v2_is_intern = workspace.is_internal_variable(root->name_test2);
7378 if (v1_is_intern || v2_is_intern) {
7379 GMM_ASSERT1(tree.secondary_domain.empty(),
7380 "Condensed variable cannot be used in secondary domain");
7382 for (
const auto &key_val : condensations) {
7383 const ga_instruction_set::region_mim rm0 = key_val.first;
7384 const condensation_description &CC0 = key_val.second;
7385 if (rm0.mim() == td.mim && rm0.region() != td.rg
7386 && (CC0.Qvars.count(root->name_test1) ||
7387 CC0.Qvars.count(root->name_test2))) {
7389 (*(rm0.region()), *(td.rg));
7390 GMM_ASSERT1(intrsct.is_empty(),
7391 "Cannot condense coupled variables between "
7392 "intersecting regions");
7395 const ga_instruction_set::region_mim rm(td.mim, td.rg,
nullptr);
7397 condensation_description &CC = condensations[rm];
7399 q1 = v1_is_intern ? CC.Qvars[root->name_test1] :
size_type(-1),
7400 q2 = v2_is_intern ? CC.Qvars[root->name_test2] :
size_type(-1);
7402 std::vector<size_type> selected_clusters;
7403 for (
size_type j=0; j < CC.Qclusters.size(); ++j)
7404 if (CC.Qclusters[j].count(q1) || CC.Qclusters[j].count(q2))
7405 selected_clusters.push_back(j);
7407 if (selected_clusters.empty()) {
7408 CC.Qclusters.push_back(std::set<size_type>());
7409 if (q1 !=
size_type(-1)) CC.Qclusters.back().insert(q1);
7410 if (q2 !=
size_type(-1)) CC.Qclusters.back().insert(q2);
7412 auto &target = CC.Qclusters[selected_clusters[0]];
7413 if (q1 !=
size_type(-1)) target.insert(q1);
7414 if (q2 !=
size_type(-1)) target.insert(q2);
7415 for (
size_type j=selected_clusters.size()-1; j > 1; --j) {
7416 auto &source = CC.Qclusters[selected_clusters[j]];
7417 target.insert(source.begin(), source.end());
7418 CC.Qclusters.erase(CC.Qclusters.begin() + selected_clusters[j]);
7425 for (
auto &key_value : condensations) {
7426 condensation_description &CC = key_value.second;
7435 CC.Jclusters.resize(CC.Qclusters.size());
7437 CC.cluster_of_Qvar.resize(Qsize);
7438 for (
size_type i=0; i < CC.Qclusters.size(); ++i)
7439 for (
const size_type &var : CC.Qclusters[i])
7440 CC.cluster_of_Qvar[var] = i;
7445 CC.KQQ.resize(Qsize, Qsize);
7446 CC.RQpr.resize(Qsize);
7448 bgeot::multi_index mi(1);
7449 mi[0] = workspace.associated_im_data(CC.Qvars[q]) ->nb_tensor_elem();
7450 gis.condensation_tensors.push_back
7451 (std::make_shared<base_tensor>(mi));
7452 CC.RQpr[q] = gis.condensation_tensors.back().get();
7457 std::array<ga_workspace::operation_type,3>
7458 phases{ga_workspace::PRE_ASSIGNMENT,
7459 ga_workspace::ASSEMBLY,
7460 ga_workspace::POST_ASSIGNMENT};
7461 for (
const auto &phase : phases) {
7463 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7464 ga_workspace::tree_description &td = workspace.tree_info(i);
7465 if (td.operation != phase)
7468 if (td.order == order || td.order ==
size_type(-1)) {
7469 std::list<ga_tree> &trees = (phase == ga_workspace::ASSEMBLY)
7471 : gis.interpolation_trees;
7472 trees.push_back(*(td.ptree));
7474 ga_semantic_analysis(trees.back(), workspace, td.mim->linked_mesh(),
7475 ref_elt_dim_of_mesh(td.mim->linked_mesh()),
7477 pga_tree_node root = trees.back().root;
7482 psecondary_domain psd(0);
7483 if (trees.back().secondary_domain.size())
7484 psd = workspace.secondary_domain(trees.back().secondary_domain);
7485 ga_instruction_set::region_mim rm(td.mim, td.rg, psd);
7486 auto &rmi = gis.all_instructions[rm];
7490 ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7491 ga_compile_node(root, workspace, gis, rmi, *(td.m),
false,
7492 rmi.current_hierarchy);
7496 if (phase != ga_workspace::ASSEMBLY) {
7497 if (!td.varname_interpolation.empty()) {
7499 = workspace.associated_im_data(td.varname_interpolation);
7500 auto &V =
const_cast<model_real_plain_vector &
>
7501 (workspace.value(td.varname_interpolation));
7502 GMM_ASSERT1(imd,
"Internal error");
7503 auto pgai = std::make_shared<ga_instruction_assignment>
7504 (root->tensor(), V, gis.ctx, imd);
7505 rmi.instructions.push_back(std::move(pgai));
7508 pga_instruction pgai;
7511 workspace.assembled_tensor() = root->tensor();
7512 pgai = std::make_shared<ga_instruction_add_to_coeff>
7513 (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7517 GMM_ASSERT1(root->tensor_proper_size() == 1,
7518 "Invalid vector or tensor quantity. An order 1 "
7519 "weak form has to be a scalar quantity");
7520 const mesh_fem *
const
7521 mf = workspace.associated_mf(root->name_test1);
7522 const im_data *
const
7523 imd = workspace.associated_im_data(root->name_test1);
7524 workspace.add_temporary_interval_for_unreduced_variable
7527 base_vector &Vu = workspace.unreduced_vector(),
7528 &Vr = workspace.assembled_vector();
7530 const std::string &intn1 = root->interpolate_name_test1;
7531 bool secondary = !intn1.empty() &&
7532 workspace.secondary_domain_exists(intn1);
7533 fem_interpolation_context
7534 &ctx = intn1.empty() ? gis.ctx
7535 : (secondary ? rmi.secondary_domain_infos.ctx
7536 : rmi.interpolate_infos[intn1].ctx);
7538 !(intn1.empty() || intn1 ==
"neighbor_element"
7539 || intn1 ==
"neighbour_elt" || secondary);
7541 if (intn1.size() && !secondary &&
7542 workspace.variable_group_exists(root->name_test1)) {
7543 ga_instruction_set::variable_group_info
7544 &vgi = rmi.interpolate_infos[intn1]
7545 .groups_info[root->name_test1];
7546 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
7547 (root->tensor(), Vr, Vu, ctx,
7548 vgi.I, vgi.mf, vgi.reduced_mf,
7549 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7550 for (
const std::string &name
7551 : workspace.variable_group(root->name_test1))
7552 gis.unreduced_terms.emplace(name,
"");
7554 base_vector &V = mf->is_reduced() ? Vu : Vr;
7555 const gmm::sub_interval
7556 &I = mf->is_reduced()
7557 ? workspace.temporary_interval_of_variable
7559 : workspace.interval_of_variable(root->name_test1);
7560 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
7561 (root->tensor(), V, ctx, I, *mf,
7562 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7563 if (mf->is_reduced())
7564 gis.unreduced_terms.emplace(root->name_test1,
"");
7567 GMM_ASSERT1(root->interpolate_name_test1.size() == 0,
7568 "Interpolate transformation on integration "
7570 if (!workspace.is_internal_variable(root->name_test1) ||
7572 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
7573 (root->tensor(), Vr, gis.ctx,
7574 workspace.interval_of_variable(root->name_test1),
7575 *imd, gis.coeff, gis.ipt);
7578 pgai = std::make_shared<ga_instruction_vector_assembly>
7579 (root->tensor(), Vr,
7580 workspace.interval_of_variable(root->name_test1),
7586 GMM_ASSERT1(root->tensor_proper_size() == 1,
7587 "Invalid vector or tensor quantity. An order 2 "
7588 "weak form has to be a scalar quantity");
7589 const mesh_fem *mf1=workspace.associated_mf(root->name_test1),
7590 *mf2=workspace.associated_mf(root->name_test2);
7592 *imd1 = workspace.associated_im_data(root->name_test1),
7593 *imd2 = workspace.associated_im_data(root->name_test2);
7594 const std::string &intn1 = root->interpolate_name_test1,
7595 &intn2 = root->interpolate_name_test2;
7596 bool secondary1 = intn1.size() &&
7597 workspace.secondary_domain_exists(intn1);
7598 bool secondary2 = intn2.size() &&
7599 workspace.secondary_domain_exists(intn2);
7600 fem_interpolation_context
7601 &ctx1 = intn1.empty() ? gis.ctx
7602 : (secondary1 ? rmi.secondary_domain_infos.ctx
7603 : rmi.interpolate_infos[intn1].ctx),
7604 &ctx2 = intn2.empty() ? gis.ctx
7605 : (secondary2 ? rmi.secondary_domain_infos.ctx
7606 : rmi.interpolate_infos[intn2].ctx);
7607 bool interpolate = !(intn1.empty() || intn1 ==
"neighbor_element"
7608 || intn1 ==
"neighbour_elt"
7610 !(intn2.empty() || intn2 ==
"neighbor_element"
7611 || intn2 ==
"neighbour_elt"
7614 workspace.add_temporary_interval_for_unreduced_variable
7616 workspace.add_temporary_interval_for_unreduced_variable
7619 bool has_var_group1 = (!intn1.empty() && !secondary1 &&
7620 workspace.variable_group_exists
7621 (root->name_test1));
7622 bool has_var_group2 = (!intn2.empty() && !secondary2 &&
7623 workspace.variable_group_exists
7624 (root->name_test2));
7625 bool simple = !interpolate &&
7626 !has_var_group1 && !has_var_group2 &&
7627 mf1 && !(mf1->is_reduced()) &&
7628 mf2 && !(mf2->is_reduced());
7631 auto &Krr = workspace.assembled_matrix();
7632 auto &Kru = workspace.col_unreduced_matrix();
7633 auto &Kur = workspace.row_unreduced_matrix();
7634 auto &Kuu = workspace.row_col_unreduced_matrix();
7637 const gmm::sub_interval
7638 &I1 = workspace.interval_of_variable(root->name_test1),
7639 &I2 = workspace.interval_of_variable(root->name_test2);
7641 &alpha1 = workspace.factor_of_variable(root->name_test1),
7642 &alpha2 = workspace.factor_of_variable(root->name_test2);
7643 if (mf1->get_qdim() == 1 && mf2->get_qdim() == 1)
7644 pgai = std::make_shared
7645 <ga_instruction_matrix_assembly_standard_scalar>
7646 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7647 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7648 else if (root->sparsity() == 10 && root->t.qdim() == 2)
7649 pgai = std::make_shared
7650 <ga_instruction_matrix_assembly_standard_vector_opt10<2>>
7651 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7652 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7653 else if (root->sparsity() == 10 && root->t.qdim() == 3)
7654 pgai = std::make_shared
7655 <ga_instruction_matrix_assembly_standard_vector_opt10<3>>
7656 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7657 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7659 pgai = std::make_shared
7660 <ga_instruction_matrix_assembly_standard_vector>
7661 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7662 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7663 }
else if (condensation &&
7664 workspace.is_internal_variable(root->name_test1) &&
7665 workspace.is_internal_variable(root->name_test2)) {
7669 GMM_ASSERT1(imd1 && imd2,
"Internal error");
7670 GMM_ASSERT1(!interpolate,
"Internal error");
7674 condensation_description &CC = condensations[rm];
7675 GMM_ASSERT1(CC.Qvars.count(root->name_test1) > 0 &&
7676 CC.Qvars.count(root->name_test2) > 0,
7678 size_type q1 = CC.Qvars[root->name_test1],
7679 q2 = CC.Qvars[root->name_test2];
7680 if (!CC.KQQ(q1,q2)) {
7682 gis.condensation_tensors.push_back
7683 (std::make_shared<base_tensor>(s1,s2));
7684 CC.KQQ(q1,q2) = gis.condensation_tensors.back().get();
7685 pgai = std::make_shared<ga_instruction_copy_vect>
7686 (CC.KQQ(q1,q2)->as_vector(), root->tensor().as_vector());
7689 pgai = std::make_shared<ga_instruction_add_to>
7690 (*CC.KQQ(q1,q2), root->tensor());
7692 rmi.instructions.push_back(std::move(pgai));
7693 }
else if (condensation &&
7694 workspace.is_internal_variable(root->name_test1)) {
7698 GMM_ASSERT1(imd1,
"Internal error");
7699 GMM_ASSERT1(!interpolate,
"Internal error");
7702 condensation_description &CC = condensations[rm];
7703 GMM_ASSERT1(CC.Qvars.count(root->name_test1),
7705 size_type q1 = CC.Qvars[root->name_test1],
7706 j2 = CC.Jvars[root->name_test2];
7707 CC.Jclusters[CC.cluster_of_Qvar[q1]].insert(j2);
7708 if (q1 >= CC.KQJ.nrows() || j2 >= CC.KQJ.ncols())
7709 CC.KQJ.resize(std::max(CC.KQJ.nrows(), q1+1),
7710 std::max(CC.KQJ.ncols(), j2+1));
7711 if (!CC.KQJ(q1,j2)) {
7715 gis.condensation_tensors.push_back
7716 (std::make_shared<base_tensor>(root->tensor()));
7717 GMM_ASSERT1(root->tensor().size(0) == s1,
"Internal error");
7718 CC.KQJ(q1,j2) = gis.condensation_tensors.back().get();
7719 pgai = std::make_shared<ga_instruction_copy_vect>
7720 (CC.KQJ(q1,j2)->as_vector(), root->tensor().as_vector());
7724 pgai = std::make_shared<ga_instruction_add_to>
7725 (*CC.KQJ(q1,j2), root->tensor());
7727 rmi.instructions.push_back(std::move(pgai));
7728 }
else if (condensation &&
7729 workspace.is_internal_variable(root->name_test2)) {
7733 GMM_ASSERT1(imd2,
"Internal error");
7734 GMM_ASSERT1(!interpolate,
"Internal error");
7737 condensation_description &CC = condensations[rm];
7738 GMM_ASSERT1(CC.Qvars.count(root->name_test2),
7740 size_type i1 = CC.Ivars[root->name_test1],
7741 q2 = CC.Qvars[root->name_test2];
7742 if (i1 >= CC.KIQ.nrows() || q2 >= CC.KIQ.ncols())
7743 CC.KIQ.resize(std::max(CC.KIQ.nrows(), i1+1),
7744 std::max(CC.KIQ.ncols(), q2+1));
7745 if (!CC.KIQ(i1,q2)) {
7749 gis.condensation_tensors.push_back
7750 (std::make_shared<base_tensor>(root->tensor()));
7751 GMM_ASSERT1(root->tensor().size(1) == s2,
7753 CC.KIQ(i1,q2) = gis.condensation_tensors.back().get();
7754 pgai = std::make_shared<ga_instruction_copy_vect>
7755 (CC.KIQ(i1,q2)->as_vector(), root->tensor().as_vector());
7759 pgai = std::make_shared<ga_instruction_add_to>
7760 (*CC.KIQ(i1,q2), root->tensor());
7762 rmi.instructions.push_back(std::move(pgai));
7763 }
else if (!workspace.is_internal_variable(root->name_test1) &&
7764 !workspace.is_internal_variable(root->name_test2)) {
7766 if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced())
7767 || has_var_group1 || has_var_group2)
7768 gis.unreduced_terms.emplace(root->name_test1,
7771 auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
7772 auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
7773 auto &Kux = (mf2 && mf2->is_reduced()) ? Kuu : Kur;
7774 auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
7775 auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
7778 &alpha1 = workspace.factor_of_variable(root->name_test1),
7779 &alpha2 = workspace.factor_of_variable(root->name_test2);
7781 if (has_var_group1) {
7782 ga_instruction_set::variable_group_info
7783 &vgi1 = rmi.interpolate_infos[intn1]
7784 .groups_info[root->name_test1];
7785 if (has_var_group2) {
7786 ga_instruction_set::variable_group_info
7787 &vgi2 = rmi.interpolate_infos[intn2]
7788 .groups_info[root->name_test2];
7789 pgai = std::make_shared
7790 <ga_instruction_matrix_assembly_mf_mf>
7791 (root->tensor(), Krr, Kru, Kur, Kuu, ctx1, ctx2,
7793 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7795 const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
7796 ? workspace.temporary_interval_of_variable
7798 : workspace.interval_of_variable(root->name_test2);
7800 pgai = std::make_shared
7801 <ga_instruction_matrix_assembly_mf_mf>
7802 (root->tensor(), Krx, Kux, ctx1, ctx2,
7803 vgi1, I2, *mf2, alpha2,
7804 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7806 pgai = std::make_shared
7807 <ga_instruction_matrix_assembly_mf_imd>
7808 (root->tensor(), Krr, Kur, ctx1, ctx2,
7809 vgi1, I2, imd2, alpha2, gis.coeff, gis.ipt);
7812 const gmm::sub_interval &I1 = mf1 && mf1->is_reduced()
7813 ? workspace.temporary_interval_of_variable
7815 : workspace.interval_of_variable(root->name_test1);
7816 if (has_var_group2) {
7817 ga_instruction_set::variable_group_info
7818 &vgi2 = rmi.interpolate_infos[intn2]
7819 .groups_info[root->name_test2];
7821 pgai = std::make_shared
7822 <ga_instruction_matrix_assembly_mf_mf>
7823 (root->tensor(), Kxr, Kxu, ctx1, ctx2,
7824 I1, *mf1, alpha1, vgi2,
7825 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7827 pgai = std::make_shared
7828 <ga_instruction_matrix_assembly_imd_mf>
7829 (root->tensor(), Krr, Kru, ctx1, ctx2,
7830 I1, imd1, alpha1, vgi2, gis.coeff, gis.ipt);
7832 const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
7833 ? workspace.temporary_interval_of_variable
7835 : workspace.interval_of_variable(root->name_test2);
7837 pgai = std::make_shared
7838 <ga_instruction_matrix_assembly_mf_mf>
7839 (root->tensor(), Kxx, ctx1, ctx2,
7840 I1, *mf1, alpha1, I2, *mf2, alpha2,
7841 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7843 pgai = std::make_shared
7844 <ga_instruction_matrix_assembly_mf_imd>
7845 (root->tensor(), Kxr, ctx1, ctx2,
7846 I1, *mf1, alpha1, I2, imd2, alpha2,
7847 gis.coeff, gis.ipt);
7849 pgai = std::make_shared
7850 <ga_instruction_matrix_assembly_imd_mf>
7851 (root->tensor(), Krx, ctx1, ctx2,
7852 I1, imd1, alpha1, I2, *mf2, alpha2,
7853 gis.coeff, gis.ipt);
7855 pgai = std::make_shared
7856 <ga_instruction_matrix_assembly_imd_imd>
7857 (root->tensor(), Krr, ctx1, ctx2,
7858 I1, imd1, alpha1, I2, imd2, alpha2,
7859 gis.coeff, gis.ipt);
7867 rmi.instructions.push_back(std::move(pgai));
7873 if (condensation && order == 2 && phase == ga_workspace::ASSEMBLY) {
7875 auto &Krr = workspace.assembled_matrix();
7876 auto &Kru = workspace.col_unreduced_matrix();
7877 auto &Kur = workspace.row_unreduced_matrix();
7878 auto &Kuu = workspace.row_col_unreduced_matrix();
7880 for (
auto &&key_val : condensations) {
7881 const ga_instruction_set::region_mim rm = key_val.first;
7882 condensation_description &CC = key_val.second;
7883 auto &rmi = gis.all_instructions[rm];
7885 CC.KQJpr.resize(CC.KQJ.nrows(), CC.KQJ.ncols());
7886 for (
size_type k=0; k < CC.KQJpr.size(); ++k) {
7887 gis.condensation_tensors.push_back
7888 (std::make_shared<base_tensor>(2,2));
7889 CC.KQJpr[k] = gis.condensation_tensors.back().get();
7892 pga_instruction pgai;
7895 for (
size_type k=0; k < CC.Qclusters.size(); ++k) {
7899 std::string name_test1 = CC.Qvars[q1];
7900 const im_data *imd1 = workspace.associated_im_data(name_test1);
7901 const gmm::sub_interval
7902 &I1 = workspace.interval_of_variable(name_test1);
7904 std::make_shared<ga_instruction_extract_residual_on_imd_dofs>
7905 (*(CC.RQpr[q1]), workspace.cached_vector(),
7906 gis.ctx, I1, *imd1, gis.ipt);
7907 rmi.instructions.push_back(std::move(pgai));
7913 pgai = std::make_shared<ga_instruction_condensation_sub>
7914 (CC.KQJpr, CC.RQpr, CC.KQQ, CC.KQJ, CC.Qclusters[k], gis.coeff);
7915 rmi.instructions.push_back(std::move(pgai));
7920 std::string name_test1 = CC.Qvars[q1];
7921 const im_data *imd1 = workspace.associated_im_data(name_test1);
7924 const gmm::sub_interval
7925 &I1 = workspace.interval_of_variable(name_test1);
7926 GMM_ASSERT1(imd1,
"Internal error");
7928 std::string name_test2 = CC.Jvars[j2];
7929 const mesh_fem *mf2 = workspace.associated_mf(name_test2);
7930 const im_data *imd2 = workspace.associated_im_data(name_test2);
7937 const gmm::sub_interval
7938 &I2 = mf2 && mf2->is_reduced()
7939 ? workspace.temporary_interval_of_variable(name_test2)
7940 : workspace.interval_of_variable(name_test2);
7941 const base_tensor &Kq1j2pr = *(CC.KQJpr(q1,j2));
7942 model_real_sparse_matrix
7943 &KQJpr = mf2 && mf2->is_reduced()
7944 ? workspace.col_unreduced_matrix()
7945 : workspace.internal_coupling_matrix();
7948 std::make_shared<ga_instruction_matrix_assembly_imd_mf>
7949 (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
7950 I1, imd1, gis.ONE, I2, *mf2, gis.ONE, gis.ONE, gis.ipt);
7952 if (mf2->is_reduced())
7953 gis.unreduced_terms.emplace(name_test1, name_test2);
7956 std::make_shared<ga_instruction_matrix_assembly_imd_imd>
7957 (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
7958 I1, imd1, gis.ONE, I2, imd2, gis.ONE, gis.ONE, gis.ipt);
7959 rmi.instructions.push_back(std::move(pgai));
7961 const bool initialize =
true;
7962 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
7963 (*(CC.RQpr[q1]), workspace.assembled_vector(),
7964 gis.ctx, I1, *imd1, gis.ONE, gis.ipt, initialize);
7965 rmi.instructions.push_back(std::move(pgai));
7970 for (
size_type i1=0; i1 < CC.Ivars.size(); ++i1) {
7972 std::string name_test1 = CC.Ivars[i1];
7973 const mesh_fem *mf1 = workspace.associated_mf(name_test1);
7974 const im_data *imd1 = workspace.associated_im_data(name_test1);
7976 &alpha1 = workspace.factor_of_variable(name_test1);
7977 const gmm::sub_interval
7978 &I1 = mf1 && mf1->is_reduced()
7979 ? workspace.temporary_interval_of_variable(name_test1)
7980 : workspace.interval_of_variable(name_test1);
7984 std::vector<std::set<size_type>> Q_of_J(CC.Jvars.size());
7985 for (
size_type q=0; q < CC.Qvars.size(); ++q)
7989 Q_of_J[j].insert(q);
7992 for (
size_type j2=0; j2 < CC.Jvars.size(); ++j2) {
7993 if (Q_of_J[j2].size()) {
7994 std::vector<base_tensor *> Ki1Q, KQj2;
7996 Ki1Q.push_back(CC.KIQ(i1,q));
7997 KQj2.push_back(CC.KQJpr(q,j2));
8000 gis.condensation_tensors.push_back
8001 (std::make_shared<base_tensor>());
8002 base_tensor &Kij = *gis.condensation_tensors.back();
8003 pgai = std::make_shared<ga_instruction_condensation_super_K>
8005 rmi.instructions.push_back(std::move(pgai));
8007 std::string name_test2 = CC.Jvars[j2];
8008 const mesh_fem *mf2 = workspace.associated_mf(name_test2);
8009 const im_data *imd2 = workspace.associated_im_data(name_test2);
8014 &alpha2 = workspace.factor_of_variable(name_test2);
8015 const gmm::sub_interval
8016 &I2 = mf2 && mf2->is_reduced()
8017 ? workspace.temporary_interval_of_variable(name_test2)
8018 : workspace.interval_of_variable(name_test2);
8020 auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8021 auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8022 auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8023 auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8025 if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced()))
8026 gis.unreduced_terms.emplace(name_test1, name_test2);
8029 pgai = std::make_shared
8030 <ga_instruction_matrix_assembly_mf_mf>
8031 (Kij, Kxx, gis.ctx, gis.ctx,
8032 I1, *mf1, alpha1, I2, *mf2, alpha2,
8033 gis.coeff, gis.nbpt, gis.ipt,
false);
8035 pgai = std::make_shared
8036 <ga_instruction_matrix_assembly_mf_imd>
8037 (Kij, Kxr, gis.ctx, gis.ctx,
8038 I1, *mf1, alpha1, I2, imd2, alpha2,
8039 gis.coeff, gis.ipt);
8041 pgai = std::make_shared
8042 <ga_instruction_matrix_assembly_imd_mf>
8043 (Kij, Krx, gis.ctx, gis.ctx,
8044 I1, imd1, alpha1, I2, *mf2, alpha2,
8045 gis.coeff, gis.ipt);
8047 pgai = std::make_shared
8048 <ga_instruction_matrix_assembly_imd_imd>
8049 (Kij, Krr, gis.ctx, gis.ctx,
8050 I1, imd1, alpha1, I2, imd2, alpha2,
8051 gis.coeff, gis.ipt);
8052 rmi.instructions.push_back(std::move(pgai));
8057 std::vector<base_tensor *> Ki1Q, RQpr;
8058 for (
size_type q=0; q < CC.Qvars.size(); ++q)
8060 Ki1Q.push_back(CC.KIQ(i1,q));
8061 RQpr.push_back(CC.RQpr[q]);
8063 gis.condensation_tensors.push_back
8064 (std::make_shared<base_tensor>());
8065 base_tensor &Ri = *gis.condensation_tensors.back();
8066 pgai = std::make_shared<ga_instruction_condensation_super_R>
8068 rmi.instructions.push_back(std::move(pgai));
8070 base_vector &R = mf1->is_reduced() ? workspace.unreduced_vector()
8071 : workspace.assembled_vector();
8073 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8074 (Ri, R, gis.ctx, I1, *mf1, gis.coeff, gis.nbpt, gis.ipt,
false);
8076 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8077 (Ri, R, gis.ctx, I1, *imd1, gis.coeff, gis.ipt);
8079 pgai = std::make_shared<ga_instruction_vector_assembly>
8080 (Ri, R, I1, gis.coeff);
8081 rmi.instructions.push_back(std::move(pgai));
8096 void ga_function_exec(ga_instruction_set &gis) {
8098 for (
auto &&instr : gis.all_instructions) {
8099 const auto &gil = instr.second.instructions;
8100 for (
size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8104 void ga_interpolation_exec(ga_instruction_set &gis,
8105 ga_workspace &workspace,
8106 ga_interpolation_context &gic) {
8108 base_small_vector un, up;
8110 for (
const std::string &t : gis.transformations)
8111 workspace.interpolate_transformation(t)->init(workspace);
8113 for (
auto &&instr : gis.all_instructions) {
8116 const mesh_region ®ion = *(instr.first.region());
8118 GMM_ASSERT1(&m == &(gic.linked_mesh()),
8119 "Incompatibility of meshes in interpolation");
8120 const auto &gilb = instr.second.begin_instructions;
8121 const auto &gile = instr.second.elt_instructions;
8122 const auto &gil = instr.second.instructions;
8125 std::vector<size_type> ind;
8126 auto pai_old = papprox_integration{};
8128 if (gic.use_mim()) {
8135 bgeot::pstored_point_tab pspt
8136 = gic.ppoints_for_element(v.cv(), v.f(), ind);
8138 if (pspt.get() && ind.size() && pspt->size()) {
8139 m.points_of_convex(v.cv(), G);
8141 up.resize(G.nrows());
8142 un.resize(pgt->dim());
8144 if (gis.ctx.have_pgp() && gis.ctx.pgt() == pgt && pai_old == gis.pai) {
8145 gis.ctx.change(gis.ctx.pgp(), 0, 0, G, v.cv(), v.f());
8147 if (!(gic.use_pgp(v.cv()))) {
8148 gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
8150 gis.ctx.change(gis.gp_pool(pgt, pspt), 0, 0, G, v.cv(), v.f());
8155 if (gis.need_elt_size)
8159 gis.nbpt = pspt->size();
8160 for (
size_type ii = 0; ii < ind.size(); ++ii) {
8162 if (gis.ctx.have_pgp()) gis.ctx.set_ii(ind[ii]);
8163 else gis.ctx.set_xref((*pspt)[gis.ipt]);
8165 if (ii == 0 || !(pgt->is_linear())) {
8168 const base_matrix& B = gis.ctx.B();
8169 gmm::copy(pgt->normals()[v.f()], un);
8170 gmm::mult(B, un, up);
8172 gmm::scale(up,1.0/nup);
8173 gmm::clean(up, 1e-13);
8175 }
else gis.Normal.resize(0);
8177 gmm::clear(workspace.assembled_tensor().as_vector());
8179 for (
size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
8180 for (
size_type j = 0; j < gile.size(); ++j) j += gile[j]->exec();
8182 for (
size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8183 gic.store_result(v.cv(), ind[ii], workspace.assembled_tensor());
8188 for (
const std::string &t : gis.transformations)
8189 workspace.interpolate_transformation(t)->finalize();
8194 void ga_exec(ga_instruction_set &gis, ga_workspace &workspace) {
8196 base_small_vector un;
8197 scalar_type J1(0), J2(0);
8199 for (
const std::string &t : gis.transformations)
8200 workspace.interpolate_transformation(t)->init(workspace);
8202 for (
auto &instr : gis.all_instructions) {
8204 psecondary_domain psd = instr.first.psd();
8206 GMM_ASSERT1(&m == &(mim.
linked_mesh()),
"Incompatibility of meshes");
8207 const auto &gilb = instr.second.begin_instructions;
8208 const auto &gile = instr.second.elt_instructions;
8209 const auto &gil = instr.second.instructions;
8223 const mesh_region ®ion = *(instr.first.region());
8228 pintegration_method pim = 0;
8229 papprox_integration pai = 0;
8230 bgeot::pstored_point_tab pspt = 0, old_pspt = 0;
8231 bgeot::pgeotrans_precomp pgp = 0;
8232 bool first_gp =
true;
8236 if (v.cv() != old_cv) {
8237 pgt = m.trans_of_convex(v.cv());
8239 m.points_of_convex(v.cv(), G1);
8241 if (pim->type() == IM_NONE)
continue;
8242 GMM_ASSERT1(pim->type() == IM_APPROX,
"Sorry, exact methods "
8243 "cannot be used in high level generic assembly");
8244 pai = pim->approx_method();
8245 pspt = pai->pintegration_points();
8247 if (pgp && gis.pai == pai && pgt_old == pgt) {
8248 gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8250 if (pai->is_built_on_the_fly()) {
8251 gis.ctx.change(pgt, 0, (*pspt)[0], G1, v.cv(), v.f());
8254 pgp = gis.gp_pool(pgt, pspt);
8255 gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8257 pgt_old = pgt; gis.pai = pai;
8259 if (gis.need_elt_size)
8264 if (pim->type() == IM_NONE)
continue;
8265 gis.ctx.set_face_num(v.f());
8267 if (pspt != old_pspt) { first_gp =
true; old_pspt = pspt; }
8272 gis.nbpt = pai->nb_points_on_face(v.f());
8273 first_ind = pai->ind_first_point_on_face(v.f());
8275 gis.nbpt = pai->nb_points_on_convex();
8277 for (gis.ipt = 0; gis.ipt < gis.nbpt; ++(gis.ipt)) {
8278 if (pgp) gis.ctx.set_ii(first_ind+gis.ipt);
8279 else gis.ctx.set_xref((*pspt)[first_ind+gis.ipt]);
8280 if (gis.ipt == 0 || !(pgt->is_linear())) {
8284 gis.Normal.resize(G1.nrows());
8285 un.resize(pgt->dim());
8286 gmm::copy(pgt->normals()[v.f()], un);
8287 gmm::mult(gis.ctx.B(), un, gis.Normal);
8290 gmm::scale(gis.Normal, 1.0/nup);
8291 gmm::clean(gis.Normal, 1e-13);
8292 }
else gis.Normal.resize(0);
8294 auto ipt_coeff = pai->coeff(first_ind+gis.ipt);
8295 gis.coeff = J1 * ipt_coeff;
8296 bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8297 workspace.include_empty_int_points());
8298 if (!enable_ipt) gis.coeff = scalar_type(0);
8300 for (
size_type j=0; j < gilb.size(); ++j) j+=gilb[j]->exec();
8304 for (
size_type j=0; j < gile.size(); ++j) j+=gile[j]->exec();
8306 if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8307 for (
size_type j=0; j < gil.size(); ++j) j+=gil[j]->exec();
8314 GA_DEBUG_INFO(
"-----------------------------");
8318 auto &sdi = instr.second.secondary_domain_infos;
8319 const mesh_region ®ion1 = *(instr.first.region());
8325 pintegration_method pim1 = 0, pim2 = 0;
8326 papprox_integration pai1 = 0, pai2 = 0;
8327 bgeot::pstored_point_tab pspt1=0, old_pspt1=0, pspt2=0, old_pspt2=0;
8328 bgeot::pgeotrans_precomp pgp1 = 0, pgp2 = 0;
8329 bool first_gp =
true;
8333 if (v1.cv() != old_cv1) {
8334 pgt1 = m.trans_of_convex(v1.cv());
8336 m.points_of_convex(v1.cv(), G1);
8338 if (pim1->type() == IM_NONE)
continue;
8339 GMM_ASSERT1(pim1->type() == IM_APPROX,
"Sorry, exact methods "
8340 "cannot be used in high level generic assembly");
8341 pai1 = pim1->approx_method();
8342 pspt1 = pai1->pintegration_points();
8343 if (pspt1->size()) {
8344 if (pgp1 && gis.pai == pai1 && pgt1_old == pgt1) {
8345 gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8347 if (pai1->is_built_on_the_fly()) {
8348 gis.ctx.change(pgt1, 0, (*pspt1)[0], G1, v1.cv(), v1.f());
8351 pgp1 = gis.gp_pool(pgt1, pspt1);
8352 gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8354 pgt1_old = pgt1; gis.pai = pai1;
8356 if (gis.need_elt_size)
8361 if (pim1->type() == IM_NONE)
continue;
8362 gis.ctx.set_face_num(v1.f());
8364 if (pspt1 != old_pspt1) { first_gp =
true; old_pspt1 = pspt1; }
8365 if (pspt1->size()) {
8369 nbpt1 = pai1->nb_points_on_face(v1.f());
8370 first_ind1 = pai1->ind_first_point_on_face(v1.f());
8372 nbpt1 = pai1->nb_points_on_convex();
8375 const mesh &m2 = psd->mim().linked_mesh();
8376 const mesh_region ®ion2 = psd->give_region(m, v1.cv(), v1.f());
8378 !v2.finished(); ++v2) {
8379 if (v2.cv() != old_cv2) {
8380 pgt2 = m2.trans_of_convex(v2.cv());
8381 pim2 = psd->mim().int_method_of_element(v2.cv());
8382 m2.points_of_convex(v2.cv(), G2);
8384 if (pim2->type() == IM_NONE)
continue;
8385 GMM_ASSERT1(pim2->type() == IM_APPROX,
"Sorry, exact methods "
8386 "cannot be used in high level generic assembly");
8387 pai2 = pim2->approx_method();
8388 pspt2 = pai2->pintegration_points();
8389 if (pspt2->size()) {
8390 if (pgp2 && sdi.pai == pai2 && pgt2_old == pgt2) {
8391 sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8393 if (pai2->is_built_on_the_fly()) {
8394 sdi.ctx.change(pgt2, 0, (*pspt2)[0], G2,v2.cv(),v2.f());
8397 pgp2 = gis.gp_pool(pgt2, pspt2);
8398 sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8400 pgt2_old = pgt2; sdi.pai = pai2;
8405 if (pim2->type() == IM_NONE)
continue;
8406 sdi.ctx.set_face_num(v2.f());
8408 if (pspt2 != old_pspt2) { first_gp =
true; old_pspt2 = pspt2; }
8409 if (pspt2->size()) {
8413 nbpt2 = pai2->nb_points_on_face(v2.f());
8414 first_ind2 = pai2->ind_first_point_on_face(v2.f());
8416 nbpt2 = gis.nbpt = pai2->nb_points_on_convex();
8418 gis.nbpt = nbpt1 * nbpt2;
8420 for (
size_type ipt1=0; ipt1 < nbpt1; ++ipt1) {
8421 for (
size_type ipt2=0; ipt2 < nbpt2; ++ipt2, ++(gis.ipt)) {
8423 if (pgp1) gis.ctx.set_ii(first_ind1+ipt1);
8424 else gis.ctx.set_xref((*pspt1)[first_ind1+ipt1]);
8425 if (pgp2) sdi.ctx.set_ii(first_ind2+ipt2);
8426 else sdi.ctx.set_xref((*pspt2)[first_ind2+ipt2]);
8428 if (gis.ipt == 0 || !(pgt1->is_linear())) {
8431 gis.Normal.resize(G1.nrows());
8432 un.resize(pgt1->dim());
8433 gmm::copy(pgt1->normals()[v1.f()], un);
8434 gmm::mult(gis.ctx.B(), un, gis.Normal);
8437 gmm::scale(gis.Normal, 1.0/nup);
8438 gmm::clean(gis.Normal, 1e-13);
8439 }
else gis.Normal.resize(0);
8442 if (gis.ipt == 0 || !(pgt2->is_linear())) {
8445 sdi.Normal.resize(G2.nrows());
8446 un.resize(pgt2->dim());
8447 gmm::copy(pgt2->normals()[v2.f()], un);
8448 gmm::mult(sdi.ctx.B(), un, sdi.Normal);
8451 gmm::scale(sdi.Normal, 1.0/nup);
8452 gmm::clean(sdi.Normal, 1e-13);
8453 }
else sdi.Normal.resize(0);
8456 auto ipt_coeff = pai1->coeff(first_ind1+ipt1)
8457 * pai2->coeff(first_ind2+ipt2);
8458 gis.coeff = J1 * J2 * ipt_coeff;
8459 bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8460 workspace.include_empty_int_points());
8461 if (!enable_ipt) gis.coeff = scalar_type(0);
8464 for (
size_type j=0; j < gilb.size(); ++j)
8469 for (
size_type j=0; j < gile.size(); ++j)
8472 if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8473 for (
size_type j=0; j < gil.size(); ++j)
8484 GA_DEBUG_INFO(
"-----------------------------");
8489 for (
const std::string &t : gis.transformations)
8490 workspace.interpolate_transformation(t)->finalize();