GetFEM  5.4.1
getfem_generic_assembly_compile_and_exec.cc
1 /*===========================================================================
2 
3  Copyright (C) 2013-2020 Yves Renard
4 
5  This file is a part of GetFEM
6 
7  GetFEM is free software; you can redistribute it and/or modify it
8  under the terms of the GNU Lesser General Public License as published
9  by the Free Software Foundation; either version 3 of the License, or
10  (at your option) any later version along with the GCC Runtime Library
11  Exception either version 3.1 or (at your option) any later version.
12  This program is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15  License and GCC Runtime Library Exception for more details.
16  You should have received a copy of the GNU Lesser General Public License
17  along with this program; if not, write to the Free Software Foundation,
18  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 
20 ===========================================================================*/
21 
25 #include "getfem/getfem_generic_assembly_compile_and_exec.h"
26 #include "getfem/getfem_generic_assembly_functions_and_operators.h"
27 
28 // #define GA_USES_BLAS // not so interesting, at least for debian blas
29 
30 // #define GA_DEBUG_INFO(a) { cout << a << endl; }
31 #define GA_DEBUG_INFO(a)
32 
33 
34 
35 namespace getfem {
36 
37 
38  template <class VEC1, class VEC2>
39  inline void copy_scaled_4(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
40  auto it1 = v1.begin();
41  auto it2 = v2.begin(), it2e = v2.end();
42  size_type nd = v1.size() >> 2;
43  for (size_type i = 0; i < nd; ++i) {
44  *it2++ = (*it1++) * a;
45  *it2++ = (*it1++) * a;
46  *it2++ = (*it1++) * a;
47  *it2++ = (*it1++) * a;
48  }
49  for (; it2 != it2e;)
50  *it2++ = (*it1++) * a;
51  }
52 
53  template <class VEC1, class VEC2>
54  inline void add_scaled_4(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
55  auto it1 = v1.begin();
56  auto it2 = v2.begin(), it2e = v2.end();
57  size_type nd = v1.size() >> 2;
58  for (size_type i = 0; i < nd; ++i) {
59  *it2++ += (*it1++) * a;
60  *it2++ += (*it1++) * a;
61  *it2++ += (*it1++) * a;
62  *it2++ += (*it1++) * a;
63  }
64  for (; it2 != it2e;)
65  *it2++ += (*it1++) * a;
66  }
67 
68  template <class VEC1, class VEC2>
69  inline void copy_scaled_8(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
70  auto it1 = v1.begin();
71  auto it2 = v2.begin(), it2e = v2.end();
72  size_type nd = v1.size() >> 3;
73  for (size_type i = 0; i < nd; ++i) {
74  *it2++ = (*it1++) * a;
75  *it2++ = (*it1++) * a;
76  *it2++ = (*it1++) * a;
77  *it2++ = (*it1++) * a;
78  *it2++ = (*it1++) * a;
79  *it2++ = (*it1++) * a;
80  *it2++ = (*it1++) * a;
81  *it2++ = (*it1++) * a;
82  }
83  for (; it2 != it2e;)
84  *it2++ = (*it1++) * a;
85  }
86 
87  template <class VEC1, class VEC2>
88  inline void add_scaled_8(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
89  auto it1 = v1.begin();
90  auto it2 = v2.begin(), it2e = v2.end();
91  size_type nd = v1.size() >> 3;
92  for (size_type i = 0; i < nd; ++i) {
93  *it2++ += (*it1++) * a;
94  *it2++ += (*it1++) * a;
95  *it2++ += (*it1++) * a;
96  *it2++ += (*it1++) * a;
97  *it2++ += (*it1++) * a;
98  *it2++ += (*it1++) * a;
99  *it2++ += (*it1++) * a;
100  *it2++ += (*it1++) * a;
101  }
102  for (; it2 != it2e;)
103  *it2++ += (*it1++) * a;
104  }
105 
106  bool operator <(const gauss_pt_corresp &gpc1,
107  const gauss_pt_corresp &gpc2) {
108  if (gpc1.pai != gpc2.pai)
109  return (gpc1.pai < gpc2.pai );
110  if (gpc1.nodes.size() != gpc2.nodes.size())
111  return (gpc1.nodes.size() < gpc2.nodes.size());
112  for (size_type i = 0; i < gpc1.nodes.size(); ++i)
113  if (gpc1.nodes[i] != gpc2.nodes[i])
114  return (gpc1.nodes[i] < gpc2.nodes[i]);
115  if (gpc1.pgt1 != gpc2.pgt1)
116  return (gpc1.pgt1 < gpc2.pgt1);
117  if (gpc1.pgt2 != gpc2.pgt2)
118  return (gpc1.pgt2 < gpc2.pgt2);
119  return false;
120  }
121 
122  bool operator <(const ga_instruction_set::region_mim &rm1,
123  const ga_instruction_set::region_mim &rm2) {
124  if (rm1.mim() != rm2.mim()) return (rm1.mim() < rm2.mim());
125  if (rm1.region() != rm2.region()) return (rm1.region() < rm2.region());
126  return (rm1.psd() < rm2.psd());
127  }
128 
129  //=========================================================================
130  // Instructions for compilation: basic optimized operations on tensors
131  //=========================================================================
132 
133  struct ga_instruction_extract_local_im_data : public ga_instruction {
134  base_tensor &t;
135  const im_data &imd;
136  papprox_integration &pai;
137  const base_vector &U;
138  const fem_interpolation_context &ctx;
139  size_type qdim, cv_old;
140  virtual int exec() {
141  GA_DEBUG_INFO("Instruction: extract local im data");
142  size_type cv = ctx.convex_num();
143  if (cv != cv_old) {
144  cv_old = cv;
145  GMM_ASSERT1(imd.linked_mesh_im().int_method_of_element(cv)
146  ->approx_method() == pai, "Im data have to be used only "
147  "on their original integration method.");
148  }
149  size_type ipt = imd.filtered_index_of_point(cv, ctx.ii());
150  GMM_ASSERT1(ipt != size_type(-1),
151  "Im data with no data on the current integration point.");
152  auto it = U.begin()+ipt*qdim;
153  std::copy(it, it+qdim, t.begin());
154  return 0;
155  }
156  ga_instruction_extract_local_im_data
157  (base_tensor &t_, const im_data &imd_, const base_vector &U_,
158  papprox_integration &pai_, const fem_interpolation_context &ctx_,
159  size_type qdim_)
160  : t(t_), imd(imd_), pai(pai_), U(U_), ctx(ctx_), qdim(qdim_),
161  cv_old(-1)
162  {}
163  };
164 
165  struct ga_instruction_slice_local_dofs : public ga_instruction {
166  const mesh_fem &mf;
167  const base_vector &U;
168  const fem_interpolation_context &ctx;
169  base_vector &coeff;
170  size_type qmult1, qmult2;
171  virtual int exec() {
172  GA_DEBUG_INFO("Instruction: Slice local dofs");
173  GMM_ASSERT1(qmult1 != 0 && qmult2 != 0, "Internal error");
174  slice_vector_on_basic_dof_of_element(mf, U, ctx.convex_num(),
175  coeff, qmult1, qmult2);
176  return 0;
177  }
178  ga_instruction_slice_local_dofs(const mesh_fem &mf_, const base_vector &U_,
179  const fem_interpolation_context &ctx_,
180  base_vector &coeff_,
181  size_type qmult1_, size_type qmult2_)
182  : mf(mf_), U(U_), ctx(ctx_), coeff(coeff_),
183  qmult1(qmult1_), qmult2(qmult2_) {}
184  };
185 
186  struct ga_instruction_update_pfp : public ga_instruction {
187  const mesh_fem &mf;
188  const fem_interpolation_context &ctx;
189  fem_precomp_pool &fp_pool;
190  pfem_precomp &pfp;
191 
192  virtual int exec() {
193  GA_DEBUG_INFO("Instruction: Pfp update");
194  if (ctx.have_pgp()) {
195  size_type cv = ctx.is_convex_num_valid()
196  ? ctx.convex_num() : mf.convex_index().first_true();
197  pfem pf = mf.fem_of_element(cv);
198  if (!pfp || pf != pfp->get_pfem() ||
199  ctx.pgp()->get_ppoint_tab() != pfp->get_ppoint_tab()) {
200  pfp = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
201  }
202  } else {
203  pfp = 0;
204  }
205  return 0;
206  }
207 
208  ga_instruction_update_pfp(const mesh_fem &mf_, pfem_precomp &pfp_,
209  const fem_interpolation_context &ctx_,
210  fem_precomp_pool &fp_pool_)
211  : mf(mf_), ctx(ctx_), fp_pool(fp_pool_), pfp(pfp_) {}
212  };
213 
214  struct ga_instruction_first_ind_tensor : public ga_instruction {
215  base_tensor &t;
216  const fem_interpolation_context &ctx;
217  size_type qdim;
218  const mesh_fem *mfn, **mfg;
219 
220  virtual int exec() {
221  GA_DEBUG_INFO("Instruction: adapt first index of tensor");
222  const mesh_fem &mf = *(mfg ? *mfg : mfn);
223  GA_DEBUG_ASSERT(mfg ? *mfg : mfn, "Internal error");
224  size_type cv_1 = ctx.is_convex_num_valid()
225  ? ctx.convex_num() : mf.convex_index().first_true();
226  pfem pf = mf.fem_of_element(cv_1);
227  GMM_ASSERT1(pf, "An element without finite element method defined");
228  size_type Qmult = qdim / pf->target_dim();
229  size_type s = pf->nb_dof(cv_1) * Qmult;
230  if (t.sizes()[0] != s)
231  { bgeot::multi_index mi = t.sizes(); mi[0] = s; t.adjust_sizes(mi); }
232  return 0;
233  }
234 
235  ga_instruction_first_ind_tensor(base_tensor &t_,
236  const fem_interpolation_context &ctx_,
237  size_type qdim_, const mesh_fem *mfn_,
238  const mesh_fem **mfg_)
239  : t(t_), ctx(ctx_), qdim(qdim_), mfn(mfn_), mfg(mfg_) {}
240  };
241 
242  struct ga_instruction_second_ind_tensor
243  : public ga_instruction_first_ind_tensor {
244 
245  virtual int exec() {
246  GA_DEBUG_INFO("Instruction: adapt second index of tensor");
247  const mesh_fem &mf = *(mfg ? *mfg : mfn);
248  size_type cv_1 = ctx.is_convex_num_valid()
249  ? ctx.convex_num() : mf.convex_index().first_true();
250  pfem pf = mf.fem_of_element(cv_1);
251  GMM_ASSERT1(pf, "An element without finite element methode defined");
252  size_type Qmult = qdim / pf->target_dim();
253  size_type s = pf->nb_dof(cv_1) * Qmult;
254  if (t.sizes()[1] != s)
255  { bgeot::multi_index mi = t.sizes(); mi[1] = s; t.adjust_sizes(mi); }
256  return 0;
257  }
258 
259  ga_instruction_second_ind_tensor(base_tensor &t_,
260  fem_interpolation_context &ctx_,
261  size_type qdim_, const mesh_fem *mfn_,
262  const mesh_fem **mfg_)
263  : ga_instruction_first_ind_tensor(t_, ctx_, qdim_, mfn_, mfg_) {}
264 
265  };
266 
267  struct ga_instruction_two_first_ind_tensor : public ga_instruction {
268  base_tensor &t;
269  const fem_interpolation_context &ctx1, &ctx2;
270  size_type qdim1;
271  const mesh_fem *mfn1, **mfg1;
272  size_type qdim2;
273  const mesh_fem *mfn2, **mfg2;
274 
275  virtual int exec() {
276  GA_DEBUG_INFO("Instruction: adapt two first indices of tensor");
277  const mesh_fem &mf1 = *(mfg1 ? *mfg1 : mfn1);
278  const mesh_fem &mf2 = *(mfg2 ? *mfg2 : mfn2);
279  size_type cv_1 = ctx1.is_convex_num_valid()
280  ? ctx1.convex_num() : mf1.convex_index().first_true();
281  size_type cv_2 = ctx2.is_convex_num_valid()
282  ? ctx2.convex_num() : mf2.convex_index().first_true();
283  pfem pf1 = mf1.fem_of_element(cv_1);
284  GMM_ASSERT1(pf1, "An element without finite element method defined");
285  pfem pf2 = mf2.fem_of_element(cv_2);
286  GMM_ASSERT1(pf2, "An element without finite element method defined");
287  size_type Qmult1 = qdim1 / pf1->target_dim();
288  size_type s1 = pf1->nb_dof(cv_1) * Qmult1;
289  size_type Qmult2 = qdim2 / pf2->target_dim();
290  size_type s2 = pf2->nb_dof(cv_2) * Qmult2;
291  GMM_ASSERT1(s1 > 0 && s2 >0, "Element without degrees of freedom");
292  if (t.sizes()[0] != s1 || t.sizes()[1] != s2) {
293  bgeot::multi_index mi = t.sizes();
294  mi[0] = s1; mi[1] = s2;
295  t.adjust_sizes(mi);
296  }
297  return 0;
298  }
299 
300  ga_instruction_two_first_ind_tensor
301  (base_tensor &t_, const fem_interpolation_context &ctx1_,
302  const fem_interpolation_context &ctx2_,
303  size_type qdim1_, const mesh_fem *mfn1_, const mesh_fem **mfg1_,
304  size_type qdim2_, const mesh_fem *mfn2_, const mesh_fem **mfg2_)
305  : t(t_), ctx1(ctx1_), ctx2(ctx2_), qdim1(qdim1_), mfn1(mfn1_),
306  mfg1(mfg1_), qdim2(qdim2_), mfn2(mfn2_), mfg2(mfg2_) {}
307  };
308 
309 
310  struct ga_instruction_X_component : public ga_instruction {
311  scalar_type &t;
312  const fem_interpolation_context &ctx;
313  size_type n;
314 
315  virtual int exec() {
316  GA_DEBUG_INFO("Instruction: X component");
317  t = ctx.xreal()[n];
318  return 0;
319  }
320 
321  ga_instruction_X_component
322  (scalar_type &t_, const fem_interpolation_context &ctx_, size_type n_)
323  : t(t_), ctx(ctx_), n(n_) {}
324  };
325 
326  struct ga_instruction_X : public ga_instruction {
327  base_tensor &t;
328  const fem_interpolation_context &ctx;
329 
330  virtual int exec() {
331  GA_DEBUG_INFO("Instruction: X");
332  GA_DEBUG_ASSERT(t.size() == ctx.xreal().size(), "dimensions mismatch");
333  gmm::copy(ctx.xreal(), t.as_vector());
334  return 0;
335  }
336 
337  ga_instruction_X(base_tensor &t_, const fem_interpolation_context &ctx_)
338  : t(t_), ctx(ctx_) {}
339  };
340 
341  struct ga_instruction_copy_small_vect : public ga_instruction {
342  base_tensor &t;
343  const base_small_vector &vec;
344 
345  virtual int exec() {
346  GA_DEBUG_INFO("Instruction: copy small vector");
347  GMM_ASSERT1(t.size() == vec.size(), "Invalid vector size.");
348  gmm::copy(vec, t.as_vector());
349  return 0;
350  }
351  ga_instruction_copy_small_vect(base_tensor &t_,
352  const base_small_vector &vec_)
353  : t(t_), vec(vec_) {}
354  };
355 
356  struct ga_instruction_copy_Normal : public ga_instruction_copy_small_vect {
357 
358  virtual int exec() {
359  GA_DEBUG_INFO("Instruction: unit normal vector");
360  GMM_ASSERT1(t.size() == vec.size(), "Invalid outward unit normal "
361  "vector. Possible reasons: not on boundary or "
362  "transformation failed.");
363  gmm::copy(vec, t.as_vector());
364  return 0;
365  }
366  ga_instruction_copy_Normal(base_tensor &t_,
367  const base_small_vector &Normal_)
368  : ga_instruction_copy_small_vect(t_, Normal_) {}
369  };
370 
371  struct ga_instruction_level_set_normal_vector : public ga_instruction {
372  base_tensor &t;
373  const mesh_im_level_set *mimls;
374  const fem_interpolation_context &ctx;
375  base_small_vector vec;
376 
377  virtual int exec() {
378  GA_DEBUG_INFO("Instruction: unit normal vector to a level-set");
379  mimls->compute_normal_vector(ctx, vec);
380  GMM_ASSERT1(t.size() == vec.size(), "Invalid outward unit normal "
381  "vector. Possible reasons: not on boundary or "
382  "transformation failed.");
383  gmm::copy(vec, t.as_vector());
384  return 0;
385  }
386  ga_instruction_level_set_normal_vector
387  (base_tensor &t_, const mesh_im_level_set *mimls_,
388  const fem_interpolation_context &ctx_)
389  : t(t_), mimls(mimls_), ctx(ctx_), vec(t.size()) {}
390  };
391 
392  struct ga_instruction_element_size : public ga_instruction {
393  base_tensor &t;
394  scalar_type &es;
395 
396  virtual int exec() {
397  GA_DEBUG_INFO("Instruction: element_size");
398  GMM_ASSERT1(t.size() == 1, "Invalid element size.");
399  t[0] = es;
400  return 0;
401  }
402  ga_instruction_element_size(base_tensor &t_, scalar_type &es_)
403  : t(t_), es(es_) {}
404  };
405 
406  struct ga_instruction_element_K : public ga_instruction {
407  base_tensor &t;
408  const fem_interpolation_context &ctx;
409 
410  virtual int exec() {
411  GA_DEBUG_INFO("Instruction: element_K");
412  GMM_ASSERT1(t.size() == (ctx.K()).size(), "Invalid tensor size.");
413  gmm::copy(ctx.K().as_vector(), t.as_vector());
414  return 0;
415  }
416  ga_instruction_element_K(base_tensor &t_,
417  const fem_interpolation_context &ct)
418  : t(t_), ctx(ct) {}
419  };
420 
421  struct ga_instruction_element_B : public ga_instruction {
422  base_tensor &t;
423  const fem_interpolation_context &ctx;
424 
425  virtual int exec() {
426  GA_DEBUG_INFO("Instruction: element_B");
427  GMM_ASSERT1(t.size() == (ctx.B()).size(), "Invalid tensor size.");
428  gmm::copy(ctx.B().as_vector(), t.as_vector());
429  return 0;
430  }
431  ga_instruction_element_B(base_tensor &t_,
432  const fem_interpolation_context &ct)
433  : t(t_), ctx(ct) {}
434  };
435 
436  struct ga_instruction_val_base : public ga_instruction {
437  base_tensor &t;
438  fem_interpolation_context &ctx;
439  const mesh_fem &mf;
440  const pfem_precomp &pfp;
441 
442  virtual int exec() { // --> t(ndof,target_dim)
443  GA_DEBUG_INFO("Instruction: compute value of base functions");
444  // if (ctx.have_pgp()) ctx.set_pfp(pfp);
445  // else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
446  // GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
447  // ctx.base_value(t);
448  if (ctx.have_pgp()) ctx.pfp_base_value(t, pfp);
449  else {
450  ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
451  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
452  ctx.base_value(t);
453  }
454  return 0;
455  }
456 
457  ga_instruction_val_base(base_tensor &tt, fem_interpolation_context &ct,
458  const mesh_fem &mf_, const pfem_precomp &pfp_)
459  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
460  };
461 
462  struct ga_instruction_xfem_plus_val_base : public ga_instruction {
463  base_tensor &t;
464  fem_interpolation_context &ctx;
465  const mesh_fem &mf;
466  pfem_precomp &pfp;
467 
468  virtual int exec() { // --> t(ndof,target_dim)
469  GA_DEBUG_INFO("Instruction: compute value of base functions");
470  if (ctx.have_pgp()) ctx.set_pfp(pfp);
471  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
472  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
473  int old_xfem_side = ctx.xfem_side();
474  ctx.set_xfem_side(1);
475  ctx.base_value(t);
476  ctx.set_xfem_side(old_xfem_side);
477  return 0;
478  }
479 
480  ga_instruction_xfem_plus_val_base(base_tensor &tt,
481  fem_interpolation_context &ct,
482  const mesh_fem &mf_, pfem_precomp &pfp_)
483  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
484  };
485 
486  struct ga_instruction_xfem_minus_val_base : public ga_instruction {
487  base_tensor &t;
488  fem_interpolation_context &ctx;
489  const mesh_fem &mf;
490  pfem_precomp &pfp;
491 
492  virtual int exec() { // --> t(ndof,target_dim)
493  GA_DEBUG_INFO("Instruction: compute value of base functions");
494  if (ctx.have_pgp()) ctx.set_pfp(pfp);
495  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
496  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
497  int old_xfem_side = ctx.xfem_side();
498  ctx.set_xfem_side(-1);
499  ctx.base_value(t);
500  ctx.set_xfem_side(old_xfem_side);
501  return 0;
502  }
503 
504  ga_instruction_xfem_minus_val_base
505  (base_tensor &tt, fem_interpolation_context &ct,
506  const mesh_fem &mf_, pfem_precomp &pfp_)
507  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
508  };
509 
510  struct ga_instruction_grad_base : public ga_instruction_val_base {
511 
512  virtual int exec() { // --> t(ndof,target_dim,N)
513  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
514  // if (ctx.have_pgp()) ctx.set_pfp(pfp);
515  // else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
516  // GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
517  // ctx.grad_base_value(t);
518  if (ctx.have_pgp()) ctx.pfp_grad_base_value(t, pfp);
519  else {
520  ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
521  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
522  ctx.grad_base_value(t);
523  }
524  return 0;
525  }
526 
527  ga_instruction_grad_base(base_tensor &tt, fem_interpolation_context &ct,
528  const mesh_fem &mf_, pfem_precomp &pfp_)
529  : ga_instruction_val_base(tt, ct, mf_, pfp_)
530  {}
531  };
532 
533  struct ga_instruction_xfem_plus_grad_base : public ga_instruction_val_base {
534 
535  virtual int exec() { // --> t(ndof,target_dim,N)
536  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
537  if (ctx.have_pgp()) ctx.set_pfp(pfp);
538  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
539  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
540  int old_xfem_side = ctx.xfem_side();
541  ctx.set_xfem_side(1);
542  ctx.grad_base_value(t);
543  ctx.set_xfem_side(old_xfem_side);
544  return 0;
545  }
546 
547  ga_instruction_xfem_plus_grad_base
548  (base_tensor &tt, fem_interpolation_context &ct,
549  const mesh_fem &mf_, pfem_precomp &pfp_)
550  : ga_instruction_val_base(tt, ct, mf_, pfp_)
551  {}
552  };
553 
554  struct ga_instruction_xfem_minus_grad_base : public ga_instruction_val_base {
555 
556  virtual int exec() { // --> t(ndof,target_dim,N)
557  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
558  if (ctx.have_pgp()) ctx.set_pfp(pfp);
559  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
560  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
561  int old_xfem_side = ctx.xfem_side();
562  ctx.set_xfem_side(-1);
563  ctx.grad_base_value(t);
564  ctx.set_xfem_side(old_xfem_side);
565  return 0;
566  }
567 
568  ga_instruction_xfem_minus_grad_base
569  (base_tensor &tt, fem_interpolation_context &ct,
570  const mesh_fem &mf_, pfem_precomp &pfp_)
571  : ga_instruction_val_base(tt, ct, mf_, pfp_)
572  {}
573  };
574 
575 
576  struct ga_instruction_hess_base : public ga_instruction_val_base {
577 
578  virtual int exec() { // --> t(ndof,target_dim,N*N)
579  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
580  if (ctx.have_pgp()) ctx.set_pfp(pfp);
581  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
582  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
583  ctx.hess_base_value(t);
584  return 0;
585  }
586 
587  ga_instruction_hess_base(base_tensor &tt, fem_interpolation_context &ct,
588  const mesh_fem &mf_, pfem_precomp &pfp_)
589  : ga_instruction_val_base(tt, ct, mf_, pfp_)
590  {}
591  };
592 
593  struct ga_instruction_xfem_plus_hess_base : public ga_instruction_val_base {
594 
595  virtual int exec() { // --> t(ndof,target_dim,N*N)
596  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
597  if (ctx.have_pgp()) ctx.set_pfp(pfp);
598  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
599  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
600  int old_xfem_side = ctx.xfem_side();
601  ctx.set_xfem_side(1);
602  ctx.hess_base_value(t);
603  ctx.set_xfem_side(old_xfem_side);
604  return 0;
605  }
606 
607  ga_instruction_xfem_plus_hess_base
608  (base_tensor &tt, fem_interpolation_context &ct,
609  const mesh_fem &mf_, pfem_precomp &pfp_)
610  : ga_instruction_val_base(tt, ct, mf_, pfp_)
611  {}
612  };
613 
614  struct ga_instruction_xfem_minus_hess_base : public ga_instruction_val_base {
615 
616  virtual int exec() { // --> t(ndof,target_dim,N*N)
617  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
618  if (ctx.have_pgp()) ctx.set_pfp(pfp);
619  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
620  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
621  int old_xfem_side = ctx.xfem_side();
622  ctx.set_xfem_side(-1);
623  ctx.hess_base_value(t);
624  ctx.set_xfem_side(old_xfem_side);
625  return 0;
626  }
627 
628  ga_instruction_xfem_minus_hess_base
629  (base_tensor &tt, fem_interpolation_context &ct,
630  const mesh_fem &mf_, pfem_precomp &pfp_)
631  : ga_instruction_val_base(tt, ct, mf_, pfp_)
632  {}
633  };
634 
635  struct ga_instruction_val : public ga_instruction {
636  scalar_type &a;
637  base_tensor &t;
638  const base_tensor &Z;
639  const base_vector &coeff;
640  size_type qdim;
641  // Z(ndof,target_dim), coeff(Qmult,ndof) --> t(target_dim*Qmult)
642  virtual int exec() {
643  GA_DEBUG_INFO("Instruction: variable value");
644  size_type ndof = Z.sizes()[0];
645  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
646  GA_DEBUG_ASSERT(t.size() == qdim, "dimensions mismatch");
647 
648  if (qdim == 1) {
649  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
650  "Wrong size for coeff vector");
651  auto itc = coeff.begin(); auto itZ = Z.begin();
652  a = (*itc++) * (*itZ++);
653  while (itc != coeff.end()) a += (*itc++) * (*itZ++);
654  } else {
655  size_type target_dim = Z.sizes()[1];
656  if (target_dim == 1) {
657  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
658  "Wrong size for coeff vector");
659  auto itc = coeff.begin(); auto itZ = Z.begin();
660  for (auto it = t.begin(); it != t.end(); ++it)
661  *it = (*itc++) * (*itZ);
662  ++itZ;
663  for (size_type j = 1; j < ndof; ++j, ++itZ) {
664  for (auto it = t.begin(); it != t.end(); ++it)
665  *it += (*itc++) * (*itZ);
666  }
667  } else {
668  size_type Qmult = qdim / target_dim;
669  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
670  "Wrong size for coeff vector");
671 
672  gmm::clear(t.as_vector());
673  auto itc = coeff.begin();
674  for (size_type j = 0; j < ndof; ++j) {
675  auto it = t.begin();
676  for (size_type q = 0; q < Qmult; ++q, ++itc) {
677  for (size_type r = 0; r < target_dim; ++r)
678  *it++ += (*itc) * Z[j + r*ndof];
679  }
680  }
681  }
682  }
683  return 0;
684  }
685 
686  ga_instruction_val(base_tensor &tt, const base_tensor &Z_,
687  const base_vector &co, size_type q)
688  : a(tt[0]), t(tt), Z(Z_), coeff(co), qdim(q) {}
689  };
690 
691  struct ga_instruction_grad : public ga_instruction_val {
692  // Z(ndof,target_dim,N), coeff(Qmult,ndof) --> t(target_dim*Qmult,N)
693  virtual int exec() {
694  GA_DEBUG_INFO("Instruction: gradient");
695  size_type ndof = Z.sizes()[0];
696  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
697  size_type N = Z.sizes()[2];
698  if (qdim == 1) {
699  GA_DEBUG_ASSERT(t.size() == N, "dimensions mismatch");
700  GA_DEBUG_ASSERT(coeff.size() == ndof, "Wrong size for coeff vector");
701  auto itZ = Z.begin();
702  for (auto it = t.begin(); it != t.end(); ++it) {
703  auto itc = coeff.begin();
704  *it = (*itc++) * (*itZ++);
705  while (itc != coeff.end()) *it += (*itc++) * (*itZ++);
706  }
707  } else {
708  size_type target_dim = Z.sizes()[1];
709  if (target_dim == 1) {
710  GA_DEBUG_ASSERT(t.size() == N*qdim, "dimensions mismatch");
711  GA_DEBUG_ASSERT(coeff.size() == ndof*qdim,
712  "Wrong size for coeff vector");
713  for (size_type q = 0; q < qdim; ++q) {
714  auto itZ = Z.begin(); auto it = t.begin() + q;
715  for (size_type k = 0; k < N; ++k) {
716  if (k) it += qdim;
717  auto itc = coeff.begin() + q;
718  *it = (*itc) * (*itZ++);
719  for (size_type j = 1; j < ndof; ++j)
720  { itc += qdim; *it += (*itc) * (*itZ++); }
721  }
722  }
723  } else {
724  size_type Qmult = qdim / target_dim;
725  GA_DEBUG_ASSERT(t.size() == N*qdim, "dimensions mismatch");
726  GA_DEBUG_ASSERT(coeff.size() == ndof*Qmult,
727  "Wrong size for coeff vector");
728  gmm::clear(t.as_vector());
729  for (size_type q = 0; q < Qmult; ++q) {
730  auto itZ = Z.begin();
731  for (size_type k = 0; k < N; ++k)
732  for (size_type r = 0; r < target_dim; ++r)
733  for (size_type j = 0; j < ndof; ++j)
734  t[r + q*target_dim + k*qdim] += coeff[j*Qmult+q] * (*itZ++);
735  }
736  }
737  }
738  return 0;
739  }
740 
741  ga_instruction_grad(base_tensor &tt, const base_tensor &Z_,
742  const base_vector &co, size_type q)
743  : ga_instruction_val(tt, Z_, co, q)
744  {}
745 
746  };
747 
748  struct ga_instruction_hess : public ga_instruction_val {
749  // Z(ndof,target_dim,N*N), coeff(Qmult,ndof) --> t(target_dim*Qmult,N,N)
750  virtual int exec() {
751  GA_DEBUG_INFO("Instruction: Hessian");
752  size_type ndof = Z.sizes()[0];
753  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
754  size_type NN = gmm::sqr(t.sizes().back());
755  GA_DEBUG_ASSERT(NN == Z.sizes()[2], "Internal error");
756  if (qdim == 1) {
757  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
758  "Wrong size for coeff vector");
759  auto it = Z.begin(); auto itt = t.begin();
760  for (size_type kl = 0; kl < NN; ++kl, ++itt) {
761  *itt = scalar_type(0);
762  for (auto itc = coeff.begin(); itc != coeff.end(); ++itc, ++it)
763  *itt += (*itc) * (*it);
764  }
765  GMM_ASSERT1(itt == t.end(), "dimensions mismatch");
766  } else {
767  size_type target_dim = Z.sizes()[1];
768  if (target_dim == 1) {
769  GA_DEBUG_ASSERT(t.size() == NN*qdim, "dimensions mismatch");
770  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
771  "Wrong size for coeff vector");
772  gmm::clear(t.as_vector());
773  for (size_type q = 0; q < qdim; ++q) {
774  base_tensor::const_iterator it = Z.begin();
775  for (size_type kl = 0; kl < NN; ++kl)
776  for (size_type j = 0; j < ndof; ++j, ++it)
777  t[q + kl*qdim] += coeff[j*qdim+q] * (*it);
778  }
779  } else {
780  size_type Qmult = qdim / target_dim;
781  GA_DEBUG_ASSERT(t.size() == NN*qdim, "dimensions mismatch");
782  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
783  "Wrong size for coeff vector");
784  gmm::clear(t.as_vector());
785  for (size_type q = 0; q < Qmult; ++q) {
786  base_tensor::const_iterator it = Z.begin();
787  for (size_type kl = 0; kl < NN; ++kl)
788  for (size_type r = 0; r < target_dim; ++r)
789  for (size_type j = 0; j < ndof; ++j, ++it)
790  t[r + q*target_dim + kl*qdim] += coeff[j*Qmult+q] * (*it);
791  }
792  }
793  }
794  return 0;
795  }
796 
797  ga_instruction_hess(base_tensor &tt, const base_tensor &Z_,
798  const base_vector &co, size_type q)
799  : ga_instruction_val(tt, Z_, co, q)
800  {}
801  };
802 
803  struct ga_instruction_diverg : public ga_instruction_val {
804  // Z(ndof,target_dim,N), coeff(Qmult,ndof) --> t(1)
805  virtual int exec() {
806  GA_DEBUG_INFO("Instruction: divergence");
807  size_type ndof = Z.sizes()[0];
808  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
809  size_type target_dim = Z.sizes()[1];
810  size_type N = Z.sizes()[2];
811  size_type Qmult = qdim / target_dim;
812  GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
813  "Dimensions mismatch for divergence operator");
814  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
815  "Wrong size for coeff vector");
816 
817  t[0] = scalar_type(0);
818  base_tensor::const_iterator it = Z.begin();
819  if (Qmult == 1)
820  for (size_type k = 0; k < N; ++k) {
821  if (k) it += (N*ndof + 1);
822  for (size_type j = 0; j < ndof; ++j) {
823  if (j) ++it;
824  t[0] += coeff[j] * (*it);
825  }
826  }
827  else // if (target_dim() == 1)
828  for (size_type k = 0; k < N; ++k) {
829  if (k) ++it;
830  for (size_type j = 0; j < ndof; ++j) {
831  if (j) ++it;
832  t[0] += coeff[j*N+k] * (*it);
833  }
834  }
835  return 0;
836  }
837 
838  ga_instruction_diverg(base_tensor &tt, const base_tensor &Z_,
839  const base_vector &co, size_type q)
840  : ga_instruction_val(tt, Z_, co, q)
841  {}
842  };
843 
844  struct ga_instruction_copy_val_base : public ga_instruction {
845  base_tensor &t;
846  const base_tensor &Z;
847  size_type qdim;
848  // Z(ndof,target_dim) --> t(Qmult*ndof,Qmult*target_dim)
849  virtual int exec() {
850  GA_DEBUG_INFO("Instruction: value of test functions");
851  if (qdim == 1) {
852  GA_DEBUG_ASSERT(t.size() == Z.size(), "Wrong size for base vector");
853  std::copy(Z.begin(), Z.end(), t.begin());
854  } else {
855  size_type target_dim = Z.sizes()[1];
856  size_type Qmult = qdim / target_dim;
857  if (Qmult == 1) {
858  std::copy(Z.begin(), Z.end(), t.begin());
859  } else {
860  if (target_dim == 1) {
861  size_type ndof = Z.sizes()[0];
862  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
863  "Wrong size for base vector");
864  std::fill(t.begin(), t.end(), scalar_type(0));
865  auto itZ = Z.begin();
866  size_type s = t.sizes()[0], sss = s+1;
867 
868  // Performs t(i*Qmult+j, k*Qmult + j) = Z(i,k);
869  auto it = t.begin();
870  for (size_type i = 0; i < ndof; ++i, ++itZ) {
871  if (i) it += Qmult;
872  auto it2 = it;
873  *it2 = *itZ;
874  for (size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
875  }
876  } else {
877  size_type ndof = Z.sizes()[0];
878  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
879  "Wrong size for base vector");
880  std::fill(t.begin(), t.end(), scalar_type(0));
881  auto itZ = Z.begin();
882  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
883 
884  // Performs t(i*Qmult+j, k*Qmult + j) = Z(i,k);
885  for (size_type k = 0; k < target_dim; ++k) {
886  auto it = t.begin() + (ss * k);
887  for (size_type i = 0; i < ndof; ++i, ++itZ) {
888  if (i) it += Qmult;
889  auto it2 = it;
890  *it2 = *itZ;
891  for (size_type j = 1; j < Qmult; ++j)
892  { it2 += sss; *it2 = *itZ; }
893  }
894  }
895  }
896  }
897  }
898  return 0;
899  }
900 
901  ga_instruction_copy_val_base(base_tensor &tt, const base_tensor &Z_,
902  size_type q) : t(tt), Z(Z_), qdim(q) {}
903  };
904 
905  struct ga_instruction_copy_grad_base : public ga_instruction_copy_val_base {
906  // Z(ndof,target_dim,N) --> t(Qmult*ndof,Qmult*target_dim,N)
907  virtual int exec() {
908  GA_DEBUG_INFO("Instruction: gradient of test functions");
909  if (qdim == 1) {
910  std::copy(Z.begin(), Z.end(), t.begin());
911  } else {
912  size_type target_dim = Z.sizes()[1];
913  size_type Qmult = qdim / target_dim;
914  if (Qmult == 1) {
915  std::copy(Z.begin(), Z.end(), t.begin());
916  } else {
917  if (target_dim == 1) {
918  size_type ndof = Z.sizes()[0];
919  size_type N = Z.sizes()[2];
920  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
921  "Wrong size for gradient vector");
922  std::fill(t.begin(), t.end(), scalar_type(0));
923  base_tensor::const_iterator itZ = Z.begin();
924  size_type s = t.sizes()[0], sss = s+1, ssss = s*target_dim*Qmult;
925 
926  // Performs t(i*Qmult+j, k*Qmult + j, l) = Z(i,k,l);
927  for (size_type l = 0; l < N; ++l) {
928  base_tensor::iterator it = t.begin() + (ssss*l);
929  for (size_type i = 0; i < ndof; ++i, ++itZ) {
930  if (i) it += Qmult;
931  base_tensor::iterator it2 = it;
932  *it2 = *itZ;
933  for (size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
934  }
935  }
936  } else {
937  size_type ndof = Z.sizes()[0];
938  size_type N = Z.sizes()[2];
939  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
940  "Wrong size for gradient vector");
941  std::fill(t.begin(), t.end(), scalar_type(0));
942  base_tensor::const_iterator itZ = Z.begin();
943  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
944  size_type ssss = ss*target_dim;
945 
946  // Performs t(i*Qmult+j, k*Qmult + j, l) = Z(i,k,l);
947  for (size_type l = 0; l < N; ++l)
948  for (size_type k = 0; k < target_dim; ++k) {
949  base_tensor::iterator it = t.begin() + (ss * k + ssss*l);
950  for (size_type i = 0; i < ndof; ++i, ++itZ) {
951  if (i) it += Qmult;
952  base_tensor::iterator it2 = it;
953  *it2 = *itZ;
954  for (size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
955  }
956  }
957  }
958  }
959  }
960  return 0;
961  }
962 
963  ga_instruction_copy_grad_base(base_tensor &tt, const base_tensor &Z_,
964  size_type q)
965  : ga_instruction_copy_val_base(tt,Z_,q) {}
966  };
967 
968  struct ga_instruction_copy_vect_val_base : public ga_instruction {
969  base_tensor &t;
970  const base_tensor &Z;
971  size_type qdim;
972  // Z(ndof) --> t(qdim*ndof,qdim*target_dim)
973  virtual int exec() {
974  GA_DEBUG_INFO("Instruction: vectorized value of test functions");
975 
976  size_type ndof = Z.sizes()[0];
977  GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
978  "Wrong size for base vector");
979  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
980  auto itZ = Z.begin();
981  size_type s = t.sizes()[0], sss = s+1;
982 
983  // Performs t(i*qdim+j, k*qdim + j) = Z(i,k);
984  auto it = t.begin();
985  for (size_type i = 0; i < ndof; ++i, ++itZ) {
986  if (i) it += qdim;
987  auto it2 = it;
988  *it2 = *itZ;
989  for (size_type j = 1; j < qdim; ++j) { it2 += sss; *it2 = *itZ; }
990  }
991  return 0;
992  }
993 
994  ga_instruction_copy_vect_val_base(base_tensor &tt, const base_tensor &Z_,
995  size_type q) : t(tt), Z(Z_), qdim(q) {}
996  };
997 
998  struct ga_instruction_copy_vect_grad_base
999  : public ga_instruction_copy_vect_val_base {
1000  // Z(ndof,N) --> t(qdim*ndof,qdim,N)
1001  virtual int exec() {
1002  GA_DEBUG_INFO("Instruction: vectorized gradient of test functions");
1003  size_type ndof = Z.sizes()[0];
1004  size_type N = Z.sizes()[2];
1005  GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
1006  "Wrong size for gradient vector");
1007  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
1008  base_tensor::const_iterator itZ = Z.begin();
1009  size_type s = t.sizes()[0], sss = s+1, ssss = s*qdim;
1010 
1011  // Performs t(i*qdim+j, k*qdim + j, l) = Z(i,k,l);
1012  for (size_type l = 0; l < N; ++l) {
1013  base_tensor::iterator it = t.begin() + (ssss*l);
1014  for (size_type i = 0; i < ndof; ++i, ++itZ) {
1015  if (i) it += qdim;
1016  base_tensor::iterator it2 = it;
1017  *it2 = *itZ;
1018  for (size_type j = 1; j < qdim; ++j) { it2+=sss; *it2=*itZ; }
1019  }
1020  }
1021  return 0;
1022  }
1023 
1024  ga_instruction_copy_vect_grad_base(base_tensor &tt, const base_tensor &Z_,
1025  size_type q)
1026  : ga_instruction_copy_vect_val_base(tt,Z_,q) {}
1027  };
1028 
1029  struct ga_instruction_copy_hess_base : public ga_instruction_copy_val_base {
1030  // Z(ndof,target_dim,N*N) --> t(Qmult*ndof,Qmult*target_dim,N,N)
1031  virtual int exec() {
1032  GA_DEBUG_INFO("Instruction: Hessian of test functions");
1033  size_type target_dim = Z.sizes()[1];
1034  size_type Qmult = qdim / target_dim;
1035  if (Qmult == 1) {
1036  gmm::copy(Z.as_vector(), t.as_vector());
1037  } else {
1038  size_type ndof = Z.sizes()[0];
1039  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
1040  "Wrong size for Hessian vector");
1041  gmm::clear(t.as_vector());
1042  base_tensor::const_iterator itZ = Z.begin();
1043  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
1044 
1045  // Performs t(i*Qmult+j, k*Qmult + j, l, m) = Z(i,k,l*N+m)
1046  size_type NNdim = Z.sizes()[2]*target_dim;
1047  for (size_type klm = 0; klm < NNdim; ++klm) {
1048  base_tensor::iterator it = t.begin() + (ss * klm);
1049  for (size_type i = 0; i < ndof; ++i, ++itZ) {
1050  if (i) it += Qmult;
1051  base_tensor::iterator it2 = it;
1052  *it2 = *itZ;
1053  for (size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
1054  }
1055  }
1056  }
1057  return 0;
1058  }
1059 
1060  ga_instruction_copy_hess_base(base_tensor &tt, const base_tensor &Z_,
1061  size_type q)
1062  : ga_instruction_copy_val_base(tt, Z_, q) {}
1063  };
1064 
1065  struct ga_instruction_copy_diverg_base : public ga_instruction_copy_val_base {
1066  // Z(ndof,target_dim,N) --> t(Qmult*ndof)
1067  virtual int exec() {
1068  GA_DEBUG_INFO("Instruction: divergence of test functions");
1069  size_type ndof = Z.sizes()[0];
1070  size_type target_dim = Z.sizes()[1];
1071  size_type N = Z.sizes()[2];
1072  size_type Qmult = qdim / target_dim;
1073  GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
1074  "Dimensions mismatch for divergence operator");
1075  GA_DEBUG_ASSERT(t.size() == ndof * Qmult,
1076  "Wrong size for divergence vector");
1077  gmm::clear(t.as_vector());
1078  base_tensor::const_iterator itZ = Z.begin();
1079  if (Qmult == 1) { // target_dim == N
1080  // Performs t(i) = Trace(Z(i,:,:))
1081  for (size_type l = 0; l < N; ++l) {
1082  base_tensor::iterator it = t.begin();
1083  if (l) itZ += target_dim*ndof+1;
1084  for (size_type i = 0; i < ndof; ++i) {
1085  if (i) { ++it; ++itZ; }
1086  *it += *itZ;
1087  }
1088  }
1089  } else { // Qmult == N
1090  // Performs t(i*Qmult+j) = Z(i,1,j)
1091  for (size_type j = 0; j < N; ++j) {
1092  base_tensor::iterator it = t.begin() + j;
1093  if (j) ++itZ;
1094  for (size_type i = 0; i < ndof; ++i) {
1095  if (i) { it += Qmult; ++itZ; }
1096  *it += *itZ;
1097  }
1098  }
1099  }
1100  return 0;
1101  }
1102 
1103  ga_instruction_copy_diverg_base(base_tensor &tt, const base_tensor &Z_,
1104  size_type q)
1105  : ga_instruction_copy_val_base(tt, Z_, q) {}
1106  };
1107 
1108  struct ga_instruction_elementary_trans {
1109  const base_vector &coeff_in;
1110  base_vector coeff_out;
1111  pelementary_transformation elemtrans;
1112  const mesh_fem &mf1, &mf2;
1113  const fem_interpolation_context &ctx;
1114  base_matrix &M;
1115  size_type &icv;
1116 
1117  void do_transformation(size_type n, size_type m) {
1118  if (icv != ctx.convex_num() || M.size() == 0) {
1119  M.base_resize(m, n);
1120  icv = ctx.convex_num();
1121  elemtrans->give_transformation(mf1, mf2, icv, M);
1122  }
1123  coeff_out.resize(gmm::mat_nrows(M));
1124  gmm::mult(M, coeff_in, coeff_out); // remember: coeff == coeff_out
1125  }
1126 
1127  ga_instruction_elementary_trans
1128  (const base_vector &co, pelementary_transformation e,
1129  const mesh_fem &mf1_, const mesh_fem &mf2_,
1130  const fem_interpolation_context &ctx_, base_matrix &M_,
1131  size_type &icv_)
1132  : coeff_in(co), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1133  M(M_), icv(icv_) {}
1134  ~ga_instruction_elementary_trans() {};
1135  };
1136 
1137  struct ga_instruction_elementary_trans_val
1138  : public ga_instruction_val, ga_instruction_elementary_trans {
1139  // Z(ndof,target_dim), coeff_in(Qmult,ndof) --> t(target_dim*Qmult)
1140  virtual int exec() {
1141  GA_DEBUG_INFO("Instruction: variable value with elementary "
1142  "transformation");
1143  size_type ndof = Z.sizes()[0];
1144  size_type Qmult = qdim / Z.sizes()[1];
1145  do_transformation(coeff_in.size(), ndof*Qmult);
1146  return ga_instruction_val::exec();
1147  }
1148 
1149  ga_instruction_elementary_trans_val
1150  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1151  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1152  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1153  : ga_instruction_val(tt, Z_, coeff_out, q),
1154  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1155  };
1156 
1157  struct ga_instruction_elementary_trans_grad
1158  : public ga_instruction_grad, ga_instruction_elementary_trans {
1159  // Z(ndof,target_dim,N), coeff_in(Qmult,ndof) --> t(target_dim*Qmult,N)
1160  virtual int exec() {
1161  GA_DEBUG_INFO("Instruction: gradient with elementary transformation");
1162  size_type ndof = Z.sizes()[0];
1163  size_type Qmult = qdim / Z.sizes()[1];
1164  do_transformation(coeff_in.size(), ndof*Qmult);
1165  return ga_instruction_grad::exec();
1166  }
1167 
1168  ga_instruction_elementary_trans_grad
1169  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1170  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1171  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1172  : ga_instruction_grad(tt, Z_, coeff_out, q),
1173  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1174  };
1175 
1176  struct ga_instruction_elementary_trans_hess
1177  : public ga_instruction_hess, ga_instruction_elementary_trans {
1178  // Z(ndof,target_dim,N,N), coeff_in(Qmult,ndof) --> t(target_dim*Qmult,N,N)
1179  virtual int exec() {
1180  GA_DEBUG_INFO("Instruction: Hessian with elementary transformation");
1181  size_type ndof = Z.sizes()[0];
1182  size_type Qmult = qdim / Z.sizes()[1];
1183  do_transformation(coeff_in.size(), ndof*Qmult);
1184  return ga_instruction_hess::exec();
1185  }
1186 
1187  ga_instruction_elementary_trans_hess
1188  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1189  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1190  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1191  : ga_instruction_hess(tt, Z_, coeff_out, q),
1192  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1193  };
1194 
1195  struct ga_instruction_elementary_trans_diverg
1196  : public ga_instruction_diverg, ga_instruction_elementary_trans {
1197  // Z(ndof,target_dim,N), coeff_in(Qmult,ndof) --> t(1)
1198  virtual int exec() {
1199  GA_DEBUG_INFO("Instruction: divergence with elementary transformation");
1200  size_type ndof = Z.sizes()[0];
1201  size_type Qmult = qdim / Z.sizes()[1];
1202  do_transformation(coeff_in.size(), ndof*Qmult);
1203  return ga_instruction_diverg::exec();
1204  }
1205 
1206  ga_instruction_elementary_trans_diverg
1207  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1208  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1209  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1210  : ga_instruction_diverg(tt, Z_, coeff_out, q),
1211  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1212  };
1213 
1214  struct ga_instruction_update_group_info : public ga_instruction {
1215  const ga_workspace &workspace;
1216  const ga_instruction_set &gis;
1217  const ga_instruction_set::interpolate_info &inin;
1218  const std::string gname;
1219  ga_instruction_set::variable_group_info &vgi;
1220 
1221  virtual int exec() {
1222  GA_DEBUG_INFO("Instruction: Update group info for "+gname);
1223  if (vgi.cached_mesh && vgi.cached_mesh == inin.m)
1224  return 0;
1225 
1226  vgi.cached_mesh = inin.m;
1227  const std::string &varname
1228  = inin.m ? workspace.variable_in_group(gname, *(inin.m))
1229  : workspace.first_variable_of_group(gname);
1230  vgi.varname = &varname;
1231  vgi.mf = workspace.associated_mf(varname);
1232  GA_DEBUG_ASSERT(vgi.mf, "Group variable should always have a mesh_fem");
1233  vgi.reduced_mf = vgi.mf->is_reduced();
1234  if (vgi.reduced_mf) {
1235  const auto it = gis.really_extended_vars.find(varname);
1236  GA_DEBUG_ASSERT(it != gis.really_extended_vars.end(),
1237  "Variable " << varname << " not in extended variables");
1238  vgi.U = &(it->second);
1239  vgi.I = &(workspace.temporary_interval_of_variable(varname));
1240  } else {
1241  vgi.U = &(workspace.value(varname));
1242  vgi.I = &(workspace.interval_of_variable(varname));
1243  }
1244  vgi.alpha = workspace.factor_of_variable(varname);
1245  return 0;
1246  }
1247 
1248  ga_instruction_update_group_info
1249  (const ga_workspace &workspace_, const ga_instruction_set &gis_,
1250  const ga_instruction_set::interpolate_info &inin_,
1251  const std::string &gname_, ga_instruction_set::variable_group_info &vgi_)
1252  : workspace(workspace_), gis(gis_), inin(inin_), gname(gname_), vgi(vgi_)
1253  {}
1254  };
1255 
1256  struct ga_instruction_interpolate_filter : public ga_instruction {
1257  base_tensor &t;
1258  const ga_instruction_set::interpolate_info &inin;
1259  const size_type pt_type;
1260  const int nb;
1261 
1262  virtual int exec() {
1263  GA_DEBUG_INFO("Instruction: interpolated filter");
1264  if ((pt_type == size_type(-1) && inin.pt_type) ||
1265  (pt_type != size_type(-1) && inin.pt_type == pt_type)) {
1266  GA_DEBUG_INFO("Instruction: interpolated filter: pass");
1267  return 0;
1268  }
1269  else {
1270  GA_DEBUG_INFO("Instruction: interpolated filter: filtered");
1271  gmm::clear(t.as_vector());
1272  return nb;
1273  }
1274  return 0;
1275  }
1276 
1277  ga_instruction_interpolate_filter
1278  (base_tensor &t_, const ga_instruction_set::interpolate_info &inin_,
1279  size_type ind_, int nb_)
1280  : t(t_), inin(inin_), pt_type(ind_), nb(nb_) {}
1281  };
1282 
1283  struct ga_instruction_copy_interpolated_small_vect : public ga_instruction {
1284  base_tensor &t;
1285  const base_small_vector &vec;
1286  const ga_instruction_set::interpolate_info &inin;
1287 
1288  virtual int exec() {
1289  GA_DEBUG_INFO("Instruction: copy small vector");
1290  GMM_ASSERT1(!(inin.has_ctx) || inin.ctx.is_convex_num_valid(),
1291  "Invalid element, probably transformation failed");
1292  GMM_ASSERT1(t.size() == vec.size(), "Invalid vector size.");
1293  gmm::copy(vec, t.as_vector());
1294  return 0;
1295  }
1296  ga_instruction_copy_interpolated_small_vect
1297  (base_tensor &t_, const base_small_vector &vec_,
1298  const ga_instruction_set::interpolate_info &inin_)
1299  : t(t_), vec(vec_), inin(inin_) {}
1300  };
1301 
1302  struct ga_instruction_interpolate : public ga_instruction {
1303  base_tensor &t;
1304  const mesh **m;
1305  const mesh_fem *mfn, **mfg;
1306  const base_vector *Un, **Ug;
1307  fem_interpolation_context &ctx;
1308  base_vector coeff;
1309  size_type qdim;
1310  const size_type &ipt;
1311  fem_precomp_pool &fp_pool;
1312  ga_instruction_set::interpolate_info &inin;
1313 
1314  virtual int exec() {
1315  GMM_ASSERT1(ctx.is_convex_num_valid(), "No valid element for the "
1316  "transformation. Probably transformation failed");
1317  const mesh_fem &mf = *(mfg ? *mfg : mfn);
1318  const base_vector &U = *(Ug ? *Ug : Un);
1319  GMM_ASSERT1(&(mf.linked_mesh()) == *m, "Interpolation of a variable "
1320  "on another mesh than the one it is defined on");
1321  slice_vector_on_basic_dof_of_element(mf, U, ctx.convex_num(), coeff);
1322  pfem pf = mf.fem_of_element(ctx.convex_num());
1323  GMM_ASSERT1(pf, "Undefined finite element method");
1324  if (ctx.have_pgp()) {
1325  if (ipt == 0)
1326  inin.pfps[&mf] = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
1327  ctx.set_pfp(inin.pfps[&mf]);
1328  } else {
1329  ctx.set_pf(pf);
1330  }
1331  return 0;
1332  }
1333 
1334  ga_instruction_interpolate
1335  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1336  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1337  fem_interpolation_context &ctx_, size_type q, const size_type &ipt_,
1338  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1339  : t(tt), m(m_), mfn(mfn_), mfg(mfg_), Un(Un_), Ug(Ug_),
1340  ctx(ctx_), qdim(q), ipt(ipt_), fp_pool(fp_pool_), inin(inin_) {}
1341  };
1342 
1343  struct ga_instruction_interpolate_val : public ga_instruction_interpolate {
1344  // --> t(target_dim*Qmult)
1345  virtual int exec() {
1346  GA_DEBUG_INFO("Instruction: interpolated variable value");
1347  ga_instruction_interpolate::exec();
1348  ctx.pf()->interpolation(ctx, coeff, t.as_vector(), dim_type(qdim));
1349  // cout << "interpolate " << &U << " result : " << t.as_vector() << endl;
1350  return 0;
1351  }
1352 
1353  ga_instruction_interpolate_val
1354  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1355  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1356  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1357  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1358  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_,ctx_, q, ipt_,
1359  fp_pool_, inin_)
1360  {}
1361  };
1362 
1363  struct ga_instruction_interpolate_grad : public ga_instruction_interpolate {
1364  // --> t(target_dim*Qmult,N)
1365  virtual int exec() {
1366  GA_DEBUG_INFO("Instruction: interpolated variable grad");
1367  ga_instruction_interpolate::exec();
1368  base_matrix v(qdim, ctx.N());
1369  ctx.pf()->interpolation_grad(ctx, coeff, v, dim_type(qdim));
1370  gmm::copy(v.as_vector(), t.as_vector());
1371  return 0;
1372  }
1373 
1374  ga_instruction_interpolate_grad
1375  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1376  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1377  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1378  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1379  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1380  fp_pool_, inin_)
1381  {}
1382  };
1383 
1384  struct ga_instruction_interpolate_hess : public ga_instruction_interpolate {
1385  // --> t(target_dim*Qmult,N,N)
1386  virtual int exec() {
1387  GA_DEBUG_INFO("Instruction: interpolated variable hessian");
1388  ga_instruction_interpolate::exec();
1389  base_matrix v(qdim, ctx.N()*ctx.N()); // To be optimized
1390  ctx.pf()->interpolation_hess(ctx, coeff, v, dim_type(qdim));
1391  gmm::copy(v.as_vector(), t.as_vector());
1392  return 0;
1393  }
1394 
1395  ga_instruction_interpolate_hess
1396  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1397  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1398  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1399  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1400  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1401  fp_pool_, inin_)
1402  {}
1403  };
1404 
1405  struct ga_instruction_interpolate_diverg : public ga_instruction_interpolate {
1406  // --> t(1)
1407  virtual int exec() {
1408  GA_DEBUG_INFO("Instruction: interpolated variable divergence");
1409  ga_instruction_interpolate::exec();
1410  ctx.pf()->interpolation_diverg(ctx, coeff, t[0]);
1411  return 0;
1412  }
1413 
1414  ga_instruction_interpolate_diverg
1415  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1416  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1417  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1418  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1419  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1420  fp_pool_, inin_)
1421  {}
1422  };
1423 
1424  struct ga_instruction_interpolate_base {
1425  base_tensor ZZ;
1426  const mesh **m;
1427  const mesh_fem *mfn, **mfg;
1428  const size_type &ipt;
1429  ga_instruction_set::interpolate_info &inin;
1430  fem_precomp_pool &fp_pool;
1431 
1432  virtual int exec() {
1433  GMM_ASSERT1(inin.ctx.is_convex_num_valid(), "No valid element for "
1434  "the transformation. Probably transformation failed");
1435  const mesh_fem &mf = *(mfg ? *mfg : mfn);
1436  GMM_ASSERT1(&(mf.linked_mesh()) == *m, "Interpolation of a variable "
1437  "on another mesh than the one it is defined on");
1438 
1439  pfem pf = mf.fem_of_element(inin.ctx.convex_num());
1440  GMM_ASSERT1(pf, "Undefined finite element method");
1441 
1442  if (inin.ctx.have_pgp()) {
1443  if (ipt == 0)
1444  inin.pfps[&mf] = fp_pool(pf, inin.ctx.pgp()->get_ppoint_tab());
1445  inin.ctx.set_pfp(inin.pfps[&mf]);
1446  } else {
1447  inin.ctx.set_pf(pf);
1448  }
1449  return 0;
1450  }
1451 
1452  ga_instruction_interpolate_base
1453  (const mesh **m_, const mesh_fem *mfn_, const mesh_fem **mfg_,
1454  const size_type &ipt_, ga_instruction_set::interpolate_info &inin_,
1455  fem_precomp_pool &fp_pool_)
1456  : m(m_), mfn(mfn_), mfg(mfg_), ipt(ipt_), inin(inin_),
1457  fp_pool(fp_pool_) {}
1458  };
1459 
1460  struct ga_instruction_interpolate_val_base
1461  : public ga_instruction_copy_val_base, ga_instruction_interpolate_base {
1462  // ctx --> Z(ndof,target_dim) --> t(Qmult*ndof,Qmult*target_dim)
1463  virtual int exec() {
1464  GA_DEBUG_INFO("Instruction: interpolated base value");
1465  ga_instruction_interpolate_base::exec();
1466  inin.ctx.pf()->real_base_value(inin.ctx, ZZ); // remember Z == ZZ
1467  return ga_instruction_copy_val_base::exec();
1468  }
1469 
1470  ga_instruction_interpolate_val_base
1471  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1472  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1473  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1474  : ga_instruction_copy_val_base(t_, ZZ, q),
1475  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1476  inin_, fp_pool_) {}
1477  };
1478 
1479  struct ga_instruction_interpolate_grad_base
1480  : public ga_instruction_copy_grad_base, ga_instruction_interpolate_base {
1481  // ctx --> Z(ndof,target_dim,N) --> t(Qmult*ndof,Qmult*target_dim,N)
1482  virtual int exec() {
1483  GA_DEBUG_INFO("Instruction: interpolated base grad");
1484  ga_instruction_interpolate_base::exec();
1485  inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ); // remember Z == ZZ
1486  return ga_instruction_copy_grad_base::exec();
1487  }
1488 
1489  ga_instruction_interpolate_grad_base
1490  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1491  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1492  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1493  : ga_instruction_copy_grad_base(t_, ZZ, q),
1494  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1495  inin_, fp_pool_) {}
1496  };
1497 
1498  struct ga_instruction_interpolate_hess_base
1499  : public ga_instruction_copy_hess_base, ga_instruction_interpolate_base {
1500  // ctx --> Z(ndof,target_dim,N*N) --> t(Qmult*ndof,Qmult*target_dim,N,N)
1501  virtual int exec() {
1502  GA_DEBUG_INFO("Instruction: interpolated base hessian");
1503  ga_instruction_interpolate_base::exec();
1504  inin.ctx.pf()->real_hess_base_value(inin.ctx, ZZ); // remember Z == ZZ
1505  return ga_instruction_copy_hess_base::exec();
1506  }
1507 
1508  ga_instruction_interpolate_hess_base
1509  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1510  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1511  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1512  : ga_instruction_copy_hess_base(t_, ZZ, q),
1513  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1514  inin_, fp_pool_) {}
1515  };
1516 
1517  struct ga_instruction_interpolate_diverg_base
1518  : public ga_instruction_copy_diverg_base, ga_instruction_interpolate_base {
1519  // ctx --> Z(ndof,target_dim,N*N) --> t(Qmult*ndof)
1520  virtual int exec() {
1521  GA_DEBUG_INFO("Instruction: interpolated base divergence");
1522  ga_instruction_interpolate_base::exec();
1523  inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ); // remember Z == ZZ
1524  return ga_instruction_copy_diverg_base::exec();
1525  }
1526 
1527  ga_instruction_interpolate_diverg_base
1528  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1529  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1530  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1531  : ga_instruction_copy_diverg_base(t_, ZZ, q),
1532  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1533  inin_, fp_pool_) {}
1534  };
1535 
1536 
1537  struct ga_instruction_elementary_trans_base {
1538  base_tensor t_in;
1539  base_tensor &t_out;
1540  pelementary_transformation elemtrans;
1541  const mesh_fem &mf1, &mf2;
1542  const fem_interpolation_context &ctx;
1543  base_matrix &M;
1544  size_type &icv;
1545 
1546  void do_transformation(size_type n, size_type m) {
1547  if (icv != ctx.convex_num() || M.size() == 0) {
1548  M.base_resize(m, n);
1549  icv = ctx.convex_num();
1550  elemtrans->give_transformation(mf1, mf2, icv, M);
1551  }
1552  t_out.mat_reduction(t_in, M, 0);
1553  }
1554 
1555  ga_instruction_elementary_trans_base
1556  (base_tensor &t_, pelementary_transformation e, const mesh_fem &mf1_,
1557  const mesh_fem &mf2_,
1558  const fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1559  : t_out(t_), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1560  M(M_), icv(icv_) {}
1561  };
1562 
1563  struct ga_instruction_elementary_trans_val_base
1564  : public ga_instruction_copy_val_base,
1565  ga_instruction_elementary_trans_base {
1566  // Z(ndof,target_dim) --> t_in --> t_out(Qmult*ndof,Qmult*target_dim)
1567  virtual int exec() {
1568  GA_DEBUG_INFO("Instruction: value of test functions with elementary "
1569  "transformation");
1570  size_type ndof = Z.sizes()[0];
1571  size_type Qmult = qdim / Z.sizes()[1];
1572  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1]);
1573  ga_instruction_copy_val_base::exec();
1574  do_transformation(t_out.sizes()[0], ndof*Qmult);
1575  return 0;
1576  }
1577 
1578  ga_instruction_elementary_trans_val_base
1579  (base_tensor &t_, const base_tensor &Z_, size_type q,
1580  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1581  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1582  : ga_instruction_copy_val_base(t_in, Z_, q),
1583  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1584  M_, icv_) {}
1585  };
1586 
1587  struct ga_instruction_elementary_trans_grad_base
1588  : public ga_instruction_copy_grad_base,
1589  ga_instruction_elementary_trans_base {
1590  // Z(ndof,target_dim,N) --> t_in --> t_out(Qmult*ndof,Qmult*target_dim,N)
1591  virtual int exec() {
1592  GA_DEBUG_INFO("Instruction: gradient of test functions with elementary "
1593  "transformation");
1594  size_type ndof = Z.sizes()[0];
1595  size_type Qmult = qdim / Z.sizes()[1];
1596  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1597  ga_instruction_copy_grad_base::exec();
1598  do_transformation(t_out.sizes()[0], ndof*Qmult);
1599  return 0;
1600  }
1601 
1602  ga_instruction_elementary_trans_grad_base
1603  (base_tensor &t_, const base_tensor &Z_, size_type q,
1604  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1605  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1606  : ga_instruction_copy_grad_base(t_in, Z_, q),
1607  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1608  M_, icv_) {}
1609  };
1610 
1611  struct ga_instruction_elementary_trans_hess_base
1612  : public ga_instruction_copy_hess_base,
1613  ga_instruction_elementary_trans_base {
1614  // Z(ndof,target_dim,N*N) --> t_out(Qmult*ndof,Qmult*target_dim,N,N)
1615  virtual int exec() {
1616  GA_DEBUG_INFO("Instruction: Hessian of test functions with elementary "
1617  "transformation");
1618  size_type ndof = Z.sizes()[0];
1619  size_type Qmult = qdim / Z.sizes()[1];
1620  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1621  ga_instruction_copy_hess_base::exec();
1622  do_transformation(t_out.sizes()[0], ndof*Qmult);
1623  return 0;
1624  }
1625 
1626  ga_instruction_elementary_trans_hess_base
1627  (base_tensor &t_, const base_tensor &Z_, size_type q,
1628  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1629  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1630  : ga_instruction_copy_hess_base(t_in, Z_, q),
1631  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1632  M_, icv_) {}
1633  };
1634 
1635  struct ga_instruction_elementary_trans_diverg_base
1636  : public ga_instruction_copy_diverg_base,
1637  ga_instruction_elementary_trans_base {
1638  // Z(ndof,target_dim,N) --> t_out(Qmult*ndof)
1639  virtual int exec() {
1640  GA_DEBUG_INFO("Instruction: divergence of test functions with elementary "
1641  "transformation");
1642  size_type ndof = Z.sizes()[0];
1643  size_type Qmult = qdim / Z.sizes()[1];
1644  t_in.adjust_sizes(Qmult*ndof);
1645  ga_instruction_copy_diverg_base::exec();
1646  do_transformation(t_out.sizes()[0], ndof*Qmult);
1647  return 0;
1648  }
1649 
1650  ga_instruction_elementary_trans_diverg_base
1651  (base_tensor &t_, const base_tensor &Z_, size_type q,
1652  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1653  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1654  : ga_instruction_copy_diverg_base(t_in, Z_, q),
1655  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1656  M_, icv_) {}
1657  };
1658 
1659 
1660  struct ga_instruction_add : public ga_instruction {
1661  base_tensor &t;
1662  const base_tensor &tc1, &tc2;
1663  virtual int exec() {
1664  GA_DEBUG_INFO("Instruction: addition");
1665  GA_DEBUG_ASSERT(t.size() == tc1.size(),
1666  "internal error " << t.size() << " != " << tc1.size());
1667  GA_DEBUG_ASSERT(t.size() == tc2.size(),
1668  "internal error " << t.size() << " != " << tc2.size());
1669  gmm::add(tc1.as_vector(), tc2.as_vector(), t.as_vector());
1670  return 0;
1671  }
1672  ga_instruction_add(base_tensor &t_,
1673  const base_tensor &tc1_, const base_tensor &tc2_)
1674  : t(t_), tc1(tc1_), tc2(tc2_) {}
1675  };
1676 
1677  struct ga_instruction_add_to : public ga_instruction {
1678  base_tensor &t;
1679  const base_tensor &tc1;
1680  virtual int exec() {
1681  GA_DEBUG_INFO("Instruction: addition");
1682  GA_DEBUG_ASSERT(t.size() == tc1.size(), "internal error " << t.size()
1683  << " incompatible with " << tc1.size());
1684  gmm::add(tc1.as_vector(), t.as_vector());
1685  return 0;
1686  }
1687  ga_instruction_add_to(base_tensor &t_, const base_tensor &tc1_)
1688  : t(t_), tc1(tc1_) {}
1689  };
1690 
1691  struct ga_instruction_add_to_coeff : public ga_instruction {
1692  base_tensor &t;
1693  const base_tensor &tc1;
1694  scalar_type &coeff;
1695  virtual int exec() {
1696  GA_DEBUG_INFO("Instruction: addition with scale");
1697  GA_DEBUG_ASSERT(t.size() == tc1.size(), "internal error " << t.size()
1698  << " incompatible with " << tc1.size());
1699  gmm::add(gmm::scaled(tc1.as_vector(), coeff), t.as_vector());
1700  return 0;
1701  }
1702  ga_instruction_add_to_coeff(base_tensor &t_, const base_tensor &tc1_,
1703  scalar_type &coeff_)
1704  : t(t_), tc1(tc1_), coeff(coeff_) {}
1705  };
1706 
1707  struct ga_instruction_sub : public ga_instruction {
1708  base_tensor &t;
1709  const base_tensor &tc1, &tc2;
1710  virtual int exec() {
1711  GA_DEBUG_INFO("Instruction: subtraction");
1712  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
1713  "internal error");
1714  gmm::add(tc1.as_vector(), gmm::scaled(tc2.as_vector(), scalar_type(-1)),
1715  t.as_vector());
1716  return 0;
1717  }
1718  ga_instruction_sub(base_tensor &t_,
1719  const base_tensor &tc1_, const base_tensor &tc2_)
1720  : t(t_), tc1(tc1_), tc2(tc2_) {}
1721  };
1722 
1723  struct ga_instruction_opposite : public ga_instruction {
1724  base_tensor &t;
1725  virtual int exec() {
1726  GA_DEBUG_INFO("Instruction: multiplication with -1");
1727  gmm::scale(t.as_vector(), scalar_type(-1));
1728  return 0;
1729  }
1730  ga_instruction_opposite(base_tensor &t_) : t(t_) {}
1731  };
1732 
1733  struct ga_instruction_print_tensor : public ga_instruction {
1734  base_tensor &t;
1735  pga_tree_node pnode;
1736  const fem_interpolation_context &ctx;
1737  size_type &nbpt, &ipt;
1738  virtual int exec() {
1739  GA_DEBUG_INFO("Instruction: tensor print");
1740  cout << "Print term "; ga_print_node(pnode, cout);
1741  cout << " on Gauss point " << ipt << "/" << nbpt << " of element "
1742  << ctx.convex_num() << ": " << t << endl;
1743  return 0;
1744  }
1745  ga_instruction_print_tensor(base_tensor &t_, pga_tree_node pnode_,
1746  const fem_interpolation_context &ctx_,
1747  size_type &nbpt_, size_type &ipt_)
1748  : t(t_), pnode(pnode_), ctx(ctx_), nbpt(nbpt_), ipt(ipt_) {}
1749  };
1750 
1751  struct ga_instruction_copy_tensor : public ga_instruction {
1752  base_tensor &t;
1753  const base_tensor &tc1;
1754  virtual int exec() {
1755  GA_DEBUG_INFO("Instruction: tensor copy");
1756  std::copy(tc1.begin(), tc1.end(), t.begin());
1757  // gmm::copy(tc1.as_vector(), t.as_vector());
1758  return 0;
1759  }
1760  ga_instruction_copy_tensor(base_tensor &t_, const base_tensor &tc1_)
1761  : t(t_), tc1(tc1_) {}
1762  };
1763 
1764  struct ga_instruction_clear_tensor : public ga_instruction {
1765  base_tensor &t;
1766  virtual int exec() {
1767  GA_DEBUG_INFO("Instruction: clear tensor");
1768  std::fill(t.begin(), t.end(), scalar_type(0));
1769  return 0;
1770  }
1771  ga_instruction_clear_tensor(base_tensor &t_) : t(t_) {}
1772  };
1773 
1774  struct ga_instruction_copy_tensor_possibly_void : public ga_instruction {
1775  base_tensor &t;
1776  const base_tensor &tc1;
1777  virtual int exec() {
1778  GA_DEBUG_INFO("Instruction: tensor copy possibly void");
1779  if (tc1.size())
1780  gmm::copy(tc1.as_vector(), t.as_vector());
1781  else
1782  gmm::clear(t.as_vector());
1783  return 0;
1784  }
1785  ga_instruction_copy_tensor_possibly_void(base_tensor &t_,
1786  const base_tensor &tc1_)
1787  : t(t_), tc1(tc1_) {}
1788  };
1789 
1790  struct ga_instruction_copy_scalar : public ga_instruction {
1791  scalar_type &t; const scalar_type &t1;
1792  virtual int exec() {
1793  GA_DEBUG_INFO("Instruction: scalar copy");
1794  t = t1;
1795  return 0;
1796  }
1797  ga_instruction_copy_scalar(scalar_type &t_, const scalar_type &t1_)
1798  : t(t_), t1(t1_) {}
1799  };
1800 
1801  struct ga_instruction_copy_vect : public ga_instruction {
1802  base_vector &t;
1803  const base_vector &t1;
1804  virtual int exec() {
1805  GA_DEBUG_INFO("Instruction: fixed size tensor copy");
1806  gmm::copy(t1, t);
1807  return 0;
1808  }
1809  ga_instruction_copy_vect(base_vector &t_, const base_vector &t1_)
1810  : t(t_), t1(t1_) {}
1811  };
1812 
1813  struct ga_instruction_trace : public ga_instruction {
1814  base_tensor &t;
1815  const base_tensor &tc1;
1816  size_type n;
1817  // tc1(:,:,...,n,n) --> t(:,:,...)
1818  virtual int exec() {
1819  GA_DEBUG_INFO("Instruction: Trace");
1820  GA_DEBUG_ASSERT(t.size()*n*n == tc1.size(), "Wrong sizes");
1821  size_type s = t.size() * (n+1);
1822  auto it = t.begin();
1823  auto it1 = tc1.begin();
1824  for (; it != t.end(); ++it, ++it1) {
1825  auto it2 = it1;
1826  *it = *it2;
1827  for (size_type i = 1; i < n; ++i) { it2 += s; *it += *it2; }
1828  }
1829  return 0;
1830  }
1831 
1832  ga_instruction_trace(base_tensor &t_, const base_tensor &tc1_, size_type n_)
1833  : t(t_), tc1(tc1_), n(n_) {}
1834  };
1835 
1836  struct ga_instruction_deviator : public ga_instruction {
1837  base_tensor &t;
1838  const base_tensor &tc1;
1839  size_type n;
1840  // tc1(:,:,...,n,n) --> t(:,:,...,n,n)
1841  virtual int exec() {
1842  GA_DEBUG_INFO("Instruction: Deviator");
1843  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1844 
1845  gmm::copy(tc1.as_vector(), t.as_vector());
1846 
1847  size_type nb = t.size()/(n*n);
1848  size_type s = nb * (n+1), j = 0;
1849  base_tensor::iterator it = t.begin();
1850  base_tensor::const_iterator it1 = tc1.begin();
1851  for (; j < nb; ++it, ++it1, ++j) {
1852  scalar_type tr(0);
1853  base_tensor::const_iterator it2 = it1;
1854  tr += *it2;
1855  for (size_type i = 1; i < n; ++i) { it2 += s; tr += *it2; }
1856  tr /= scalar_type(n);
1857 
1858  base_tensor::iterator it3 = it;
1859  *it3 -= tr;
1860  for (size_type i = 1; i < n; ++i) { it3 += s; *it3 -= tr; }
1861  }
1862  return 0;
1863  }
1864 
1865  ga_instruction_deviator(base_tensor &t_, const base_tensor &tc1_,
1866  size_type n_)
1867  : t(t_), tc1(tc1_), n(n_) {}
1868  };
1869 
1870  struct ga_instruction_transpose : public ga_instruction { // To be optimized
1871  base_tensor &t;
1872  const base_tensor &tc1;
1873  size_type n1, n2, nn;
1874  virtual int exec() {
1875  GA_DEBUG_INFO("Instruction: transpose");
1876  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1877 
1878  size_type n0 = tc1.size() / (n1*n2*nn);
1879  auto it = t.begin();
1880  for (size_type i = 0; i < nn; ++i) {
1881  size_type s1 = i*n1*n2*n0;
1882  for (size_type j = 0; j < n1; ++j) {
1883  size_type s2 = s1 + j*n0;
1884  for (size_type k = 0; k < n2; ++k) {
1885  size_type s3 = s2 + k*n1*n0;
1886  for (size_type l = 0; l < n0; ++l, ++it)
1887  *it = tc1[s3+l];
1888  }
1889  }
1890  }
1891  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1892  return 0;
1893  }
1894  ga_instruction_transpose(base_tensor &t_, const base_tensor &tc1_,
1895  size_type n1_, size_type n2_, size_type nn_)
1896  : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1897  };
1898 
1899  struct ga_instruction_swap_indices : public ga_instruction {// To be optimized
1900  base_tensor &t;
1901  const base_tensor &tc1;
1902  size_type nn1, nn2, ii2, ii3;
1903  virtual int exec() {
1904  GA_DEBUG_INFO("Instruction: swap indices");
1905  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1906  size_type ii1 = t.size() / (nn1*nn2*ii2*ii3);
1907 
1908  auto it = t.begin();
1909  for (size_type i = 0; i < ii3; ++i)
1910  for (size_type j = 0; j < nn1; ++j)
1911  for (size_type k = 0; k < ii2; ++k)
1912  for (size_type l = 0; l < nn2; ++l) {
1913  size_type ind = j*ii1+k*ii1*nn1+l*ii1*nn1*ii2+i*ii1*nn1*ii2*nn2;
1914  for (size_type m = 0; m < ii1; ++m, ++it)
1915  *it = tc1[m+ind];
1916  }
1917  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1918  return 0;
1919  }
1920  ga_instruction_swap_indices(base_tensor &t_, const base_tensor &tc1_,
1921  size_type n1_, size_type n2_,
1922  size_type i2_, size_type i3_)
1923  : t(t_), tc1(tc1_), nn1(n1_), nn2(n2_), ii2(i2_), ii3(i3_) {}
1924  };
1925 
1926  struct ga_instruction_index_move_last : public ga_instruction {// To be optimized
1927  base_tensor &t;
1928  const base_tensor &tc1;
1929  size_type nn, ii2;
1930  virtual int exec() {
1931  GA_DEBUG_INFO("Instruction: swap indices");
1932  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1933  size_type ii1 = t.size() / (nn*ii2);
1934 
1935  auto it = t.begin();
1936  for (size_type i = 0; i < nn; ++i)
1937  for (size_type j = 0; j < ii2; ++j) {
1938  size_type ind = i*ii1+j*ii1*nn;
1939  for (size_type k = 0; k < ii1; ++k, ++it)
1940  *it = tc1[k+ind];
1941  }
1942  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1943  return 0;
1944  }
1945  ga_instruction_index_move_last(base_tensor &t_, const base_tensor &tc1_,
1946  size_type n_, size_type i2_)
1947  : t(t_), tc1(tc1_), nn(n_), ii2(i2_) {}
1948  };
1949 
1950  struct ga_instruction_transpose_no_test : public ga_instruction {
1951  base_tensor &t;
1952  const base_tensor &tc1;
1953  size_type n1, n2, nn;
1954  virtual int exec() {
1955  GA_DEBUG_INFO("Instruction: transpose");
1956  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1957 
1958  auto it = t.begin();
1959  for (size_type i = 0; i < nn; ++i) {
1960  size_type s1 = i*n1*n2;
1961  for (size_type j = 0; j < n1; ++j) {
1962  size_type s2 = s1 + j;
1963  for (size_type k = 0; k < n2; ++k, ++it)
1964  *it = tc1[s2 + k*n1];
1965  }
1966  }
1967  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1968  return 0;
1969  }
1970  ga_instruction_transpose_no_test(base_tensor &t_, const base_tensor &tc1_,
1971  size_type n1_, size_type n2_,
1972  size_type nn_)
1973  : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1974  };
1975 
1976  struct ga_instruction_transpose_test : public ga_instruction {
1977  base_tensor &t;
1978  const base_tensor &tc1;
1979  virtual int exec() {
1980  GA_DEBUG_INFO("Instruction: copy tensor and transpose test functions");
1981  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1982  GA_DEBUG_ASSERT(t.sizes().size() >= 2, "Wrong sizes");
1983 
1984  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s3 = s1*s2;
1985  size_type s = t.size() / s3;
1986  base_tensor::iterator it = t.begin();
1987  for (size_type k = 0; k < s; ++k)
1988  for (size_type j = 0; j < s2; ++j)
1989  for (size_type i = 0; i < s1; ++i, ++it)
1990  *it = tc1[j+s2*i+k*s3];
1991  return 0;
1992  }
1993  ga_instruction_transpose_test(base_tensor &t_, const base_tensor &tc1_)
1994  : t(t_), tc1(tc1_) {}
1995  };
1996 
1997  struct ga_instruction_sym : public ga_instruction {
1998  base_tensor &t;
1999  const base_tensor &tc1;
2000  virtual int exec() {
2001  GA_DEBUG_INFO("Instruction: symmetric part");
2002  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2003  size_type order = t.sizes().size();
2004  size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2005  size_type s = t.size() / (s1*s2);
2006  for (size_type i = 0; i < s1; ++i)
2007  for (size_type j = 0; j < s2; ++j) {
2008  base_tensor::iterator it = t.begin() + s*(i + s1*j);
2009  base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2010  it1T = tc1.begin() + s*(j + s2*i);
2011  for (size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ + *it1T++);
2012  }
2013  return 0;
2014  }
2015  ga_instruction_sym(base_tensor &t_, const base_tensor &tc1_)
2016  : t(t_), tc1(tc1_) {}
2017  };
2018 
2019  struct ga_instruction_skew : public ga_instruction {
2020  base_tensor &t;
2021  const base_tensor &tc1;
2022  virtual int exec() {
2023  GA_DEBUG_INFO("Instruction: skew-symmetric part");
2024  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2025  size_type order = t.sizes().size();
2026  size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2027  size_type s = t.size() / (s1*s2);
2028  for (size_type i = 0; i < s1; ++i)
2029  for (size_type j = 0; j < s2; ++j) {
2030  base_tensor::iterator it = t.begin() + s*(i + s1*j);
2031  base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2032  it1T = tc1.begin() + s*(j + s2*i);
2033  for (size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ - *it1T++);
2034  }
2035  return 0;
2036  }
2037  ga_instruction_skew(base_tensor &t_, const base_tensor &tc1_)
2038  : t(t_), tc1(tc1_) {}
2039  };
2040 
2041  struct ga_instruction_scalar_add : public ga_instruction {
2042  scalar_type &t;
2043  const scalar_type &c, &d;
2044  virtual int exec() {
2045  GA_DEBUG_INFO("Instruction: scalar addition");
2046  t = c + d;
2047  return 0;
2048  }
2049  ga_instruction_scalar_add(scalar_type &t_, const scalar_type &c_,
2050  const scalar_type &d_)
2051  : t(t_), c(c_), d(d_) {}
2052  };
2053 
2054  struct ga_instruction_scalar_sub : public ga_instruction {
2055  scalar_type &t;
2056  const scalar_type &c, &d;
2057  virtual int exec() {
2058  GA_DEBUG_INFO("Instruction: scalar subtraction");
2059  t = c - d;
2060  return 0;
2061  }
2062  ga_instruction_scalar_sub(scalar_type &t_, const scalar_type &c_,
2063  const scalar_type &d_)
2064  : t(t_), c(c_), d(d_) {}
2065  };
2066 
2067  struct ga_instruction_scalar_scalar_mult : public ga_instruction {
2068  scalar_type &t;
2069  const scalar_type &c, &d;
2070  virtual int exec() {
2071  GA_DEBUG_INFO("Instruction: scalar multiplication");
2072  t = c * d;
2073  return 0;
2074  }
2075  ga_instruction_scalar_scalar_mult(scalar_type &t_, const scalar_type &c_,
2076  const scalar_type &d_)
2077  : t(t_), c(c_), d(d_) {}
2078  };
2079 
2080  struct ga_instruction_scalar_scalar_div : public ga_instruction {
2081  scalar_type &t;
2082  const scalar_type &c, &d;
2083  virtual int exec() {
2084  GA_DEBUG_INFO("Instruction: scalar division");
2085  t = c / d;
2086  return 0;
2087  }
2088  ga_instruction_scalar_scalar_div(scalar_type &t_, const scalar_type &c_,
2089  const scalar_type &d_)
2090  : t(t_), c(c_), d(d_) {}
2091  };
2092 
2093  struct ga_instruction_scalar_mult : public ga_instruction {
2094  base_tensor &t, &tc1;
2095  const scalar_type &c;
2096  virtual int exec() {
2097  GA_DEBUG_INFO("Instruction: multiplication of a tensor by a scalar " << c);
2098  gmm::copy(gmm::scaled(tc1.as_vector(), c), t.as_vector());
2099  return 0;
2100  }
2101  ga_instruction_scalar_mult(base_tensor &t_, base_tensor &tc1_,
2102  const scalar_type &c_)
2103  : t(t_), tc1(tc1_), c(c_) {}
2104  };
2105 
2106  struct ga_instruction_scalar_div : public ga_instruction {
2107  base_tensor &t, &tc1;
2108  const scalar_type &c;
2109  virtual int exec() {
2110  GA_DEBUG_INFO("Instruction: division of a tensor by a scalar");
2111  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2112 
2113  base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2114  for (; it != t.end(); ++it, ++it1) *it = *it1/c;
2115  return 0;
2116  }
2117  ga_instruction_scalar_div(base_tensor &t_, base_tensor &tc1_,
2118  const scalar_type &c_)
2119  : t(t_), tc1(tc1_), c(c_) {}
2120  };
2121 
2122  // Performs Cross product in the presence of test functions
2123  struct ga_instruction_cross_product_tf : public ga_instruction {
2124  base_tensor &t, &tc1, &tc2;
2125  bool inv;
2126  virtual int exec() {
2127  GA_DEBUG_INFO("Instruction: Cross product with test functions");
2128 
2129  size_type n1 = tc1.size() / 3, n2 = tc2.size() / 3, nn=n1*n2;
2130  GA_DEBUG_ASSERT(t.size() == nn*3, "Bad tensor size for cross product");
2131  size_type mm=2*nn, n1_2 = 2*n1, n2_2 = 2*n2;
2132  base_tensor::iterator it = t.begin(), it2 = tc2.begin();
2133 
2134  if (inv) {
2135  for (size_type i = 0; i < n2; ++i, ++it2) {
2136  base_tensor::iterator it1 = tc1.begin();
2137  for (size_type j = 0; j < n1; ++j, ++it, ++it1) {
2138  *it = - it1[n1] *it2[n2_2] + it1[n1_2]*it2[n2];
2139  it[nn] = - it1[n1_2]*it2[0] + it1[0] *it2[n2_2];
2140  it[mm] = - it1[0] *it2[n2] + it1[n1] *it2[0];
2141  }
2142  }
2143  } else {
2144  for (size_type i = 0; i < n2; ++i, ++it2) {
2145  base_tensor::iterator it1 = tc1.begin();
2146  for (size_type j = 0; j < n1; ++j, ++it, ++it1) {
2147  *it = it1[n1] *it2[n2_2] - it1[n1_2]*it2[n2];
2148  it[nn] = it1[n1_2]*it2[0] - it1[0] *it2[n2_2];
2149  it[mm] = it1[0] *it2[n2] - it1[n1] *it2[0];
2150  }
2151  }
2152  }
2153  return 0;
2154  }
2155  ga_instruction_cross_product_tf(base_tensor &t_, base_tensor &tc1_,
2156  base_tensor &tc2_, bool inv_)
2157  : t(t_), tc1(tc1_), tc2(tc2_), inv(inv_) {}
2158  };
2159 
2160  // Performs Cross product in the absence of test functions
2161  struct ga_instruction_cross_product : public ga_instruction {
2162  base_tensor &t, &tc1, &tc2;
2163  virtual int exec() {
2164  GA_DEBUG_INFO("Instruction: Cross product with test functions");
2165  GA_DEBUG_ASSERT(t.size() == 3 && tc1.size() == 3 && tc2.size() == 3,
2166  "Bad tensor size for cross product");
2167  t[0] = tc1[1]*tc2[2] - tc1[2]*tc2[1];
2168  t[1] = tc1[2]*tc2[0] - tc1[0]*tc2[2];
2169  t[2] = tc1[0]*tc2[1] - tc1[1]*tc2[0];
2170  return 0;
2171  }
2172  ga_instruction_cross_product(base_tensor &t_, base_tensor &tc1_,
2173  base_tensor &tc2_)
2174  : t(t_), tc1(tc1_), tc2(tc2_) {}
2175  };
2176 
2177 
2178 
2179 
2180  struct ga_instruction_dotmult : public ga_instruction {
2181  base_tensor &t, &tc1, &tc2;
2182  virtual int exec() {
2183  GA_DEBUG_INFO("Instruction: componentwise multiplication");
2184  size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2185  GA_DEBUG_ASSERT(t.size() == s1_1*s2, "Wrong sizes");
2186 
2187  base_tensor::iterator it = t.begin();
2188  for (size_type i = 0; i < s2; ++i)
2189  for (size_type m = 0; m < s1_1; ++m, ++it)
2190  *it = tc1[m+s1_1*i] * tc2[i];
2191  return 0;
2192  }
2193  ga_instruction_dotmult(base_tensor &t_, base_tensor &tc1_,
2194  base_tensor &tc2_)
2195  : t(t_), tc1(tc1_), tc2(tc2_) {}
2196  };
2197 
2198  struct ga_instruction_dotdiv : public ga_instruction {
2199  base_tensor &t, &tc1, &tc2;
2200  virtual int exec() {
2201  GA_DEBUG_INFO("Instruction: componentwise division");
2202  size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2203  GA_DEBUG_ASSERT(t.size() == s1_1*s2, "Wrong sizes");
2204 
2205  base_tensor::iterator it = t.begin();
2206  for (size_type i = 0; i < s2; ++i)
2207  for (size_type m = 0; m < s1_1; ++m, ++it)
2208  *it = tc1[m+s1_1*i] / tc2[i];
2209  return 0;
2210  }
2211  ga_instruction_dotdiv(base_tensor &t_, base_tensor &tc1_,
2212  base_tensor &tc2_)
2213  : t(t_), tc1(tc1_), tc2(tc2_) {}
2214  };
2215 
2216  // Performs Ami Bni -> Cmni
2217  struct ga_instruction_dotmult_spec : public ga_instruction {
2218  base_tensor &t, &tc1, &tc2;
2219  virtual int exec() {
2220  GA_DEBUG_INFO("Instruction: specific componentwise multiplication");
2221  size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
2222  size_type s1_1 = tc1.size() / s2_2;
2223 
2224  base_tensor::iterator it = t.begin();
2225  for (size_type i = 0; i < s2_2; ++i)
2226  for (size_type n = 0; n < s2_1; ++n)
2227  for (size_type m = 0; m < s1_1; ++m, ++it)
2228  *it = tc1[m+s1_1*i] * tc2[n+s2_1*i];
2229  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2230  return 0;
2231  }
2232  ga_instruction_dotmult_spec(base_tensor &t_, base_tensor &tc1_,
2233  base_tensor &tc2_)
2234  : t(t_), tc1(tc1_), tc2(tc2_) {}
2235  };
2236 
2237  // Performs Amijik -> Cmjk. To be optimized
2238  struct ga_instruction_contract_1_1 : public ga_instruction {
2239  base_tensor &t, &tc1;
2240  size_type nn, ii2, ii3;
2241  virtual int exec() {
2242  GA_DEBUG_INFO("Instruction: single contraction on a single tensor");
2243 
2244  size_type ii1 = tc1.size() / (nn*nn*ii2*ii3);
2245 
2246  base_tensor::iterator it = t.begin();
2247  for (size_type i = 0; i < ii3; ++i)
2248  for (size_type j = 0; j < ii2; ++j)
2249  for (size_type k = 0; k < ii1; ++k, ++it) {
2250  *it = scalar_type(0);
2251  size_type pre_ind = k+j*ii1*nn+i*ii1*nn*ii2*nn;
2252  for (size_type n = 0; n < nn; ++n)
2253  *it += tc1[pre_ind+n*ii1+n*ii1*nn*ii2];
2254  }
2255 
2256  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2257  return 0;
2258  }
2259  ga_instruction_contract_1_1(base_tensor &t_, base_tensor &tc1_,
2260  size_type n_, size_type i2_, size_type i3_)
2261  : t(t_), tc1(tc1_), nn(n_), ii2(i2_), ii3(i3_) {}
2262  };
2263 
2264  // Performs Amijk Bnljp -> Cmniklp. To be optimized
2265  struct ga_instruction_contract_2_1 : public ga_instruction {
2266  base_tensor &t, &tc1, &tc2;
2267  size_type nn, ii1, ii2, ii3, ii4;
2268  virtual int exec() {
2269  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2270 
2271  size_type ift1 = tc1.size() / (nn*ii1*ii2);
2272  size_type ift2 = tc2.size() / (nn*ii3*ii4);
2273 
2274  base_tensor::iterator it = t.begin();
2275  for (size_type i = 0; i < ii4; ++i)
2276  for (size_type j = 0; j < ii3; ++j)
2277  for (size_type k = 0; k < ii2; ++k)
2278  for (size_type l = 0; l < ii1; ++l)
2279  for (size_type p = 0; p < ift2; ++p)
2280  for (size_type q = 0; q < ift1; ++q, ++it) {
2281  *it = scalar_type(0);
2282  size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2283  size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2284  for (size_type n = 0; n < nn; ++n)
2285  *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2286  }
2287 
2288  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2289  return 0;
2290  }
2291  ga_instruction_contract_2_1(base_tensor &t_, base_tensor &tc1_,
2292  base_tensor &tc2_,
2293  size_type n_, size_type i1_, size_type i2_,
2294  size_type i3_, size_type i4_)
2295  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2296  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2297  };
2298 
2299  // Performs Amijk Bnljp -> Cnmiklp. To be optimized
2300  struct ga_instruction_contract_2_1_rev : public ga_instruction {
2301  base_tensor &t, &tc1, &tc2;
2302  size_type nn, ii1, ii2, ii3, ii4;
2303  virtual int exec() {
2304  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2305 
2306  size_type ift1 = tc1.size() / (nn*ii1*ii2);
2307  size_type ift2 = tc2.size() / (nn*ii3*ii4);
2308 
2309  base_tensor::iterator it = t.begin();
2310  for (size_type i = 0; i < ii4; ++i)
2311  for (size_type j = 0; j < ii3; ++j)
2312  for (size_type k = 0; k < ii2; ++k)
2313  for (size_type l = 0; l < ii1; ++l)
2314  for (size_type q = 0; q < ift1; ++q)
2315  for (size_type p = 0; p < ift2; ++p, ++it) {
2316  *it = scalar_type(0);
2317  size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2318  size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2319  for (size_type n = 0; n < nn; ++n)
2320  *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2321  }
2322 
2323  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2324  return 0;
2325  }
2326  ga_instruction_contract_2_1_rev(base_tensor &t_, base_tensor &tc1_,
2327  base_tensor &tc2_,
2328  size_type n_, size_type i1_, size_type i2_,
2329  size_type i3_, size_type i4_)
2330  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2331  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2332  };
2333 
2334  // Performs Amijklp Bnqjrls -> Cmnikpqrs. To be optimized
2335  struct ga_instruction_contract_2_2 : public ga_instruction {
2336  base_tensor &t, &tc1, &tc2;
2337  size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2338  bool inv_tc2;
2339  virtual int exec() {
2340  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2341 
2342  size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2343  size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2344 
2345  size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2346  if (inv_tc2) std::swap(sn1, sn2);
2347 
2348  base_tensor::iterator it = t.begin();
2349  for (size_type i = 0; i < ii6; ++i)
2350  for (size_type j = 0; j < ii5; ++j)
2351  for (size_type k = 0; k < ii4; ++k)
2352  for (size_type l = 0; l < ii3; ++l)
2353  for (size_type p = 0; p < ii2; ++p)
2354  for (size_type q = 0; q < ii1; ++q)
2355  for (size_type r = 0; r < ift2; ++r)
2356  for (size_type s = 0; s < ift1; ++s, ++it) {
2357  *it = scalar_type(0);
2358  size_type ind1
2359  = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2360  size_type ind2
2361  = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2362  for (size_type n1 = 0; n1 < nn1; ++n1)
2363  for (size_type n2 = 0; n2 < nn2; ++n2)
2364  *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2365  * tc2[ind2+n1*sn1+n2*sn2];
2366  }
2367 
2368  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2369  return 0;
2370  }
2371  ga_instruction_contract_2_2(base_tensor &t_, base_tensor &tc1_,
2372  base_tensor &tc2_,
2373  size_type n1_, size_type n2_,
2374  size_type i1_, size_type i2_, size_type i3_,
2375  size_type i4_, size_type i5_, size_type i6_,
2376  bool intc2)
2377  : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2378  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2379  inv_tc2(intc2) {}
2380  };
2381 
2382  // Performs Amijklp Bnqjrls -> Cnmikpqrs. To be optimized
2383  struct ga_instruction_contract_2_2_rev : public ga_instruction {
2384  base_tensor &t, &tc1, &tc2;
2385  size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2386  bool inv_tc2;
2387  virtual int exec() {
2388  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2389 
2390  size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2391  size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2392 
2393  size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2394  if (inv_tc2) std::swap(sn1, sn2);
2395 
2396  base_tensor::iterator it = t.begin();
2397  for (size_type i = 0; i < ii6; ++i)
2398  for (size_type j = 0; j < ii5; ++j)
2399  for (size_type k = 0; k < ii4; ++k)
2400  for (size_type l = 0; l < ii3; ++l)
2401  for (size_type p = 0; p < ii2; ++p)
2402  for (size_type q = 0; q < ii1; ++q)
2403  for (size_type s = 0; s < ift1; ++s)
2404  for (size_type r = 0; r < ift2; ++r, ++it) {
2405  *it = scalar_type(0);
2406  size_type ind1
2407  = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2408  size_type ind2
2409  = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2410  for (size_type n1 = 0; n1 < nn1; ++n1)
2411  for (size_type n2 = 0; n2 < nn2; ++n2)
2412  *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2413  * tc2[ind2+n1*sn1+n2*sn2];
2414  }
2415 
2416  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2417  return 0;
2418  }
2419  ga_instruction_contract_2_2_rev(base_tensor &t_, base_tensor &tc1_,
2420  base_tensor &tc2_,
2421  size_type n1_, size_type n2_,
2422  size_type i1_, size_type i2_, size_type i3_,
2423  size_type i4_, size_type i5_, size_type i6_,
2424  bool intc2)
2425  : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2426  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2427  inv_tc2(intc2) {}
2428  };
2429 
2430 
2431  // Performs Amj Bjk -> Cmk. To be optimized
2432  struct ga_instruction_matrix_mult : public ga_instruction {
2433  base_tensor &t, &tc1, &tc2;
2434  size_type n;
2435  virtual int exec() {
2436  GA_DEBUG_INFO("Instruction: order one contraction "
2437  "(dot product or matrix multiplication)");
2438 
2439  size_type s1 = tc1.size() / n;
2440  size_type s2 = tc2.size() / n;
2441 
2442  base_tensor::iterator it = t.begin();
2443  for (size_type k = 0; k < s2; ++k)
2444  for (size_type i = 0; i < s1; ++i, ++it) {
2445  *it = scalar_type(0);
2446  for (size_type j = 0; j < n; ++j)
2447  *it += tc1[i+j*s1] * tc2[j+k*n];
2448  }
2449  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2450  return 0;
2451  }
2452  ga_instruction_matrix_mult(base_tensor &t_, base_tensor &tc1_,
2453  base_tensor &tc2_, size_type n_)
2454  : t(t_), tc1(tc1_), tc2(tc2_), n(n_) {}
2455  };
2456 
2457  // Performs Amij Bnjk -> Cmnik. To be optimized
2458  struct ga_instruction_matrix_mult_spec : public ga_instruction {
2459  base_tensor &t, &tc1, &tc2;
2460  size_type n, m, p; // tc1 of size q*m*n, tc2 of size l*n*p
2461  // t of size q*l*m*p
2462  virtual int exec() {
2463  GA_DEBUG_INFO("Instruction: specific order one contraction "
2464  "(dot product or matrix multiplication)");
2465  size_type q = tc1.size() / (m * n);
2466  size_type l = tc2.size() / (p * n);
2467 
2468  base_tensor::iterator it = t.begin();
2469  for (size_type r = 0; r < p; ++r)
2470  for (size_type k = 0; k < m; ++k)
2471  for (size_type j = 0; j < l; ++j)
2472  for (size_type i = 0; i < q; ++i, ++it) {
2473  *it = scalar_type(0);
2474  for (size_type s = 0; s < n; ++s)
2475  *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2476  }
2477  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2478  return 0;
2479  }
2480  ga_instruction_matrix_mult_spec(base_tensor &t_, base_tensor &tc1_,
2481  base_tensor &tc2_, size_type n_,
2482  size_type m_, size_type p_)
2483  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2484  };
2485 
2486  // Performs Amij Bnjk -> Cnmik. To be optimized
2487  struct ga_instruction_matrix_mult_spec2 : public ga_instruction {
2488  base_tensor &t, &tc1, &tc2;
2489  size_type n, m, p; // tc1 of size q*m*n, tc2 of size l*n*p
2490  // t of size l*q*m*p
2491  virtual int exec() {
2492  GA_DEBUG_INFO("Instruction: specific order one contraction "
2493  "(dot product or matrix multiplication)");
2494  size_type q = tc1.size() / (m * n);
2495  size_type l = tc2.size() / (p * n);
2496 
2497  base_tensor::iterator it = t.begin();
2498  for (size_type r = 0; r < p; ++r)
2499  for (size_type k = 0; k < m; ++k)
2500  for (size_type i = 0; i < q; ++i)
2501  for (size_type j = 0; j < l; ++j, ++it) {
2502  *it = scalar_type(0);
2503  for (size_type s = 0; s < n; ++s)
2504  *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2505  }
2506  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2507  return 0;
2508  }
2509  ga_instruction_matrix_mult_spec2(base_tensor &t_, base_tensor &tc1_,
2510  base_tensor &tc2_, size_type n_,
2511  size_type m_, size_type p_)
2512  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2513  };
2514 
2515  // Performs Ani Bmi -> Cmn
2516  struct ga_instruction_contraction : public ga_instruction {
2517  base_tensor &t, &tc1, &tc2;
2518  size_type nn;
2519  virtual int exec() {
2520  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn);
2521 #if GA_USES_BLAS
2522  long m = int(tc1.size()/nn), k = int(nn), n = int(tc2.size()/nn);
2523  long lda = m, ldb = n, ldc = m;
2524  char T = 'T', N = 'N';
2525  scalar_type alpha(1), beta(0);
2526  gmm::dgemm_(&N, &T, &m, &n, &k, &alpha, &(tc1[0]), &lda, &(tc2[0]), &ldb,
2527  &beta, &(t[0]), &ldc);
2528 #else
2529  size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2530  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2531 
2532  auto it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2533  for (auto it = t.begin(); it != t.end(); ++it) {
2534  auto it11 = it1, it22 = it2;
2535  scalar_type a = (*it11) * (*it22);
2536  for (size_type i = 1; i < nn; ++i)
2537  { it11 += s1; it22 += s2; a += (*it11) * (*it22); }
2538  *it = a;
2539  ++it2; if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2540  }
2541  // auto it = t.begin(); // Unoptimized version.
2542  // for (size_type i = 0; i < s1; ++i)
2543  // for (size_type j = 0; j < s2; ++j, ++it) {
2544  // *it = scalar_type(0);
2545  // for (size_type k = 0; k < nn; ++k)
2546  // *it += tc1[i+k*s1] * tc2[j+k*s2];
2547  // }
2548 #endif
2549  return 0;
2550  }
2551  ga_instruction_contraction(base_tensor &t_, base_tensor &tc1_,
2552  base_tensor &tc2_, size_type n_)
2553  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2554  };
2555 
2556  // Performs Ani Bmi -> Cmn
2557  struct ga_instruction_contraction_opt0_2 : public ga_instruction {
2558  base_tensor &t, &tc1, &tc2;
2559  size_type n, q;
2560  virtual int exec() {
2561  GA_DEBUG_INFO("Instruction: contraction operation of size " << n*q <<
2562  " optimized for vectorized second tensor of type 2");
2563  size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2564  size_type s1_qq = s1*q, s2_qq = s2*q;
2565  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2566 
2567  auto it = t.begin(), it1 = tc1.begin();
2568  for (size_type i = 0; i < s1; ++i, ++it1) {
2569  auto it2 = tc2.begin();
2570  for (size_type j = 0; j < s2_q; ++j) {
2571  if (j) it2+=q;
2572  auto itt1 = it1;
2573  for (size_type l = 0; l < q; ++l, ++it) {
2574  if (l) itt1 += s1;
2575  auto ittt1 = itt1, ittt2 = it2;
2576  *it = *ittt1 * (*ittt2);
2577  for (size_type m = 1; m < n; ++m) {
2578  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2579  }
2580  }
2581  }
2582  }
2583  // base_tensor u = t;
2584  // ga_instruction_contraction toto(t, tc1, tc2, n*q);
2585  // toto.exec();
2586  // GMM_ASSERT1(gmm::vect_dist2(t.as_vector(), u.as_vector()) < 1E-9, "Erroneous");
2587  return 0;
2588  }
2589  ga_instruction_contraction_opt0_2(base_tensor &t_, base_tensor &tc1_,
2590  base_tensor &tc2_, size_type n_,
2591  size_type q_)
2592  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) {}
2593  };
2594 
2595  // Performs Ani Bmi -> Cmn
2596  template <int N>
2597  struct ga_instruction_contraction_opt0_2_unrolled : public ga_instruction {
2598  base_tensor &t, &tc1, &tc2;
2599  size_type q;
2600  virtual int exec() {
2601  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N*q
2602  << " optimized for vectorized second tensor of type 2");
2603  size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2604  size_type s1_qq = s1*q, s2_qq = s2*q;
2605  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2606 
2607  auto it = t.begin(), it1 = tc1.begin();
2608  for (size_type i = 0; i < s1; ++i, ++it1) {
2609  auto it2 = tc2.begin();
2610  for (size_type j = 0; j < s2_q; ++j) {
2611  if (j) it2+=q;
2612  auto itt1 = it1;
2613  for (size_type l = 0; l < q; ++l, ++it) {
2614  if (l) itt1 += s1;
2615  auto ittt1 = itt1, ittt2 = it2;
2616  *it = *ittt1 * (*ittt2);
2617  for (size_type m = 1; m < N; ++m) {
2618  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2619  }
2620  }
2621  }
2622  }
2623  return 0;
2624  }
2625  ga_instruction_contraction_opt0_2_unrolled(base_tensor &t_, base_tensor &tc1_,
2626  base_tensor &tc2_, size_type q_)
2627  : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2628  };
2629 
2630  // Performs Ani Bmi -> Cmn
2631  template <int N, int Q>
2632  struct ga_instruction_contraction_opt0_2_dunrolled : public ga_instruction {
2633  base_tensor &t, &tc1, &tc2;
2634  virtual int exec() {
2635  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N*Q
2636  << " optimized for vectorized second tensor of type 2");
2637  size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q), s2_q = s2/Q;
2638  size_type s1_qq = s1*Q, s2_qq = s2*Q;
2639  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2640 
2641  auto it = t.begin(), it1 = tc1.begin();
2642  for (size_type i = 0; i < s1; ++i, ++it1) {
2643  auto it2 = tc2.begin();
2644  for (size_type j = 0; j < s2_q; ++j) {
2645  if (j) it2+=Q;
2646  auto itt1 = it1;
2647  for (size_type l = 0; l < Q; ++l, ++it) {
2648  if (l) itt1 += s1;
2649  auto ittt1 = itt1, ittt2 = it2;
2650  *it = *ittt1 * (*ittt2);
2651  for (size_type m = 1; m < N; ++m) {
2652  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2653  }
2654  }
2655  }
2656  }
2657  return 0;
2658  }
2659  ga_instruction_contraction_opt0_2_dunrolled
2660  (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2661  : t(t_), tc1(tc1_), tc2(tc2_) {}
2662  };
2663 
2664  // Performs Ani Bmi -> Cmn
2665  struct ga_instruction_contraction_opt2_0 : public ga_instruction {
2666  base_tensor &t, &tc1, &tc2;
2667  size_type n, q;
2668  virtual int exec() {
2669  GA_DEBUG_INFO("Instruction: contraction operation of size " << n*q <<
2670  " optimized for vectorized second tensor of type 2");
2671  size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2672  size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2673  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2674 
2675  auto it = t.begin();
2676  for (size_type i = 0; i < s1_q; ++i) {
2677  auto it1 = tc1.begin() + i*q;
2678  for (size_type l = 0; l < q; ++l) {
2679  auto it2 = tc2.begin() + l*s2;
2680  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
2681  auto itt1 = it1, itt2 = it2;
2682  *it = *itt1 * (*itt2);
2683  for (size_type m = 1; m < n; ++m) {
2684  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2685  }
2686  }
2687  }
2688  }
2689  return 0;
2690  }
2691  ga_instruction_contraction_opt2_0(base_tensor &t_, base_tensor &tc1_,
2692  base_tensor &tc2_, size_type n_,
2693  size_type q_)
2694  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) { }
2695  };
2696 
2697  // Performs Ani Bmi -> Cmn
2698  template <int N>
2699  struct ga_instruction_contraction_opt2_0_unrolled : public ga_instruction {
2700  base_tensor &t, &tc1, &tc2;
2701  size_type q;
2702  virtual int exec() {
2703  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N*q
2704  << " optimized for vectorized second tensor of type 2");
2705  size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2706  size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2707  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2708 
2709  auto it = t.begin(), it1 = tc1.begin();
2710  for (size_type i = 0; i < s1_q; ++i, it1 += q) {
2711  for (size_type l = 0; l < q; ++l) {
2712  auto it2 = tc2.begin() + l*s2;
2713  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
2714  auto itt1 = it1, itt2 = it2;
2715  *it = *itt1 * (*itt2);
2716  for (size_type m = 1; m < N; ++m) {
2717  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2718  }
2719  }
2720  }
2721  }
2722  return 0;
2723  }
2724  ga_instruction_contraction_opt2_0_unrolled(base_tensor &t_, base_tensor &tc1_,
2725  base_tensor &tc2_, size_type q_)
2726  : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2727  };
2728 
2729  // Performs Ani Bmi -> Cmn
2730  template <int N, int Q>
2731  struct ga_instruction_contraction_opt2_0_dunrolled : public ga_instruction {
2732  base_tensor &t, &tc1, &tc2;
2733  virtual int exec() {
2734  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N*Q
2735  << " optimized for vectorized second tensor of type 2");
2736  size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q);
2737  size_type s1_q = s1/Q, s1_qq = s1*Q, s2_qq = s2*Q;
2738  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2739 
2740  auto it = t.begin(), it1 = tc1.begin();
2741  for (size_type i = 0; i < s1_q; ++i, it1 += Q) {
2742  for (size_type l = 0; l < Q; ++l) {
2743  auto it2 = tc2.begin() + l*s2;
2744  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
2745  auto itt1 = it1, itt2 = it2;
2746  *it = *itt1 * (*itt2);
2747  for (size_type m = 1; m < N; ++m) {
2748  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2749  }
2750  }
2751  }
2752  }
2753  return 0;
2754  }
2755  ga_instruction_contraction_opt2_0_dunrolled
2756  (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2757  : t(t_), tc1(tc1_), tc2(tc2_) {}
2758  };
2759 
2760  // Performs Ani Bmi -> Cmn
2761  struct ga_instruction_contraction_opt0_1 : public ga_instruction {
2762  base_tensor &t, &tc1, &tc2;
2763  size_type nn;
2764  virtual int exec() {
2765  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn <<
2766  " optimized for vectorized second tensor of type 1");
2767  size_type ss1=tc1.size(), s1 = ss1/nn, s2=tc2.size()/nn, s2_n=s2/nn;
2768 
2769  auto it = t.begin(), it1 = tc1.begin();
2770  for (size_type i = 0; i < s1; ++i, ++it1) {
2771  auto it2 = tc2.begin();
2772  for (size_type j = 0; j < s2_n; ++j) {
2773  if (j) it2 += nn;
2774  auto itt1 = it1;
2775  *it++ = (*itt1) * (*it2);
2776  for (size_type k = 1; k < nn; ++k)
2777  { itt1 += s1; *it++ = (*itt1) * (*it2); }
2778  }
2779  }
2780  return 0;
2781  }
2782  ga_instruction_contraction_opt0_1(base_tensor &t_, base_tensor &tc1_,
2783  base_tensor &tc2_, size_type n_)
2784  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2785  };
2786 
2787  template<int N> inline void reduc_elem_unrolled_opt1_
2788  (const base_vector::iterator &it, const base_vector::iterator &it1,
2789  scalar_type a, size_type s1) {
2790  it[N-1] = it1[(N-1)*s1] * a;
2791  reduc_elem_unrolled_opt1_<N-1>(it, it1, a, s1);
2792  }
2793  template<> inline void reduc_elem_unrolled_opt1_<1>
2794  (const base_vector::iterator &it, const base_vector::iterator &it1,
2795  scalar_type a, size_type /* s1 */)
2796  { *it = (*it1) * a; }
2797 
2798  // Performs Ani Bmi -> Cmn
2799  template <int N>
2800  struct ga_instruction_contraction_opt0_1_unrolled : public ga_instruction {
2801  base_tensor &t, &tc1, &tc2;
2802  virtual int exec() {
2803  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N
2804  << " optimized for vectorized second tensor of type 1");
2805  size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2806  auto it = t.begin(), it1 = tc1.begin();
2807  for (size_type i = 0; i < s1; ++i, ++it1) {
2808  auto it2 = tc2.begin(), it2e = it2 + s2;
2809  for (; it2 != it2e; it2 += N, it += N)
2810  reduc_elem_unrolled_opt1_<N>(it, it1, *it2, s1);
2811  }
2812  return 0;
2813  }
2814  ga_instruction_contraction_opt0_1_unrolled(base_tensor &t_, base_tensor &tc1_,
2815  base_tensor &tc2_)
2816  : t(t_), tc1(tc1_), tc2(tc2_) {}
2817  };
2818 
2819  // Performs Ani Bmi -> Cmn
2820  struct ga_instruction_contraction_opt1_1 : public ga_instruction {
2821  base_tensor &t, &tc1, &tc2;
2822  size_type nn;
2823  virtual int exec() {
2824  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn <<
2825  " optimized for both vectorized tensor of type 1");
2826  size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_1 = s2+1;
2827  GA_DEBUG_ASSERT(t.size() == s2*s1, "Internal error");
2828  size_type ss1 = s1/nn, ss2 = s2/nn;
2829 
2830  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
2831  auto it2 = tc2.begin();
2832  for (size_type j = 0; j < ss2; ++j) {
2833  if (j) it2 += nn;
2834  auto it1 = tc1.begin(), it = t.begin() + j*nn;
2835  for (size_type i = 0; i < ss1; ++i) {
2836  if (i) { it1 += nn, it += s2*nn; }
2837  scalar_type a = (*it1) * (*it2);
2838  auto itt = it;
2839  *itt = a; itt += s2_1; *itt = a;
2840  for (size_type k = 2; k < nn; ++k) { itt += s2_1; *itt = a; }
2841  }
2842  }
2843  return 0;
2844  }
2845  ga_instruction_contraction_opt1_1(base_tensor &t_, base_tensor &tc1_,
2846  base_tensor &tc2_, size_type n_)
2847  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2848  };
2849 
2850 
2851 
2852  template<int N> inline scalar_type reduc_elem_unrolled__
2853  (base_tensor::iterator &it1, base_tensor::iterator &it2,
2854  size_type s1, size_type s2) {
2855  return (it1[(N-1)*s1])*(it2[(N-1)*s2])
2856  + reduc_elem_unrolled__<N-1>(it1, it2, s1, s2);
2857  }
2858  template<> inline scalar_type reduc_elem_unrolled__<1>
2859  (base_tensor::iterator &it1, base_tensor::iterator &it2,
2860  size_type /*s1*/, size_type /*s2*/)
2861  { return (*it1)*(*it2); }
2862 
2863  // Performs Ani Bmi -> Cmn. Unrolled operation.
2864  template<int N> struct ga_instruction_contraction_unrolled
2865  : public ga_instruction {
2866  base_tensor &t, &tc1, &tc2;
2867  virtual int exec() {
2868  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N);
2869  size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2870  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error, " << t.size()
2871  << " != " << s1 << "*" << s2);
2872  base_tensor::iterator it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2873  for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
2874  *it = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2875  ++it2; if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2876  }
2877  return 0;
2878  }
2879  ga_instruction_contraction_unrolled(base_tensor &t_, base_tensor &tc1_,
2880  base_tensor &tc2_)
2881  : t(t_), tc1(tc1_), tc2(tc2_) {}
2882  };
2883 
2884  template<int N, int S2> inline void reduc_elem_d_unrolled__
2885  (base_tensor::iterator &it, base_tensor::iterator &it1,
2886  base_tensor::iterator &it2, size_type s1, size_type s2) {
2887  *it++ = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2888  reduc_elem_d_unrolled__<N, S2-1>(it, it1, ++it2, s1, s2);
2889  }
2890  // A Repeated definition is following because partial specialization
2891  // of functions is not allowed in C++ for the moment.
2892  // The gain in assembly time is small compared to the simply unrolled version
2893  template<> inline void reduc_elem_d_unrolled__<1, 0>
2894  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2895  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2896  template<> inline void reduc_elem_d_unrolled__<2, 0>
2897  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2898  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2899  template<> inline void reduc_elem_d_unrolled__<3, 0>
2900  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2901  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2902  template<> inline void reduc_elem_d_unrolled__<4, 0>
2903  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2904  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2905  template<> inline void reduc_elem_d_unrolled__<5, 0>
2906  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2907  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2908  template<> inline void reduc_elem_d_unrolled__<6, 0>
2909  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2910  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2911  template<> inline void reduc_elem_d_unrolled__<7, 0>
2912  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2913  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2914  template<> inline void reduc_elem_d_unrolled__<8, 0>
2915  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2916  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2917  template<> inline void reduc_elem_d_unrolled__<9, 0>
2918  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2919  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2920  template<> inline void reduc_elem_d_unrolled__<10, 0>
2921  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2922  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2923  template<> inline void reduc_elem_d_unrolled__<11, 0>
2924  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2925  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2926  template<> inline void reduc_elem_d_unrolled__<12, 0>
2927  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2928  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2929  template<> inline void reduc_elem_d_unrolled__<13, 0>
2930  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2931  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2932  template<> inline void reduc_elem_d_unrolled__<14, 0>
2933  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2934  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2935  template<> inline void reduc_elem_d_unrolled__<15, 0>
2936  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2937  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2938  template<> inline void reduc_elem_d_unrolled__<16, 0>
2939  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2940  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2941 
2942  // Performs Ani Bmi -> Cmn. Automatically doubly unrolled operation
2943  // (for uniform meshes).
2944  template<int N, int S2> struct ga_ins_red_d_unrolled
2945  : public ga_instruction {
2946  base_tensor &t, &tc1, &tc2;
2947  virtual int exec() {
2948  GA_DEBUG_INFO("Instruction: doubly unrolled contraction operation of size "
2949  << S2 << "x" << N);
2950  size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2951  GA_DEBUG_ASSERT(s2 == S2, "Internal error");
2952  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error, " << t.size()
2953  << " != " << s1 << "*" << s2);
2954  base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2955  for (size_type ii = 0; ii < s1; ++ii, ++it1) {
2956  base_tensor::iterator it2 = tc2.begin();
2957  reduc_elem_d_unrolled__<N, S2>(it, it1, it2, s1, s2);
2958  }
2959  GA_DEBUG_ASSERT(it == t.end(), "Internal error");
2960  return 0;
2961  }
2962  ga_ins_red_d_unrolled(base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2963  : t(t_), tc1(tc1_), tc2(tc2_) {}
2964  };
2965 
2966 
2967  pga_instruction ga_instruction_contraction_switch
2968  (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
2969  size_type n, bool &to_clear) {
2970  base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
2971 
2972  if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
2973  tc1_.qdim() == n && tc2_.qdim() == n) {
2974  to_clear = true;
2975  t_.set_sparsity(10, tc1_.qdim());
2976  return std::make_shared<ga_instruction_contraction_opt1_1>(t, tc1, tc2, n);
2977  }
2978 
2979  if (tc2_.sparsity() == 1) {
2980  switch(n) {
2981  case 2:
2982  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
2983  (t, tc1, tc2);
2984  case 3:
2985  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
2986  (t, tc1, tc2);
2987  case 4:
2988  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
2989  (t, tc1, tc2);
2990  case 5:
2991  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
2992  (t, tc1, tc2);
2993  default:
2994  return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
2995  }
2996  }
2997  if (tc2_.sparsity() == 2) {
2998  size_type q2 = tc2.sizes()[1];
2999  size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
3000  if (n2*q2 == n) {
3001  switch (n2) {
3002  case 1:
3003  switch (q2) {
3004  case 2:
3005  return
3006  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3007  (t, tc1, tc2);
3008  case 3:
3009  return
3010  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3011  (t, tc1, tc2);
3012  case 4:
3013  return
3014  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3015  (t, tc1, tc2);
3016  default :
3017  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3018  (t, tc1, tc2, q2);
3019  }
3020  case 2:
3021  switch (q2) {
3022  case 2:
3023  return
3024  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3025  (t, tc1, tc2);
3026  case 3:
3027  return
3028  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3029  (t, tc1, tc2);
3030  case 4:
3031  return
3032  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3033  (t, tc1, tc2);
3034  default :
3035  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3036  (t, tc1, tc2, q2);
3037  }
3038  case 3:
3039  switch (q2) {
3040  case 2:
3041  return
3042  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3043  (t, tc1, tc2);
3044  case 3:
3045  return
3046  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3047  (t, tc1, tc2);
3048  case 4:
3049  return
3050  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3051  (t, tc1, tc2);
3052  default :
3053  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3054  (t, tc1, tc2, q2);
3055  }
3056  case 4:
3057  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3058  (t, tc1, tc2, q2);
3059  case 5:
3060  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3061  (t, tc1, tc2, q2);
3062  default:
3063  return std::make_shared<ga_instruction_contraction_opt0_2>
3064  (t,tc1,tc2,n2,q2);
3065  }
3066  }
3067  }
3068  if (tc1_.sparsity() == 2) {
3069  size_type q1 = tc1.sizes()[1];
3070  size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
3071  if (n1*q1 == n) {
3072  switch (n1) {
3073  case 1:
3074  switch (q1) {
3075  case 2:
3076  return
3077  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3078  (t, tc1, tc2);
3079  case 3:
3080  return
3081  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3082  (t, tc1, tc2);
3083  case 4:
3084  return
3085  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3086  (t, tc1, tc2);
3087  default :
3088  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3089  (t, tc1, tc2, q1);
3090  }
3091  case 2:
3092  switch (q1) {
3093  case 2:
3094  return
3095  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3096  (t, tc1, tc2);
3097  case 3:
3098  return
3099  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3100  (t, tc1, tc2);
3101  case 4:
3102  return
3103  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3104  (t, tc1, tc2);
3105  default :
3106  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3107  (t, tc1, tc2, q1);
3108  }
3109  case 3:
3110  switch (q1) {
3111  case 2:
3112  return
3113  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3114  (t, tc1, tc2);
3115  case 3:
3116  return
3117  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3118  (t, tc1, tc2);
3119  case 4:
3120  return
3121  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3122  (t, tc1, tc2);
3123  default :
3124  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3125  (t, tc1, tc2, q1);
3126  }
3127  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3128  (t, tc1, tc2, q1);
3129  case 4:
3130  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3131  (t, tc1, tc2, q1);
3132  case 5:
3133  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3134  (t, tc1, tc2, q1);
3135  default:
3136  return std::make_shared<ga_instruction_contraction_opt2_0>
3137  (t,tc1,tc2, n1, q1);
3138  }
3139  }
3140  }
3141 
3142  switch(n) {
3143  case 2 : return std::make_shared<ga_instruction_contraction_unrolled< 2>>
3144  (t, tc1, tc2);
3145  case 3 : return std::make_shared<ga_instruction_contraction_unrolled< 3>>
3146  (t, tc1, tc2);
3147  case 4 : return std::make_shared<ga_instruction_contraction_unrolled< 4>>
3148  (t, tc1, tc2);
3149  case 5 : return std::make_shared<ga_instruction_contraction_unrolled< 5>>
3150  (t, tc1, tc2);
3151  case 6 : return std::make_shared<ga_instruction_contraction_unrolled< 6>>
3152  (t, tc1, tc2);
3153  case 7 : return std::make_shared<ga_instruction_contraction_unrolled< 7>>
3154  (t, tc1, tc2);
3155  case 8 : return std::make_shared<ga_instruction_contraction_unrolled< 8>>
3156  (t, tc1, tc2);
3157  case 9 : return std::make_shared<ga_instruction_contraction_unrolled< 9>>
3158  (t, tc1, tc2);
3159  case 10 : return std::make_shared<ga_instruction_contraction_unrolled<10>>
3160  (t, tc1, tc2);
3161  case 11 : return std::make_shared<ga_instruction_contraction_unrolled<11>>
3162  (t, tc1, tc2);
3163  case 12 : return std::make_shared<ga_instruction_contraction_unrolled<12>>
3164  (t, tc1, tc2);
3165  case 13 : return std::make_shared<ga_instruction_contraction_unrolled<13>>
3166  (t, tc1, tc2);
3167  case 14 : return std::make_shared<ga_instruction_contraction_unrolled<14>>
3168  (t, tc1, tc2);
3169  case 15 : return std::make_shared<ga_instruction_contraction_unrolled<15>>
3170  (t, tc1, tc2);
3171  case 16 : return std::make_shared<ga_instruction_contraction_unrolled<16>>
3172  (t, tc1, tc2);
3173  default : return std::make_shared<ga_instruction_contraction>
3174  (t, tc1, tc2, n);
3175  }
3176  }
3177 
3178  pga_instruction ga_uniform_instruction_contraction_switch
3179  (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3180  size_type n, bool &to_clear) {
3181  base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3182 
3183  if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3184  tc1_.qdim() == n && tc2_.qdim() == n) {
3185  to_clear = true;
3186  t_.set_sparsity(10, tc1_.qdim());
3187  return std::make_shared<ga_instruction_contraction_opt1_1>(t,tc1,tc2,n);
3188  }
3189  if (tc2_.sparsity() == 1) {
3190  switch(n) {
3191  case 2:
3192  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3193  (t, tc1, tc2);
3194  case 3:
3195  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3196  (t, tc1, tc2);
3197  case 4:
3198  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3199  (t, tc1, tc2);
3200  case 5:
3201  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3202  (t, tc1, tc2);
3203  default:
3204  return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
3205  }
3206  }
3207  if (tc2_.sparsity() == 2) {
3208  size_type q2 = tc2.sizes()[1];
3209  size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
3210  if (n2*q2 == n) {
3211  switch (n2) {
3212  case 1:
3213  switch (q2) {
3214  case 2:
3215  return
3216  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3217  (t, tc1, tc2);
3218  case 3:
3219  return
3220  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3221  (t, tc1, tc2);
3222  case 4:
3223  return
3224  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3225  (t, tc1, tc2);
3226  default :
3227  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3228  (t, tc1, tc2, q2);
3229  }
3230  case 2:
3231  switch (q2) {
3232  case 2:
3233  return
3234  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3235  (t, tc1, tc2);
3236  case 3:
3237  return
3238  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3239  (t, tc1, tc2);
3240  case 4:
3241  return
3242  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3243  (t, tc1, tc2);
3244  default :
3245  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3246  (t, tc1, tc2, q2);
3247  }
3248  case 3:
3249  switch (q2) {
3250  case 2:
3251  return
3252  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3253  (t, tc1, tc2);
3254  case 3:
3255  return
3256  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3257  (t, tc1, tc2);
3258  case 4:
3259  return
3260  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3261  (t, tc1, tc2);
3262  default :
3263  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3264  (t, tc1, tc2, q2);
3265  }
3266  case 4:
3267  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3268  (t, tc1, tc2, q2);
3269  case 5:
3270  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3271  (t, tc1, tc2, q2);
3272  default:
3273  return std::make_shared<ga_instruction_contraction_opt0_2>
3274  (t,tc1,tc2,n2,q2);
3275  }
3276  }
3277  }
3278  if (tc1_.sparsity() == 2) {
3279  size_type q1 = tc1.sizes()[1];
3280  size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
3281  if (n1*q1 == n) {
3282  switch (n1) {
3283  case 1:
3284  switch (q1) {
3285  case 2:
3286  return
3287  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3288  (t, tc1, tc2);
3289  case 3:
3290  return
3291  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3292  (t, tc1, tc2);
3293  case 4:
3294  return
3295  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3296  (t, tc1, tc2);
3297  default :
3298  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3299  (t, tc1, tc2, q1);
3300  }
3301  case 2:
3302  switch (q1) {
3303  case 2:
3304  return
3305  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3306  (t, tc1, tc2);
3307  case 3:
3308  return
3309  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3310  (t, tc1, tc2);
3311  case 4:
3312  return
3313  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3314  (t, tc1, tc2);
3315  default :
3316  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3317  (t, tc1, tc2, q1);
3318  }
3319  case 3:
3320  switch (q1) {
3321  case 2:
3322  return
3323  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3324  (t, tc1, tc2);
3325  case 3:
3326  return
3327  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3328  (t, tc1, tc2);
3329  case 4:
3330  return
3331  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3332  (t, tc1, tc2);
3333  default :
3334  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3335  (t, tc1, tc2, q1);
3336  }
3337  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3338  (t, tc1, tc2, q1);
3339  case 4:
3340  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3341  (t, tc1, tc2, q1);
3342  case 5:
3343  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3344  (t, tc1, tc2, q1);
3345  default:
3346  return std::make_shared<ga_instruction_contraction_opt2_0>
3347  (t,tc1,tc2, n1, q1);
3348  }
3349  }
3350  }
3351 
3352  // Only specialized for certain values
3353  size_type s2 = tc2.size()/n;
3354  switch(s2) {
3355  case 1 :
3356  switch(n) {
3357  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,1>>(t, tc1, tc2);
3358  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,1>>(t, tc1, tc2);
3359  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,1>>(t, tc1, tc2);
3360  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3361  }
3362  case 2 :
3363  switch(n) {
3364  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,2>>(t, tc1, tc2);
3365  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,2>>(t, tc1, tc2);
3366  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,2>>(t, tc1, tc2);
3367  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3368  }
3369  case 3 :
3370  switch(n) {
3371  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,3>>(t, tc1, tc2);
3372  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,3>>(t, tc1, tc2);
3373  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,3>>(t, tc1, tc2);
3374  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3375  }
3376  case 4 :
3377  switch(n) {
3378  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,4>>(t, tc1, tc2);
3379  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,4>>(t, tc1, tc2);
3380  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,4>>(t, tc1, tc2);
3381  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3382  }
3383  case 5 :
3384  switch(n) {
3385  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,5>>(t, tc1, tc2);
3386  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,5>>(t, tc1, tc2);
3387  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,5>>(t, tc1, tc2);
3388  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3389  }
3390  case 6 :
3391  switch(n) {
3392  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,6>>(t, tc1, tc2);
3393  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,6>>(t, tc1, tc2);
3394  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,6>>(t, tc1, tc2);
3395  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3396  }
3397  case 7 :
3398  switch(n) {
3399  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,7>>(t, tc1, tc2);
3400  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,7>>(t, tc1, tc2);
3401  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,7>>(t, tc1, tc2);
3402  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3403  }
3404  case 8 :
3405  switch(n) {
3406  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,8>>(t, tc1, tc2);
3407  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,8>>(t, tc1, tc2);
3408  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,8>>(t, tc1, tc2);
3409  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3410  }
3411  case 9 :
3412  switch(n) {
3413  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,9>>(t, tc1, tc2);
3414  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,9>>(t, tc1, tc2);
3415  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,9>>(t, tc1, tc2);
3416  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3417  }
3418  case 10:
3419  switch(n) {
3420  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,10>>(t, tc1, tc2);
3421  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,10>>(t, tc1, tc2);
3422  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,10>>(t, tc1, tc2);
3423  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3424  }
3425  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3426  }
3427  }
3428 
3429 
3430  // Performs Amij Bnj -> Cmni. To be optimized.
3431  struct ga_instruction_spec_contraction : public ga_instruction {
3432  base_tensor &t, &tc1, &tc2;
3433  size_type nn;
3434  virtual int exec() {
3435  GA_DEBUG_INFO("Instruction: specific contraction operation of "
3436  "size " << nn);
3437  size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3438  size_type s2 = tc2.sizes()[0];
3439  base_tensor::iterator it = t.begin();
3440  for (size_type i = 0; i < s11; ++i)
3441  for (size_type n = 0; n < s2; ++n)
3442  for (size_type m = 0; m < s1; ++m, ++it) {
3443  *it = scalar_type(0);
3444  for (size_type j = 0; j < nn; ++j)
3445  *it += tc1[m+i*s1+j*s111] * tc2[n+j*s2];
3446  }
3447  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3448  return 0;
3449  }
3450  ga_instruction_spec_contraction(base_tensor &t_, base_tensor &tc1_,
3451  base_tensor &tc2_, size_type n_)
3452  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3453  };
3454 
3455  // Performs Amik Bnjk -> Cmnij. To be optimized.
3456  struct ga_instruction_spec2_contraction : public ga_instruction {
3457  base_tensor &t, &tc1, &tc2;
3458  size_type nn;
3459  virtual int exec() {
3460  GA_DEBUG_INFO("Instruction: second specific contraction operation of "
3461  "size " << nn);
3462  size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3463  size_type s2 = tc2.sizes()[0], s22 = tc2.size() / (s2*nn), s222 = s2*s22;
3464  base_tensor::iterator it = t.begin();
3465  for (size_type j = 0; j < s22; ++j)
3466  for (size_type i = 0; i < s11; ++i)
3467  for (size_type m = 0; m < s1; ++m)
3468  for (size_type n = 0; n < s2; ++n, ++it) {
3469  *it = scalar_type(0);
3470  for (size_type k = 0; k < nn; ++k)
3471  *it += tc1[m+i*s1+k*s111] * tc2[n+j*s2+k*s222];
3472  }
3473  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3474  return 0;
3475  }
3476  ga_instruction_spec2_contraction(base_tensor &t_, base_tensor &tc1_,
3477  base_tensor &tc2_, size_type n_)
3478  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3479  };
3480 
3481  // Performs Aij Bkl -> Cijkl
3482  struct ga_instruction_simple_tmult : public ga_instruction {
3483  base_tensor &t, &tc1, &tc2;
3484  virtual int exec() {
3485  GA_DEBUG_INFO("Instruction: simple tensor product");
3486  size_type s1 = tc1.size();
3487  GA_DEBUG_ASSERT(t.size() == s1 * tc2.size(), "Wrong sizes");
3488  base_tensor::iterator it2=tc2.begin(), it1=tc1.begin(), it1end=it1 + s1;
3489  for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
3490  *it = *(it2) * (*it1);
3491  ++it1; if (it1 == it1end) { it1 = tc1.begin(), ++it2; }
3492  }
3493  return 0;
3494  }
3495  ga_instruction_simple_tmult(base_tensor &t_, base_tensor &tc1_,
3496  base_tensor &tc2_)
3497  : t(t_), tc1(tc1_), tc2(tc2_) {}
3498  };
3499 
3500  template<int S1> inline void tmult_elem_unrolled__
3501  (base_tensor::iterator &it, base_tensor::iterator &it1,
3502  base_tensor::iterator &it2) {
3503  *it++ = (*it1++)*(*it2);
3504  tmult_elem_unrolled__<S1-1>(it, it1, it2);
3505  }
3506  template<> inline void tmult_elem_unrolled__<0>
3507  (base_tensor::iterator &/*it*/, base_tensor::iterator &/*it1*/,
3508  base_tensor::iterator &/*it2*/) { }
3509 
3510  // Performs Aij Bkl -> Cijkl, partially unrolled version
3511  template<int S1> struct ga_instruction_simple_tmult_unrolled
3512  : public ga_instruction {
3513  base_tensor &t, &tc1, &tc2;
3514  virtual int exec() {
3515  size_type s2 = tc2.size();
3516  GA_DEBUG_ASSERT(tc1.size() == S1,
3517  "Wrong sizes " << tc1.size() << " != " << S1);
3518  GA_DEBUG_INFO("Instruction: simple tensor product, unrolled with "
3519  << S1 << " operations");
3520  GA_DEBUG_ASSERT(t.size() == S1 * s2,
3521  "Wrong sizes " << t.size() << " != " << S1 << "*" << s2);
3522  base_tensor::iterator it = t.begin(), it2 = tc2.begin();
3523  for (size_type ii = 0; ii < s2; ++ii, ++it2) {
3524  base_tensor::iterator it1 = tc1.begin();
3525  tmult_elem_unrolled__<S1>(it, it1, it2);
3526  }
3527  GA_DEBUG_ASSERT(it == t.end(), "Internal error");
3528  return 0;
3529  }
3530  ga_instruction_simple_tmult_unrolled(base_tensor &t_, base_tensor &tc1_,
3531  base_tensor &tc2_)
3532  : t(t_), tc1(tc1_), tc2(tc2_) {}
3533  };
3534 
3535  pga_instruction ga_uniform_instruction_simple_tmult
3536  (base_tensor &t, base_tensor &tc1, base_tensor &tc2) {
3537  switch(tc1.size()) {
3538  case 2 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 2>>
3539  (t, tc1, tc2);
3540  case 3 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 3>>
3541  (t, tc1, tc2);
3542  case 4 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 4>>
3543  (t, tc1, tc2);
3544  case 5 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 5>>
3545  (t, tc1, tc2);
3546  case 6 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 6>>
3547  (t, tc1, tc2);
3548  case 7 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 7>>
3549  (t, tc1, tc2);
3550  case 8 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 8>>
3551  (t, tc1, tc2);
3552  case 9 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 9>>
3553  (t, tc1, tc2);
3554  case 10 : return std::make_shared<ga_instruction_simple_tmult_unrolled<10>>
3555  (t, tc1, tc2);
3556  case 11 : return std::make_shared<ga_instruction_simple_tmult_unrolled<11>>
3557  (t, tc1, tc2);
3558  case 12 : return std::make_shared<ga_instruction_simple_tmult_unrolled<12>>
3559  (t, tc1, tc2);
3560  case 13 : return std::make_shared<ga_instruction_simple_tmult_unrolled<13>>
3561  (t, tc1, tc2);
3562  case 14 : return std::make_shared<ga_instruction_simple_tmult_unrolled<14>>
3563  (t, tc1, tc2);
3564  case 15 : return std::make_shared<ga_instruction_simple_tmult_unrolled<15>>
3565  (t, tc1, tc2);
3566  case 16 : return std::make_shared<ga_instruction_simple_tmult_unrolled<16>>
3567  (t, tc1, tc2);
3568  default : return std::make_shared<ga_instruction_simple_tmult>
3569  (t, tc1, tc2);
3570  }
3571  }
3572 
3573 
3574  // Performs Ami Bnj -> Cmnij. To be optimized.
3575  struct ga_instruction_spec_tmult : public ga_instruction {
3576  base_tensor &t, &tc1, &tc2;
3577  size_type s1_2, s2_2;
3578  virtual int exec() {
3579  GA_DEBUG_INFO("Instruction: specific tensor product");
3580  GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(), "Wrong sizes");
3581  size_type s1_1 = tc1.size() / s1_2;
3582  size_type s2_1 = tc2.size() / s2_2;
3583 
3584  base_tensor::iterator it = t.begin();
3585  for (size_type j = 0; j < s2_2; ++j)
3586  for (size_type i = 0; i < s1_2; ++i)
3587  for (size_type n = 0; n < s2_1; ++n)
3588  for (size_type m = 0; m < s1_1; ++m, ++it)
3589  *it = tc1[m+i*s1_1] * tc2[n+j*s2_1];
3590  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3591  return 0;
3592  }
3593  ga_instruction_spec_tmult(base_tensor &t_, base_tensor &tc1_,
3594  base_tensor &tc2_, size_type s1_2_,
3595  size_type s2_2_)
3596  : t(t_), tc1(tc1_), tc2(tc2_), s1_2(s1_2_), s2_2(s2_2_) {}
3597  };
3598 
3599  // Performs Ai Bmj -> Cmij. To be optimized.
3600  struct ga_instruction_spec2_tmult : public ga_instruction {
3601  base_tensor &t, &tc1, &tc2;
3602  virtual int exec() {
3603  GA_DEBUG_INFO("Instruction: second specific tensor product");
3604  GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(), "Wrong sizes");
3605  size_type s1 = tc1.size();
3606  size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
3607 
3608  base_tensor::iterator it = t.begin();
3609  for (size_type j = 0; j < s2_2; ++j)
3610  for (size_type i = 0; i < s1; ++i)
3611  for (size_type m = 0; m < s2_1; ++m, ++it)
3612  *it = tc1[i] * tc2[m+j*s2_1];
3613  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3614  return 0;
3615  }
3616  ga_instruction_spec2_tmult(base_tensor &t_, base_tensor &tc1_,
3617  base_tensor &tc2_)
3618  : t(t_), tc1(tc1_), tc2(tc2_) {}
3619  };
3620 
3621 
3622 
3623  struct ga_instruction_simple_c_matrix : public ga_instruction {
3624  base_tensor &t;
3625  std::vector<scalar_type *> components;
3626  virtual int exec() {
3627  GA_DEBUG_INFO("Instruction: gathering components for explicit "
3628  "matrix");
3629  GA_DEBUG_ASSERT(t.size() == components.size(), "Wrong sizes");
3630  for (size_type i = 0; i < components.size(); ++i)
3631  t[i] = *(components[i]);
3632  return 0;
3633  }
3634  ga_instruction_simple_c_matrix(base_tensor &t_,
3635  std::vector<scalar_type *> &components_)
3636  : t(t_), components(components_) {}
3637  };
3638 
3639  struct ga_instruction_c_matrix_with_tests : public ga_instruction {
3640  base_tensor &t;
3641  const std::vector<const base_tensor *> components;
3642  virtual int exec() {
3643  GA_DEBUG_INFO("Instruction: gathering components for explicit "
3644  "matrix with tests functions");
3645  size_type s = t.size() / components.size();
3646  GA_DEBUG_ASSERT(s, "Wrong sizes");
3647  base_tensor::iterator it = t.begin();
3648  for (size_type i = 0; i < components.size(); ++i) {
3649  const base_tensor &t1 = *(components[i]);
3650  if (t1.size() > 1) {
3651  GA_DEBUG_ASSERT(t1.size() == s, "Wrong sizes, " << t1.size()
3652  << " != " << s);
3653  for (size_type j = 0; j < s; ++j) *it++ = t1[j];
3654  } else {
3655  for (size_type j = 0; j < s; ++j) *it++ = t1[0];
3656  }
3657  }
3658  return 0;
3659  }
3660  ga_instruction_c_matrix_with_tests
3661  (base_tensor &t_, const std::vector<const base_tensor *> &components_)
3662  : t(t_), components(components_) {}
3663  };
3664 
3665  struct ga_instruction_eval_func_1arg_1res : public ga_instruction {
3666  scalar_type &t;
3667  const scalar_type &c;
3668  pscalar_func_onearg f1;
3669  virtual int exec() {
3670  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3671  "predefined function on a scalar");
3672  t = (*f1)(c);
3673  return 0;
3674  }
3675  ga_instruction_eval_func_1arg_1res(scalar_type &t_, const scalar_type &c_,
3676  pscalar_func_onearg f1_)
3677  : t(t_), c(c_), f1(f1_) {}
3678  };
3679 
3680  struct ga_instruction_eval_func_1arg_1res_expr : public ga_instruction {
3681  scalar_type &t;
3682  const scalar_type &c;
3683  const ga_predef_function &F;
3684  virtual int exec() {
3685  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3686  "predefined function on a scalar");
3687  t = F(c);
3688  return 0;
3689  }
3690  ga_instruction_eval_func_1arg_1res_expr(scalar_type &t_,
3691  const scalar_type &c_,
3692  const ga_predef_function &F_)
3693  : t(t_), c(c_), F(F_) {}
3694  };
3695 
3696  struct ga_instruction_eval_func_1arg : public ga_instruction {
3697  base_tensor &t, &tc1;
3698  pscalar_func_onearg f1;
3699  virtual int exec() {
3700  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3701  "predefined function on tensor");
3702  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3703  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f1)(tc1[i]);
3704  return 0;
3705  }
3706  ga_instruction_eval_func_1arg(base_tensor &t_, base_tensor &c_,
3707  pscalar_func_onearg f1_)
3708  : t(t_), tc1(c_), f1(f1_) {}
3709  };
3710 
3711  struct ga_instruction_eval_func_1arg_expr : public ga_instruction {
3712  base_tensor &t, &tc1;
3713  const ga_predef_function &F;
3714  virtual int exec() {
3715  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3716  "predefined function on tensor");
3717  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3718  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i]);
3719  return 0;
3720  }
3721  ga_instruction_eval_func_1arg_expr(base_tensor &t_, base_tensor &c_,
3722  const ga_predef_function &F_)
3723  : t(t_), tc1(c_), F(F_) {}
3724  };
3725 
3726  struct ga_instruction_eval_func_2arg_1res : public ga_instruction {
3727  scalar_type &t;
3728  const scalar_type &c, &d;
3729  pscalar_func_twoargs f2;
3730  virtual int exec() {
3731  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3732  "predefined function on two scalar");
3733  t = (*f2)(c, d);
3734  return 0;
3735  }
3736  ga_instruction_eval_func_2arg_1res(scalar_type &t_, const scalar_type &c_,
3737  const scalar_type &d_,
3738  pscalar_func_twoargs f2_)
3739  : t(t_), c(c_), d(d_), f2(f2_) {}
3740  };
3741 
3742  struct ga_instruction_eval_func_2arg_1res_expr : public ga_instruction {
3743  scalar_type &t;
3744  const scalar_type &c, &d;
3745  const ga_predef_function &F;
3746  virtual int exec() {
3747  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3748  "predefined function on two scalar");
3749  t = F(c, d);
3750  return 0;
3751  }
3752  ga_instruction_eval_func_2arg_1res_expr(scalar_type &t_,
3753  const scalar_type &c_,
3754  const scalar_type &d_,
3755  const ga_predef_function &F_)
3756  : t(t_), c(c_), d(d_), F(F_) {}
3757  };
3758 
3759  struct ga_instruction_eval_func_2arg_first_scalar : public ga_instruction {
3760  base_tensor &t, &tc1, &tc2;
3761  pscalar_func_twoargs f2;
3762  virtual int exec() {
3763  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3764  "predefined function on one scalar and one tensor");
3765  GA_DEBUG_ASSERT(t.size() == tc2.size(), "Wrong sizes");
3766  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[0], tc2[i]);
3767  return 0;
3768  }
3769  ga_instruction_eval_func_2arg_first_scalar
3770  (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3771  pscalar_func_twoargs f2_)
3772  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3773  };
3774 
3775  struct ga_instruction_eval_func_2arg_first_scalar_expr
3776  : public ga_instruction {
3777  base_tensor &t, &tc1, &tc2;
3778  const ga_predef_function &F;
3779  virtual int exec() {
3780  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3781  "predefined function on one scalar and one tensor");
3782  GA_DEBUG_ASSERT(t.size() == tc2.size(), "Wrong sizes");
3783  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[0], tc2[i]);
3784  return 0;
3785  }
3786  ga_instruction_eval_func_2arg_first_scalar_expr
3787  (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3788  const ga_predef_function &F_)
3789  : t(t_), tc1(c_), tc2(d_), F(F_) {}
3790  };
3791 
3792  struct ga_instruction_eval_func_2arg_second_scalar : public ga_instruction {
3793  base_tensor &t, &tc1, &tc2;
3794  pscalar_func_twoargs f2;
3795  virtual int exec() {
3796  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3797  "predefined function on one tensor and one scalar");
3798  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3799  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[0]);
3800  return 0;
3801  }
3802  ga_instruction_eval_func_2arg_second_scalar(base_tensor &t_,
3803  base_tensor &c_,
3804  base_tensor &d_,
3805  pscalar_func_twoargs f2_)
3806  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3807  };
3808 
3809  struct ga_instruction_eval_func_2arg_second_scalar_expr
3810  : public ga_instruction {
3811  base_tensor &t, &tc1, &tc2;
3812  const ga_predef_function &F;
3813  virtual int exec() {
3814  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3815  "predefined function on one tensor and one scalar");
3816  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3817  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[0]);
3818  return 0;
3819  }
3820  ga_instruction_eval_func_2arg_second_scalar_expr
3821  (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3822  const ga_predef_function &F_)
3823  : t(t_), tc1(c_), tc2(d_), F(F_) {}
3824  };
3825 
3826  struct ga_instruction_eval_func_2arg : public ga_instruction {
3827  base_tensor &t, &tc1, &tc2;
3828  pscalar_func_twoargs f2;
3829  virtual int exec() {
3830  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3831  "predefined function on two tensors");
3832  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3833  "Wrong sizes");
3834 
3835  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[i]);
3836  return 0;
3837  }
3838  ga_instruction_eval_func_2arg(base_tensor &t_, base_tensor &c_,
3839  base_tensor &d_, pscalar_func_twoargs f2_)
3840  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3841  };
3842 
3843  struct ga_instruction_eval_func_2arg_expr : public ga_instruction {
3844  base_tensor &t, &tc1, &tc2;
3845  const ga_predef_function &F;
3846  virtual int exec() {
3847  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3848  "predefined function on two tensors");
3849  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3850  "Wrong sizes");
3851 
3852  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[i]);
3853  return 0;
3854  }
3855  ga_instruction_eval_func_2arg_expr(base_tensor &t_, base_tensor &c_,
3856  base_tensor &d_,
3857  const ga_predef_function &F_)
3858  : t(t_), tc1(c_), tc2(d_), F(F_) {}
3859  };
3860 
3861  struct ga_instruction_eval_OP : public ga_instruction {
3862  base_tensor &t;
3863  const ga_nonlinear_operator &OP;
3864  ga_nonlinear_operator::arg_list args;
3865  virtual int exec() {
3866  GA_DEBUG_INFO("Instruction: operator evaluation");
3867  OP.value(args, t);
3868  return 0;
3869  }
3870  ga_instruction_eval_OP(base_tensor &t_, const ga_nonlinear_operator &OP_,
3871  ga_nonlinear_operator::arg_list &args_)
3872  : t(t_), OP(OP_), args(args_) {}
3873  };
3874 
3875  struct ga_instruction_eval_derivative_OP : public ga_instruction {
3876  base_tensor &t;
3877  const ga_nonlinear_operator &OP;
3878  ga_nonlinear_operator::arg_list args;
3879  size_type der1;
3880  virtual int exec() {
3881  GA_DEBUG_INFO("Instruction: operator derivative evaluation");
3882  OP.derivative(args, der1, t);
3883  return 0;
3884  }
3885  ga_instruction_eval_derivative_OP(base_tensor &t_,
3886  const ga_nonlinear_operator &OP_,
3887  ga_nonlinear_operator::arg_list &args_,
3888  size_type der1_)
3889  : t(t_), OP(OP_), args(args_), der1(der1_) {}
3890  };
3891 
3892  struct ga_instruction_eval_second_derivative_OP : public ga_instruction {
3893  base_tensor &t;
3894  const ga_nonlinear_operator &OP;
3895  ga_nonlinear_operator::arg_list args;
3896  size_type der1, der2;
3897  virtual int exec() {
3898  GA_DEBUG_INFO("Instruction: operator second derivative evaluation");
3899  OP.second_derivative(args, der1, der2, t);
3900  return 0;
3901  }
3902  ga_instruction_eval_second_derivative_OP
3903  (base_tensor &t_, const ga_nonlinear_operator &OP_,
3904  ga_nonlinear_operator::arg_list &args_, size_type der1_, size_type der2_)
3905  : t(t_), OP(OP_), args(args_), der1(der1_), der2(der2_) {}
3906  };
3907 
3908  struct ga_instruction_tensor_slice : public ga_instruction {
3909  base_tensor &t, &tc1;
3910  bgeot::multi_index mi, indices;
3911  virtual int exec() {
3912  GA_DEBUG_INFO("Instruction: tensor slice");
3913  size_type order = t.sizes().size();
3914  for (bgeot::multi_index mi3(order); !mi3.finished(t.sizes());
3915  mi3.incrementation(t.sizes())) {
3916  for (size_type j = 0; j < order; ++j)
3917  mi[indices[j]] = mi3[j];
3918  t(mi3) = tc1(mi);
3919  }
3920  return 0;
3921  }
3922  ga_instruction_tensor_slice(base_tensor &t_, base_tensor &tc1_,
3923  bgeot::multi_index &mi_,
3924  bgeot::multi_index &indices_)
3925  : t(t_), tc1(tc1_), mi(mi_), indices(indices_) {}
3926  };
3927 
3928  struct ga_instruction_transformation_call : public ga_instruction {
3929  const ga_workspace &workspace;
3930  ga_instruction_set::interpolate_info &inin;
3931  pinterpolate_transformation trans;
3932  fem_interpolation_context &ctx;
3933  const base_small_vector &Normal;
3934  const mesh &m;
3935  bool compute_der;
3936 
3937  virtual int exec() {
3938  GA_DEBUG_INFO("Instruction: call interpolate transformation");
3939  base_node P_ref;
3940  size_type cv;
3941  short_type face_num;
3942  inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m), cv,
3943  face_num, P_ref, inin.Normal,
3944  inin.derivatives, compute_der);
3945  if (inin.pt_type) {
3946  if (cv != size_type(-1)) {
3947  inin.m->points_of_convex(cv, inin.G);
3948  inin.ctx.change((inin.m)->trans_of_convex(cv),
3949  0, P_ref, inin.G, cv, face_num);
3950  inin.has_ctx = true;
3951  if (face_num != short_type(-1)) {
3952  inin.Normal = bgeot::compute_normal(inin.ctx, face_num);
3953  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
3954  } else
3955  inin.Normal.resize(0);
3956  inin.pt_y = inin.ctx.xreal();
3957  } else {
3958  inin.ctx.invalid_convex_num();
3959  inin.Normal.resize(0);
3960  inin.pt_y = P_ref;
3961  inin.has_ctx = false;
3962  }
3963  } else {
3964  inin.ctx.invalid_convex_num();
3965  inin.Normal.resize(0);
3966  inin.pt_y.resize(0);
3967  inin.has_ctx = false;
3968  }
3969  GA_DEBUG_INFO("Instruction: end of call interpolate transformation");
3970  return 0;
3971  }
3972  ga_instruction_transformation_call
3973  (const ga_workspace &w, ga_instruction_set::interpolate_info &i,
3974  pinterpolate_transformation t, fem_interpolation_context &ctxx,
3975  const base_small_vector &No, const mesh &mm, bool compute_der_)
3976  : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
3977  compute_der(compute_der_) {}
3978  };
3979 
3980  struct ga_instruction_neighbor_transformation_call : public ga_instruction {
3981  const ga_workspace &workspace;
3982  ga_instruction_set::interpolate_info &inin;
3983  pinterpolate_transformation trans;
3984  fem_interpolation_context &ctx;
3985  base_small_vector &Normal;
3986  const mesh &m;
3987  size_type &ipt;
3988  papprox_integration &pai;
3990  std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp;
3991 
3992  virtual int exec() {
3993  bool cancel_optimization = false;
3994  GA_DEBUG_INFO("Instruction: call interpolate neighbor transformation");
3995  if (ipt == 0) {
3996  if (!(ctx.have_pgp()) || !pai || pai->is_built_on_the_fly()
3997  || cancel_optimization) {
3998  inin.ctx.invalid_convex_num();
3999  } else {
4000  // Test if the situation has already been encountered
4001  size_type cv = ctx.convex_num();
4002  short_type f = ctx.face_num();
4003  auto adj_face = m.adjacent_face(cv, f);
4004  if (adj_face.cv == size_type(-1)) {
4005  inin.ctx.invalid_convex_num();
4006  } else {
4007  gauss_pt_corresp gpc;
4008  gpc.pgt1 = m.trans_of_convex(cv);
4009  gpc.pgt2 = m.trans_of_convex(adj_face.cv);
4010  gpc.pai = pai;
4011  auto inds_pt1 = m.ind_points_of_face_of_convex(cv, f);
4012  auto inds_pt2 = m.ind_points_of_face_of_convex(adj_face.cv,
4013  adj_face.f);
4014  auto str1 = gpc.pgt1->structure();
4015  auto str2 = gpc.pgt2->structure();
4016  size_type nbptf1 = str1->nb_points_of_face(f);
4017  size_type nbptf2 = str2->nb_points_of_face(adj_face.f);
4018  gpc.nodes.resize(nbptf1*2);
4019  for (size_type i = 0; i < nbptf1; ++i) {
4020  gpc.nodes[2*i] = str1->ind_points_of_face(f)[i];
4021  bool found = false;
4022  for (size_type j = 0; j < nbptf2; ++j) {
4023  if (inds_pt2[j] == inds_pt1[i]) {
4024  gpc.nodes[2*i+1] = str2->ind_points_of_face(adj_face.f)[j];
4025  found = true;
4026  break;
4027  }
4028  }
4029  GMM_ASSERT1(found, "Internal error");
4030  }
4031  bgeot::pstored_point_tab pspt = 0;
4032  auto itm = neighbor_corresp.find(gpc);
4033  if (itm != neighbor_corresp.end()) {
4034  pspt = itm->second;
4035  } else {
4036  size_type nbpt = pai->nb_points_on_face(f);
4038  gic.init(m.points_of_convex(adj_face.cv), gpc.pgt2);
4039  size_type first_ind = pai->ind_first_point_on_face(f);
4041  &spt = *(pai->pintegration_points());
4042  base_matrix G;
4043  m.points_of_convex(cv, G);
4044  fem_interpolation_context ctx_x(gpc.pgt1, 0, spt[0], G, cv, f);
4045  std::vector<base_node> P_ref(nbpt);
4046 
4047  for (size_type i = 0; i < nbpt; ++i) {
4048  ctx_x.set_xref(spt[first_ind+i]);
4049  bool converged = true;
4050  gic.invert(ctx_x.xreal(), P_ref[i], converged);
4051  bool is_in = (gpc.pgt2->convex_ref()->is_in(P_ref[i]) < 1E-4);
4052  GMM_ASSERT1(is_in && converged,"Geometric transformation "
4053  "inversion has failed in neighbor transformation");
4054  }
4055  pspt = store_point_tab(P_ref);
4056  neighbor_corresp[gpc] = pspt;
4057  }
4058  m.points_of_convex(adj_face.cv, inin.G);
4059  bgeot::pgeotrans_precomp pgp = gp_pool(gpc.pgt2, pspt);
4060  inin.ctx.change(pgp, 0, 0, inin.G, adj_face.cv, adj_face.f);
4061  }
4062  }
4063  }
4064 
4065  if (inin.ctx.have_pgp()) {
4066  inin.ctx.set_ii(ipt);
4067  inin.pt_type = 1;
4068  inin.has_ctx = true;
4069  inin.pt_y = inin.ctx.xreal();
4070  inin.Normal = bgeot::compute_normal(inin.ctx, inin.ctx.face_num());
4071  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4072  inin.m = &m;
4073  } else {
4074  base_node P_ref;
4075  size_type cv;
4076  short_type face_num;
4077  gmm::clear(inin.Normal);
4078  inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m),
4079  cv, face_num, P_ref, inin.Normal,
4080  inin.derivatives, false);
4081  if (inin.pt_type) {
4082  if (cv != size_type(-1)) {
4083  inin.m->points_of_convex(cv, inin.G);
4084  inin.ctx.change((inin.m)->trans_of_convex(cv),
4085  0, P_ref, inin.G, cv, face_num);
4086  inin.has_ctx = true;
4087  if (face_num != short_type(-1)) {
4088  inin.Normal = bgeot::compute_normal(inin.ctx, face_num);
4089  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4090  } else
4091  inin.Normal.resize(0);
4092  inin.pt_y = inin.ctx.xreal();
4093  } else {
4094  inin.ctx.invalid_convex_num();
4095  inin.pt_y = P_ref;
4096  inin.has_ctx = false;
4097  }
4098  } else {
4099  inin.ctx.invalid_convex_num();
4100  inin.Normal.resize(0);
4101  inin.pt_y.resize(0);
4102  inin.has_ctx = false;
4103  }
4104  }
4105  GA_DEBUG_INFO("Instruction: end of call neighbor interpolate "
4106  "transformation");
4107  return 0;
4108  }
4109  ga_instruction_neighbor_transformation_call
4110  (const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4111  pinterpolate_transformation t, fem_interpolation_context &ctxx,
4112  base_small_vector &No, const mesh &mm, size_type &ipt_,
4113  papprox_integration &pai_, bgeot::geotrans_precomp_pool &gp_pool_,
4114  std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp_)
4115  : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
4116  ipt(ipt_), pai(pai_), gp_pool(gp_pool_),
4117  neighbor_corresp(neighbor_corresp_) {}
4118  };
4119 
4120 
4121  struct ga_instruction_scalar_assembly : public ga_instruction {
4122  const base_tensor &t;
4123  scalar_type &E, &coeff;
4124  virtual int exec() {
4125  GA_DEBUG_INFO("Instruction: scalar term assembly");
4126  E += t[0] * coeff;
4127  return 0;
4128  }
4129  ga_instruction_scalar_assembly(base_tensor &t_, scalar_type &E_,
4130  scalar_type &coeff_)
4131  : t(t_), E(E_), coeff(coeff_) {}
4132  };
4133 
4134  struct ga_instruction_vector_assembly_mf : public ga_instruction
4135  {
4136  const base_tensor &t;
4137  base_vector &VI, &Vi;
4138  const fem_interpolation_context &ctx;
4139  const gmm::sub_interval *const&I, *const I__;
4140  const mesh_fem *const&mf, *const mf__;
4141  const bool &reduced_mf;
4142  const scalar_type &coeff;
4143  const size_type &nbpt, &ipt;
4144  base_vector elem;
4145  const bool interpolate;
4146  virtual int exec() {
4147  GA_DEBUG_INFO("Instruction: vector term assembly for fem variable");
4148  bool empty_weight = (coeff == scalar_type(0));
4149  if (ipt == 0 || interpolate) {
4150  if (empty_weight) elem.resize(0);
4151  elem.resize(t.size());
4152  if (!empty_weight)
4153  copy_scaled_4(t, coeff, elem);
4154  } else if (!empty_weight)
4155  // gmm::add(gmm::scaled(t.as_vector(), coeff), elem);
4156  add_scaled_4(t, coeff, elem);
4157 
4158  if (ipt == nbpt-1 || interpolate) { // finalize
4159  GA_DEBUG_ASSERT(mf, "Internal error");
4160  if (!ctx.is_convex_num_valid()) return 0;
4161  size_type cv_1 = ctx.convex_num();
4162  size_type qmult = mf->get_qdim();
4163  if (qmult > 1) qmult /= mf->fem_of_element(cv_1)->target_dim();
4164  base_vector &V = reduced_mf ? Vi : VI;
4165  GA_DEBUG_ASSERT(V.size() >= I->first() + mf->nb_basic_dof(),
4166  "Bad assembly vector size " << V.size() << ">=" <<
4167  I->first() << "+"<< mf->nb_basic_dof());
4168  auto itr = elem.cbegin();
4169  auto itw = V.begin() + I->first();
4170  for (const auto &dof : mf->ind_scalar_basic_dof_of_element(cv_1))
4171  for (size_type q = 0; q < qmult; ++q)
4172  *(itw+dof+q) += *itr++;
4173  GMM_ASSERT1(itr == elem.end(), "Internal error");
4174  }
4175  return 0;
4176  }
4177 
4178  ga_instruction_vector_assembly_mf
4179  (const base_tensor &t_, base_vector &VI_, base_vector &Vi_,
4180  const fem_interpolation_context &ctx_,
4181  const gmm::sub_interval *&I_, const mesh_fem *&mf_,
4182  const bool &reduced_mf_,
4183  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4184  bool interpolate_)
4185  : t(t_), VI(VI_), Vi(Vi_), ctx(ctx_),
4186  I(I_), I__(nullptr), mf(mf_), mf__(nullptr), reduced_mf(reduced_mf_),
4187  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4188 
4189  ga_instruction_vector_assembly_mf
4190  (const base_tensor &t_, base_vector &V_,
4191  const fem_interpolation_context &ctx_,
4192  const gmm::sub_interval &I_, const mesh_fem &mf_,
4193  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4194  bool interpolate_)
4195  : t(t_), VI(V_), Vi(V_), ctx(ctx_),
4196  I(I__), I__(&I_), mf(mf__), mf__(&mf_), reduced_mf(false_),
4197  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4198  protected:
4199  const bool false_=false;
4200  };
4201 
4202  struct ga_instruction_vector_assembly_imd : public ga_instruction {
4203  const base_tensor &t;
4204  base_vector &V;
4205  const fem_interpolation_context &ctx;
4206  const gmm::sub_interval &I;
4207  const im_data &imd;
4208  scalar_type &coeff;
4209  const size_type &ipt;
4210  const bool initialize;
4211  virtual int exec() {
4212  GA_DEBUG_INFO("Instruction: vector term assembly for im_data variable");
4213  size_type cv = ctx.convex_num();
4214  size_type i = t.size() * imd.filtered_index_of_point(cv, ipt);
4215  GMM_ASSERT1(i+t.size() <= I.size(),
4216  "Internal error "<<i<<"+"<<t.size()<<" <= "<<I.size());
4217  auto itw = V.begin() + I.first() + i;
4218  if (initialize)
4219  for (const auto &val : t.as_vector())
4220  *itw++ = coeff*val;
4221  else
4222  for (const auto &val : t.as_vector())
4223  *itw++ += coeff*val;
4224  return 0;
4225  }
4226  ga_instruction_vector_assembly_imd
4227  (const base_tensor &t_, base_vector &V_,
4228  const fem_interpolation_context &ctx_, const gmm::sub_interval &I_,
4229  const im_data &imd_, scalar_type &coeff_, const size_type &ipt_,
4230  bool initialize_=false)
4231  : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), coeff(coeff_), ipt(ipt_),
4232  initialize(initialize_)
4233  {}
4234  };
4235 
4236  struct ga_instruction_vector_assembly : public ga_instruction {
4237  const base_tensor &t;
4238  base_vector &V;
4239  const gmm::sub_interval &I;
4240  scalar_type &coeff;
4241  virtual int exec() {
4242  GA_DEBUG_INFO("Instruction: vector term assembly for "
4243  "fixed size variable");
4244  gmm::add(gmm::scaled(t.as_vector(), coeff), gmm::sub_vector(V, I));
4245  return 0;
4246  }
4247  ga_instruction_vector_assembly(const base_tensor &t_, base_vector &V_,
4248  const gmm::sub_interval &I_,
4249  scalar_type &coeff_)
4250  : t(t_), V(V_), I(I_), coeff(coeff_) {}
4251  };
4252 
4253  struct ga_instruction_assignment : public ga_instruction {
4254  const base_tensor &t;
4255  base_vector &V;
4256  const fem_interpolation_context &ctx;
4257  const im_data *imd;
4258  virtual int exec() {
4259  GA_DEBUG_INFO("Instruction: Assignement to im_data");
4260  imd->set_tensor(V, ctx.convex_num(), ctx.ii(), t);
4261  return 0;
4262  }
4263  ga_instruction_assignment(const base_tensor &t_, base_vector &V_,
4264  const fem_interpolation_context &ctx_,
4265  const im_data *imd_)
4266  : t(t_), V(V_), ctx(ctx_), imd(imd_) {}
4267  };
4268 
4269  struct ga_instruction_extract_residual_on_imd_dofs : public ga_instruction {
4270  base_tensor &t;
4271  const base_vector &V;
4272  const fem_interpolation_context &ctx;
4273  const gmm::sub_interval &I;
4274  const im_data &imd;
4275  const size_type &ipt;
4276  virtual int exec() {
4277  GA_DEBUG_INFO("Instruction: extract residual for im_data variable");
4278  size_type ifirst = I.first();
4279  size_type cv = ctx.convex_num();
4280  size_type i = t.size() * imd.filtered_index_of_point(cv, ipt);
4281  GMM_ASSERT1(i+t.size() <= I.size(),
4282  "Internal error "<<i<<"+"<<t.size()<<" <= "<<I.size());
4283  for (auto &&val : t.as_vector())
4284  val = V[ifirst+(i++)];
4285  return 0;
4286  }
4287  ga_instruction_extract_residual_on_imd_dofs
4288  (base_tensor &t_, const base_vector &V_,
4289  const fem_interpolation_context &ctx_, const gmm::sub_interval &I_,
4290  const im_data &imd_, const size_type &ipt_)
4291  : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), ipt(ipt_)
4292  {}
4293  };
4294 
4295 
4296  template <class MAT>
4297  inline void add_elem_matrix
4298  (MAT &K, const std::vector<size_type> &dofs1,
4299  const std::vector<size_type> &dofs2, std::vector<size_type> &/*dofs1_sort*/,
4300  base_vector &elem, scalar_type threshold, size_type /* N */) {
4301  base_vector::const_iterator it = elem.cbegin();
4302  for (const size_type &dof2 : dofs2)
4303  for (const size_type &dof1 : dofs1) {
4304  if (gmm::abs(*it) > threshold)
4305  K(dof1, dof2) += *it;
4306  ++it;
4307  }
4308  }
4309 
4310  // static const std::vector<size_type> *the_indto_sort;
4311  // int compare_my_indices(const void *a, const void *b) {
4312  // size_type aa = *((const size_type *)(a));
4313  // size_type bb = *((const size_type *)(b));
4314  // return int((*the_indto_sort)[aa]) - int((*the_indto_sort)[bb]);
4315  // }
4316 
4317  inline void add_elem_matrix
4318  (gmm::col_matrix<gmm::rsvector<scalar_type>> &K,
4319  const std::vector<size_type> &dofs1, const std::vector<size_type> &dofs2,
4320  std::vector<size_type> &dofs1_sort,
4321  base_vector &elem, scalar_type threshold, size_type N) {
4322 
4323  size_type s1 = dofs1.size();
4324 
4325  dofs1_sort.resize(s1);
4326  for (size_type i = 0; i < s1; ++i) { // insertion sort
4327  size_type j = i, k = j-1;
4328  while (j > 0 && dofs1[i] < dofs1[dofs1_sort[k]])
4329  { dofs1_sort[j] = dofs1_sort[k]; j--; k--; }
4330  dofs1_sort[j] = i;
4331  }
4332 
4333  // dofs1_sort.resize(s1); // test with qsort: not faster in the tested cases
4334  // for (size_type i = 0; i < s1; ++i) dofs1_sort[i] = i;
4335  // the_indto_sort = &dofs1;
4336  // qsort(&(dofs1_sort[0]), s1, sizeof(size_type), compare_my_indices);
4337 
4338  gmm::elt_rsvector_<scalar_type> ev;
4339 
4340  size_type maxest = (N+1) * s1;
4341  base_vector::const_iterator it = elem.cbegin();
4342  bool first(true);
4343  for (const size_type &dof2 : dofs2) { // Iteration on columns
4344  if (first) first = false;
4345  else it += s1;
4346  std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4347  size_type nb = col.size();
4348 
4349  if (nb == 0) {
4350  col.reserve(maxest);
4351  for (size_type i = 0; i < s1; ++i) {
4352  size_type k = dofs1_sort[i];
4353  ev.e = *(it+k);
4354  if (gmm::abs(ev.e) > threshold) {
4355  ev.c=dofs1[k];
4356  col.push_back(ev);
4357  }
4358  }
4359  } else { // column merge
4360  size_type ind = 0;
4361  for (size_type i = 0; i < s1; ++i) {
4362  size_type k = dofs1_sort[i];
4363  ev.e = *(it+k);
4364  if (gmm::abs(ev.e) > threshold) {
4365  ev.c = dofs1[k];
4366 
4367  size_type count = nb - ind, step, l;
4368  while (count > 0) {
4369  step = count / 2;
4370  l = ind + step;
4371  if (col[l].c < ev.c) {
4372  ind = ++l;
4373  count -= step + 1;
4374  }
4375  else
4376  count = step;
4377  }
4378 
4379  auto itc = col.begin() + ind;
4380  if (ind != nb && itc->c == ev.c)
4381  itc->e += ev.e;
4382  else {
4383  if (nb - ind > 1300)
4384  GMM_WARNING2("Inefficient addition of element in rsvector with "
4385  << col.size() - ind << " non-zero entries");
4386  col.push_back(ev);
4387  if (ind != nb) {
4388  itc = col.begin() + ind;
4389  auto ite = col.end();
4390  --ite;
4391  auto itee = ite;
4392  for (; ite != itc; --ite) { --itee; *ite = *itee; }
4393  *itc = ev;
4394  }
4395  ++nb;
4396  }
4397  ++ind;
4398  }
4399  }
4400  }
4401  }
4402  }
4403 
4404 
4405  inline void add_elem_matrix_contiguous_rows
4406  (gmm::col_matrix<gmm::rsvector<scalar_type>> &K,
4407  const size_type &i1, const size_type &s1,
4408  const std::vector<size_type> &dofs2,
4409  base_vector &elem, scalar_type threshold) {
4410 
4411  gmm::elt_rsvector_<scalar_type> ev;
4412 
4413  base_vector::const_iterator it = elem.cbegin();
4414  bool first(true);
4415  for (const size_type &dof2 : dofs2) { // Iteration on columns
4416  if (first) first = false;
4417  else it += s1;
4418  std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4419  size_type nb = col.size();
4420 
4421  if (nb == 0) {
4422  col.reserve(s1);
4423  for (size_type i = 0; i < s1; ++i) {
4424  ev.e = *(it+i);
4425  if (gmm::abs(ev.e) > threshold) {
4426  ev.c = i1 + i;
4427  col.push_back(ev);
4428  }
4429  }
4430  } else { // column merge (can be optimized for a contiguous range)
4431  size_type ind = 0;
4432  for (size_type i = 0; i < s1; ++i) {
4433  ev.e = *(it+i);
4434  if (gmm::abs(ev.e) > threshold) {
4435  ev.c = i1 + i;
4436 
4437  size_type count = nb - ind, step, l;
4438  while (count > 0) {
4439  step = count / 2;
4440  l = ind + step;
4441  if (col[l].c < ev.c) {
4442  ind = ++l;
4443  count -= step + 1;
4444  }
4445  else
4446  count = step;
4447  }
4448 
4449  auto itc = col.begin() + ind;
4450  if (ind != nb && itc->c == ev.c)
4451  itc->e += ev.e;
4452  else {
4453  if (nb - ind > 1300)
4454  GMM_WARNING2("Inefficient addition of element in rsvector with "
4455  << col.size() - ind << " non-zero entries");
4456  col.push_back(ev);
4457  if (ind != nb) {
4458  itc = col.begin() + ind;
4459  auto ite = col.end();
4460  --ite;
4461  auto itee = ite;
4462  for (; ite != itc; --ite) { --itee; *ite = *itee; }
4463  *itc = ev;
4464  }
4465  ++nb;
4466  }
4467  ++ind;
4468  }
4469  }
4470  }
4471  }
4472  }
4473 
4474  inline void populate_dofs_vector
4475  (std::vector<size_type> &dofs,
4476  const size_type &size, const size_type &ifirst, const size_type &qmult,
4477  const getfem::mesh::ind_set &mfdofs)
4478  {
4479  dofs.assign(size, ifirst);
4480  auto itd = dofs.begin();
4481  if (qmult == 1)
4482  for (const auto &dof : mfdofs) *itd++ += dof;
4483  else
4484  for (const auto &dof : mfdofs)
4485  for (size_type q = 0; q < qmult; ++q) *itd++ += dof + q;
4486  }
4487 
4488  inline void populate_dofs_vector // special case for qmult == 1
4489  (std::vector<size_type> &dofs, const size_type &size, const size_type &ifirst,
4490  const getfem::mesh::ind_set &mfdofs)
4491  {
4492  dofs.assign(size, ifirst);
4493  auto itd = dofs.begin();
4494  for (const auto &dof : mfdofs) *itd++ += dof;
4495  }
4496 
4497 
4498  inline void populate_contiguous_dofs_vector
4499  (std::vector<size_type> &dofs, const size_type &size, const size_type &ifirst)
4500  {
4501  dofs.assign(size, ifirst);
4502  for (size_type i=0; i < size; ++i) dofs[i] += i;
4503  }
4504 
4505  struct ga_instruction_matrix_assembly_base : public ga_instruction {
4506  const base_tensor &t;
4507  const fem_interpolation_context &ctx1, &ctx2;
4508  const scalar_type &alpha1, &alpha2, &coeff;
4509  const size_type &nbpt, &ipt;
4510  base_vector elem;
4511  bool interpolate;
4512  std::vector<size_type> dofs1, dofs2, dofs1_sort;
4513  void add_tensor_to_element_matrix(bool initialize, bool empty_weight) {
4514  if (initialize) {
4515  if (empty_weight) elem.resize(0);
4516  elem.resize(t.size());
4517  if (!empty_weight)
4518  copy_scaled_4(t, coeff*alpha1*alpha2, elem);
4519  } else if (!empty_weight)
4520  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4521  // Faster than a daxpy blas call on my config
4522  add_scaled_4(t, coeff*alpha1*alpha2, elem);
4523  }
4524  ga_instruction_matrix_assembly_base
4525  (const base_tensor &t_,
4526  const fem_interpolation_context &ctx1_,
4527  const fem_interpolation_context &ctx2_,
4528  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
4529  const size_type &nbpt_, const size_type &ipt_, bool interpolate_)
4530  : t(t_), ctx1(ctx1_), ctx2(ctx2_), alpha1(a1), alpha2(a2),
4531  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_),
4532  dofs1(0), dofs2(0), dofs1_sort(0)
4533  {}
4534  protected:
4535  const bool false_=false;
4536  const size_type zero_=0;
4537  };
4538 
4539 
4540  struct ga_instruction_matrix_assembly_mf_mf
4541  : public ga_instruction_matrix_assembly_base
4542  {
4543  model_real_sparse_matrix &Krr, &Kru, &Kur, &Kuu;
4544  const gmm::sub_interval *const&I1, *const&I2, *const I1__, *const I2__;
4545  const mesh_fem *const&mf1, *const&mf2, *const mf1__, *const mf2__;
4546  const bool &reduced_mf1, &reduced_mf2; // refs to mf1/2->is_reduced()
4547  virtual int exec() {
4548  GA_DEBUG_INFO("Instruction: matrix term assembly");
4549  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
4550 
4551  bool initialize = (ipt == 0 || interpolate);
4552  bool empty_weight = (coeff == scalar_type(0));
4553  add_tensor_to_element_matrix(initialize, empty_weight); // t --> elem
4554 
4555  if (ipt == nbpt-1 || interpolate) { // finalize
4556  model_real_sparse_matrix &K = reduced_mf1 ? (reduced_mf2 ? Kuu : Kur)
4557  : (reduced_mf2 ? Kru : Krr);
4558  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
4559 
4560  scalar_type ninf = gmm::vect_norminf(elem);
4561  if (ninf == scalar_type(0)) return 0;
4562 
4563  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4564  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4565  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4566 
4567  size_type N = ctx1.N();
4568  size_type qmult1 = mf1->get_qdim();
4569  if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
4570  populate_dofs_vector(dofs1, s1, ifirst1, qmult1, // --> dofs1
4571  mf1->ind_scalar_basic_dof_of_element(cv1));
4572  if (mf1 == mf2 && cv1 == cv2) {
4573  if (ifirst1 == ifirst2) {
4574  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4575  } else {
4576  populate_dofs_vector(dofs2, dofs1.size(), ifirst2 - ifirst1, dofs1);
4577  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4578  }
4579  } else {
4580  N = std::max(N, ctx2.N());
4581  size_type qmult2 = mf2->get_qdim();
4582  if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
4583  populate_dofs_vector(dofs2, s2, ifirst2, qmult2, // --> dofs2
4584  mf2->ind_scalar_basic_dof_of_element(cv2));
4585  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4586  }
4587  }
4588  return 0;
4589  }
4590 
4591  ga_instruction_matrix_assembly_mf_mf
4592  (const base_tensor &t_,
4593  model_real_sparse_matrix &Krr_, model_real_sparse_matrix &Kru_,
4594  model_real_sparse_matrix &Kur_, model_real_sparse_matrix &Kuu_,
4595  const fem_interpolation_context &ctx1_,
4596  const fem_interpolation_context &ctx2_,
4597  const ga_instruction_set::variable_group_info &vgi1,
4598  const ga_instruction_set::variable_group_info &vgi2,
4599  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4600  bool interpolate_)
4601  : ga_instruction_matrix_assembly_base
4602  (t_, ctx1_, ctx2_, vgi1.alpha, vgi2.alpha, coeff_, nbpt_, ipt_,
4603  interpolate_),
4604  Krr(Krr_), Kru(Kru_), Kur(Kur_), Kuu(Kuu_),
4605  I1(vgi1.I), I2(vgi2.I), I1__(nullptr), I2__(nullptr),
4606  mf1(vgi1.mf), mf2(vgi2.mf), mf1__(nullptr), mf2__(nullptr),
4607  reduced_mf1(vgi1.reduced_mf), reduced_mf2(vgi2.reduced_mf) {}
4608 
4609  ga_instruction_matrix_assembly_mf_mf
4610  (const base_tensor &t_,
4611  model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
4612  const fem_interpolation_context &ctx1_,
4613  const fem_interpolation_context &ctx2_,
4614  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
4615  const ga_instruction_set::variable_group_info &vgi2,
4616  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4617  bool interpolate_)
4618  : ga_instruction_matrix_assembly_base
4619  (t_, ctx1_, ctx2_, a1, vgi2.alpha, coeff_, nbpt_, ipt_, interpolate_),
4620  Krr(Kxr_), Kru(Kxu_), Kur(Kxr_), Kuu(Kxu_),
4621  I1(I1__), I2(vgi2.I), I1__(&I1_), I2__(nullptr),
4622  mf1(mf1__), mf2(vgi2.mf), mf1__(&mf1_), mf2__(nullptr),
4623  reduced_mf1(false_), reduced_mf2(vgi2.reduced_mf) {}
4624 
4625  ga_instruction_matrix_assembly_mf_mf
4626  (const base_tensor &t_,
4627  model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
4628  const fem_interpolation_context &ctx1_,
4629  const fem_interpolation_context &ctx2_,
4630  const ga_instruction_set::variable_group_info &vgi1,
4631  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
4632  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4633  bool interpolate_)
4634  : ga_instruction_matrix_assembly_base
4635  (t_, ctx1_, ctx2_, vgi1.alpha, a2, coeff_, nbpt_, ipt_, interpolate_),
4636  Krr(Krx_), Kru(Krx_), Kur(Kux_), Kuu(Kux_),
4637  I1(vgi1.I), I2(I2__), I1__(nullptr), I2__(&I2_),
4638  mf1(vgi1.mf), mf2(mf2__), mf1__(nullptr), mf2__(&mf2_),
4639  reduced_mf1(vgi1.reduced_mf), reduced_mf2(false_) {}
4640 
4641  ga_instruction_matrix_assembly_mf_mf
4642  (const base_tensor &t_, model_real_sparse_matrix &K_,
4643  const fem_interpolation_context &ctx1_,
4644  const fem_interpolation_context &ctx2_,
4645  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
4646  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
4647  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4648  bool interpolate_)
4649  : ga_instruction_matrix_assembly_base
4650  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, interpolate_),
4651  Krr(K_), Kru(K_), Kur(K_), Kuu(K_),
4652  I1(I1__), I2(I2__), I1__(&I1_), I2__(&I2_),
4653  mf1(mf1__), mf2(mf2__), mf1__(&mf1_), mf2__(&mf2_),
4654  reduced_mf1(false_), reduced_mf2(false_) {}
4655  };
4656 
4657 
4658  struct ga_instruction_matrix_assembly_imd_mf
4659  : public ga_instruction_matrix_assembly_base
4660  {
4661  model_real_sparse_matrix &Kxr, &Kxu;
4662  const gmm::sub_interval *I1, *I2__, * const &I2;
4663  const im_data *imd1;
4664  const mesh_fem * const mf2__, * const &mf2;
4665  const bool &reduced_mf2; // ref to mf2->is_reduced()
4666  virtual int exec() {
4667  GA_DEBUG_INFO("Instruction: matrix term assembly");
4668  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
4669 
4670  bool empty_weight = (coeff == scalar_type(0));
4671  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
4672 
4673  scalar_type ninf = gmm::vect_norminf(elem);
4674  if (ninf == scalar_type(0)) return 0;
4675 
4676  model_real_sparse_matrix &K = reduced_mf2 ? Kxu : Kxr;
4677  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
4678  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4679  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4680  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4681  if (imd1) ifirst1 += s1 * imd1->filtered_index_of_point(cv1, ipt);
4682 
4683  populate_contiguous_dofs_vector(dofs1, s1, ifirst1); // --> dofs1
4684  size_type qmult2 = mf2->get_qdim();
4685  if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
4686  populate_dofs_vector(dofs2, s2, ifirst2, qmult2, // --> dofs2
4687  mf2->ind_scalar_basic_dof_of_element(cv2));
4688  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx2.N());
4689  return 0;
4690  }
4691 
4692  ga_instruction_matrix_assembly_imd_mf
4693  (const base_tensor &t_,
4694  model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
4695  const fem_interpolation_context &ctx1_,
4696  const fem_interpolation_context &ctx2_,
4697  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
4698  const ga_instruction_set::variable_group_info &vgi2,
4699  const scalar_type &coeff_, const size_type &ipt_)
4700  : ga_instruction_matrix_assembly_base
4701  (t_, ctx1_, ctx2_, a1, vgi2.alpha, coeff_, zero_, ipt_, false),
4702  Kxr(Kxr_), Kxu(Kxu_), I1(&I1_), I2__(nullptr), I2(vgi2.I),
4703  imd1(imd1_), mf2__(nullptr), mf2(vgi2.mf), reduced_mf2(vgi2.reduced_mf)
4704  {}
4705 
4706  ga_instruction_matrix_assembly_imd_mf
4707  (const base_tensor &t_, model_real_sparse_matrix &K_,
4708  const fem_interpolation_context &ctx1_,
4709  const fem_interpolation_context &ctx2_,
4710  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
4711  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
4712  const scalar_type &coeff_, const size_type &ipt_)
4713  : ga_instruction_matrix_assembly_base
4714  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4715  Kxr(K_), Kxu(K_), I1(&I1_), I2__(&I2_), I2(I2__),
4716  imd1(imd1_), mf2__(&mf2_), mf2(mf2__), reduced_mf2(false_) {}
4717  };
4718 
4719  struct ga_instruction_matrix_assembly_mf_imd
4720  : public ga_instruction_matrix_assembly_base
4721  {
4722  model_real_sparse_matrix &Krx, &Kux;
4723  const gmm::sub_interval * const &I1, *const I1__, *I2;
4724  const mesh_fem * const &mf1, *const mf1__;
4725  const bool &reduced_mf1; // ref to mf1->is_reduced()
4726  const im_data *imd2;
4727  virtual int exec() {
4728  GA_DEBUG_INFO("Instruction: matrix term assembly");
4729  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
4730 
4731  bool empty_weight = (coeff == scalar_type(0));
4732  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
4733 
4734  scalar_type ninf = gmm::vect_norminf(elem);
4735  if (ninf == scalar_type(0)) return 0;
4736 
4737  model_real_sparse_matrix &K = reduced_mf1 ? Kux : Krx;
4738  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
4739  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4740  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4741  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4742  if (imd2) ifirst2 += s2 * imd2->filtered_index_of_point(cv2, ipt);
4743 
4744  size_type qmult1 = mf1->get_qdim();
4745  if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
4746  populate_dofs_vector(dofs1, s1, ifirst1, qmult1, // --> dofs1
4747  mf1->ind_scalar_basic_dof_of_element(cv1));
4748  populate_contiguous_dofs_vector(dofs2, s2, ifirst2); // --> dofs2
4749  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx1.N());
4750  return 0;
4751  }
4752 
4753  ga_instruction_matrix_assembly_mf_imd
4754  (const base_tensor &t_,
4755  model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
4756  const fem_interpolation_context &ctx1_,
4757  const fem_interpolation_context &ctx2_,
4758  const ga_instruction_set::variable_group_info &vgi1,
4759  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
4760  const scalar_type &coeff_, const size_type &ipt_)
4761  : ga_instruction_matrix_assembly_base
4762  (t_, ctx1_, ctx2_, vgi1.alpha, a2, coeff_, zero_, ipt_, false),
4763  Krx(Krx_), Kux(Kux_), I1(vgi1.I), I1__(nullptr), I2(&I2_),
4764  mf1(vgi1.mf), mf1__(nullptr), reduced_mf1(vgi1.reduced_mf), imd2(imd2_)
4765  {}
4766 
4767  ga_instruction_matrix_assembly_mf_imd
4768  (const base_tensor &t_, model_real_sparse_matrix &K_,
4769  const fem_interpolation_context &ctx1_,
4770  const fem_interpolation_context &ctx2_,
4771  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
4772  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
4773  const scalar_type &coeff_, const size_type &ipt_)
4774  : ga_instruction_matrix_assembly_base
4775  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4776  Krx(K_), Kux(K_), I1(I1__), I1__(&I1_), I2(&I2_),
4777  mf1(mf1__), mf1__(&mf1_), reduced_mf1(false_), imd2(imd2_) {}
4778  };
4779 
4780 
4781 
4782  struct ga_instruction_matrix_assembly_imd_imd
4783  : public ga_instruction_matrix_assembly_base
4784  {
4785  model_real_sparse_matrix &K;
4786  const gmm::sub_interval &I1, &I2;
4787  const im_data *imd1, *imd2;
4788  virtual int exec() {
4789  GA_DEBUG_INFO("Instruction: matrix term assembly");
4790  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4791 
4792  bool empty_weight = (coeff == scalar_type(0));
4793  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
4794 
4795  scalar_type ninf = gmm::vect_norminf(elem);
4796  if (ninf == scalar_type(0)) return 0;
4797 
4798  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4799  size_type ifirst1 = I1.first(), ifirst2 = I2.first();
4800  if (imd1)
4801  ifirst1 += s1 * imd1->filtered_index_of_point(ctx1.convex_num(), ipt);
4802  if (imd2)
4803  ifirst2 += s2 * imd2->filtered_index_of_point(ctx2.convex_num(), ipt);
4804 
4805  populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
4806  add_elem_matrix_contiguous_rows(K, ifirst1, s1, dofs2, elem, ninf*1E-14);
4807  return 0;
4808  }
4809  ga_instruction_matrix_assembly_imd_imd
4810  (const base_tensor &t_, model_real_sparse_matrix &K_,
4811  const fem_interpolation_context &ctx1_,
4812  const fem_interpolation_context &ctx2_,
4813  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
4814  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
4815  const scalar_type &coeff_, const size_type &ipt_)
4816  : ga_instruction_matrix_assembly_base
4817  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4818  K(K_), I1(I1_), I2(I2_), imd1(imd1_), imd2(imd2_) {}
4819  };
4820 
4821 
4822  struct ga_instruction_matrix_assembly_standard_scalar
4823  : public ga_instruction_matrix_assembly_base
4824  {
4825  model_real_sparse_matrix &K;
4826  const gmm::sub_interval &I1, &I2;
4827  const mesh_fem *pmf1, *pmf2;
4828  virtual int exec() {
4829  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
4830  "scalar fems");
4831  if (ipt == 0) {
4832  elem.resize(t.size());
4833  // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4834  copy_scaled_4(t, coeff*alpha1*alpha2, elem);
4835  } else
4836  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4837  // Faster than a daxpy blas call on my config
4838  add_scaled_4(t, coeff*alpha1*alpha2, elem);
4839 
4840  if (ipt == nbpt-1) { // finalize
4841  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4842 
4843  scalar_type ninf = gmm::vect_norminf(elem);
4844  if (ninf == scalar_type(0)) return 0;
4845 
4846  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num(), N=ctx1.N();
4847  if (cv1 == size_type(-1)) return 0;
4848  auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4849  GA_DEBUG_ASSERT(ct1.size() == t.sizes()[0], "Internal error");
4850  populate_dofs_vector(dofs1, ct1.size(), I1.first(), ct1);
4851 
4852  if (pmf2 == pmf1 && cv1 == cv2) {
4853  if (I1.first() == I2.first()) {
4854  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4855  } else {
4856  populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
4857  dofs1);
4858  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4859  }
4860  } else {
4861  if (cv2 == size_type(-1)) return 0;
4862  auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4863  GA_DEBUG_ASSERT(ct2.size() == t.sizes()[1], "Internal error");
4864  populate_dofs_vector(dofs2, ct2.size(), I2.first(), ct2);
4865  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4866  }
4867  }
4868  return 0;
4869  }
4870  ga_instruction_matrix_assembly_standard_scalar
4871  (const base_tensor &t_, model_real_sparse_matrix &K_,
4872  const fem_interpolation_context &ctx1_,
4873  const fem_interpolation_context &ctx2_,
4874  const gmm::sub_interval &I1_, const gmm::sub_interval &I2_,
4875  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
4876  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
4877  const size_type &nbpt_, const size_type &ipt_)
4878  : ga_instruction_matrix_assembly_base
4879  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
4880  K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
4881  };
4882 
4883  struct ga_instruction_matrix_assembly_standard_vector
4884  : public ga_instruction_matrix_assembly_base
4885  {
4886  model_real_sparse_matrix &K;
4887  const gmm::sub_interval &I1, &I2;
4888  const mesh_fem *pmf1, *pmf2;
4889  virtual int exec() {
4890  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
4891  "vector fems");
4892  if (ipt == 0) {
4893  elem.resize(t.size());
4894  copy_scaled_8(t, coeff*alpha1*alpha2, elem);
4895  // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4896  } else
4897  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4898  // (Far) faster than a daxpy blas call on my config.
4899  add_scaled_8(t, coeff*alpha1*alpha2, elem);
4900 
4901  if (ipt == nbpt-1) { // finalize
4902  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4903 
4904  scalar_type ninf = gmm::vect_norminf(elem);
4905  if (ninf == scalar_type(0)) return 0;
4906  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], N = ctx1.N();
4907 
4908  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4909  if (cv1 == size_type(-1)) return 0;
4910  size_type qmult1 = pmf1->get_qdim();
4911  if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
4912  populate_dofs_vector(dofs1, s1, I1.first(), qmult1, // --> dofs1
4913  pmf1->ind_scalar_basic_dof_of_element(cv1));
4914 
4915  if (pmf2 == pmf1 && cv1 == cv2 && I1.first() == I2.first()) {
4916  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4917  } else {
4918  if (pmf2 == pmf1 && cv1 == cv2) {
4919  populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
4920  dofs1);
4921  } else {
4922  if (cv2 == size_type(-1)) return 0;
4923  size_type qmult2 = pmf2->get_qdim();
4924  if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
4925  populate_dofs_vector(dofs2, s2, I2.first(), qmult2, // --> dofs2
4926  pmf2->ind_scalar_basic_dof_of_element(cv2));
4927  }
4928  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4929  }
4930  }
4931  return 0;
4932  }
4933  ga_instruction_matrix_assembly_standard_vector
4934  (const base_tensor &t_, model_real_sparse_matrix &K_,
4935  const fem_interpolation_context &ctx1_,
4936  const fem_interpolation_context &ctx2_,
4937  const gmm::sub_interval &I1_, const gmm::sub_interval &I2_,
4938  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
4939  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
4940  const size_type &nbpt_, const size_type &ipt_)
4941  : ga_instruction_matrix_assembly_base
4942  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
4943  K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
4944  };
4945 
4946  template<int QQ>
4947  struct ga_instruction_matrix_assembly_standard_vector_opt10
4948  : public ga_instruction_matrix_assembly_base
4949  {
4950  model_real_sparse_matrix &K;
4951  const gmm::sub_interval &I1, &I2;
4952  const mesh_fem *pmf1, *pmf2;
4953  virtual int exec() {
4954  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
4955  "vector fems optimized for format 10 qdim " << QQ);
4956  size_type s1_q = QQ*t.sizes()[0];
4957  size_type ss1 = t.sizes()[0]/QQ, ss2 = t.sizes()[1]/QQ;
4958  scalar_type e = coeff*alpha1*alpha2;
4959  if (ipt == 0) {
4960  elem.resize(ss1*ss2);
4961  auto itel = elem.begin();
4962  for (size_type j = 0; j < ss2; ++j) {
4963  auto it = t.begin() + j*s1_q;
4964  for (size_type i = 0; i < ss1; ++i, it += QQ)
4965  *itel++ = (*it) * e;
4966  }
4967  } else {
4968  auto itel = elem.begin();
4969  for (size_type j = 0; j < ss2; ++j) {
4970  auto it = t.begin() + j*s1_q;
4971  for (size_type i = 0; i < ss1; ++i, it += QQ)
4972  *itel++ += (*it) * e;
4973  }
4974  }
4975  if (ipt == nbpt-1) { // finalize
4976  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4977 
4978  scalar_type ninf = gmm::vect_norminf(elem) * 1E-14;
4979  if (ninf == scalar_type(0)) return 0;
4980  size_type N = ctx1.N();
4981  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4982  size_type i1 = I1.first(), i2 = I2.first();
4983  if (cv1 == size_type(-1)) return 0;
4984  populate_dofs_vector(dofs1, ss1, i1,
4985  pmf1->ind_scalar_basic_dof_of_element(cv1));
4986  bool same_dofs(pmf2 == pmf1 && cv1 == cv2 && i1 == i2);
4987 
4988  if (!same_dofs) {
4989  if (cv2 == size_type(-1)) return 0;
4990  populate_dofs_vector(dofs2, ss2, i2,
4991  pmf2->ind_scalar_basic_dof_of_element(cv2));
4992  }
4993  std::vector<size_type> &dofs2_ = same_dofs ? dofs1 : dofs2;
4994  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
4995  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4996  if (!same_dofs) for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4997  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
4998  if (QQ >= 3) {
4999  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5000  if (!same_dofs) for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5001  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5002  }
5003  }
5004  return 0;
5005  }
5006 
5007  ga_instruction_matrix_assembly_standard_vector_opt10
5008  (const base_tensor &t_, model_real_sparse_matrix &Kn_,
5009  const fem_interpolation_context &ctx1_,
5010  const fem_interpolation_context &ctx2_,
5011  const gmm::sub_interval &In1_, const gmm::sub_interval &In2_,
5012  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
5013  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
5014  const size_type &nbpt_, const size_type &ipt_)
5015  : ga_instruction_matrix_assembly_base
5016  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5017  K(Kn_), I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_)
5018  {
5019  static_assert(QQ >= 2 && QQ <=3,
5020  "Template implemented only for QQ=2 and QQ=3");
5021  }
5022  };
5023 
5024 
5025  struct ga_instruction_condensation_sub : public ga_instruction {
5026  // one such instruction is used for every cluster of intercoupled
5027  // condensed variables
5028  gmm::dense_matrix<base_tensor *> KQJprime;
5029  std::vector<base_tensor *> RQprime;
5030  gmm::dense_matrix<base_tensor const *> KQQloc, KQJloc;
5031  base_tensor invKqqqq, Kqqjj;
5032  base_vector Rqq;
5033  std::vector<std::array<size_type,3>> partQ, partJ;
5034  const scalar_type &coeff; // &alpha1, &alpha2 ?
5035  virtual int exec() { // implementation can be optimized
5036  GA_DEBUG_INFO("Instruction: variable cluster subdiagonal condensation");
5037  // copy from KQQ to invKqqqq
5038  for (const auto &qqq1 : partQ) {
5039  size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5040  for (const auto &qqq2 : partQ) {
5041  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5042  if (KQQloc(q1,q2)) {
5043  auto itr = KQQloc(q1,q2)->cbegin();
5044  GMM_ASSERT1(KQQloc(q1,q2)->size()
5045  == (qq1end-qq1start)*(qq2end-qq2start),
5046  "Internal error");
5047  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2)
5048  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5049  invKqqqq(qq1,qq2) = *itr++;
5050  }
5051  }
5052  }
5053  // calculate inverse matrix invKqqqq
5054  bgeot::lu_inverse(&(invKqqqq[0]), invKqqqq.size(0));
5055 
5056  // Resize Kqqjj as primary variable sizes may change dynamically
5057  size_type prev_j(0);
5058  for (auto &&jjj : partJ) {
5059  size_type j=jjj[0];
5060  size_type new_j(0);
5061  for (const auto &qqq : partQ) {
5062  size_type q=qqq[0];
5063  if (KQJloc(q,j)) {
5064  if (new_j) {
5065  GMM_ASSERT1(new_j == KQJloc(q,j)->size(1), "Internal error");
5066  } else
5067  new_j = KQJloc(q,j)->size(1);
5068  }
5069  }
5070  // Resize KQJprime submatrices to match KQJloc sizes
5071  for (const auto &qqq : partQ) {
5072  size_type q=qqq[0];
5073  KQJprime(q,j)->adjust_sizes(qqq[2]-qqq[1], new_j);
5074  }
5075  jjj[1] = prev_j;
5076  prev_j += new_j;
5077  jjj[2] = prev_j;
5078  }
5079 
5080  Kqqjj.adjust_sizes(partQ.back()[2], partJ.back()[2]);
5081  gmm::clear(Kqqjj.as_vector());
5082  gmm::clear(Rqq);
5083 
5084  // multiply invKqqqq with all submatrices in KQJloc and RQprime and store
5085  // the results in Kqqjj and Rqq
5086  for (const auto &jjj : partJ) {
5087  size_type j = jjj[0], jjstart = jjj[1], jjend = jjj[2];
5088  for (const auto &qqq2 : partQ) {
5089  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5090  if (KQJloc(q2,j)) {
5091  auto itr = KQJloc(q2,j)->begin(); // auto &mat = KQJloc(q2,j);
5092  for (size_type jj=jjstart; jj < jjend; ++jj) {
5093  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5094  for (size_type qq1=0; qq1 < partQ.back()[2]; ++qq1) {
5095  Kqqjj(qq1,jj) += invKqqqq(qq1,qq2)*(*itr);
5096  // Kqqjj(qq1,jj) += invKqq(qq1,qq2)*mat(qq2-qqstart,jj-jjstart);
5097  } // for qq1
5098  } // for qq2
5099  } // for jj
5100  GMM_ASSERT1(itr == KQJloc(q2,j)->cend(), "Internal error");
5101  }
5102  } // in partQ
5103  } // in partJ
5104  for (const auto &qqq2 : partQ) {
5105  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5106  if (RQprime[q2]) {
5107  auto itr = RQprime[q2]->cbegin();
5108  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5109  for (size_type qq1=0; qq1 < invKqqqq.size(0); ++qq1)
5110  Rqq[qq1] += invKqqqq(qq1,qq2)*(*itr);
5111  } // for qq2
5112  GMM_ASSERT1(itr == RQprime[q2]->cend(), "Internal error");
5113  }
5114  } // in partQ
5115 
5116  // distribute the results from Kqqjj/Rqq to KQJprime/RQprime
5117  // submatrices/subvectors
5118  for (const auto &qqq1 : partQ) {
5119  size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5120  { // writing into RQprime
5121  auto itw = RQprime[q1]->begin();
5122  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5123  *itw++ = Rqq[qq1]/coeff;
5124  }
5125  for (const auto &jjj2 : partJ) {
5126  size_type j2 = jjj2[0], jj2start = jjj2[1], jj2end = jjj2[2];
5127  auto itw = KQJprime(q1,j2)->begin();
5128  for (size_type jj2=jj2start; jj2 < jj2end; ++jj2)
5129  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5130  *itw++ = Kqqjj(qq1,jj2);
5131  }
5132  }
5133  return 0;
5134  }
5135 
5136  ga_instruction_condensation_sub(gmm::dense_matrix<base_tensor *> &KQJpr,
5137  std::vector<base_tensor *> &RQpr, // input/output
5138  const gmm::dense_matrix<base_tensor *> &KQQ,
5139  const gmm::dense_matrix<base_tensor *> &KQJ,
5140  const std::set<size_type> &Qset,
5141  const scalar_type &coeff_)
5142  : KQJprime(KQJpr), RQprime(RQpr), coeff(coeff_)
5143  {
5144  // * to const *
5145  KQQloc.resize(KQQ.nrows(), KQQ.ncols());
5146  KQJloc.resize(KQJ.nrows(), KQJ.ncols());
5147  for (size_type i=0; i < KQQ.as_vector().size(); ++i) KQQloc[i] = KQQ[i];
5148  for (size_type i=0; i < KQJ.as_vector().size(); ++i) KQJloc[i] = KQJ[i];
5149 
5150  for (size_type j=0; j < KQJ.ncols(); ++j)
5151  for (const size_type &q : Qset)
5152  if (KQJ(q,j)) {
5153  partJ.push_back(std::array<size_type,3>{j,0,0});
5154  break;
5155  }
5156 
5157  partQ.resize(0);
5158  for (const size_type &q : Qset)
5159  partQ.push_back(std::array<size_type,3>{q,0,0});
5160  size_type prev_q(0);
5161  for (auto &qqq1 : partQ) {
5162  size_type q1 = qqq1[0];
5163  size_type new_q(0);
5164  for (const size_type &q2 : Qset)
5165  if (new_q) {
5166  GMM_ASSERT1(new_q == KQQ(q1,q2)->size(0) &&
5167  new_q == KQQ(q2,q1)->size(1), "Internal error");
5168  } else
5169  new_q = KQQ(q1,q2)->size(0);
5170  qqq1[1] = prev_q;
5171  prev_q += new_q;
5172  qqq1[2] = prev_q;
5173  }
5174  invKqqqq.adjust_sizes(partQ.back()[2], partQ.back()[2]);
5175  Rqq.resize(partQ.back()[2]);
5176  // Kqqjj will be resized dynamically due to possible changes in j interval
5177  }
5178  };
5179 
5180 
5181  struct ga_instruction_condensation_super_K : public ga_instruction {
5182  base_tensor &Kij;
5183  std::vector<base_tensor *> KiQ, KQj; // indexed wrt q in Q
5184  size_type Qsize;
5185 
5186  virtual int exec() {
5187  GA_DEBUG_INFO("Instruction: contribution of condensation to kept part");
5188 
5189  size_type m = KiQ[0]->size(0);
5190  size_type n = KQj[0]->size(1);
5191  Kij.adjust_sizes(m,n);
5192  gmm::clear(Kij.as_vector());
5193  for (size_type k=0; k < Qsize; ++k) {
5194  const base_tensor &K1 = *KiQ[k], &K2 = *KQj[k];
5195  size_type qqsize = K1.size(1);
5196  GMM_ASSERT1(K1.size(0) == m && K2.size(1) == n && K2.size(0) == qqsize,
5197  "Internal error");
5198 
5199  base_tensor::iterator it = Kij.begin();
5200  for (size_type jj = 0; jj < n; ++jj)
5201  for (size_type ii = 0; ii < m; ++ii, ++it)
5202  for (size_type qq = 0; qq < qqsize; ++qq)
5203  *it -= K1[ii+qq*m] * K2[qq+jj*qqsize];
5204  GA_DEBUG_ASSERT(it == Kij.end(), "Wrong sizes");
5205  }
5206  return 0;
5207  }
5208  ga_instruction_condensation_super_K(base_tensor &Kij_,
5209  const std::vector<base_tensor *> KiQ_,
5210  const std::vector<base_tensor *> KQj_)
5211  : Kij(Kij_), KiQ(KiQ_), KQj(KQj_)
5212  {
5213  Qsize = KiQ.size();
5214  GMM_ASSERT1(KiQ.size() == KQj.size(), "Internal error");
5215  }
5216  };
5217 
5218  struct ga_instruction_condensation_super_R : public ga_instruction {
5219  base_tensor &Ri;
5220  std::vector<base_tensor *> KiQ, RQpr; // indexed wrt q in Q
5221  size_type Qsize;
5222 
5223  virtual int exec() {
5224  GA_DEBUG_INFO("Instruction: contribution of condensation to primary rhs");
5225 
5226  size_type m = KiQ[0]->size(0);
5227  Ri.adjust_sizes(m);
5228  gmm::clear(Ri.as_vector());
5229  for (size_type k=0; k < Qsize; ++k) {
5230  const base_tensor &K1 = *KiQ[k], &R2 = *RQpr[k];
5231  size_type qqsize = K1.size(1);
5232  GMM_ASSERT1(K1.size(0) == m && R2.size(0) == qqsize, "Internal error");
5233  base_tensor::iterator it = Ri.begin();
5234  for (size_type ii = 0; ii < m; ++ii, ++it)
5235  for (size_type qq = 0; qq < qqsize; ++qq)
5236  *it -= K1[ii+qq*m] * R2[qq];
5237  GA_DEBUG_ASSERT(it == Ri.end(), "Wrong sizes");
5238  }
5239  return 0;
5240  }
5241  ga_instruction_condensation_super_R(base_tensor &Ri_,
5242  const std::vector<base_tensor *> KiQ_,
5243  const std::vector<base_tensor *> RQpr_)
5244  : Ri(Ri_), KiQ(KiQ_), RQpr(RQpr_)
5245  {
5246  Qsize = KiQ.size();
5247  GMM_ASSERT1(KiQ.size() == RQpr.size(), "Internal error");
5248  }
5249  };
5250 
5251  //=========================================================================
5252  // Compilation of assembly trees into a list of basic instructions
5253  //=========================================================================
5254 
5255  static void extend_variable_in_gis(const ga_workspace &workspace,
5256  const std::string &varname,
5257  ga_instruction_set &gis) {
5258  if (workspace.variable_group_exists(varname)) {
5259  for (const std::string &v : workspace.variable_group(varname))
5260  extend_variable_in_gis(workspace, v, gis);
5261  } else if (gis.extended_vars.count(varname) == 0) {
5262  const mesh_fem *mf = workspace.associated_mf(varname);
5263  if (mf->is_reduced()) {
5264  auto n = (mf->get_qdim() == 1) ? workspace.qdim(varname) : 1;
5265  base_vector &U = gis.really_extended_vars[varname];
5266  gmm::resize(U, mf->nb_basic_dof() * n);
5267  mf->extend_vector(workspace.value(varname), U);
5268  gis.extended_vars[varname] = &(gis.really_extended_vars[varname]);
5269  } else {
5270  gis.extended_vars[varname] = &(workspace.value(varname));
5271  }
5272  }
5273  }
5274 
5275  static void ga_clear_node_list
5276  (pga_tree_node pnode, std::map<scalar_type,
5277  std::list<pga_tree_node> > &node_list) {
5278  std::list<pga_tree_node> &loc_node_list = node_list[pnode->hash_value];
5279  for (std::list<pga_tree_node>::iterator it = loc_node_list.begin();
5280  it != loc_node_list.end(); ) {
5281  if (*it == pnode) it = loc_node_list.erase(it); else ++it;
5282  }
5283  for (size_type i = 0; i < pnode->children.size(); ++i)
5284  ga_clear_node_list(pnode->children[i], node_list);
5285  }
5286 
5287  // workspace argument is not const because of declaration of temporary
5288  // unreduced variables
5289  static void ga_compile_node(const pga_tree_node pnode,
5290  ga_workspace &workspace,
5291  ga_instruction_set &gis,
5292  ga_instruction_set::region_mim_instructions &rmi,
5293  const mesh &m, bool function_case,
5294  ga_if_hierarchy &if_hierarchy) {
5295 
5296  if (pnode->node_type == GA_NODE_PREDEF_FUNC ||
5297  pnode->node_type == GA_NODE_OPERATOR ||
5298  pnode->node_type == GA_NODE_SPEC_FUNC ||
5299  pnode->node_type == GA_NODE_CONSTANT ||
5300  pnode->node_type == GA_NODE_ALLINDICES ||
5301  pnode->node_type == GA_NODE_RESHAPE ||
5302  pnode->node_type == GA_NODE_SWAP_IND ||
5303  pnode->node_type == GA_NODE_IND_MOVE_LAST ||
5304  pnode->node_type == GA_NODE_CONTRACT) return;
5305 
5306  // cout << "compiling "; ga_print_node(pnode, cout); cout << endl;
5307 
5308  pga_instruction pgai;
5309  ga_if_hierarchy *pif_hierarchy = &if_hierarchy;
5310  ga_if_hierarchy new_if_hierarchy;
5311 
5312  const mesh_fem *mf1 = 0, *mf2 = 0;
5313  const mesh_fem **mfg1 = 0, **mfg2 = 0;
5314  fem_interpolation_context *pctx1 = 0, *pctx2 = 0;
5315  bool tensor_to_clear = false;
5316  bool tensor_to_adapt = false;
5317 
5318  if (pnode->test_function_type) {
5319  if (pnode->name_test1.size())
5320  mf1 = workspace.associated_mf(pnode->name_test1);
5321  if (mf1) {
5322  pctx1 = &(gis.ctx);
5323  const std::string &intn1 = pnode->interpolate_name_test1;
5324  if (intn1.size()) {
5325  if (workspace.secondary_domain_exists(intn1)) {
5326  pctx1 = &(rmi.secondary_domain_infos.ctx);
5327  } else {
5328  tensor_to_adapt = true;
5329  pctx1 = &(rmi.interpolate_infos[intn1].ctx);
5330  if (workspace.variable_group_exists(pnode->name_test1)) {
5331  ga_instruction_set::variable_group_info &vgi =
5332  rmi.interpolate_infos[intn1].groups_info[pnode->name_test1];
5333  mfg1 = &(vgi.mf);
5334  mf1 = 0;
5335  }
5336  }
5337  }
5338  }
5339  if (pnode->name_test2.size())
5340  mf2 = workspace.associated_mf(pnode->name_test2);
5341  if (mf2) {
5342  pctx2 = &(gis.ctx);
5343  const std::string &intn2 = pnode->interpolate_name_test2;
5344  if (intn2.size()) {
5345  if (workspace.secondary_domain_exists(intn2)) {
5346  pctx2 = &(rmi.secondary_domain_infos.ctx);
5347  } else {
5348  tensor_to_adapt = true;
5349  pctx2 = &(rmi.interpolate_infos[intn2].ctx);
5350  if (workspace.variable_group_exists(pnode->name_test2)) {
5351  ga_instruction_set::variable_group_info &vgi =
5352  rmi.interpolate_infos[intn2].groups_info[pnode->name_test2];
5353  mfg2 = &(vgi.mf);
5354  mf2 = 0;
5355  }
5356  }
5357  }
5358  }
5359  }
5360 
5361  // Produce a resize instruction which is stored if no equivalent node is
5362  // detected and if the mesh is not uniform.
5363  pnode->t.set_to_original(); pnode->t.set_sparsity(0, 0);
5364  bool is_uniform = false;
5365  if (pnode->test_function_type == 1) {
5366  if (mf1 || mfg1)
5367  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5368  (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5369  if (mf1 && mf1->is_uniform())
5370  { is_uniform = true; pctx1->invalid_convex_num(); }
5371  } else if (pnode->test_function_type == 2) {
5372  if (mf2 || mfg2)
5373  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5374  (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5375  if (mf2 && mf2->is_uniform())
5376  { is_uniform = true; pctx2->invalid_convex_num(); }
5377  } else if (pnode->test_function_type == 3) {
5378  if ((mf1 || mfg1) && (mf2 || mfg2)) {
5379  pgai = std::make_shared<ga_instruction_two_first_ind_tensor>
5380  (pnode->tensor(), *pctx1, *pctx2, pnode->qdim1, mf1, mfg1,
5381  pnode->qdim2, mf2, mfg2);
5382  if (mf1 && mf1->is_uniform() && mf2 && mf2->is_uniform()) {
5383  is_uniform = true;
5384  pctx1->invalid_convex_num();
5385  pctx2->invalid_convex_num();
5386  }
5387  } else if (mf1 || mfg1) {
5388  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5389  (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5390  if (mf1 && mf1->is_uniform())
5391  { is_uniform = true; pctx1->invalid_convex_num(); }
5392  } else if (mf2 || mfg2) {
5393  pgai = std::make_shared<ga_instruction_second_ind_tensor>
5394  (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5395  if (mf2 && mf2->is_uniform())
5396  { is_uniform = true; pctx2->invalid_convex_num(); }
5397  }
5398  }
5399 
5400  // Optimization: detects if an equivalent node has already been compiled
5401  pnode->t.set_to_original();
5402  if (rmi.node_list.find(pnode->hash_value) != rmi.node_list.end()) {
5403  for (pga_tree_node &pnode1 : rmi.node_list[pnode->hash_value]) {
5404  // cout << "found potential equivalent nodes ";
5405  // ga_print_node(pnode, cout);
5406  // cout << " and "; ga_print_node(pnode1, cout); cout << endl;
5407  if (sub_tree_are_equal(pnode, pnode1, workspace, 1)) {
5408  pnode->t.set_to_copy(pnode1->t);
5409  return;
5410  }
5411  if (sub_tree_are_equal(pnode, pnode1, workspace, 2)) {
5412  // cout << "confirmed with transpose" << endl;
5413  if (pnode->nb_test_functions() == 2) {
5414  if (pgai) { // resize instruction if needed
5415  if (is_uniform)
5416  { pgai->exec(); }
5417  else { rmi.instructions.push_back(std::move(pgai)); }
5418  }
5419  pgai = std::make_shared<ga_instruction_transpose_test>
5420  (pnode->tensor(), pnode1->tensor());
5421  rmi.instructions.push_back(std::move(pgai));
5422  } else {
5423  pnode->t.set_to_copy(pnode1->t);
5424  }
5425  return;
5426  }
5427  // cout << "sub_tree_are_equal = " << int(sub_tree_are_equal(pnode, pnode1, workspace, 1)) << endl;
5428  std::stringstream ss;
5429  ss << "Detected wrong equivalent nodes:" << endl;
5430  ga_print_node(pnode, ss);
5431  ss << endl << " and " << endl;
5432  ga_print_node(pnode1, ss);
5433  ss << endl << "No problem, but hash values could be adapted." << endl;
5434  GMM_TRACE2(ss.str());
5435  }
5436  }
5437 
5438  if (pgai) { // resize instruction if needed and no equivalent node detected
5439  if (is_uniform) { pgai->exec(); }
5440  else {
5441  if (tensor_to_adapt)
5442  rmi.instructions.push_back(std::move(pgai));
5443  else
5444  rmi.elt_instructions.push_back(std::move(pgai));
5445  }
5446  }
5447 
5448  size_type interpolate_filter_inst = rmi.instructions.size();
5449  if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5450  pgai = pga_instruction();
5451  rmi.instructions.push_back(std::move(pgai));
5452  if_hierarchy.increment();
5453  new_if_hierarchy.child_of(if_hierarchy);
5454  pif_hierarchy = &new_if_hierarchy;
5455  }
5456 
5457  for (size_type i = 0; i < pnode->children.size(); ++i)
5458  ga_compile_node(pnode->children[i], workspace, gis, rmi, m,
5459  function_case, *pif_hierarchy);
5460 
5461  if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5462  const std::string &intn = pnode->interpolate_name;
5463  ga_instruction_set::interpolate_info &inin = rmi.interpolate_infos[intn];
5464  pgai = std::make_shared<ga_instruction_interpolate_filter>
5465  (pnode->tensor(), inin, pnode->nbc1,
5466  int(rmi.instructions.size() - interpolate_filter_inst));
5467  rmi.instructions[interpolate_filter_inst].swap(pgai);
5468  pgai = std::make_shared<ga_instruction_copy_tensor>
5469  (pnode->tensor(), pnode->children[0]->tensor());
5470  rmi.instructions.push_back(std::move(pgai));
5471  ga_clear_node_list(pnode->children[0], rmi.node_list);
5472  }
5473 
5474  static scalar_type minus = -scalar_type(1);
5475  size_type nbch = pnode->children.size();
5476  pga_tree_node child0 = (nbch > 0) ? pnode->children[0] : 0;
5477  pga_tree_node child1 = (nbch > 1) ? pnode->children[1] : 0;
5478  bgeot::multi_index mi;
5479  const bgeot::multi_index &size0 = child0 ? child0->t.sizes() : mi;
5480  // const bgeot::multi_index &size1 = child1 ? child1->t.sizes() : mi;
5481  size_type dim0 = child0 ? child0->tensor_order() : 0;
5482  size_type dim1 = child1 ? child1->tensor_order() : 0;
5483 
5484  switch (pnode->node_type) {
5485 
5486  case GA_NODE_PREDEF_FUNC: case GA_NODE_OPERATOR: case GA_NODE_SPEC_FUNC:
5487  case GA_NODE_CONSTANT: case GA_NODE_ALLINDICES: case GA_NODE_ZERO:
5488  case GA_NODE_RESHAPE: case GA_NODE_CROSS_PRODUCT:
5489  case GA_NODE_SWAP_IND: case GA_NODE_IND_MOVE_LAST:
5490  case GA_NODE_CONTRACT: case GA_NODE_INTERPOLATE_FILTER:
5491  break;
5492 
5493  case GA_NODE_X:
5494  GMM_ASSERT1(!function_case,
5495  "No use of X is allowed in scalar functions");
5496  if (pnode->nbc1) {
5497  GA_DEBUG_ASSERT(pnode->tensor().size() == 1, "dimensions mismatch");
5498  GMM_ASSERT1(pnode->nbc1 <= m.dim(),
5499  "Bad index for X in expression");
5500  pgai = std::make_shared<ga_instruction_X_component>
5501  (pnode->tensor()[0], gis.ctx, pnode->nbc1-1);
5502  } else {
5503  if (pnode->tensor().size() != m.dim())
5504  pnode->init_vector_tensor(m.dim());
5505  pgai = std::make_shared<ga_instruction_X>(pnode->tensor(), gis.ctx);
5506  }
5507  rmi.instructions.push_back(std::move(pgai));
5508  break;
5509 
5510  case GA_NODE_ELT_SIZE:
5511  GMM_ASSERT1(!function_case,
5512  "No use of element_size is allowed in functions");
5513  if (pnode->tensor().size() != 1) pnode->init_scalar_tensor(0);
5514  pgai = std::make_shared<ga_instruction_element_size>
5515  (pnode->tensor(), gis.elt_size);
5516  gis.need_elt_size = true;
5517  rmi.instructions.push_back(std::move(pgai));
5518  break;
5519 
5520  case GA_NODE_ELT_K:
5521  GMM_ASSERT1(!function_case,
5522  "No use of element_K is allowed in functions");
5523  pgai = std::make_shared<ga_instruction_element_K>(pnode->tensor(),
5524  gis.ctx);
5525  rmi.instructions.push_back(std::move(pgai));
5526  break;
5527 
5528  case GA_NODE_ELT_B:
5529  GMM_ASSERT1(!function_case,
5530  "No use of element_B is allowed in functions");
5531  pgai = std::make_shared<ga_instruction_element_B>(pnode->tensor(),
5532  gis.ctx);
5533  rmi.instructions.push_back(std::move(pgai));
5534  break;
5535 
5536  case GA_NODE_NORMAL:
5537  {
5538  GMM_ASSERT1(!function_case,
5539  "No use of Normal is allowed in functions");
5540  if (pnode->tensor().size() != m.dim())
5541  pnode->init_vector_tensor(m.dim());
5542  const mesh_im_level_set *mimls
5543  = dynamic_cast<const mesh_im_level_set *>(rmi.im);
5544  if (mimls && mimls->location()==mesh_im_level_set::INTEGRATE_BOUNDARY) {
5545  // Appel avec ctx (pt de Gauss)
5546  pgai = std::make_shared<ga_instruction_level_set_normal_vector>
5547  (pnode->tensor(), mimls, gis.ctx);
5548  rmi.instructions.push_back(std::move(pgai));
5549  } else {
5550  pgai = std::make_shared<ga_instruction_copy_Normal>
5551  (pnode->tensor(), gis.Normal);
5552  rmi.instructions.push_back(std::move(pgai));
5553  }
5554  }
5555  break;
5556 
5557  case GA_NODE_INTERPOLATE_X:
5558  case GA_NODE_INTERPOLATE_NORMAL:
5559  GMM_ASSERT1(!function_case,
5560  "No use of Interpolate is allowed in functions");
5561  if (pnode->tensor().size() != m.dim())
5562  pnode->init_vector_tensor(m.dim());
5563  if (pnode->node_type == GA_NODE_INTERPOLATE_X)
5564  pgai = std::make_shared<ga_instruction_copy_interpolated_small_vect>
5565  (pnode->tensor(),
5566  rmi.interpolate_infos[pnode->interpolate_name].pt_y,
5567  rmi.interpolate_infos[pnode->interpolate_name]);
5568  else if (pnode->node_type == GA_NODE_INTERPOLATE_NORMAL)
5569  pgai = std::make_shared<ga_instruction_copy_Normal>
5570  (pnode->tensor(),
5571  rmi.interpolate_infos[pnode->interpolate_name].Normal);
5572  rmi.instructions.push_back(std::move(pgai));
5573  break;
5574 
5575  case GA_NODE_SECONDARY_DOMAIN_X:
5576  case GA_NODE_SECONDARY_DOMAIN_NORMAL:
5577  {
5578  GMM_ASSERT1(!function_case,
5579  "No use of Secondary_domain is allowed in functions");
5580  auto psd = workspace.secondary_domain(pnode->interpolate_name);
5581  size_type sddim = psd->mim().linked_mesh().dim();
5582  if (pnode->tensor().size() != sddim)
5583  pnode->init_vector_tensor(sddim);
5584  if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_X)
5585  pgai = std::make_shared<ga_instruction_X>
5586  (pnode->tensor(), rmi.secondary_domain_infos.ctx);
5587  else if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_NORMAL)
5588  pgai = std::make_shared<ga_instruction_copy_Normal>
5589  (pnode->tensor(), rmi.secondary_domain_infos.Normal);
5590  rmi.instructions.push_back(std::move(pgai));
5591  }
5592  break;
5593 
5594  case GA_NODE_VAL: case GA_NODE_GRAD:
5595  case GA_NODE_HESS: case GA_NODE_DIVERG:
5596  case GA_NODE_ELEMENTARY_VAL: case GA_NODE_ELEMENTARY_GRAD:
5597  case GA_NODE_ELEMENTARY_HESS: case GA_NODE_ELEMENTARY_DIVERG:
5598  case GA_NODE_XFEM_PLUS_VAL: case GA_NODE_XFEM_PLUS_GRAD:
5599  case GA_NODE_XFEM_PLUS_HESS: case GA_NODE_XFEM_PLUS_DIVERG:
5600  case GA_NODE_XFEM_MINUS_VAL: case GA_NODE_XFEM_MINUS_GRAD:
5601  case GA_NODE_XFEM_MINUS_HESS: case GA_NODE_XFEM_MINUS_DIVERG:
5602  {
5603  bool is_elementary = (pnode->node_type == GA_NODE_ELEMENTARY_VAL ||
5604  pnode->node_type == GA_NODE_ELEMENTARY_GRAD ||
5605  pnode->node_type == GA_NODE_ELEMENTARY_HESS ||
5606  pnode->node_type == GA_NODE_ELEMENTARY_DIVERG);
5607  if (function_case) {
5608  GMM_ASSERT1(!is_elementary,
5609  "No elementary transformation is allowed in functions");
5610  GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_PLUS_VAL &&
5611  pnode->node_type != GA_NODE_XFEM_PLUS_GRAD &&
5612  pnode->node_type != GA_NODE_XFEM_PLUS_HESS &&
5613  pnode->node_type != GA_NODE_XFEM_PLUS_DIVERG,
5614  "Xfem_plus not allowed in functions");
5615  GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_MINUS_VAL &&
5616  pnode->node_type != GA_NODE_XFEM_MINUS_GRAD &&
5617  pnode->node_type != GA_NODE_XFEM_MINUS_HESS &&
5618  pnode->node_type != GA_NODE_XFEM_MINUS_DIVERG,
5619  "Xfem_plus not allowed in functions");
5620  const mesh_fem *mf = workspace.associated_mf(pnode->name);
5621  const im_data *imd = workspace.associated_im_data(pnode->name);
5622  GMM_ASSERT1(!mf, "No fem expression is allowed in "
5623  "function expression");
5624  GMM_ASSERT1(!imd, "No integration method data is allowed in "
5625  "function expression");
5626  if (gmm::vect_size(workspace.value(pnode->name)) == 1)
5627  pgai = std::make_shared<ga_instruction_copy_scalar>
5628  (pnode->tensor()[0], (workspace.value(pnode->name))[0]);
5629  else
5630  pgai = std::make_shared<ga_instruction_copy_vect>
5631  (pnode->tensor().as_vector(), workspace.value(pnode->name));
5632  rmi.instructions.push_back(std::move(pgai));
5633  } else {
5634  const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
5635  const im_data *imd = workspace.associated_im_data(pnode->name);
5636 
5637  if (is_elementary) {
5638  mf = workspace.associated_mf(pnode->elementary_target);
5639  GMM_ASSERT1(mf && mfo,
5640  "Wrong context for elementary transformation");
5641  GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
5642  "The finite element of variable " << pnode->name
5643  << " has to be defined on the same mesh than the "
5644  << "integration method or interpolation used");
5645  }
5646 
5647  if (imd) {
5648  pgai = std::make_shared<ga_instruction_extract_local_im_data>
5649  (pnode->tensor(), *imd, workspace.value(pnode->name),
5650  gis.pai, gis.ctx, workspace.qdim(pnode->name));
5651  rmi.instructions.push_back(std::move(pgai));
5652  } else {
5653  GMM_ASSERT1(mf, "Internal error");
5654 
5655  GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
5656  "The finite element of variable " <<
5657  (is_elementary ? pnode->elementary_target : pnode->name)
5658  << " has to be defined on the same mesh than the "
5659  << "integration method or interpolation used");
5660 
5661  // An instruction for extracting local dofs of the variable.
5662  if (rmi.local_dofs.count(pnode->name) == 0) {
5663  rmi.local_dofs[pnode->name] = base_vector(1);
5664  extend_variable_in_gis(workspace, pnode->name, gis);
5665  // cout << "local dof of " << pnode->name << endl;
5666  size_type qmult2 = mfo->get_qdim();
5667  if (qmult2 > 1 && !(mfo->is_uniformly_vectorized()))
5668  qmult2 = size_type(-1);
5669  pgai = std::make_shared<ga_instruction_slice_local_dofs>
5670  (*mfo, *(gis.extended_vars[pnode->name]), gis.ctx,
5671  rmi.local_dofs[pnode->name],
5672  workspace.qdim(pnode->name) / mfo->get_qdim(), qmult2);
5673  rmi.elt_instructions.push_back(std::move(pgai));
5674  }
5675 
5676  // An instruction for pfp update
5677  if (mf->is_uniform()) {
5678  if (rmi.pfps.count(mf) == 0) {
5679  rmi.pfps[mf] = 0;
5680  pgai = std::make_shared<ga_instruction_update_pfp>
5681  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5682  rmi.begin_instructions.push_back(std::move(pgai));
5683  }
5684  } else if (rmi.pfps.count(mf) == 0 ||
5685  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
5686  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
5687  rmi.pfps[mf] = 0;
5688  pgai = std::make_shared<ga_instruction_update_pfp>
5689  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5690  rmi.instructions.push_back(std::move(pgai));
5691  }
5692 
5693  // An instruction for the base value
5694  pgai = pga_instruction();
5695  switch (pnode->node_type) {
5696  case GA_NODE_VAL: case GA_NODE_ELEMENTARY_VAL:
5697  if (rmi.base.count(mf) == 0 ||
5698  !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
5699  rmi.base_hierarchy[mf].push_back(if_hierarchy);
5700  pgai = std::make_shared<ga_instruction_val_base>
5701  (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5702  }
5703  break;
5704  case GA_NODE_XFEM_PLUS_VAL:
5705  if (rmi.xfem_plus_base.count(mf) == 0 ||
5706  !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
5707  {
5708  rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
5709  pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
5710  (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5711  }
5712  break;
5713  case GA_NODE_XFEM_MINUS_VAL:
5714  if (rmi.xfem_minus_base.count(mf) == 0 ||
5715  !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
5716  {
5717  rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
5718  pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
5719  (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5720  }
5721  break;
5722  case GA_NODE_GRAD: case GA_NODE_DIVERG:
5723  case GA_NODE_ELEMENTARY_GRAD: case GA_NODE_ELEMENTARY_DIVERG:
5724  if (rmi.grad.count(mf) == 0 ||
5725  !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
5726  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
5727  pgai = std::make_shared<ga_instruction_grad_base>
5728  (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5729  }
5730  break;
5731  case GA_NODE_XFEM_PLUS_GRAD: case GA_NODE_XFEM_PLUS_DIVERG:
5732  if (rmi.xfem_plus_grad.count(mf) == 0 ||
5733  !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
5734  {
5735  rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
5736  pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
5737  (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5738  }
5739  break;
5740  case GA_NODE_XFEM_MINUS_GRAD: case GA_NODE_XFEM_MINUS_DIVERG:
5741  if (rmi.xfem_minus_grad.count(mf) == 0 ||
5742  !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
5743  {
5744  rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
5745  pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
5746  (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5747  }
5748  break;
5749  case GA_NODE_HESS: case GA_NODE_ELEMENTARY_HESS:
5750  if (rmi.hess.count(mf) == 0 ||
5751  !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
5752  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
5753  pgai = std::make_shared<ga_instruction_hess_base>
5754  (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5755  }
5756  break;
5757  case GA_NODE_XFEM_PLUS_HESS:
5758  if (rmi.xfem_plus_hess.count(mf) == 0 ||
5759  !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
5760  {
5761  rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
5762  pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
5763  (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5764  }
5765  break;
5766  case GA_NODE_XFEM_MINUS_HESS:
5767  if (rmi.xfem_minus_hess.count(mf) == 0 ||
5768  !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
5769  {
5770  rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
5771  pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
5772  (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5773  }
5774  break;
5775 
5776  default : GMM_ASSERT1(false, "Internal error");
5777  }
5778  if (pgai) rmi.instructions.push_back(std::move(pgai));
5779 
5780  // The eval instruction
5781  switch (pnode->node_type) {
5782  case GA_NODE_VAL: // --> t(target_dim*Qmult)
5783  pgai = std::make_shared<ga_instruction_val>
5784  (pnode->tensor(), rmi.base[mf], rmi.local_dofs[pnode->name],
5785  workspace.qdim(pnode->name));
5786  break;
5787  case GA_NODE_GRAD: // --> t(target_dim*Qmult,N)
5788  pgai = std::make_shared<ga_instruction_grad>
5789  (pnode->tensor(), rmi.grad[mf],
5790  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5791  break;
5792  case GA_NODE_HESS: // --> t(target_dim*Qmult,N,N)
5793  pgai = std::make_shared<ga_instruction_hess>
5794  (pnode->tensor(), rmi.hess[mf],
5795  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5796  break;
5797  case GA_NODE_DIVERG: // --> t(1)
5798  pgai = std::make_shared<ga_instruction_diverg>
5799  (pnode->tensor(), rmi.grad[mf],
5800  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5801  break;
5802  case GA_NODE_XFEM_PLUS_VAL: // --> t(target_dim*Qmult)
5803  pgai = std::make_shared<ga_instruction_val>
5804  (pnode->tensor(), rmi.xfem_plus_base[mf],
5805  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5806  break;
5807  case GA_NODE_XFEM_PLUS_GRAD: // --> t(target_dim*Qmult,N)
5808  pgai = std::make_shared<ga_instruction_grad>
5809  (pnode->tensor(), rmi.xfem_plus_grad[mf],
5810  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5811  break;
5812  case GA_NODE_XFEM_PLUS_HESS: // --> t(target_dim*Qmult,N,N)
5813  pgai = std::make_shared<ga_instruction_hess>
5814  (pnode->tensor(), rmi.xfem_plus_hess[mf],
5815  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5816  break;
5817  case GA_NODE_XFEM_PLUS_DIVERG: // --> t(1)
5818  pgai = std::make_shared<ga_instruction_diverg>
5819  (pnode->tensor(), rmi.xfem_plus_grad[mf],
5820  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5821  break;
5822  case GA_NODE_XFEM_MINUS_VAL: // --> t(target_dim*Qmult)
5823  pgai = std::make_shared<ga_instruction_val>
5824  (pnode->tensor(), rmi.xfem_minus_base[mf],
5825  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5826  break;
5827  case GA_NODE_XFEM_MINUS_GRAD: // --> t(target_dim*Qmult,N)
5828  pgai = std::make_shared<ga_instruction_grad>
5829  (pnode->tensor(), rmi.xfem_minus_grad[mf],
5830  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5831  break;
5832  case GA_NODE_XFEM_MINUS_HESS: // --> t(target_dim*Qmult,N,N)
5833  pgai = std::make_shared<ga_instruction_hess>
5834  (pnode->tensor(), rmi.xfem_minus_hess[mf],
5835  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5836  break;
5837  case GA_NODE_XFEM_MINUS_DIVERG: // --> t(1)
5838  pgai = std::make_shared<ga_instruction_diverg>
5839  (pnode->tensor(), rmi.xfem_minus_grad[mf],
5840  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5841  break;
5842  case GA_NODE_ELEMENTARY_VAL:
5843  { // --> t(target_dim*Qmult)
5844  ga_instruction_set::elementary_trans_info &eti
5845  = rmi.elementary_trans_infos
5846  [std::make_tuple(pnode->elementary_name, mfo, mf)];
5847  pgai =
5848  std::make_shared<ga_instruction_elementary_trans_val>
5849  (pnode->tensor(), rmi.base[mf],
5850  rmi.local_dofs[pnode->name],
5851  workspace.qdim(pnode->elementary_target),
5852  workspace.elementary_transformation(pnode->elementary_name),
5853  *mfo, *mf, gis.ctx, eti.M, eti.icv);
5854  }
5855  break;
5856  case GA_NODE_ELEMENTARY_GRAD:
5857  { // --> t(target_dim*Qmult,N)
5858  ga_instruction_set::elementary_trans_info &eti
5859  = rmi.elementary_trans_infos
5860  [std::make_tuple(pnode->elementary_name, mfo, mf)];
5861  pgai =
5862  std::make_shared<ga_instruction_elementary_trans_grad>
5863  (pnode->tensor(), rmi.grad[mf],
5864  rmi.local_dofs[pnode->name],
5865  workspace.qdim(pnode->elementary_target),
5866  workspace.elementary_transformation(pnode->elementary_name),
5867  *mfo, *mf, gis.ctx, eti.M, eti.icv);
5868  }
5869  break;
5870  case GA_NODE_ELEMENTARY_HESS:
5871  { // --> t(target_dim*Qmult,N,N)
5872  ga_instruction_set::elementary_trans_info &eti
5873  = rmi.elementary_trans_infos
5874  [std::make_tuple(pnode->elementary_name, mfo, mf)];
5875  pgai =
5876  std::make_shared<ga_instruction_elementary_trans_hess>
5877  (pnode->tensor(), rmi.hess[mf],
5878  rmi.local_dofs[pnode->name],
5879  workspace.qdim(pnode->elementary_target),
5880  workspace.elementary_transformation(pnode->elementary_name),
5881  *mfo, *mf, gis.ctx, eti.M, eti.icv);
5882  }
5883  break;
5884  case GA_NODE_ELEMENTARY_DIVERG:
5885  { // --> t(1)
5886  ga_instruction_set::elementary_trans_info &eti
5887  = rmi.elementary_trans_infos
5888  [std::make_tuple(pnode->elementary_name, mfo, mf)];
5889  pgai =
5890  std::make_shared<ga_instruction_elementary_trans_diverg>
5891  (pnode->tensor(), rmi.grad[mf],
5892  rmi.local_dofs[pnode->name],
5893  workspace.qdim(pnode->elementary_target),
5894  workspace.elementary_transformation(pnode->elementary_name),
5895  *mfo, *mf, gis.ctx, eti.M, eti.icv);
5896  }
5897  break;
5898  default: break;
5899  }
5900  rmi.instructions.push_back(std::move(pgai));
5901  }
5902  }
5903  }
5904  break;
5905 
5906  case GA_NODE_SECONDARY_DOMAIN_VAL: case GA_NODE_SECONDARY_DOMAIN_GRAD:
5907  case GA_NODE_SECONDARY_DOMAIN_HESS: case GA_NODE_SECONDARY_DOMAIN_DIVERG:
5908  {
5909  GMM_ASSERT1(!function_case, "internal error");
5910  const mesh_fem *mf = workspace.associated_mf(pnode->name);
5911  const im_data *imd = workspace.associated_im_data(pnode->name);
5912  const std::string &intn = pnode->interpolate_name;
5913  auto &sdi = rmi.secondary_domain_infos;
5914 
5915  fem_interpolation_context *pctx = &(sdi.ctx);
5916  papprox_integration pai = sdi.pai;
5917  psecondary_domain psd = workspace.secondary_domain(intn);
5918 
5919  if (imd) {
5920  pgai = std::make_shared<ga_instruction_extract_local_im_data>
5921  (pnode->tensor(), *imd, workspace.value(pnode->name),
5922  pai, *pctx, workspace.qdim(pnode->name));
5923  rmi.instructions.push_back(std::move(pgai));
5924  } else {
5925  GMM_ASSERT1(mf, "Internal error");
5926  GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
5927  "The finite element of variable " << pnode->name <<
5928  " has to be defined on the same mesh than the "
5929  "integration method or interpolation used on the "
5930  "secondary domain");
5931 
5932  // An instruction for extracting local dofs of the variable.
5933  if (sdi.local_dofs.count(pnode->name) == 0) {
5934  sdi.local_dofs[pnode->name] = base_vector(1);
5935  extend_variable_in_gis(workspace, pnode->name, gis);
5936  size_type qmult2 = mf->get_qdim();
5937  if (qmult2 > 1 && !(mf->is_uniformly_vectorized()))
5938  qmult2 = size_type(-1);
5939  pgai = std::make_shared<ga_instruction_slice_local_dofs>
5940  (*mf, *(gis.extended_vars[pnode->name]), *pctx,
5941  sdi.local_dofs[pnode->name],
5942  workspace.qdim(pnode->name) / mf->get_qdim(), qmult2);
5943  rmi.elt_instructions.push_back(std::move(pgai));
5944  }
5945 
5946  // An instruction for pfp update
5947  if (mf->is_uniform()) {
5948  if (sdi.pfps.count(mf) == 0) {
5949  sdi.pfps[mf] = 0;
5950  pgai = std::make_shared<ga_instruction_update_pfp>
5951  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
5952  rmi.begin_instructions.push_back(std::move(pgai));
5953  }
5954  } else if (sdi.pfps.count(mf) == 0 ||
5955  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
5956  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
5957  sdi.pfps[mf] = 0;
5958  pgai = std::make_shared<ga_instruction_update_pfp>
5959  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
5960  rmi.instructions.push_back(std::move(pgai));
5961  }
5962 
5963  // An instruction for the base value
5964  pgai = pga_instruction();
5965  switch (pnode->node_type) {
5966  case GA_NODE_SECONDARY_DOMAIN_VAL:
5967  if (sdi.base.count(mf) == 0 ||
5968  !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
5969  rmi.base_hierarchy[mf].push_back(if_hierarchy);
5970  pgai = std::make_shared<ga_instruction_val_base>
5971  (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
5972  }
5973  break;
5974  case GA_NODE_SECONDARY_DOMAIN_GRAD:
5975  case GA_NODE_SECONDARY_DOMAIN_DIVERG:
5976  if (sdi.grad.count(mf) == 0 ||
5977  !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
5978  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
5979  pgai = std::make_shared<ga_instruction_grad_base>
5980  (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
5981  }
5982  break;
5983  case GA_NODE_SECONDARY_DOMAIN_HESS:
5984  if (sdi.hess.count(mf) == 0 ||
5985  !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
5986  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
5987  pgai = std::make_shared<ga_instruction_hess_base>
5988  (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
5989  }
5990  break;
5991  default : GMM_ASSERT1(false, "Internal error");
5992  }
5993  if (pgai) rmi.instructions.push_back(std::move(pgai));
5994 
5995  // The eval instruction
5996  switch (pnode->node_type) {
5997  case GA_NODE_SECONDARY_DOMAIN_VAL: // --> t(target_dim*Qmult)
5998  pgai = std::make_shared<ga_instruction_val>
5999  (pnode->tensor(), sdi.base[mf], sdi.local_dofs[pnode->name],
6000  workspace.qdim(pnode->name));
6001  break;
6002  case GA_NODE_SECONDARY_DOMAIN_GRAD: // --> t(target_dim*Qmult,N)
6003  pgai = std::make_shared<ga_instruction_grad>
6004  (pnode->tensor(), sdi.grad[mf],
6005  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6006  break;
6007  case GA_NODE_SECONDARY_DOMAIN_HESS: // --> t(target_dim*Qmult,N,N)
6008  pgai = std::make_shared<ga_instruction_hess>
6009  (pnode->tensor(), sdi.hess[mf],
6010  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6011  break;
6012  case GA_NODE_SECONDARY_DOMAIN_DIVERG: // --> t(1)
6013  pgai = std::make_shared<ga_instruction_diverg>
6014  (pnode->tensor(), sdi.grad[mf],
6015  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6016  break;
6017  default: break;
6018  }
6019  rmi.instructions.push_back(std::move(pgai));
6020  }
6021  }
6022  break;
6023 
6024  case GA_NODE_INTERPOLATE_VAL: case GA_NODE_INTERPOLATE_GRAD:
6025  case GA_NODE_INTERPOLATE_HESS: case GA_NODE_INTERPOLATE_DIVERG:
6026  {
6027  extend_variable_in_gis(workspace, pnode->name, gis);
6028 
6029  const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6030  const std::string &intn = pnode->interpolate_name;
6031  const base_vector *Un = gis.extended_vars[pnode->name], **Ug = 0;
6032  fem_interpolation_context *pctx = &(rmi.interpolate_infos[intn].ctx);
6033  const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6034  if (workspace.variable_group_exists(pnode->name)) {
6035  ga_instruction_set::variable_group_info &vgi =
6036  rmi.interpolate_infos[intn].groups_info[pnode->name];
6037  mfg = &(vgi.mf); mfn = 0; Ug = &(vgi.U); Un = 0;
6038  }
6039 
6040  if (pnode->node_type == GA_NODE_INTERPOLATE_VAL) {
6041  // --> t(target_dim*Qmult)
6042  pgai = std::make_shared<ga_instruction_interpolate_val>
6043  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6044  workspace.qdim(pnode->name),
6045  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6046  } else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD) {
6047  // --> t(target_dim*Qmult,N)
6048  pgai = std::make_shared<ga_instruction_interpolate_grad>
6049  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6050  workspace.qdim(pnode->name),
6051  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6052  } else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS) {
6053  // --> t(target_dim*Qmult,N,N)
6054  pgai = std::make_shared<ga_instruction_interpolate_hess>
6055  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6056  workspace.qdim(pnode->name),
6057  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6058  } else { // --> t(1)
6059  pgai = std::make_shared<ga_instruction_interpolate_diverg>
6060  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6061  workspace.qdim(pnode->name),
6062  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6063  }
6064  rmi.instructions.push_back(std::move(pgai));
6065  }
6066  break;
6067 
6068  case GA_NODE_INTERPOLATE_DERIVATIVE:
6069  GMM_ASSERT1(!function_case,
6070  "No use of Interpolate is allowed in functions");
6071  pgai = std::make_shared<ga_instruction_copy_tensor_possibly_void>
6072  (pnode->tensor(),
6073  rmi.interpolate_infos[pnode->interpolate_name_der]
6074  .derivatives[var_trans_pair(pnode->name, pnode->interpolate_name)]);
6075  rmi.instructions.push_back(std::move(pgai));
6076  break;
6077 
6078  case GA_NODE_VAL_TEST: case GA_NODE_GRAD_TEST:
6079  case GA_NODE_HESS_TEST: case GA_NODE_DIVERG_TEST:
6080  case GA_NODE_ELEMENTARY_VAL_TEST: case GA_NODE_ELEMENTARY_GRAD_TEST:
6081  case GA_NODE_ELEMENTARY_HESS_TEST: case GA_NODE_ELEMENTARY_DIVERG_TEST:
6082  case GA_NODE_XFEM_PLUS_VAL_TEST: case GA_NODE_XFEM_PLUS_GRAD_TEST:
6083  case GA_NODE_XFEM_PLUS_HESS_TEST: case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6084  case GA_NODE_XFEM_MINUS_VAL_TEST: case GA_NODE_XFEM_MINUS_GRAD_TEST:
6085  case GA_NODE_XFEM_MINUS_HESS_TEST: case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6086  // GMM_ASSERT1(!function_case,
6087  // "Test functions not allowed in functions");
6088  {
6089  bool is_elementary = (pnode->node_type==GA_NODE_ELEMENTARY_VAL_TEST ||
6090  pnode->node_type==GA_NODE_ELEMENTARY_GRAD_TEST ||
6091  pnode->node_type==GA_NODE_ELEMENTARY_HESS_TEST ||
6092  pnode->node_type==GA_NODE_ELEMENTARY_DIVERG_TEST);
6093  const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6094  if (is_elementary) {
6095  mf = workspace.associated_mf(pnode->elementary_target);
6096  GMM_ASSERT1(mf && mfo,
6097  "Wrong context for elementary transformation");
6098  GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6099  "The finite element of variable " << pnode->name
6100  << " has to be defined on the same mesh than the "
6101  << "integration method or interpolation used");
6102  }
6103 
6104  if (mf) {
6105  GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6106  "The finite element of variable " << pnode->name <<
6107  (is_elementary ? pnode->elementary_target : pnode->name)
6108  << " and the applied integration method have to be"
6109  << " defined on the same mesh");
6110 
6111  // An instruction for pfp update
6112  if (is_uniform) {
6113  if (rmi.pfps.count(mf) == 0) {
6114  rmi.pfps[mf] = 0;
6115  pgai = std::make_shared<ga_instruction_update_pfp>
6116  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6117  rmi.begin_instructions.push_back(std::move(pgai));
6118  }
6119  } else if (rmi.pfps.count(mf) == 0 ||
6120  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6121  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6122  rmi.pfps[mf] = 0;
6123  pgai = std::make_shared<ga_instruction_update_pfp>
6124  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6125  rmi.instructions.push_back(std::move(pgai));
6126  }
6127 
6128  // An instruction for the base value
6129  pgai = pga_instruction();
6130  switch (pnode->node_type) {
6131  case GA_NODE_VAL_TEST: case GA_NODE_ELEMENTARY_VAL_TEST:
6132  if (rmi.base.find(mf) == rmi.base.end() ||
6133  !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6134  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6135  pgai = std::make_shared<ga_instruction_val_base>
6136  (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6137  }
6138  break;
6139  case GA_NODE_XFEM_PLUS_VAL_TEST:
6140  if (rmi.xfem_plus_base.find(mf) == rmi.xfem_plus_base.end() ||
6141  !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6142  {
6143  rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6144  pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6145  (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6146  }
6147  break;
6148  case GA_NODE_XFEM_MINUS_VAL_TEST:
6149  if (rmi.xfem_minus_base.find(mf) == rmi.xfem_minus_base.end() ||
6150  !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6151  {
6152  rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6153  pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6154  (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6155  }
6156  break;
6157  case GA_NODE_GRAD_TEST: case GA_NODE_DIVERG_TEST:
6158  case GA_NODE_ELEMENTARY_GRAD_TEST:
6159  case GA_NODE_ELEMENTARY_DIVERG_TEST:
6160  if (rmi.grad.find(mf) == rmi.grad.end() ||
6161  !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6162  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6163  pgai = std::make_shared<ga_instruction_grad_base>
6164  (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6165  }
6166  break;
6167  case GA_NODE_XFEM_PLUS_GRAD_TEST: case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6168  if (rmi.xfem_plus_grad.find(mf) == rmi.xfem_plus_grad.end() ||
6169  !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6170  {
6171  rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6172  pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6173  (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6174  }
6175  break;
6176  case GA_NODE_XFEM_MINUS_GRAD_TEST:
6177  case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6178  if (rmi.xfem_minus_grad.find(mf) == rmi.xfem_minus_grad.end() ||
6179  !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6180  {
6181  rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6182  pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6183  (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6184  }
6185  break;
6186  case GA_NODE_HESS_TEST: case GA_NODE_ELEMENTARY_HESS_TEST:
6187  if (rmi.hess.count(mf) == 0 ||
6188  !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6189  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6190  pgai = std::make_shared<ga_instruction_hess_base>
6191  (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6192  }
6193  break;
6194  case GA_NODE_XFEM_PLUS_HESS_TEST:
6195  if (rmi.xfem_plus_hess.count(mf) == 0 ||
6196  !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6197  {
6198  rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6199  pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6200  (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6201  }
6202  break;
6203  case GA_NODE_XFEM_MINUS_HESS_TEST:
6204  if (rmi.xfem_minus_hess.find(mf) == rmi.xfem_minus_hess.end() ||
6205  !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6206  {
6207  rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6208  pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6209  (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6210  }
6211  break;
6212 
6213  default : GMM_ASSERT1(false, "Internal error");
6214  }
6215  if (pgai) rmi.instructions.push_back(std::move(pgai));
6216 
6217  // The copy of the real_base_value
6218  switch(pnode->node_type) {
6219  case GA_NODE_VAL_TEST:
6220  // --> t(Qmult*ndof,Qmult*target_dim)
6221  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6222  pnode->t.set_sparsity(1, mf->get_qdim());
6223  tensor_to_clear = true;
6224  pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6225  (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6226  } else {
6227  pgai = std::make_shared<ga_instruction_copy_val_base>
6228  (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6229  }
6230  break;
6231  case GA_NODE_GRAD_TEST:
6232  // --> t(Qmult*ndof,Qmult*target_dim,N)
6233  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6234  pnode->t.set_sparsity(2, mf->get_qdim());
6235  tensor_to_clear = true;
6236  pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6237  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6238  } else {
6239  pgai = std::make_shared<ga_instruction_copy_grad_base>
6240  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6241  }
6242  break;
6243  case GA_NODE_HESS_TEST:
6244  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6245  pgai = std::make_shared<ga_instruction_copy_hess_base>
6246  (pnode->tensor(), rmi.hess[mf], mf->get_qdim());
6247  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6248  pnode->t.set_sparsity(3, mf->get_qdim());
6249  break;
6250  case GA_NODE_DIVERG_TEST:
6251  // --> t(Qmult*ndof)
6252  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6253  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6254  break;
6255  case GA_NODE_XFEM_PLUS_VAL_TEST:
6256  // -->t(Qmult*ndof,Qmult*target_dim)
6257  pgai = std::make_shared<ga_instruction_copy_val_base>
6258  (pnode->tensor(), rmi.xfem_plus_base[mf], mf->get_qdim());
6259  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6260  pnode->t.set_sparsity(1, mf->get_qdim());
6261  break;
6262  case GA_NODE_XFEM_PLUS_GRAD_TEST:
6263  // --> t(Qmult*ndof,Qmult*target_dim,N)
6264  pgai = std::make_shared<ga_instruction_copy_grad_base>
6265  (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6266  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6267  pnode->t.set_sparsity(2, mf->get_qdim());
6268  break;
6269  case GA_NODE_XFEM_PLUS_HESS_TEST:
6270  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6271  pgai = std::make_shared<ga_instruction_copy_hess_base>
6272  (pnode->tensor(), rmi.xfem_plus_hess[mf], mf->get_qdim());
6273  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6274  pnode->t.set_sparsity(3, mf->get_qdim());
6275  break;
6276  case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6277  // --> t(Qmult*ndof)
6278  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6279  (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6280  break;
6281  case GA_NODE_XFEM_MINUS_VAL_TEST:
6282  // -->t(Qmult*ndof,Qmult*target_dim)
6283  pgai = std::make_shared<ga_instruction_copy_val_base>
6284  (pnode->tensor(), rmi.xfem_minus_base[mf], mf->get_qdim());
6285  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6286  pnode->t.set_sparsity(1, mf->get_qdim());
6287  break;
6288  case GA_NODE_XFEM_MINUS_GRAD_TEST:
6289  // --> t(Qmult*ndof,Qmult*target_dim,N)
6290  pgai = std::make_shared<ga_instruction_copy_grad_base>
6291  (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6292  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6293  pnode->t.set_sparsity(2, mf->get_qdim());
6294  break;
6295  case GA_NODE_XFEM_MINUS_HESS_TEST:
6296  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6297  pgai = std::make_shared<ga_instruction_copy_hess_base>
6298  (pnode->tensor(), rmi.xfem_minus_hess[mf], mf->get_qdim());
6299  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6300  pnode->t.set_sparsity(3, mf->get_qdim());
6301  break;
6302  case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6303  // --> t(Qmult*ndof)
6304  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6305  (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6306  break;
6307  case GA_NODE_ELEMENTARY_VAL_TEST:
6308  { // --> t(Qmult*ndof,Qmult*target_dim)
6309  ga_instruction_set::elementary_trans_info &eti
6310  = rmi.elementary_trans_infos
6311  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6312  pgai =
6313  std::make_shared<ga_instruction_elementary_trans_val_base>
6314  (pnode->tensor(), rmi.base[mf], mf->get_qdim(),
6315  workspace.elementary_transformation(pnode->elementary_name),
6316  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6317  }
6318  break;
6319  case GA_NODE_ELEMENTARY_GRAD_TEST:
6320  { // --> t(Qmult*ndof,Qmult*target_dim,N)
6321  ga_instruction_set::elementary_trans_info &eti
6322  = rmi.elementary_trans_infos
6323  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6324  pgai =
6325  std::make_shared<ga_instruction_elementary_trans_grad_base>
6326  (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6327  workspace.elementary_transformation(pnode->elementary_name),
6328  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6329  }
6330  break;
6331  case GA_NODE_ELEMENTARY_HESS_TEST:
6332  { // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6333  ga_instruction_set::elementary_trans_info &eti
6334  = rmi.elementary_trans_infos
6335  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6336  pgai =
6337  std::make_shared<ga_instruction_elementary_trans_hess_base>
6338  (pnode->tensor(), rmi.hess[mf], mf->get_qdim(),
6339  workspace.elementary_transformation(pnode->elementary_name),
6340  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6341  }
6342  break;
6343  case GA_NODE_ELEMENTARY_DIVERG_TEST:
6344  { // --> t(Qmult*ndof)
6345  ga_instruction_set::elementary_trans_info &eti
6346  = rmi.elementary_trans_infos
6347  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6348  pgai =
6349  std::make_shared<ga_instruction_elementary_trans_diverg_base>
6350  (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6351  workspace.elementary_transformation(pnode->elementary_name),
6352  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6353  }
6354  break;
6355  default: break;
6356  }
6357  if (pgai) rmi.instructions.push_back(std::move(pgai));
6358  }
6359  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6360  }
6361  break;
6362 
6363  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6364  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6365  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6366  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6367  {
6368  GMM_ASSERT1(!function_case, "internal error");
6369  const mesh_fem *mf = workspace.associated_mf(pnode->name);
6370  const std::string &intn = pnode->interpolate_name;
6371  auto &sdi = rmi.secondary_domain_infos;
6372 
6373  fem_interpolation_context *pctx = &(sdi.ctx);
6374  papprox_integration pai = sdi.pai;
6375  psecondary_domain psd = workspace.secondary_domain(intn);
6376  if (mf) {
6377  GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6378  "The finite element of variable " << pnode->name <<
6379  " and the applied integration method have to be"
6380  " defined on the same mesh for secondary domain");
6381 
6382  // An instruction for pfp update
6383  if (is_uniform) {
6384  if (sdi.pfps.count(mf) == 0) {
6385  sdi.pfps[mf] = 0;
6386  pgai = std::make_shared<ga_instruction_update_pfp>
6387  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6388  rmi.begin_instructions.push_back(std::move(pgai));
6389  }
6390  } else if (sdi.pfps.count(mf) == 0 ||
6391  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6392  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6393  sdi.pfps[mf] = 0;
6394  pgai = std::make_shared<ga_instruction_update_pfp>
6395  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6396  rmi.instructions.push_back(std::move(pgai));
6397  }
6398 
6399  // An instruction for the base value
6400  pgai = pga_instruction();
6401  switch (pnode->node_type) {
6402  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6403  if (sdi.base.count(mf) == 0 ||
6404  !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6405  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6406  pgai = std::make_shared<ga_instruction_val_base>
6407  (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6408  }
6409  break;
6410  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6411  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6412  if (sdi.grad.count(mf) == 0 ||
6413  !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6414  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6415  pgai = std::make_shared<ga_instruction_grad_base>
6416  (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6417  }
6418  break;
6419  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6420  if (sdi.hess.count(mf) == 0 ||
6421  !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6422  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6423  pgai = std::make_shared<ga_instruction_hess_base>
6424  (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6425  }
6426  break;
6427  default : GMM_ASSERT1(false, "Internal error");
6428  }
6429  if (pgai) rmi.instructions.push_back(std::move(pgai));
6430 
6431  // The copy of the real_base_value
6432  switch(pnode->node_type) {
6433  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6434  // --> t(Qmult*ndof,Qmult*target_dim)
6435  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6436  pnode->t.set_sparsity(1, mf->get_qdim());
6437  tensor_to_clear = true;
6438  pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6439  (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6440  } else {
6441  pgai = std::make_shared<ga_instruction_copy_val_base>
6442  (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6443  }
6444  break;
6445  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6446  // --> t(Qmult*ndof,Qmult*target_dim,N)
6447  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6448  pnode->t.set_sparsity(2, mf->get_qdim());
6449  tensor_to_clear = true;
6450  pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6451  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6452  } else {
6453  pgai = std::make_shared<ga_instruction_copy_grad_base>
6454  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6455  }
6456  break;
6457  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6458  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6459  pgai = std::make_shared<ga_instruction_copy_hess_base>
6460  (pnode->tensor(), sdi.hess[mf], mf->get_qdim());
6461  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6462  pnode->t.set_sparsity(3, mf->get_qdim());
6463  break;
6464  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6465  // --> t(Qmult*ndof)
6466  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6467  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6468  break;
6469  default: break;
6470  }
6471  if (pgai) rmi.instructions.push_back(std::move(pgai));
6472  }
6473  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6474  }
6475  break;
6476 
6477  case GA_NODE_INTERPOLATE_VAL_TEST: case GA_NODE_INTERPOLATE_GRAD_TEST:
6478  case GA_NODE_INTERPOLATE_HESS_TEST: case GA_NODE_INTERPOLATE_DIVERG_TEST:
6479  {
6480  const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6481  const std::string &intn = pnode->interpolate_name;
6482  const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6483  if (workspace.variable_group_exists(pnode->name)) {
6484  ga_instruction_set::variable_group_info &vgi =
6485  rmi.interpolate_infos[intn].groups_info[pnode->name];
6486  mfg = &(vgi.mf); mfn = 0;
6487  }
6488 
6489  if (pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST) {
6490  // --> t(Qmult*ndof,Qmult*target_dim)
6491  pgai = std::make_shared<ga_instruction_interpolate_val_base>
6492  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6493  workspace.qdim(pnode->name), rmi.interpolate_infos[intn],
6494  gis.fp_pool);
6495  } else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST) {
6496  // --> t(Qmult*ndof,Qmult*target_dim,N)
6497  pgai = std::make_shared<ga_instruction_interpolate_grad_base>
6498  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6499  workspace.qdim(pnode->name),
6500  rmi.interpolate_infos[intn], gis.fp_pool);
6501  } else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST) {
6502  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6503  pgai = std::make_shared<ga_instruction_interpolate_hess_base>
6504  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6505  workspace.qdim(pnode->name),
6506  rmi.interpolate_infos[intn], gis.fp_pool);
6507  } else { // if (pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST) {
6508  // --> t(Qmult*ndof)
6509  pgai = std::make_shared<ga_instruction_interpolate_diverg_base>
6510  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6511  workspace.qdim(pnode->name),
6512  rmi.interpolate_infos[intn], gis.fp_pool);
6513  }
6514  rmi.instructions.push_back(std::move(pgai));
6515  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6516  }
6517  break;
6518 
6519  case GA_NODE_OP:
6520  switch(pnode->op_type) {
6521 
6522  case GA_PLUS:
6523  if (pnode->tensor().size() == 1) {
6524  GA_DEBUG_ASSERT(child0->tensor().size() == 1,
6525  "Internal error: child0 not scalar");
6526  GA_DEBUG_ASSERT(child1->tensor().size() == 1,
6527  "Internal error: child1 not scalar");
6528  pgai = std::make_shared<ga_instruction_scalar_add>
6529  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6530  } else {
6531  pgai = std::make_shared<ga_instruction_add>
6532  (pnode->tensor(), child0->tensor(), child1->tensor());
6533  }
6534  if (child0->t.sparsity() == child1->t.sparsity()
6535  && child0->t.qdim() == child1->t.qdim())
6536  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6537  rmi.instructions.push_back(std::move(pgai));
6538  break;
6539 
6540  case GA_MINUS:
6541  if (pnode->tensor().size() == 1) {
6542  GA_DEBUG_ASSERT(child0->tensor().size() == 1,
6543  "Internal error: child0 not scalar");
6544  GA_DEBUG_ASSERT(child1->tensor().size() == 1,
6545  "Internal error: child1 not scalar");
6546  pgai = std::make_shared<ga_instruction_scalar_sub>
6547  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6548  } else {
6549  pgai = std::make_shared<ga_instruction_sub>
6550  (pnode->tensor(), child0->tensor(), child1->tensor());
6551  }
6552  if (child0->t.sparsity() == child1->t.sparsity()
6553  && child0->t.qdim() == child1->t.qdim())
6554  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6555  rmi.instructions.push_back(std::move(pgai));
6556  break;
6557 
6558  case GA_UNARY_MINUS:
6559  if (pnode->tensor().size() == 1) {
6560  GA_DEBUG_ASSERT(child0->tensor().size() == 1, "Internal error");
6561  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6562  (pnode->tensor()[0], child0->tensor()[0], minus);
6563  } else {
6564  pgai = std::make_shared<ga_instruction_scalar_mult>
6565  (pnode->tensor(), child0->tensor(), minus);
6566  }
6567  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6568  rmi.instructions.push_back(std::move(pgai));
6569  break;
6570 
6571 
6572  case GA_DOT: case GA_COLON: case GA_MULT:
6573  {
6574  size_type tps0 = child0->tensor_proper_size();
6575  size_type tps1 = child1->tensor_proper_size();
6576  size_type s1 = (tps0 * tps1) / pnode->tensor_proper_size();
6577  size_type s2 = size_type(round(sqrt(scalar_type(s1))));
6578 
6579  pgai = pga_instruction();
6580  if ((pnode->op_type == GA_DOT && dim1 <= 1) ||
6581  (pnode->op_type == GA_COLON && dim1 <= 2) ||
6582  (pnode->op_type == GA_MULT && dim0 == 4) ||
6583  (pnode->op_type == GA_MULT && dim1 <= 1) ||
6584  child0->tensor().size() == 1 || tps1 == 1) {
6585 
6586  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6587  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6588  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6589  }
6590  else if (child0->tensor().size() == 1) {
6591  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6592  pgai = std::make_shared<ga_instruction_scalar_mult>
6593  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6594  }
6595  else if (child1->tensor().size() == 1) {
6596  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6597  pgai = std::make_shared<ga_instruction_scalar_mult>
6598  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6599  }
6600  else if (pnode->test_function_type < 3) {
6601  if (tps0 == 1) {
6602  if (is_uniform) // Unrolled instruction
6603  pgai = ga_uniform_instruction_simple_tmult
6604  (pnode->tensor(), child0->tensor(), child1->tensor());
6605  else
6606  pgai = std::make_shared<ga_instruction_simple_tmult>
6607  (pnode->tensor(), child0->tensor(), child1->tensor());
6608  } else {
6609  if (tps1 == 1) {
6610  if (is_uniform) // Unrolled instruction
6611  pgai = ga_uniform_instruction_simple_tmult
6612  (pnode->tensor(), child1->tensor(), child0->tensor());
6613  else
6614  pgai = std::make_shared<ga_instruction_simple_tmult>
6615  (pnode->tensor(), child1->tensor(), child0->tensor());
6616  } else if (is_uniform) // Unrolled instruction
6617  pgai = ga_uniform_instruction_contraction_switch
6618  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6619  else // Unrolled instruction
6620  pgai = ga_instruction_contraction_switch
6621  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6622  }
6623  } else {
6624  if (child1->test_function_type == 1 ||
6625  child1->test_function_type == 3) {
6626  if (child1->test_function_type == 3 ||
6627  child1->tensor_proper_size() <= s2) {
6628  if (tps0 == 1) {
6629  if (is_uniform) { // Unrolled instruction
6630  pgai = ga_uniform_instruction_simple_tmult
6631  (pnode->tensor(), child1->tensor(), child0->tensor());
6632  } else
6633  pgai = std::make_shared<ga_instruction_simple_tmult>
6634  (pnode->tensor(), child1->tensor(), child0->tensor());
6635  } else if (is_uniform) // Unrolled instruction
6636  pgai = ga_uniform_instruction_contraction_switch
6637  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6638  else // Unrolled instruction
6639  pgai = ga_instruction_contraction_switch
6640  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6641  } else
6642  pgai = std::make_shared<ga_instruction_spec_contraction>
6643  (pnode->tensor(), child1->tensor(), child0->tensor(), s2);
6644  } else if (child1->test_function_type == 0 ||
6645  (child0->tensor_proper_size() == s2 &&
6646  child1->tensor_proper_size() == s2)) {
6647  if (tps0 == 1) {
6648  if (is_uniform) { // Unrolled instruction
6649  pgai = ga_uniform_instruction_simple_tmult
6650  (pnode->tensor(), child0->tensor(), child1->tensor());
6651  } else
6652  pgai = std::make_shared<ga_instruction_simple_tmult>
6653  (pnode->tensor(), child0->tensor(), child1->tensor());
6654  } else {
6655  if (is_uniform) // Unrolled instruction
6656  pgai = ga_uniform_instruction_contraction_switch
6657  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6658  else // Unrolled instruction
6659  pgai = ga_instruction_contraction_switch
6660  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6661  }
6662  } else {
6663  if (child0->tensor_proper_size() == s2)
6664  pgai = ga_uniform_instruction_contraction_switch
6665  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6666  else if (child1->tensor_proper_size() == s2)
6667  pgai = std::make_shared<ga_instruction_spec_contraction>
6668  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6669  else
6670  pgai = std::make_shared<ga_instruction_spec2_contraction>
6671  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6672  }
6673  }
6674  } else { // GA_MULT or GA_DOT for dim1 > 1 or GA_COLON for dim1 > 2
6675  // and child1->tensor_proper_size() > 1
6676  if (pnode->test_function_type < 3) {
6677  if (tps0 == 1) {
6678  if (is_uniform) // Unrolled instruction
6679  pgai = ga_uniform_instruction_simple_tmult
6680  (pnode->tensor(), child0->tensor(), child1->tensor());
6681  else
6682  pgai = std::make_shared<ga_instruction_simple_tmult>
6683  (pnode->tensor(), child0->tensor(), child1->tensor());
6684  } else {
6685  if (child1->test_function_type == 0)
6686  pgai = std::make_shared<ga_instruction_matrix_mult>
6687  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6688  else
6689  pgai = std::make_shared<ga_instruction_matrix_mult_spec>
6690  (pnode->tensor(), child0->tensor(), child1->tensor(),
6691  s2, tps0/s2, tps1/s2);
6692  }
6693  } else {
6694  if (child0->tensor_proper_size() == 1) {
6695  if (child0->test_function_type == 0 ||
6696  child0->test_function_type == 1) {
6697  if (is_uniform) // Unrolled instruction
6698  pgai = ga_uniform_instruction_simple_tmult
6699  (pnode->tensor(), child0->tensor(), child1->tensor());
6700  else
6701  pgai = std::make_shared<ga_instruction_simple_tmult>
6702  (pnode->tensor(), child0->tensor(), child1->tensor());
6703  } else
6704  pgai = std::make_shared<ga_instruction_spec_tmult>
6705  (pnode->tensor(), child1->tensor(), child0->tensor(),
6706  tps1, tps0);
6707  } else {
6708  if (child1->test_function_type == 0)
6709  pgai = std::make_shared<ga_instruction_matrix_mult>
6710  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6711  else if (child1->test_function_type == 2)
6712  pgai = std::make_shared<ga_instruction_matrix_mult_spec>
6713  (pnode->tensor(), child0->tensor(), child1->tensor(),
6714  s2, tps0/s2, tps1/s2);
6715  else
6716  pgai = std::make_shared<ga_instruction_matrix_mult_spec2>
6717  (pnode->tensor(), child0->tensor(), child1->tensor(),
6718  s2, tps0/s2, tps1/s2);
6719  }
6720  }
6721  }
6722  rmi.instructions.push_back(std::move(pgai));
6723  }
6724  break;
6725 
6726  case GA_DIV:
6727  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6728  pgai = std::make_shared<ga_instruction_scalar_scalar_div>
6729  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6730  } else if (child1->tensor().size() == 1) {
6731  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6732  pgai = std::make_shared<ga_instruction_scalar_div>
6733  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6734  } else GMM_ASSERT1(false, "Internal error");
6735  rmi.instructions.push_back(std::move(pgai));
6736  break;
6737 
6738  case GA_PRINT:
6739  pnode->t.set_to_copy(child0->t);
6740  pgai = std::make_shared<ga_instruction_print_tensor>
6741  (pnode->tensor(), child0, gis.ctx, gis.nbpt, gis.ipt);
6742  rmi.instructions.push_back(std::move(pgai));
6743  break;
6744 
6745  case GA_QUOTE:
6746  if (pnode->tensor_proper_size() > 1) {
6747  size_type n1 = child0->tensor_proper_size(0);
6748  size_type n2 = (child0->tensor_order() > 1) ?
6749  child0->tensor_proper_size(1) : 1;
6750  size_type nn = 1;
6751  for (size_type i = 2; i < child0->tensor_order(); ++i)
6752  nn *= child0->tensor_proper_size(i);
6753  if (child0->nb_test_functions() == 0)
6754  pgai = std::make_shared<ga_instruction_transpose_no_test>
6755  (pnode->tensor(), child0->tensor(), n1, n2, nn);
6756  else
6757  pgai = std::make_shared<ga_instruction_transpose>
6758  (pnode->tensor(), child0->tensor(), n1, n2, nn);
6759  rmi.instructions.push_back(std::move(pgai));
6760  } else {
6761  pnode->t.set_to_copy(child0->t);
6762  }
6763  break;
6764 
6765  case GA_SYM:
6766  if (pnode->tensor_proper_size() != 1) {
6767  pgai = std::make_shared<ga_instruction_sym>
6768  (pnode->tensor(), child0->tensor());
6769  rmi.instructions.push_back(std::move(pgai));
6770  } else {
6771  pnode->t.set_to_copy(child0->t);
6772  }
6773  break;
6774 
6775  case GA_SKEW:
6776  {
6777  pgai = std::make_shared<ga_instruction_skew>
6778  (pnode->tensor(), child0->tensor());
6779  rmi.instructions.push_back(std::move(pgai));
6780  }
6781  break;
6782 
6783  case GA_TRACE:
6784  {
6785  size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
6786  if (N == 1) {
6787  pnode->t.set_to_copy(child0->t);
6788  } else {
6789  pgai = std::make_shared<ga_instruction_trace>
6790  (pnode->tensor(), child0->tensor(), N);
6791  rmi.instructions.push_back(std::move(pgai));
6792  }
6793  }
6794  break;
6795 
6796  case GA_DEVIATOR:
6797  {
6798  size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
6799  pgai = std::make_shared<ga_instruction_deviator>
6800  (pnode->tensor(), child0->tensor(), N);
6801  rmi.instructions.push_back(std::move(pgai));
6802  }
6803  break;
6804 
6805  case GA_DOTMULT:
6806 
6807  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6808  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6809  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6810  } else if (child0->tensor().size() == 1) {
6811  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6812  pgai = std::make_shared<ga_instruction_scalar_mult>
6813  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6814  }
6815  else if (child1->tensor().size() == 1) {
6816  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6817  pgai = std::make_shared<ga_instruction_scalar_mult>
6818  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6819  }
6820  else if (child1->test_function_type == 0)
6821  pgai = std::make_shared<ga_instruction_dotmult>
6822  (pnode->tensor(), child0->tensor(), child1->tensor());
6823  else if (child0->test_function_type == 0)
6824  pgai = std::make_shared<ga_instruction_dotmult>
6825  (pnode->tensor(), child1->tensor(), child0->tensor());
6826  else if (child0->test_function_type == 1)
6827  pgai = std::make_shared<ga_instruction_dotmult_spec>
6828  (pnode->tensor(), child0->tensor(), child1->tensor());
6829  else
6830  pgai = std::make_shared<ga_instruction_dotmult_spec>
6831  (pnode->tensor(), child1->tensor(), child0->tensor());
6832 
6833  rmi.instructions.push_back(std::move(pgai));
6834  break;
6835 
6836 
6837  case GA_DOTDIV:
6838  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6839  pgai = std::make_shared<ga_instruction_scalar_scalar_div>
6840  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6841  } else if (child1->tensor().size() == 1) {
6842  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6843  pgai = std::make_shared<ga_instruction_scalar_div>
6844  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6845  } else if (child1->test_function_type == 0) {
6846  pgai = std::make_shared<ga_instruction_dotdiv>
6847  (pnode->tensor(), child0->tensor(), child1->tensor());
6848  } else GMM_ASSERT1(false, "Internal error");
6849  rmi.instructions.push_back(std::move(pgai));
6850  break;
6851 
6852 
6853  case GA_TMULT:
6854  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6855  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6856  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6857  } else if (child0->tensor().size() == 1) {
6858  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6859  pgai = std::make_shared<ga_instruction_scalar_mult>
6860  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6861  }
6862  else if (child1->tensor().size() == 1) {
6863  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6864  pgai = std::make_shared<ga_instruction_scalar_mult>
6865  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6866  }
6867  else if (child1->test_function_type == 0) {
6868  if (is_uniform) // Unrolled instruction
6869  pgai = ga_uniform_instruction_simple_tmult
6870  (pnode->tensor(), child0->tensor(), child1->tensor());
6871  else
6872  pgai = std::make_shared<ga_instruction_simple_tmult>
6873  (pnode->tensor(), child0->tensor(), child1->tensor());
6874  } else if (child1->tensor_proper_size() == 1)
6875  pgai = std::make_shared<ga_instruction_spec2_tmult>
6876  (pnode->tensor(), child0->tensor(), child1->tensor());
6877  else
6878  pgai = std::make_shared<ga_instruction_spec_tmult>
6879  (pnode->tensor(), child0->tensor(), child1->tensor(),
6880  child0->tensor_proper_size(),
6881  child1->tensor_proper_size());
6882 
6883  rmi.instructions.push_back(std::move(pgai));
6884  break;
6885 
6886  default:GMM_ASSERT1(false, "Unexpected operation. Internal error.");
6887  }
6888  break;
6889 
6890  case GA_NODE_C_MATRIX:
6891  {
6892  if (pnode->test_function_type) {
6893  std::vector<const base_tensor *> components(pnode->children.size());
6894  for (size_type i = 0; i < pnode->children.size(); ++i)
6895  components[i] = &(pnode->children[i]->tensor());
6896  pgai = std::make_shared<ga_instruction_c_matrix_with_tests>
6897  (pnode->tensor(), components);
6898  } else {
6899  std::vector<scalar_type *> components(pnode->children.size());
6900  for (size_type i = 0; i < pnode->children.size(); ++i)
6901  components[i] = &(pnode->children[i]->tensor()[0]);
6902  pgai = std::make_shared<ga_instruction_simple_c_matrix>
6903  (pnode->tensor(), components);
6904  }
6905  rmi.instructions.push_back(std::move(pgai));
6906  }
6907  break;
6908 
6909  case GA_NODE_PARAMS:
6910  if (child0->node_type == GA_NODE_RESHAPE) {
6911  pgai = std::make_shared<ga_instruction_copy_tensor>(pnode->tensor(),
6912  child1->tensor());
6913  rmi.instructions.push_back(std::move(pgai));
6914  } else if (child0->node_type == GA_NODE_CROSS_PRODUCT) {
6915  pga_tree_node child2 = pnode->children[2];
6916  if (child1->test_function_type==2 && child2->test_function_type==1)
6917  pgai = std::make_shared<ga_instruction_cross_product_tf>
6918  (pnode->tensor(), child2->tensor(), child1->tensor(), true);
6919  else if (child1->test_function_type || child2->test_function_type)
6920  pgai = std::make_shared<ga_instruction_cross_product_tf>
6921  (pnode->tensor(), child1->tensor(), child2->tensor(), false);
6922  else
6923  pgai = std::make_shared<ga_instruction_cross_product>
6924  (pnode->tensor(), child1->tensor(), child2->tensor());
6925  rmi.instructions.push_back(std::move(pgai));
6926  } else if (child0->node_type == GA_NODE_IND_MOVE_LAST) {
6927  size_type ind;
6928  ind = size_type(round(pnode->children[2]->tensor()[0])-1);
6929  size_type ii2 = 1;
6930  for (size_type i = 0; i < child1->tensor_order(); ++i)
6931  if (i>ind) ii2 *= child1->tensor_proper_size(i);
6932  size_type nn = child1->tensor_proper_size(ind);
6933  pgai = std::make_shared<ga_instruction_index_move_last>
6934  (pnode->tensor(), child1->tensor(), nn, ii2);
6935  rmi.instructions.push_back(std::move(pgai));
6936  } else if (child0->node_type == GA_NODE_SWAP_IND) {
6937  size_type ind[4];
6938  for (size_type i = 2; i < 4; ++i)
6939  ind[i] = size_type(round(pnode->children[i]->tensor()[0])-1);
6940  if (ind[2] > ind[3]) std::swap(ind[2], ind[3]);
6941  size_type ii2 = 1, ii3 = 1;
6942  for (size_type i = 0; i < child1->tensor_order(); ++i) {
6943  if (i>ind[2] && i<ind[3]) ii2 *= child1->tensor_proper_size(i);
6944  if (i>ind[3]) ii3 *= child1->tensor_proper_size(i);
6945  }
6946  size_type nn1 = child1->tensor_proper_size(ind[2]);
6947  size_type nn2 = child1->tensor_proper_size(ind[3]);
6948 
6949  pgai = std::make_shared<ga_instruction_swap_indices>
6950  (pnode->tensor(), child1->tensor(), nn1, nn2, ii2, ii3);
6951  rmi.instructions.push_back(std::move(pgai));
6952  } else if (child0->node_type == GA_NODE_CONTRACT) {
6953  std::vector<size_type> ind(2), indsize(2);
6954  pga_tree_node child2(0);
6955  if (pnode->children.size() == 4)
6956  { ind[0] = 2; ind[1] = 3; }
6957  else if (pnode->children.size() == 5)
6958  { ind[0] = 2; ind[1] = 4; child2 = pnode->children[3]; }
6959  else if (pnode->children.size() == 7) {
6960  ind.resize(4); indsize.resize(4);
6961  ind[0] = 2; ind[1] = 3; ind[2] = 5; ind[3] = 6;
6962  child2 = pnode->children[4];
6963  }
6964  size_type kk = 0, ll = 1;
6965  for (size_type i = 1; i < pnode->children.size(); ++i) {
6966  if (i == ind[kk]) {
6967  ind[kk] = size_type(round(pnode->children[i]->tensor()[0])-1);
6968  indsize[kk] = pnode->children[ll]->tensor_proper_size(ind[kk]);
6969  ++kk;
6970  } else ll = i;
6971  }
6972 
6973  if (pnode->children.size() == 4) {
6974  size_type i1 = ind[0], i2 = ind[1];
6975  if (i1 > i2) std::swap(i1, i2);
6976  size_type ii2 = 1, ii3 = 1;
6977  for (size_type i = 0; i < child1->tensor_order(); ++i) {
6978  if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
6979  if (i > i2) ii3 *= child1->tensor_proper_size(i);
6980  }
6981  pgai = std::make_shared<ga_instruction_contract_1_1>
6982  (pnode->tensor(), child1->tensor(), indsize[0], ii2, ii3);
6983  }
6984  else if (pnode->children.size() == 5) {
6985  // Particular cases should be detected (ii2=ii3=1 in particular).
6986  size_type i1 = ind[0], i2 = ind[1];
6987  size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1;
6988  for (size_type i = 0; i < child1->tensor_order(); ++i) {
6989  if (i < i1) ii1 *= child1->tensor_proper_size(i);
6990  if (i > i1) ii2 *= child1->tensor_proper_size(i);
6991  }
6992  for (size_type i = 0; i < child2->tensor_order(); ++i) {
6993  if (i < i2) ii3 *= child2->tensor_proper_size(i);
6994  if (i > i2) ii4 *= child2->tensor_proper_size(i);
6995  }
6996  if (child1->test_function_type==1 && child2->test_function_type==2)
6997  pgai = std::make_shared<ga_instruction_contract_2_1_rev>
6998  (pnode->tensor(), child1->tensor(), child2->tensor(),
6999  indsize[0], ii1, ii2, ii3, ii4);
7000  else
7001  pgai = std::make_shared<ga_instruction_contract_2_1>
7002  (pnode->tensor(), child1->tensor(), child2->tensor(),
7003  indsize[0], ii1, ii2, ii3, ii4);
7004  }
7005  else if (pnode->children.size() == 7) {
7006  // Particular cases should be detected (ii2=ii3=1 in particular).
7007  size_type i1 = ind[0], i2 = ind[1], i3 = ind[2], i4 = ind[3];
7008  size_type nn1 = indsize[0], nn2 = indsize[1];
7009  size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1, ii5 = 1, ii6 = 1;
7010  if (i1 > i2)
7011  { std::swap(i1, i2); std::swap(i3, i4); std::swap(nn1, nn2); }
7012  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7013  if (i < i1) ii1 *= child1->tensor_proper_size(i);
7014  if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7015  if (i > i2) ii3 *= child1->tensor_proper_size(i);
7016  }
7017  for (size_type i = 0; i < child2->tensor_order(); ++i) {
7018  if (i < i3 && i < i4) ii4 *= child2->tensor_proper_size(i);
7019  if ((i > i3 && i < i4) || (i > i4 && i < i3))
7020  ii5 *= child2->tensor_proper_size(i);
7021  if (i > i3 && i > i4) ii6 *= child2->tensor_proper_size(i);
7022  }
7023  if (child1->test_function_type==1 && child2->test_function_type==2)
7024  pgai = std::make_shared<ga_instruction_contract_2_2_rev>
7025  (pnode->tensor(), child1->tensor(), child2->tensor(),
7026  nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7027  else
7028  pgai = std::make_shared<ga_instruction_contract_2_2>
7029  (pnode->tensor(), child1->tensor(), child2->tensor(),
7030  nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7031  }
7032  rmi.instructions.push_back(std::move(pgai));
7033  } else if (child0->node_type == GA_NODE_PREDEF_FUNC) {
7034 
7035  std::string name = child0->name;
7036  const ga_predef_function_tab &PREDEF_FUNCTIONS
7038  ga_predef_function_tab::const_iterator it = PREDEF_FUNCTIONS.find(name);
7039  const ga_predef_function &F = it->second;
7040  size_type nbargs = F.nbargs();
7041  pga_tree_node child2 = (nbargs == 2) ? pnode->children[2] : child1;
7042 
7043  if (nbargs == 1) {
7044  if (child1->tensor().size() == 1) {
7045  if (F.ftype() == 0)
7046  pgai = std::make_shared<ga_instruction_eval_func_1arg_1res>
7047  (pnode->tensor()[0], child1->tensor()[0], F.f1());
7048  else
7049  pgai = std::make_shared<ga_instruction_eval_func_1arg_1res_expr>
7050  (pnode->tensor()[0], child1->tensor()[0], F);
7051  } else {
7052  if (F.ftype() == 0)
7053  pgai = std::make_shared<ga_instruction_eval_func_1arg>
7054  (pnode->tensor(), child1->tensor(), F.f1());
7055  else
7056  pgai = std::make_shared<ga_instruction_eval_func_1arg_expr>
7057  (pnode->tensor(), child1->tensor(), F);
7058  }
7059  } else {
7060  if (child1->tensor().size() == 1 && child2->tensor().size() == 1) {
7061  if (F.ftype() == 0)
7062  pgai = std::make_shared<ga_instruction_eval_func_2arg_1res>
7063  (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7064  F.f2());
7065  else
7066  pgai = std::make_shared<ga_instruction_eval_func_2arg_1res_expr>
7067  (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7068  F);
7069  } else if (child1->tensor().size() == 1) {
7070  if (F.ftype() == 0)
7071  pgai =
7072  std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
7073  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7074  else
7075  pgai =
7076  std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
7077  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7078  } else if (child2->tensor().size() == 1) {
7079  if (F.ftype() == 0)
7080  pgai =
7081  std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
7082  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7083  else
7084  pgai =
7085  std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
7086  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7087  } else {
7088  if (F.ftype() == 0)
7089  pgai = std::make_shared<ga_instruction_eval_func_2arg>
7090  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7091  else
7092  pgai = std::make_shared<ga_instruction_eval_func_2arg_expr>
7093  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7094  }
7095  }
7096  rmi.instructions.push_back(std::move(pgai));
7097 
7098  } else if (child0->node_type == GA_NODE_SPEC_FUNC) {
7099 
7100  GMM_ASSERT1(false, "Internal error");
7101 
7102  } else if (child0->node_type == GA_NODE_OPERATOR) {
7103 
7104  ga_predef_operator_tab &PREDEF_OPERATORS
7106  ga_predef_operator_tab::T::iterator it
7107  = PREDEF_OPERATORS.tab.find(child0->name);
7108  const ga_nonlinear_operator &OP = *(it->second);
7109  ga_nonlinear_operator::arg_list args;
7110  for (size_type i = 1; i < pnode->children.size(); ++i)
7111  args.push_back(&(pnode->children[i]->tensor()));
7112 
7113  if (child0->der1 && child0->der2 == 0) {
7114  pgai = std::make_shared<ga_instruction_eval_derivative_OP>
7115  (pnode->tensor(), OP, args, child0->der1);
7116  } else if (child0->der1 && child0->der2) {
7117  pgai = std::make_shared<ga_instruction_eval_second_derivative_OP>
7118  (pnode->tensor(), OP, args, child0->der1, child0->der2);
7119  } else {
7120  pgai = std::make_shared<ga_instruction_eval_OP>(pnode->tensor(),
7121  OP, args);
7122  }
7123  rmi.instructions.push_back(std::move(pgai));
7124 
7125  } else { // Access to a component of the tensor
7126  bgeot::multi_index mi1(size0.size()), indices;
7127  if (pnode->tensor().size() == 1) {
7128  for (size_type i = 0; i < child0->tensor_order(); ++i)
7129  mi1[i] = size_type(round(pnode->children[i+1]->tensor()[0])-1);
7130  pgai = std::make_shared<ga_instruction_copy_scalar>
7131  (pnode->tensor()[0], child0->tensor()(mi1));
7132  } else {
7133  size_type nb_test = pnode->nb_test_functions();
7134  for (size_type i = 0; i < nb_test; ++i) indices.push_back(i);
7135  for (size_type i = 0; i < child0->tensor_order(); ++i) {
7136  if (pnode->children[i+1]->node_type != GA_NODE_ALLINDICES)
7137  mi1[i+nb_test]
7138  = size_type(round(pnode->children[i+1]->tensor()[0])- 1);
7139  else
7140  indices.push_back(i+nb_test);
7141  }
7142  pgai = std::make_shared<ga_instruction_tensor_slice>
7143  (pnode->tensor(), child0->tensor(), mi1, indices);
7144  }
7145  rmi.instructions.push_back(std::move(pgai));
7146  }
7147 
7148  break;
7149 
7150  default:GMM_ASSERT1(false, "Unexpected node type " << pnode->node_type
7151  << " in compilation. Internal error.");
7152  }
7153  if (tensor_to_clear) {
7154  gmm::clear(pnode->tensor().as_vector());
7155  if (!is_uniform) {
7156  pgai = std::make_shared<ga_instruction_clear_tensor>(pnode->tensor());
7157  rmi.elt_instructions.push_back(std::move(pgai));
7158  }
7159  }
7160  rmi.node_list[pnode->hash_value].push_back(pnode);
7161  } // ga_compile_node
7162 
7163  void ga_compile_function(ga_workspace &workspace,
7164  ga_instruction_set &gis, bool scalar) {
7165  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7166  const ga_workspace::tree_description &td = workspace.tree_info(i);
7167 
7168  gis.trees.push_back(*(td.ptree));
7169  pga_tree_node root = gis.trees.back().root;
7170  if (root) {
7171  GMM_ASSERT1(!scalar || (root->tensor().size() == 1),
7172  "The result of the given expression is not a scalar");
7173  ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7174  gis.all_instructions[rm].m = td.m;
7175  ga_if_hierarchy if_hierarchy;
7176  ga_compile_node(root, workspace, gis, gis.all_instructions[rm],
7177  *(td.m), true, if_hierarchy);
7178 
7179  gis.coeff = scalar_type(1);
7180  pga_instruction pgai;
7181  workspace.assembled_tensor() = root->tensor();
7182  pgai = std::make_shared<ga_instruction_add_to_coeff>
7183  (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7184  gis.all_instructions[rm].instructions.push_back(std::move(pgai));
7185  }
7186  }
7187  }
7188 
7189  static bool ga_node_used_interpolates
7190  (const pga_tree_node pnode, const ga_workspace &workspace,
7191  std::map<std::string, std::set<std::string> > &interpolates,
7192  std::set<std::string> &interpolates_der) {
7193  bool found = false;
7194  bool intrpl(pnode->node_type == GA_NODE_INTERPOLATE_VAL ||
7195  pnode->node_type == GA_NODE_INTERPOLATE_GRAD ||
7196  pnode->node_type == GA_NODE_INTERPOLATE_HESS ||
7197  pnode->node_type == GA_NODE_INTERPOLATE_DIVERG);
7198  bool intrpl_test(pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST ||
7199  pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST ||
7200  pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST ||
7201  pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST);
7202 
7203  if (intrpl || intrpl_test ||
7204  pnode->node_type == GA_NODE_INTERPOLATE_FILTER ||
7205  pnode->node_type == GA_NODE_INTERPOLATE_X ||
7206  pnode->node_type == GA_NODE_INTERPOLATE_NORMAL) {
7207  interpolates[pnode->interpolate_name].size();
7208  if (intrpl || intrpl_test) {
7209  if (workspace.variable_group_exists(pnode->name))
7210  interpolates[pnode->interpolate_name].insert(pnode->name);
7211  }
7212  found = true;
7213  }
7214  if (pnode->node_type == GA_NODE_INTERPOLATE_DERIVATIVE) {
7215  interpolates_der.insert(pnode->interpolate_name_der);
7216  interpolates[pnode->interpolate_name_der].size();
7217  if (workspace.variable_group_exists(pnode->name))
7218  interpolates[pnode->interpolate_name_der].insert(pnode->name);
7219  }
7220  for (size_type i = 0; i < pnode->children.size(); ++i)
7221  found = ga_node_used_interpolates(pnode->children[i], workspace,
7222  interpolates, interpolates_der)
7223  || found;
7224  return found;
7225  }
7226 
7227 
7228  static void ga_compile_interpolate_trans
7229  (const pga_tree_node pnode, const ga_workspace &workspace,
7230  ga_instruction_set &gis, ga_instruction_set::region_mim_instructions &rmi,
7231  const mesh &m) {
7232 
7233  std::set<std::string> interpolates_der;
7234  std::map<std::string, std::set<std::string> > transformations;
7235  ga_node_used_interpolates(pnode, workspace, transformations,
7236  interpolates_der);
7237 
7238  for (const auto &transformation : transformations) {
7239  const std::string &transname = transformation.first;
7240  bool compute_der = (interpolates_der.count(transname) != 0);
7241  if (rmi.transformations.count(transname) == 0 ||
7242  (compute_der && rmi.transformations_der.count(transname) == 0)) {
7243  rmi.transformations[transname].size();
7244  gis.transformations.insert(transname);
7245  if (compute_der) rmi.transformations_der.insert(transname);
7246  pga_instruction pgai;
7247  if (transname.compare("neighbor_element") == 0 ||
7248  transname.compare("neighbour_elt") == 0) {
7249  pgai = std::make_shared<ga_instruction_neighbor_transformation_call>
7250  (workspace, rmi.interpolate_infos[transname],
7251  workspace.interpolate_transformation(transname), gis.ctx,
7252  gis.Normal, m, gis.ipt, gis.pai, gis.gp_pool,
7253  gis.neighbor_corresp);
7254  } else {
7255  pgai = std::make_shared<ga_instruction_transformation_call>
7256  (workspace, rmi.interpolate_infos[transname],
7257  workspace.interpolate_transformation(transname), gis.ctx,
7258  gis.Normal, m, compute_der);
7259  }
7260  if (pgai) rmi.instructions.push_back(std::move(pgai));
7261  }
7262 
7263  for (const std::string &nodename : transformation.second) {
7264  if (rmi.transformations[transname].count(nodename) == 0) {
7265  auto&& inin = rmi.interpolate_infos[transname];
7266  pga_instruction pgai =
7267  std::make_shared<ga_instruction_update_group_info>
7268  (workspace, gis, inin, nodename, inin.groups_info[nodename]);
7269  rmi.instructions.push_back(std::move(pgai));
7270  rmi.transformations[transname].insert(nodename);
7271  }
7272  }
7273  }
7274  }
7275 
7276  void ga_compile_interpolation(ga_workspace &workspace,
7277  ga_instruction_set &gis) {
7278  gis.transformations.clear();
7279  gis.all_instructions.clear();
7280  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7281  const ga_workspace::tree_description &td = workspace.tree_info(i);
7282  if (td.operation != ga_workspace::ASSEMBLY) {
7283  gis.trees.push_back(*(td.ptree));
7284 
7285  // Semantic analysis mainly to evaluate fixed size variables and data
7286  const mesh *m = td.m;
7287  GMM_ASSERT1(m, "Internal error");
7288  ga_semantic_analysis(gis.trees.back(), workspace, *m,
7289  ref_elt_dim_of_mesh(*m), true, false);
7290  pga_tree_node root = gis.trees.back().root;
7291  if (root) {
7292  // Compile tree
7293  ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7294  auto &rmi = gis.all_instructions[rm];
7295  rmi.m = td.m;
7296  rmi.im = td.mim;
7297  // rmi.interpolate_infos.clear();
7298  ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7299  ga_compile_node(root, workspace, gis,rmi, *(td.m), false,
7300  rmi.current_hierarchy);
7301 
7302  // After compile tree
7303  workspace.assembled_tensor() = root->tensor();
7304  pga_instruction pgai = std::make_shared<ga_instruction_add_to>
7305  (workspace.assembled_tensor(), root->tensor());
7306  rmi.instructions.push_back(std::move(pgai));
7307  }
7308  }
7309  }
7310  }
7311 
7312 
7313  struct var_set : std::map<std::string,size_type> {
7314  // This class indexes variable names in the order of their addition
7315  size_type operator[](const std::string &name) {
7316  if (name.empty()) return size_type(-1);
7317  size_type id = size();
7318  auto it = find(name);
7319  if (it == end()) {
7320  emplace(name, id);
7321  return id;
7322  }
7323  return it->second;
7324  }
7325  std::string operator[](const size_type &id) const {
7326  for (const auto &key_value : *this) // brute force reverse search
7327  if (key_value.second == id)
7328  return key_value.first;
7329  return std::string("");
7330  }
7331  };
7332 
7333 
7334  struct condensation_description {
7335  var_set Ivars, Jvars, Qvars; // sets of variables involved in condensation
7336  // Clusters of intercoupled condensed variables and subdiagonally coupled
7337  // primary variables for each cluster
7338  std::vector<std::set<size_type>> Qclusters, Jclusters;
7339  // Each element of Qclusters contains a group of intercoupled condensed
7340  // variables. Due to the couplings within each group, all variables of the
7341  // same group need to be condensed out simultaneously. Per definition two
7342  // clusters cannot share a common variable.
7343  // indexing of groups
7344  std::vector<size_type> cluster_of_Qvar;
7345  // Matrices of pointers to submatrices for all coupling terms
7346  gmm::dense_matrix<base_tensor *> KQQ, // diagonal
7347  KQJ, KQJpr, // subdiagonal
7348  KIQ, // superdiagonal
7349  KIJ; // outcome
7350  std::vector<base_tensor *> RI, // res. vector of coupled primary variables
7351  RQpr; // partial solution for condensed variables (initially stores residuals)
7352  };
7353 
7354  void ga_compile(ga_workspace &workspace,
7355  ga_instruction_set &gis, size_type order, bool condensation) {
7356  gis.transformations.clear();
7357  gis.all_instructions.clear();
7358  gis.unreduced_terms.clear();
7359  workspace.clear_temporary_variable_intervals();
7360 
7361  std::map<const ga_instruction_set::region_mim, condensation_description>
7362  condensations;
7363 
7364  if (condensation && order == 2) {
7365  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7366  ga_workspace::tree_description &td = workspace.tree_info(i);
7367  if (td.order != 2 && td.order != size_type(-1))
7368  continue;
7369  ga_tree tree(*(td.ptree)); // temporary tree (not used later)
7370  ga_semantic_analysis(tree, workspace, td.mim->linked_mesh(),
7371  ref_elt_dim_of_mesh(td.mim->linked_mesh()),
7372  true, false);
7373  pga_tree_node root = tree.root;
7374  if (root) {
7375  const bool
7376  v1_is_intern = workspace.is_internal_variable(root->name_test1),
7377  v2_is_intern = workspace.is_internal_variable(root->name_test2);
7378  if (v1_is_intern || v2_is_intern) {
7379  GMM_ASSERT1(tree.secondary_domain.empty(),
7380  "Condensed variable cannot be used in secondary domain");
7381 
7382  for (const auto &key_val : condensations) {
7383  const ga_instruction_set::region_mim rm0 = key_val.first;
7384  const condensation_description &CC0 = key_val.second;
7385  if (rm0.mim() == td.mim && rm0.region() != td.rg
7386  && (CC0.Qvars.count(root->name_test1) ||
7387  CC0.Qvars.count(root->name_test2))) {
7388  mesh_region intrsct = getfem::mesh_region::intersection
7389  (*(rm0.region()), *(td.rg));
7390  GMM_ASSERT1(intrsct.is_empty(),
7391  "Cannot condense coupled variables between "
7392  "intersecting regions");
7393  }
7394  }
7395  const ga_instruction_set::region_mim rm(td.mim, td.rg, nullptr);
7396 
7397  condensation_description &CC = condensations[rm];
7398  size_type
7399  q1 = v1_is_intern ? CC.Qvars[root->name_test1] : size_type(-1),
7400  q2 = v2_is_intern ? CC.Qvars[root->name_test2] : size_type(-1);
7401  GMM_ASSERT1(q1 != size_type(-1) || q2 != size_type(-1), "Error");
7402  std::vector<size_type> selected_clusters;
7403  for (size_type j=0; j < CC.Qclusters.size(); ++j)
7404  if (CC.Qclusters[j].count(q1) || CC.Qclusters[j].count(q2))
7405  selected_clusters.push_back(j);
7406 
7407  if (selected_clusters.empty()) { // create new cluster
7408  CC.Qclusters.push_back(std::set<size_type>());
7409  if (q1 != size_type(-1)) CC.Qclusters.back().insert(q1);
7410  if (q2 != size_type(-1)) CC.Qclusters.back().insert(q2);
7411  } else { // add into existing cluster / merge clusters together
7412  auto &target = CC.Qclusters[selected_clusters[0]];
7413  if (q1 != size_type(-1)) target.insert(q1);
7414  if (q2 != size_type(-1)) target.insert(q2);
7415  for (size_type j=selected_clusters.size()-1; j > 1; --j) {
7416  auto &source = CC.Qclusters[selected_clusters[j]];
7417  target.insert(source.begin(), source.end());
7418  CC.Qclusters.erase(CC.Qclusters.begin() + selected_clusters[j]);
7419  }
7420  }
7421  } // is_internal_variable
7422  } // if (root)
7423  } // for (size_type i = 0; i < workspace.nb_trees(); ++i)
7424 
7425  for (auto &key_value : condensations) {
7426  condensation_description &CC = key_value.second;
7427  //for (const auto &cluster : CC.Qclusters) {
7428  // cout << "Clusters of coupled variables:" << endl;
7429  // for (const auto &varid : cluster) cout << "/" << CC.Qvars[varid];
7430  // cout << "/" << endl;
7431  //}
7432  size_type Qsize = CC.Qvars.size();
7433 
7434  // Jclusters will hold all J variables each cluster is coupled to
7435  CC.Jclusters.resize(CC.Qclusters.size());
7436 
7437  CC.cluster_of_Qvar.resize(Qsize);
7438  for (size_type i=0; i < CC.Qclusters.size(); ++i)
7439  for (const size_type &var : CC.Qclusters[i])
7440  CC.cluster_of_Qvar[var] = i;
7441 
7442  // Qvars: all condensed variables
7443  // Qclusters: definition of clusters of intercoupled variables of Qvars
7444  // cluster_of_Qvar: dictionary for which cluster each variable belongs to
7445  CC.KQQ.resize(Qsize, Qsize);
7446  CC.RQpr.resize(Qsize);
7447  for (size_type q=0; q < Qsize; ++q) {
7448  bgeot::multi_index mi(1);
7449  mi[0] = workspace.associated_im_data(CC.Qvars[q]) ->nb_tensor_elem();
7450  gis.condensation_tensors.push_back // memory allocation
7451  (std::make_shared<base_tensor>(mi));
7452  CC.RQpr[q] = gis.condensation_tensors.back().get();
7453  }
7454  }
7455  } // if (condensation && order == 2)
7456 
7457  std::array<ga_workspace::operation_type,3>
7458  phases{ga_workspace::PRE_ASSIGNMENT,
7459  ga_workspace::ASSEMBLY,
7460  ga_workspace::POST_ASSIGNMENT};
7461  for (const auto &phase : phases) {
7462 
7463  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7464  ga_workspace::tree_description &td = workspace.tree_info(i);
7465  if (td.operation != phase)
7466  continue; // skip this tree in this phase
7467 
7468  if (td.order == order || td.order == size_type(-1)) {
7469  std::list<ga_tree> &trees = (phase == ga_workspace::ASSEMBLY)
7470  ? gis.trees
7471  : gis.interpolation_trees;
7472  trees.push_back(*(td.ptree));
7473  // Semantic analysis mainly to evaluate fixed size variables and data
7474  ga_semantic_analysis(trees.back(), workspace, td.mim->linked_mesh(),
7475  ref_elt_dim_of_mesh(td.mim->linked_mesh()),
7476  true, false);
7477  pga_tree_node root = trees.back().root;
7478  if (root) {
7479  // Compile tree
7480  // cout << "Will compile "; ga_print_node(root, cout); cout << endl;
7481 
7482  psecondary_domain psd(0);
7483  if (trees.back().secondary_domain.size())
7484  psd = workspace.secondary_domain(trees.back().secondary_domain);
7485  ga_instruction_set::region_mim rm(td.mim, td.rg, psd);
7486  auto &rmi = gis.all_instructions[rm];
7487  rmi.m = td.m;
7488  rmi.im = td.mim;
7489  // rmi.interpolate_infos.clear();
7490  ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7491  ga_compile_node(root, workspace, gis, rmi, *(td.m), false,
7492  rmi.current_hierarchy);
7493  // cout << "compilation finished "; ga_print_node(root, cout);
7494  // cout << endl;
7495 
7496  if (phase != ga_workspace::ASSEMBLY) { // Assignment/interpolation
7497  if (!td.varname_interpolation.empty()) {
7498  auto *imd
7499  = workspace.associated_im_data(td.varname_interpolation);
7500  auto &V = const_cast<model_real_plain_vector &>
7501  (workspace.value(td.varname_interpolation));
7502  GMM_ASSERT1(imd, "Internal error");
7503  auto pgai = std::make_shared<ga_instruction_assignment>
7504  (root->tensor(), V, gis.ctx, imd);
7505  rmi.instructions.push_back(std::move(pgai));
7506  }
7507  } else { // Addition of an assembly instruction
7508  pga_instruction pgai;
7509  switch(order) {
7510  case 0: {
7511  workspace.assembled_tensor() = root->tensor();
7512  pgai = std::make_shared<ga_instruction_add_to_coeff>
7513  (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7514  break;
7515  }
7516  case 1: {
7517  GMM_ASSERT1(root->tensor_proper_size() == 1,
7518  "Invalid vector or tensor quantity. An order 1 "
7519  "weak form has to be a scalar quantity");
7520  const mesh_fem * const
7521  mf = workspace.associated_mf(root->name_test1);
7522  const im_data * const
7523  imd = workspace.associated_im_data(root->name_test1);
7524  workspace.add_temporary_interval_for_unreduced_variable
7525  (root->name_test1);
7526 
7527  base_vector &Vu = workspace.unreduced_vector(),
7528  &Vr = workspace.assembled_vector();
7529  if (mf) {
7530  const std::string &intn1 = root->interpolate_name_test1;
7531  bool secondary = !intn1.empty() &&
7532  workspace.secondary_domain_exists(intn1);
7533  fem_interpolation_context
7534  &ctx = intn1.empty() ? gis.ctx
7535  : (secondary ? rmi.secondary_domain_infos.ctx
7536  : rmi.interpolate_infos[intn1].ctx);
7537  bool interpolate =
7538  !(intn1.empty() || intn1 == "neighbor_element"
7539  || intn1 == "neighbour_elt" || secondary);
7540 
7541  if (intn1.size() && !secondary &&
7542  workspace.variable_group_exists(root->name_test1)) {
7543  ga_instruction_set::variable_group_info
7544  &vgi = rmi.interpolate_infos[intn1]
7545  .groups_info[root->name_test1];
7546  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
7547  (root->tensor(), Vr, Vu, ctx,
7548  vgi.I, vgi.mf, vgi.reduced_mf,
7549  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7550  for (const std::string &name
7551  : workspace.variable_group(root->name_test1))
7552  gis.unreduced_terms.emplace(name, "");
7553  } else {
7554  base_vector &V = mf->is_reduced() ? Vu : Vr;
7555  const gmm::sub_interval
7556  &I = mf->is_reduced()
7557  ? workspace.temporary_interval_of_variable
7558  (root->name_test1)
7559  : workspace.interval_of_variable(root->name_test1);
7560  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
7561  (root->tensor(), V, ctx, I, *mf,
7562  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7563  if (mf->is_reduced())
7564  gis.unreduced_terms.emplace(root->name_test1, "");
7565  }
7566  } else if (imd) {
7567  GMM_ASSERT1(root->interpolate_name_test1.size() == 0,
7568  "Interpolate transformation on integration "
7569  "point variable");
7570  if (!workspace.is_internal_variable(root->name_test1) ||
7571  condensation)
7572  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
7573  (root->tensor(), Vr, gis.ctx,
7574  workspace.interval_of_variable(root->name_test1),
7575  *imd, gis.coeff, gis.ipt);
7576  // Variable root->name_test1 can be internal or not
7577  } else {
7578  pgai = std::make_shared<ga_instruction_vector_assembly>
7579  (root->tensor(), Vr,
7580  workspace.interval_of_variable(root->name_test1),
7581  gis.coeff);
7582  }
7583  break;
7584  }
7585  case 2: {
7586  GMM_ASSERT1(root->tensor_proper_size() == 1,
7587  "Invalid vector or tensor quantity. An order 2 "
7588  "weak form has to be a scalar quantity");
7589  const mesh_fem *mf1=workspace.associated_mf(root->name_test1),
7590  *mf2=workspace.associated_mf(root->name_test2);
7591  const im_data
7592  *imd1 = workspace.associated_im_data(root->name_test1),
7593  *imd2 = workspace.associated_im_data(root->name_test2);
7594  const std::string &intn1 = root->interpolate_name_test1,
7595  &intn2 = root->interpolate_name_test2;
7596  bool secondary1 = intn1.size() &&
7597  workspace.secondary_domain_exists(intn1);
7598  bool secondary2 = intn2.size() &&
7599  workspace.secondary_domain_exists(intn2);
7600  fem_interpolation_context
7601  &ctx1 = intn1.empty() ? gis.ctx
7602  : (secondary1 ? rmi.secondary_domain_infos.ctx
7603  : rmi.interpolate_infos[intn1].ctx),
7604  &ctx2 = intn2.empty() ? gis.ctx
7605  : (secondary2 ? rmi.secondary_domain_infos.ctx
7606  : rmi.interpolate_infos[intn2].ctx);
7607  bool interpolate = !(intn1.empty() || intn1 == "neighbor_element"
7608  || intn1 == "neighbour_elt"
7609  || secondary1) ||
7610  !(intn2.empty() || intn2 == "neighbor_element"
7611  || intn2 == "neighbour_elt"
7612  || secondary2);
7613 
7614  workspace.add_temporary_interval_for_unreduced_variable
7615  (root->name_test1);
7616  workspace.add_temporary_interval_for_unreduced_variable
7617  (root->name_test2);
7618 
7619  bool has_var_group1 = (!intn1.empty() && !secondary1 &&
7620  workspace.variable_group_exists
7621  (root->name_test1));
7622  bool has_var_group2 = (!intn2.empty() && !secondary2 &&
7623  workspace.variable_group_exists
7624  (root->name_test2));
7625  bool simple = !interpolate &&
7626  !has_var_group1 && !has_var_group2 &&
7627  mf1 && !(mf1->is_reduced()) &&
7628  mf2 && !(mf2->is_reduced());
7629 
7630  // ga instructions write into one of the following matrices
7631  auto &Krr = workspace.assembled_matrix();
7632  auto &Kru = workspace.col_unreduced_matrix();
7633  auto &Kur = workspace.row_unreduced_matrix();
7634  auto &Kuu = workspace.row_col_unreduced_matrix();
7635 
7636  if (simple) { // --> Krr
7637  const gmm::sub_interval
7638  &I1 = workspace.interval_of_variable(root->name_test1),
7639  &I2 = workspace.interval_of_variable(root->name_test2);
7640  const scalar_type
7641  &alpha1 = workspace.factor_of_variable(root->name_test1),
7642  &alpha2 = workspace.factor_of_variable(root->name_test2);
7643  if (mf1->get_qdim() == 1 && mf2->get_qdim() == 1)
7644  pgai = std::make_shared
7645  <ga_instruction_matrix_assembly_standard_scalar>
7646  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7647  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7648  else if (root->sparsity() == 10 && root->t.qdim() == 2)
7649  pgai = std::make_shared
7650  <ga_instruction_matrix_assembly_standard_vector_opt10<2>>
7651  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7652  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7653  else if (root->sparsity() == 10 && root->t.qdim() == 3)
7654  pgai = std::make_shared
7655  <ga_instruction_matrix_assembly_standard_vector_opt10<3>>
7656  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7657  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7658  else
7659  pgai = std::make_shared
7660  <ga_instruction_matrix_assembly_standard_vector>
7661  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7662  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7663  } else if (condensation &&
7664  workspace.is_internal_variable(root->name_test1) &&
7665  workspace.is_internal_variable(root->name_test2)) {
7666  // diagonal condensation matrix KQQ
7667  // Only memory allocation, gathering of relevant pointers
7668  // and data summation instructions
7669  GMM_ASSERT1(imd1 && imd2, "Internal error");
7670  GMM_ASSERT1(!interpolate, "Internal error");
7671  size_type s1 = imd1->nb_tensor_elem();
7672  size_type s2 = imd2->nb_tensor_elem();
7673 
7674  condensation_description &CC = condensations[rm];
7675  GMM_ASSERT1(CC.Qvars.count(root->name_test1) > 0 &&
7676  CC.Qvars.count(root->name_test2) > 0,
7677  "Internal error");
7678  size_type q1 = CC.Qvars[root->name_test1],
7679  q2 = CC.Qvars[root->name_test2];
7680  if (!CC.KQQ(q1,q2)) {
7681  // allocate a new matrix
7682  gis.condensation_tensors.push_back
7683  (std::make_shared<base_tensor>(s1,s2));
7684  CC.KQQ(q1,q2) = gis.condensation_tensors.back().get();
7685  pgai = std::make_shared<ga_instruction_copy_vect>
7686  (CC.KQQ(q1,q2)->as_vector(), root->tensor().as_vector());
7687  } else {
7688  // addition instruction to the previously allocated matrix
7689  pgai = std::make_shared<ga_instruction_add_to>
7690  (*CC.KQQ(q1,q2), root->tensor());
7691  }
7692  rmi.instructions.push_back(std::move(pgai));
7693  } else if (condensation &&
7694  workspace.is_internal_variable(root->name_test1)) {
7695  // subdiagonal condensation matrix KQJ
7696  // Only memory allocation, gathering of relevant pointers
7697  // and data summation instructions
7698  GMM_ASSERT1(imd1, "Internal error");
7699  GMM_ASSERT1(!interpolate, "Internal error");
7700  size_type s1 = imd1->nb_tensor_elem();
7701 
7702  condensation_description &CC = condensations[rm];
7703  GMM_ASSERT1(CC.Qvars.count(root->name_test1),
7704  "Internal error");
7705  size_type q1 = CC.Qvars[root->name_test1],
7706  j2 = CC.Jvars[root->name_test2];
7707  CC.Jclusters[CC.cluster_of_Qvar[q1]].insert(j2);
7708  if (q1 >= CC.KQJ.nrows() || j2 >= CC.KQJ.ncols())
7709  CC.KQJ.resize(std::max(CC.KQJ.nrows(), q1+1),
7710  std::max(CC.KQJ.ncols(), j2+1));
7711  if (!CC.KQJ(q1,j2)) {
7712  // allocate a new matrix. Here we do not know the size as
7713  // it may change dynamically, but for now, just use the
7714  // size of root->tensor()
7715  gis.condensation_tensors.push_back
7716  (std::make_shared<base_tensor>(root->tensor()));
7717  GMM_ASSERT1(root->tensor().size(0) == s1, "Internal error");
7718  CC.KQJ(q1,j2) = gis.condensation_tensors.back().get();
7719  pgai = std::make_shared<ga_instruction_copy_vect>
7720  (CC.KQJ(q1,j2)->as_vector(), root->tensor().as_vector());
7721  } else {
7722  // an extra matrix for this entry has already been
7723  // allocated, so just add the current tensor to it
7724  pgai = std::make_shared<ga_instruction_add_to>
7725  (*CC.KQJ(q1,j2), root->tensor());
7726  }
7727  rmi.instructions.push_back(std::move(pgai));
7728  } else if (condensation &&
7729  workspace.is_internal_variable(root->name_test2)) {
7730  // superdiagonal condensation matrix KIQ
7731  // Only memory allocation, gathering of relevant pointers
7732  // and data summation instructions
7733  GMM_ASSERT1(imd2, "Internal error");
7734  GMM_ASSERT1(!interpolate, "Internal error");
7735  size_type s2 = imd2->nb_tensor_elem();
7736 
7737  condensation_description &CC = condensations[rm];
7738  GMM_ASSERT1(CC.Qvars.count(root->name_test2),
7739  "Internal error");
7740  size_type i1 = CC.Ivars[root->name_test1],
7741  q2 = CC.Qvars[root->name_test2];
7742  if (i1 >= CC.KIQ.nrows() || q2 >= CC.KIQ.ncols())
7743  CC.KIQ.resize(std::max(CC.KIQ.nrows(), i1+1),
7744  std::max(CC.KIQ.ncols(), q2+1));
7745  if (!CC.KIQ(i1,q2)) {
7746  // allocate a new matrix. Here we do not know the size as
7747  // it may change dynamically, but for now, just use the
7748  // size of root->tensor()
7749  gis.condensation_tensors.push_back
7750  (std::make_shared<base_tensor>(root->tensor()));
7751  GMM_ASSERT1(root->tensor().size(1) == s2,
7752  "Internal error");
7753  CC.KIQ(i1,q2) = gis.condensation_tensors.back().get();
7754  pgai = std::make_shared<ga_instruction_copy_vect>
7755  (CC.KIQ(i1,q2)->as_vector(), root->tensor().as_vector());
7756  } else {
7757  // an extra matrix for this entry has already been
7758  // allocated, so just add the current tensor to it
7759  pgai = std::make_shared<ga_instruction_add_to>
7760  (*CC.KIQ(i1,q2), root->tensor());
7761  }
7762  rmi.instructions.push_back(std::move(pgai));
7763  } else if (!workspace.is_internal_variable(root->name_test1) &&
7764  !workspace.is_internal_variable(root->name_test2)) {
7765 
7766  if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced())
7767  || has_var_group1 || has_var_group2)
7768  gis.unreduced_terms.emplace(root->name_test1,
7769  root->name_test2);
7770 
7771  auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
7772  auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
7773  auto &Kux = (mf2 && mf2->is_reduced()) ? Kuu : Kur;
7774  auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
7775  auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
7776 
7777  const scalar_type
7778  &alpha1 = workspace.factor_of_variable(root->name_test1),
7779  &alpha2 = workspace.factor_of_variable(root->name_test2);
7780 
7781  if (has_var_group1) {
7782  ga_instruction_set::variable_group_info
7783  &vgi1 = rmi.interpolate_infos[intn1]
7784  .groups_info[root->name_test1];
7785  if (has_var_group2) {
7786  ga_instruction_set::variable_group_info
7787  &vgi2 = rmi.interpolate_infos[intn2]
7788  .groups_info[root->name_test2];
7789  pgai = std::make_shared
7790  <ga_instruction_matrix_assembly_mf_mf>
7791  (root->tensor(), Krr, Kru, Kur, Kuu, ctx1, ctx2,
7792  vgi1, vgi2,
7793  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7794  } else {
7795  const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
7796  ? workspace.temporary_interval_of_variable
7797  (root->name_test2)
7798  : workspace.interval_of_variable(root->name_test2);
7799  if (mf2)
7800  pgai = std::make_shared
7801  <ga_instruction_matrix_assembly_mf_mf>
7802  (root->tensor(), Krx, Kux, ctx1, ctx2,
7803  vgi1, I2, *mf2, alpha2,
7804  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7805  else // for global variable imd2 == 0
7806  pgai = std::make_shared
7807  <ga_instruction_matrix_assembly_mf_imd>
7808  (root->tensor(), Krr, Kur, ctx1, ctx2,
7809  vgi1, I2, imd2, alpha2, gis.coeff, gis.ipt);
7810  }
7811  } else { // !has_var_group1
7812  const gmm::sub_interval &I1 = mf1 && mf1->is_reduced()
7813  ? workspace.temporary_interval_of_variable
7814  (root->name_test1)
7815  : workspace.interval_of_variable(root->name_test1);
7816  if (has_var_group2) {
7817  ga_instruction_set::variable_group_info
7818  &vgi2 = rmi.interpolate_infos[intn2]
7819  .groups_info[root->name_test2];
7820  if (mf1)
7821  pgai = std::make_shared
7822  <ga_instruction_matrix_assembly_mf_mf>
7823  (root->tensor(), Kxr, Kxu, ctx1, ctx2,
7824  I1, *mf1, alpha1, vgi2,
7825  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7826  else // for global variable imd1 == 0
7827  pgai = std::make_shared
7828  <ga_instruction_matrix_assembly_imd_mf>
7829  (root->tensor(), Krr, Kru, ctx1, ctx2,
7830  I1, imd1, alpha1, vgi2, gis.coeff, gis.ipt);
7831  } else { // !has_var_group2
7832  const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
7833  ? workspace.temporary_interval_of_variable
7834  (root->name_test2)
7835  : workspace.interval_of_variable(root->name_test2);
7836  if (mf1 && mf2)
7837  pgai = std::make_shared
7838  <ga_instruction_matrix_assembly_mf_mf>
7839  (root->tensor(), Kxx, ctx1, ctx2,
7840  I1, *mf1, alpha1, I2, *mf2, alpha2,
7841  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7842  else if (mf1) // for global variable imd2 == 0
7843  pgai = std::make_shared
7844  <ga_instruction_matrix_assembly_mf_imd>
7845  (root->tensor(), Kxr, ctx1, ctx2,
7846  I1, *mf1, alpha1, I2, imd2, alpha2,
7847  gis.coeff, gis.ipt);
7848  else if (mf2)
7849  pgai = std::make_shared
7850  <ga_instruction_matrix_assembly_imd_mf>
7851  (root->tensor(), Krx, ctx1, ctx2,
7852  I1, imd1, alpha1, I2, *mf2, alpha2,
7853  gis.coeff, gis.ipt);
7854  else
7855  pgai = std::make_shared
7856  <ga_instruction_matrix_assembly_imd_imd>
7857  (root->tensor(), Krr, ctx1, ctx2,
7858  I1, imd1, alpha1, I2, imd2, alpha2,
7859  gis.coeff, gis.ipt);
7860  }
7861  }
7862  } // if (!simple)
7863  break;
7864  } // case 2
7865  } // switch(order)
7866  if (pgai)
7867  rmi.instructions.push_back(std::move(pgai));
7868  }
7869  } // if (root)
7870  } // if (td.order == order || td.order == size_type(-1))
7871  } // for (const ga_workspace::tree_description &td : trees_of_current_phase)
7872 
7873  if (condensation && order == 2 && phase == ga_workspace::ASSEMBLY) {
7874 
7875  auto &Krr = workspace.assembled_matrix();
7876  auto &Kru = workspace.col_unreduced_matrix();
7877  auto &Kur = workspace.row_unreduced_matrix();
7878  auto &Kuu = workspace.row_col_unreduced_matrix();
7879 
7880  for (auto &&key_val : condensations) {
7881  const ga_instruction_set::region_mim rm = key_val.first;
7882  condensation_description &CC = key_val.second;
7883  auto &rmi = gis.all_instructions[rm];
7884 
7885  CC.KQJpr.resize(CC.KQJ.nrows(), CC.KQJ.ncols());
7886  for (size_type k=0; k < CC.KQJpr.size(); ++k) {
7887  gis.condensation_tensors.push_back // memory allocation
7888  (std::make_shared<base_tensor>(2,2));
7889  CC.KQJpr[k] = gis.condensation_tensors.back().get();
7890  }
7891 
7892  pga_instruction pgai;
7893 
7894  // Add one diagonal/subdiagonal condensation instruction per cluster
7895  for (size_type k=0; k < CC.Qclusters.size(); ++k) {
7896  // extract condensed variables residuals from
7897  // workspace.assembled_vector() into RQpr
7898  for (size_type q1 : CC.Qclusters[k]) {
7899  std::string name_test1 = CC.Qvars[q1];
7900  const im_data *imd1 = workspace.associated_im_data(name_test1);
7901  const gmm::sub_interval
7902  &I1 = workspace.interval_of_variable(name_test1);
7903  pgai =
7904  std::make_shared<ga_instruction_extract_residual_on_imd_dofs>
7905  (*(CC.RQpr[q1]), workspace.cached_vector(), // cached_V --> CC.RQpr[q1]
7906  gis.ctx, I1, *imd1, gis.ipt);
7907  rmi.instructions.push_back(std::move(pgai));
7908  }
7909 
7910  // the exec() of this instruction calculates KQJpr including any
7911  // necessary size update to match the sizes of KQJ, upon size change
7912  // of primary variables J
7913  pgai = std::make_shared<ga_instruction_condensation_sub>
7914  (CC.KQJpr, CC.RQpr, CC.KQQ, CC.KQJ, CC.Qclusters[k], gis.coeff); // factor_of_variable()?
7915  rmi.instructions.push_back(std::move(pgai));
7916 
7917  // assemble/store KQJpr/RQpr matrices/vectors into the
7918  // corresponding global matrix/vector
7919  for (size_type q1 : CC.Qclusters[k]) {
7920  std::string name_test1 = CC.Qvars[q1];
7921  const im_data *imd1 = workspace.associated_im_data(name_test1);
7922 // const scalar_type
7923 // &alpha1 = workspace.factor_of_variable(name_test1); // TODO
7924  const gmm::sub_interval
7925  &I1 = workspace.interval_of_variable(name_test1);
7926  GMM_ASSERT1(imd1, "Internal error");
7927  for (size_type j2 : CC.Jclusters[k]) {
7928  std::string name_test2 = CC.Jvars[j2];
7929  const mesh_fem *mf2 = workspace.associated_mf(name_test2); // TODO: name_test2 variable group
7930  const im_data *imd2 = workspace.associated_im_data(name_test2);
7931 // const std::string &intn2 = root->interpolate_name_test2;
7932 // GMM_ASSERT1(intn2.empty(), "Coupling of internal variables "
7933 // "with interpolated variables not "
7934 // "implemented yet");
7935 // const scalar_type
7936 // &alpha2 = workspace.factor_of_variable(name_test2); // TODO
7937  const gmm::sub_interval
7938  &I2 = mf2 && mf2->is_reduced()
7939  ? workspace.temporary_interval_of_variable(name_test2)
7940  : workspace.interval_of_variable(name_test2);
7941  const base_tensor &Kq1j2pr = *(CC.KQJpr(q1,j2)); // <- input
7942  model_real_sparse_matrix
7943  &KQJpr = mf2 && mf2->is_reduced()
7944  ? workspace.col_unreduced_matrix()
7945  : workspace.internal_coupling_matrix(); // <- output
7946  if (mf2) {
7947  pgai =
7948  std::make_shared<ga_instruction_matrix_assembly_imd_mf>
7949  (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
7950  I1, imd1, gis.ONE, I2, *mf2, gis.ONE, gis.ONE, gis.ipt); // without gis.coeff
7951  // TODO: name_test2 variable group
7952  if (mf2->is_reduced())
7953  gis.unreduced_terms.emplace(name_test1, name_test2);
7954  } else // for global variable imd2 == 0
7955  pgai =
7956  std::make_shared<ga_instruction_matrix_assembly_imd_imd>
7957  (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
7958  I1, imd1, gis.ONE, I2, imd2, gis.ONE, gis.ONE, gis.ipt); // without gis.coeff
7959  rmi.instructions.push_back(std::move(pgai));
7960  } // for j2
7961  const bool initialize = true;
7962  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
7963  (*(CC.RQpr[q1]), workspace.assembled_vector(), // <- overwriting internal variables residual with internal solution
7964  gis.ctx, I1, *imd1, gis.ONE, gis.ipt, initialize); // without gis.coeff
7965  rmi.instructions.push_back(std::move(pgai));
7966  } // for q1
7967  }
7968 
7969  // Add superdiagonal condensation instructions
7970  for (size_type i1=0; i1 < CC.Ivars.size(); ++i1) {
7971 
7972  std::string name_test1 = CC.Ivars[i1];
7973  const mesh_fem *mf1 = workspace.associated_mf(name_test1); // TODO: name_test1 variable group
7974  const im_data *imd1 = workspace.associated_im_data(name_test1);
7975  const scalar_type
7976  &alpha1 = workspace.factor_of_variable(name_test1);
7977  const gmm::sub_interval
7978  &I1 = mf1 && mf1->is_reduced()
7979  ? workspace.temporary_interval_of_variable(name_test1)
7980  : workspace.interval_of_variable(name_test1);
7981 
7982  // Q_of_J[j2] will hold all condensed variables q that couple
7983  // variable i1 to each variable j2
7984  std::vector<std::set<size_type>> Q_of_J(CC.Jvars.size());
7985  for (size_type q=0; q < CC.Qvars.size(); ++q)
7986  if (CC.KIQ(i1,q)) {
7987  size_type cid = CC.cluster_of_Qvar[q];
7988  for (size_type j : CC.Jclusters[cid])
7989  Q_of_J[j].insert(q);
7990  }
7991 
7992  for (size_type j2=0; j2 < CC.Jvars.size(); ++j2) {
7993  if (Q_of_J[j2].size()) { // a coupling between i1 and j2 exists
7994  std::vector<base_tensor *> Ki1Q, KQj2;
7995  for (size_type q : Q_of_J[j2]) {
7996  Ki1Q.push_back(CC.KIQ(i1,q));
7997  KQj2.push_back(CC.KQJpr(q,j2));
7998  }
7999  // allocate a tensor for storing the coupling between i1 and j2
8000  gis.condensation_tensors.push_back
8001  (std::make_shared<base_tensor>());
8002  base_tensor &Kij = *gis.condensation_tensors.back();
8003  pgai = std::make_shared<ga_instruction_condensation_super_K>
8004  (Kij, Ki1Q, KQj2);
8005  rmi.instructions.push_back(std::move(pgai));
8006  // add assembly instruction
8007  std::string name_test2 = CC.Jvars[j2];
8008  const mesh_fem *mf2 = workspace.associated_mf(name_test2); // TODO: name_test2 variable group
8009  const im_data *imd2 = workspace.associated_im_data(name_test2);
8010  // Here assuming interpolate_name_test1.empty() &&
8011  // interpolate_name_test2.empty() &&
8012  // !(secondary1 || secondary2) && !interpolate;
8013  const scalar_type
8014  &alpha2 = workspace.factor_of_variable(name_test2);
8015  const gmm::sub_interval
8016  &I2 = mf2 && mf2->is_reduced()
8017  ? workspace.temporary_interval_of_variable(name_test2)
8018  : workspace.interval_of_variable(name_test2);
8019 
8020  auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8021  auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8022  auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8023  auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8024 
8025  if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced()))
8026  gis.unreduced_terms.emplace(name_test1, name_test2);
8027 
8028  if (mf1 && mf2) // TODO: name_test1 or name_test2 variable group
8029  pgai = std::make_shared
8030  <ga_instruction_matrix_assembly_mf_mf>
8031  (Kij, Kxx, gis.ctx, gis.ctx,
8032  I1, *mf1, alpha1, I2, *mf2, alpha2,
8033  gis.coeff, gis.nbpt, gis.ipt, false);
8034  else if (mf1) // for global variable imd2 == 0
8035  pgai = std::make_shared
8036  <ga_instruction_matrix_assembly_mf_imd>
8037  (Kij, Kxr, gis.ctx, gis.ctx,
8038  I1, *mf1, alpha1, I2, imd2, alpha2,
8039  gis.coeff, gis.ipt);
8040  else if (mf2)
8041  pgai = std::make_shared
8042  <ga_instruction_matrix_assembly_imd_mf>
8043  (Kij, Krx, gis.ctx, gis.ctx,
8044  I1, imd1, alpha1, I2, *mf2, alpha2,
8045  gis.coeff, gis.ipt);
8046  else
8047  pgai = std::make_shared
8048  <ga_instruction_matrix_assembly_imd_imd>
8049  (Kij, Krr, gis.ctx, gis.ctx,
8050  I1, imd1, alpha1, I2, imd2, alpha2,
8051  gis.coeff, gis.ipt);
8052  rmi.instructions.push_back(std::move(pgai));
8053  } // if (Q_of_J[j2].size())
8054  } // for j2
8055 
8056  // RHS condensation instructions
8057  std::vector<base_tensor *> Ki1Q, RQpr;
8058  for (size_type q=0; q < CC.Qvars.size(); ++q)
8059  if (CC.KIQ(i1,q)) {
8060  Ki1Q.push_back(CC.KIQ(i1,q));
8061  RQpr.push_back(CC.RQpr[q]);
8062  }
8063  gis.condensation_tensors.push_back
8064  (std::make_shared<base_tensor>());
8065  base_tensor &Ri = *gis.condensation_tensors.back();
8066  pgai = std::make_shared<ga_instruction_condensation_super_R>
8067  (Ri, Ki1Q, RQpr);
8068  rmi.instructions.push_back(std::move(pgai));
8069 
8070  base_vector &R = mf1->is_reduced() ? workspace.unreduced_vector()
8071  : workspace.assembled_vector();
8072  if (mf1)
8073  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8074  (Ri, R, gis.ctx, I1, *mf1, gis.coeff, gis.nbpt, gis.ipt, false);
8075  else if (imd1)
8076  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8077  (Ri, R, gis.ctx, I1, *imd1, gis.coeff, gis.ipt);
8078  else
8079  pgai = std::make_shared<ga_instruction_vector_assembly>
8080  (Ri, R, I1, gis.coeff);
8081  rmi.instructions.push_back(std::move(pgai));
8082  } // for i1
8083  } // for (const auto &key_val : condensations)
8084  } // if (phase == ga_workspace::ASSEMBLY)
8085  } // for (const auto &phase : phases)
8086 
8087  } // ga_compile(...)
8088 
8089 
8090 
8091  //=========================================================================
8092  // Execution of a compiled set of assembly terms
8093  //=========================================================================
8094 
8095 
8096  void ga_function_exec(ga_instruction_set &gis) {
8097 
8098  for (auto &&instr : gis.all_instructions) {
8099  const auto &gil = instr.second.instructions;
8100  for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8101  }
8102  }
8103 
8104  void ga_interpolation_exec(ga_instruction_set &gis,
8105  ga_workspace &workspace,
8106  ga_interpolation_context &gic) {
8107  base_matrix G;
8108  base_small_vector un, up;
8109 
8110  for (const std::string &t : gis.transformations)
8111  workspace.interpolate_transformation(t)->init(workspace);
8112 
8113  for (auto &&instr : gis.all_instructions) {
8114 
8115  const getfem::mesh_im &mim = *(instr.first.mim());
8116  const mesh_region &region = *(instr.first.region());
8117  const getfem::mesh &m = *(instr.second.m);
8118  GMM_ASSERT1(&m == &(gic.linked_mesh()),
8119  "Incompatibility of meshes in interpolation");
8120  const auto &gilb = instr.second.begin_instructions;
8121  const auto &gile = instr.second.elt_instructions;
8122  const auto &gil = instr.second.instructions;
8123 
8124  // iteration on elements (or faces of elements)
8125  std::vector<size_type> ind;
8126  auto pai_old = papprox_integration{};
8127  for (getfem::mr_visitor v(region, m, true); !v.finished(); ++v) {
8128  if (gic.use_mim()) {
8129  if (!mim.convex_index().is_in(v.cv())) continue;
8130  gis.pai = mim.int_method_of_element(v.cv())->approx_method();
8131  } else
8132  gis.pai = 0;
8133 
8134  ind.resize(0);
8135  bgeot::pstored_point_tab pspt
8136  = gic.ppoints_for_element(v.cv(), v.f(), ind);
8137 
8138  if (pspt.get() && ind.size() && pspt->size()) {
8139  m.points_of_convex(v.cv(), G);
8140  bgeot::pgeometric_trans pgt = m.trans_of_convex(v.cv());
8141  up.resize(G.nrows());
8142  un.resize(pgt->dim());
8143 
8144  if (gis.ctx.have_pgp() && gis.ctx.pgt() == pgt && pai_old == gis.pai) {
8145  gis.ctx.change(gis.ctx.pgp(), 0, 0, G, v.cv(), v.f());
8146  } else {
8147  if (!(gic.use_pgp(v.cv()))) {
8148  gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
8149  } else {
8150  gis.ctx.change(gis.gp_pool(pgt, pspt), 0, 0, G, v.cv(), v.f());
8151  }
8152  }
8153  pai_old = gis.pai;
8154 
8155  if (gis.need_elt_size)
8156  gis.elt_size = m.convex_radius_estimate(v.cv()) * scalar_type(2);
8157 
8158  // iterations on interpolation points
8159  gis.nbpt = pspt->size();
8160  for (size_type ii = 0; ii < ind.size(); ++ii) {
8161  gis.ipt = ii;
8162  if (gis.ctx.have_pgp()) gis.ctx.set_ii(ind[ii]);
8163  else gis.ctx.set_xref((*pspt)[gis.ipt]);
8164 
8165  if (ii == 0 || !(pgt->is_linear())) {
8166  // Computation of unit normal vector in case of a boundary
8167  if (v.f() != short_type(-1)) {
8168  const base_matrix& B = gis.ctx.B();
8169  gmm::copy(pgt->normals()[v.f()], un);
8170  gmm::mult(B, un, up);
8171  scalar_type nup = gmm::vect_norm2(up);
8172  gmm::scale(up,1.0/nup);
8173  gmm::clean(up, 1e-13);
8174  gis.Normal = up;
8175  } else gis.Normal.resize(0);
8176  }
8177  gmm::clear(workspace.assembled_tensor().as_vector());
8178  if (ii == 0) {
8179  for (size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
8180  for (size_type j = 0; j < gile.size(); ++j) j += gile[j]->exec();
8181  }
8182  for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8183  gic.store_result(v.cv(), ind[ii], workspace.assembled_tensor());
8184  }
8185  }
8186  }
8187  }
8188  for (const std::string &t : gis.transformations)
8189  workspace.interpolate_transformation(t)->finalize();
8190 
8191  gic.finalize();
8192  }
8193 
8194  void ga_exec(ga_instruction_set &gis, ga_workspace &workspace) {
8195  base_matrix G1, G2;
8196  base_small_vector un;
8197  scalar_type J1(0), J2(0);
8198 
8199  for (const std::string &t : gis.transformations)
8200  workspace.interpolate_transformation(t)->init(workspace);
8201 
8202  for (auto &instr : gis.all_instructions) {
8203  const getfem::mesh_im &mim = *(instr.first.mim());
8204  psecondary_domain psd = instr.first.psd();
8205  const getfem::mesh &m = *(instr.second.m);
8206  GMM_ASSERT1(&m == &(mim.linked_mesh()), "Incompatibility of meshes");
8207  const auto &gilb = instr.second.begin_instructions;
8208  const auto &gile = instr.second.elt_instructions;
8209  const auto &gil = instr.second.instructions;
8210 
8211  // if (gilb.size()) cout << "Begin instructions\n";
8212  // for (size_type j = 0; j < gilb.size(); ++j)
8213  // cout << typeid(*(gilb[j])).name() << endl;
8214  // if (gile.size()) cout << "\nElement instructions\n";
8215  // for (size_type j = 0; j < gile.size(); ++j)
8216  // cout << typeid(*(gile[j])).name() << endl;
8217  // cout << "\nGauss pt instructions\n";
8218  // for (size_type j = 0; j < gil.size(); ++j)
8219  // cout << typeid(*(gil[j])).name() << endl;
8220 
8221  if (!psd) { // standard integration on a single domain
8222 
8223  const mesh_region &region = *(instr.first.region());
8224 
8225  // iteration on elements (or faces of elements)
8226  size_type old_cv = size_type(-1);
8227  bgeot::pgeometric_trans pgt = 0, pgt_old = 0;
8228  pintegration_method pim = 0;
8229  papprox_integration pai = 0;
8230  bgeot::pstored_point_tab pspt = 0, old_pspt = 0;
8231  bgeot::pgeotrans_precomp pgp = 0;
8232  bool first_gp = true;
8233  for (getfem::mr_visitor v(region, m, true); !v.finished(); ++v) {
8234  if (mim.convex_index().is_in(v.cv())) {
8235  // cout << "proceed with elt " << v.cv() << " face " << v.f()<<endl;
8236  if (v.cv() != old_cv) {
8237  pgt = m.trans_of_convex(v.cv());
8238  pim = mim.int_method_of_element(v.cv());
8239  m.points_of_convex(v.cv(), G1);
8240 
8241  if (pim->type() == IM_NONE) continue;
8242  GMM_ASSERT1(pim->type() == IM_APPROX, "Sorry, exact methods "
8243  "cannot be used in high level generic assembly");
8244  pai = pim->approx_method();
8245  pspt = pai->pintegration_points();
8246  if (pspt->size()) {
8247  if (pgp && gis.pai == pai && pgt_old == pgt) {
8248  gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8249  } else {
8250  if (pai->is_built_on_the_fly()) {
8251  gis.ctx.change(pgt, 0, (*pspt)[0], G1, v.cv(), v.f());
8252  pgp = 0;
8253  } else {
8254  pgp = gis.gp_pool(pgt, pspt);
8255  gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8256  }
8257  pgt_old = pgt; gis.pai = pai;
8258  }
8259  if (gis.need_elt_size)
8260  gis.elt_size = convex_radius_estimate(pgt, G1)*scalar_type(2);
8261  }
8262  old_cv = v.cv();
8263  } else {
8264  if (pim->type() == IM_NONE) continue;
8265  gis.ctx.set_face_num(v.f());
8266  }
8267  if (pspt != old_pspt) { first_gp = true; old_pspt = pspt; }
8268  if (pspt->size()) {
8269  // iterations on Gauss points
8270  size_type first_ind = 0;
8271  if (v.f() != short_type(-1)) {
8272  gis.nbpt = pai->nb_points_on_face(v.f());
8273  first_ind = pai->ind_first_point_on_face(v.f());
8274  } else {
8275  gis.nbpt = pai->nb_points_on_convex();
8276  }
8277  for (gis.ipt = 0; gis.ipt < gis.nbpt; ++(gis.ipt)) {
8278  if (pgp) gis.ctx.set_ii(first_ind+gis.ipt);
8279  else gis.ctx.set_xref((*pspt)[first_ind+gis.ipt]);
8280  if (gis.ipt == 0 || !(pgt->is_linear())) {
8281  J1 = gis.ctx.J();
8282  // Computation of unit normal vector in case of a boundary
8283  if (v.f() != short_type(-1)) {
8284  gis.Normal.resize(G1.nrows());
8285  un.resize(pgt->dim());
8286  gmm::copy(pgt->normals()[v.f()], un);
8287  gmm::mult(gis.ctx.B(), un, gis.Normal);
8288  scalar_type nup = gmm::vect_norm2(gis.Normal);
8289  J1 *= nup;
8290  gmm::scale(gis.Normal, 1.0/nup);
8291  gmm::clean(gis.Normal, 1e-13);
8292  } else gis.Normal.resize(0);
8293  }
8294  auto ipt_coeff = pai->coeff(first_ind+gis.ipt);
8295  gis.coeff = J1 * ipt_coeff;
8296  bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8297  workspace.include_empty_int_points());
8298  if (!enable_ipt) gis.coeff = scalar_type(0);
8299  if (first_gp) {
8300  for (size_type j=0; j < gilb.size(); ++j) j+=gilb[j]->exec();
8301  first_gp = false;
8302  }
8303  if (gis.ipt == 0) {
8304  for (size_type j=0; j < gile.size(); ++j) j+=gile[j]->exec();
8305  }
8306  if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8307  for (size_type j=0; j < gil.size(); ++j) j+=gil[j]->exec();
8308  }
8309  GA_DEBUG_INFO("");
8310  }
8311  }
8312  }
8313  }
8314  GA_DEBUG_INFO("-----------------------------");
8315 
8316  } else { // Integration on the product of two domains (secondary domain)
8317 
8318  auto &sdi = instr.second.secondary_domain_infos;
8319  const mesh_region &region1 = *(instr.first.region());
8320 
8321  // iteration on elements (or faces of elements)
8322  size_type old_cv1=size_type(-1), old_cv2=size_type(-1);
8323  size_type nbpt1 = 0, nbpt2 = 0;
8324  bgeot::pgeometric_trans pgt1 = 0, pgt1_old = 0, pgt2 = 0, pgt2_old = 0;
8325  pintegration_method pim1 = 0, pim2 = 0;
8326  papprox_integration pai1 = 0, pai2 = 0;
8327  bgeot::pstored_point_tab pspt1=0, old_pspt1=0, pspt2=0, old_pspt2=0;
8328  bgeot::pgeotrans_precomp pgp1 = 0, pgp2 = 0;
8329  bool first_gp = true;
8330  for (getfem::mr_visitor v1(region1, m, true); !v1.finished(); ++v1) {
8331  if (mim.convex_index().is_in(v1.cv())) {
8332  // cout << "proceed with elt " << v1.cv()<<" face " << v1.f()<<endl;
8333  if (v1.cv() != old_cv1) {
8334  pgt1 = m.trans_of_convex(v1.cv());
8335  pim1 = mim.int_method_of_element(v1.cv());
8336  m.points_of_convex(v1.cv(), G1);
8337 
8338  if (pim1->type() == IM_NONE) continue;
8339  GMM_ASSERT1(pim1->type() == IM_APPROX, "Sorry, exact methods "
8340  "cannot be used in high level generic assembly");
8341  pai1 = pim1->approx_method();
8342  pspt1 = pai1->pintegration_points();
8343  if (pspt1->size()) {
8344  if (pgp1 && gis.pai == pai1 && pgt1_old == pgt1) {
8345  gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8346  } else {
8347  if (pai1->is_built_on_the_fly()) {
8348  gis.ctx.change(pgt1, 0, (*pspt1)[0], G1, v1.cv(), v1.f());
8349  pgp1 = 0;
8350  } else {
8351  pgp1 = gis.gp_pool(pgt1, pspt1);
8352  gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8353  }
8354  pgt1_old = pgt1; gis.pai = pai1;
8355  }
8356  if (gis.need_elt_size)
8357  gis.elt_size = convex_radius_estimate(pgt1,G1)*scalar_type(2);
8358  }
8359  old_cv1 = v1.cv();
8360  } else {
8361  if (pim1->type() == IM_NONE) continue;
8362  gis.ctx.set_face_num(v1.f());
8363  }
8364  if (pspt1 != old_pspt1) { first_gp = true; old_pspt1 = pspt1; }
8365  if (pspt1->size()) {
8366  // iterations on Gauss points
8367  size_type first_ind1 = 0;
8368  if (v1.f() != short_type(-1)) {
8369  nbpt1 = pai1->nb_points_on_face(v1.f());
8370  first_ind1 = pai1->ind_first_point_on_face(v1.f());
8371  } else {
8372  nbpt1 = pai1->nb_points_on_convex();
8373  }
8374 
8375  const mesh &m2 = psd->mim().linked_mesh();
8376  const mesh_region &region2 = psd->give_region(m, v1.cv(), v1.f());
8377  for (getfem::mr_visitor v2(region2, m2, true);
8378  !v2.finished(); ++v2) {
8379  if (v2.cv() != old_cv2) {
8380  pgt2 = m2.trans_of_convex(v2.cv());
8381  pim2 = psd->mim().int_method_of_element(v2.cv());
8382  m2.points_of_convex(v2.cv(), G2);
8383 
8384  if (pim2->type() == IM_NONE) continue;
8385  GMM_ASSERT1(pim2->type() == IM_APPROX, "Sorry, exact methods "
8386  "cannot be used in high level generic assembly");
8387  pai2 = pim2->approx_method();
8388  pspt2 = pai2->pintegration_points();
8389  if (pspt2->size()) {
8390  if (pgp2 && sdi.pai == pai2 && pgt2_old == pgt2) {
8391  sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8392  } else {
8393  if (pai2->is_built_on_the_fly()) {
8394  sdi.ctx.change(pgt2, 0, (*pspt2)[0], G2,v2.cv(),v2.f());
8395  pgp2 = 0;
8396  } else {
8397  pgp2 = gis.gp_pool(pgt2, pspt2);
8398  sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8399  }
8400  pgt2_old = pgt2; sdi.pai = pai2;
8401  }
8402  }
8403  old_cv2 = v2.cv();
8404  } else {
8405  if (pim2->type() == IM_NONE) continue;
8406  sdi.ctx.set_face_num(v2.f());
8407  }
8408  if (pspt2 != old_pspt2) { first_gp = true; old_pspt2 = pspt2; }
8409  if (pspt2->size()) {
8410  // iterations on Gauss points
8411  size_type first_ind2 = 0;
8412  if (v2.f() != short_type(-1)) {
8413  nbpt2 = pai2->nb_points_on_face(v2.f());
8414  first_ind2 = pai2->ind_first_point_on_face(v2.f());
8415  } else {
8416  nbpt2 = gis.nbpt = pai2->nb_points_on_convex();
8417  }
8418  gis.nbpt = nbpt1 * nbpt2;
8419  gis.ipt = 0;
8420  for (size_type ipt1=0; ipt1 < nbpt1; ++ipt1) {
8421  for (size_type ipt2=0; ipt2 < nbpt2; ++ipt2, ++(gis.ipt)) {
8422 
8423  if (pgp1) gis.ctx.set_ii(first_ind1+ipt1);
8424  else gis.ctx.set_xref((*pspt1)[first_ind1+ipt1]);
8425  if (pgp2) sdi.ctx.set_ii(first_ind2+ipt2);
8426  else sdi.ctx.set_xref((*pspt2)[first_ind2+ipt2]);
8427 
8428  if (gis.ipt == 0 || !(pgt1->is_linear())) {
8429  J1 = gis.ctx.J();
8430  if (v1.f() != short_type(-1)) {
8431  gis.Normal.resize(G1.nrows());
8432  un.resize(pgt1->dim());
8433  gmm::copy(pgt1->normals()[v1.f()], un);
8434  gmm::mult(gis.ctx.B(), un, gis.Normal);
8435  scalar_type nup = gmm::vect_norm2(gis.Normal);
8436  J1 *= nup;
8437  gmm::scale(gis.Normal, 1.0/nup);
8438  gmm::clean(gis.Normal, 1e-13);
8439  } else gis.Normal.resize(0);
8440  }
8441 
8442  if (gis.ipt == 0 || !(pgt2->is_linear())) {
8443  J2 = sdi.ctx.J();
8444  if (v2.f() != short_type(-1)) {
8445  sdi.Normal.resize(G2.nrows());
8446  un.resize(pgt2->dim());
8447  gmm::copy(pgt2->normals()[v2.f()], un);
8448  gmm::mult(sdi.ctx.B(), un, sdi.Normal);
8449  scalar_type nup = gmm::vect_norm2(sdi.Normal);
8450  J2 *= nup;
8451  gmm::scale(sdi.Normal, 1.0/nup);
8452  gmm::clean(sdi.Normal, 1e-13);
8453  } else sdi.Normal.resize(0);
8454  }
8455 
8456  auto ipt_coeff = pai1->coeff(first_ind1+ipt1)
8457  * pai2->coeff(first_ind2+ipt2);
8458  gis.coeff = J1 * J2 * ipt_coeff;
8459  bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8460  workspace.include_empty_int_points());
8461  if (!enable_ipt) gis.coeff = scalar_type(0);
8462 
8463  if (first_gp) {
8464  for (size_type j=0; j < gilb.size(); ++j)
8465  j+=gilb[j]->exec();
8466  first_gp = false;
8467  }
8468  if (gis.ipt == 0) {
8469  for (size_type j=0; j < gile.size(); ++j)
8470  j+=gile[j]->exec();
8471  }
8472  if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8473  for (size_type j=0; j < gil.size(); ++j)
8474  j+=gil[j]->exec();
8475  }
8476  GA_DEBUG_INFO("");
8477  }
8478  }
8479  }
8480  }
8481  }
8482  }
8483  }
8484  GA_DEBUG_INFO("-----------------------------");
8485  }
8486 
8487  }
8488 
8489  for (const std::string &t : gis.transformations)
8490  workspace.interpolate_transformation(t)->finalize();
8491  }
8492 
8493 
8494 } /* end of namespace */
dal::singleton::instance
static T & instance()
Instance from the current thread.
Definition: dal_singleton.h:165
bgeot::compute_normal
base_small_vector compute_normal(const geotrans_interpolation_context &c, size_type face)
norm of returned vector is the ratio between the face surface on the real element and the face surfac...
Definition: bgeot_geometric_trans.cc:1082
bgeot::geotrans_inv_convex
does the inversion of the geometric transformation for a given convex
Definition: bgeot_geotrans_inv.h:64
getfem::mesh_im::convex_index
const dal::bit_vector & convex_index(void) const
Get the set of convexes where an integration method has been assigned.
Definition: getfem_mesh_im.h:73
gmm::resize
void resize(M &v, size_type m, size_type n)
*‍/
Definition: gmm_blas.h:231
bgeot::size_type
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49
gmm::clear
void clear(L &l)
clear (fill with zeros) a vector or matrix.
Definition: gmm_blas.h:59
getfem::mesh_im
Describe an integration method linked to a mesh.
Definition: getfem_mesh_im.h:47
getfem_generic_assembly_semantic.h
Semantic analysis of assembly trees and semantic manipulations.
getfem_generic_assembly_tree.h
Compilation and execution operations.
getfem::convex_radius_estimate
scalar_type APIDECL convex_radius_estimate(bgeot::pgeometric_trans pgt, const base_matrix &pts)
rough estimate of the radius of the convex using the largest eigenvalue of the jacobian of the geomet...
Definition: getfem_mesh.cc:798
bgeot::geotrans_precomp_pool
The object geotrans_precomp_pool Allow to allocate a certain number of geotrans_precomp and automatic...
Definition: bgeot_geometric_trans.h:383
bgeot::short_type
gmm::uint16_type short_type
used as the common short type integer in the library
Definition: bgeot_config.h:72
getfem
GEneric Tool for Finite Element Methods.
Definition: getfem_accumulated_distro.h:46
getfem::mesh_region::intersection
static mesh_region intersection(const mesh_region &a, const mesh_region &b)
return the intersection of two mesh regions
Definition: getfem_mesh_region.cc:391
getfem::mesh::convex_radius_estimate
virtual scalar_type convex_radius_estimate(size_type ic) const
Return an estimate of the convex largest dimension.
Definition: getfem_mesh.cc:461
getfem::mesh_region::visitor
"iterator" class for regions.
Definition: getfem_mesh_region.h:237
gmm::vect_norm2
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norm2(const V &v)
Euclidean norm of a vector.
Definition: gmm_blas.h:557
getfem::pfem
std::shared_ptr< const getfem::virtual_fem > pfem
type of pointer on a fem description
Definition: getfem_fem.h:244
gmm::rsvector
sparse vector built upon std::vector.
Definition: gmm_def.h:488
bgeot::alpha
size_type alpha(short_type n, short_type d)
Return the value of which is the number of monomials of a polynomial of variables and degree .
Definition: bgeot_poly.cc:47
getfem::slice_vector_on_basic_dof_of_element
void slice_vector_on_basic_dof_of_element(const mesh_fem &mf, const VEC1 &vec, size_type cv, VEC2 &coeff, size_type qmult1=size_type(-1), size_type qmult2=size_type(-1))
Given a mesh_fem.
Definition: getfem_mesh_fem.h:659
getfem::mesh
Describe a mesh (collection of convexes (elements) and points).
Definition: getfem_mesh.h:95
bgeot::pgeometric_trans
std::shared_ptr< const bgeot::geometric_trans > pgeometric_trans
pointer type for a geometric transformation
Definition: bgeot_geometric_trans.h:186
getfem::mesh_im::linked_mesh
const mesh & linked_mesh() const
Give a reference to the linked mesh of type mesh.
Definition: getfem_mesh_im.h:79
getfem::mesh_im::int_method_of_element
virtual pintegration_method int_method_of_element(size_type cv) const
return the integration method associated with an element (in no integration is associated,...
Definition: getfem_mesh_im.h:117
bgeot::geotrans_inv_convex::invert
bool invert(const base_node &n, base_node &n_ref, scalar_type IN_EPS=1e-12, bool project_into_element=false)
given the node on the real element, returns the node on the reference element (even if it is outside ...
Definition: bgeot_geotrans_inv.cc:70
bgeot::stored_point_tab
Point tab storage.
Definition: bgeot_convex_ref.h:49
getfem_mesh_im_level_set.h
a subclass of mesh_im which is conformal to a number of level sets.
gmm::vect_norminf
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norminf(const V &v)
Infinity norm of a vector.
Definition: gmm_blas.h:693