From 2ab0bf576dede9215c3c37feb35a66caef93ce95 Mon Sep 17 00:00:00 2001 From: ShuliangLu Date: Wed, 1 Mar 2023 10:32:11 +0800 Subject: [PATCH] heterogenous material and supporter bones --- projects/CuLagrange/CMakeLists.txt | 10 +- .../CuLagrange/fem/FastQuasiStaticStepping.cu | 719 ------- .../CuLagrange/fem/FleshDynamicStepping.cu | 1321 ++++++------ projects/CuLagrange/fem/FleshQuasiStepping.cu | 588 ------ .../CuLagrange/fem/QuasiStaticStepping.cu | 349 ---- .../fem/collision_energy/collision_utils.hpp | 696 ++++--- .../collision_energy/edge_edge_collision.hpp | 13 +- .../edge_edge_sqrt_collision.hpp | 227 -- .../collision_energy/evaluate_collision.hpp | 891 ++++++-- .../vertex_face_sqrt_collision.hpp | 106 +- .../geometry/BaryCentricInterpolator.cu | 224 +- .../geometry/BiharmonicBoundedWeight.cu | 2 +- projects/CuLagrange/geometry/CollisionVis.cu | 1824 ++++++++++++----- .../CuLagrange/geometry/DeformationField.cu | 8 +- .../CuLagrange/geometry/SolveLaplacian.cu | 2 +- projects/CuLagrange/geometry/Topology.cu | 345 +++- projects/CuLagrange/geometry/VectorField.cu | 78 +- .../geometry/file_parser/read_vtk_mesh.hpp | 7 + .../geometry/kernel/bary_centric_weights.hpp | 94 +- .../kernel/calculate_bisector_normal.hpp | 2 +- .../geometry/kernel/calculate_edge_normal.hpp | 34 +- .../kernel/calculate_facet_center.hpp | 6 +- .../kernel/calculate_facet_normal.hpp | 34 +- .../kernel/compute_characteristic_length.hpp | 4 +- .../CuLagrange/geometry/kernel/geo_math.hpp | 386 ++++ .../CuLagrange/geometry/kernel/laplacian.hpp | 351 ++++ .../geometry/kernel/tiled_vector_ops.hpp | 223 +- .../CuLagrange/geometry/kernel/topology.hpp | 393 +++- .../geometry/linear_system/mfcg.hpp | 21 +- 29 files changed, 5253 insertions(+), 3705 deletions(-) delete mode 100644 projects/CuLagrange/fem/FastQuasiStaticStepping.cu delete mode 100644 projects/CuLagrange/fem/FleshQuasiStepping.cu delete mode 100644 projects/CuLagrange/fem/QuasiStaticStepping.cu delete mode 100644 projects/CuLagrange/fem/collision_energy/edge_edge_sqrt_collision.hpp create mode 100644 projects/CuLagrange/geometry/kernel/laplacian.hpp diff --git a/projects/CuLagrange/CMakeLists.txt b/projects/CuLagrange/CMakeLists.txt index a9d5fdaaf1..25b4e4b5be 100644 --- a/projects/CuLagrange/CMakeLists.txt +++ b/projects/CuLagrange/CMakeLists.txt @@ -70,17 +70,11 @@ target_sources(zeno PRIVATE # fem/Check.cu fem/Generation.cpp - # fem/test.cpp - # fem/QuasiStaticStepping.cu - - # fem/FastQuasiStaticStepping.cu - fem/FleshQuasiStepping.cu - fem/FleshDynamicStepping.cu # CHECK THIS fem/collision_energy/vertex_face_collision.hpp fem/collision_energy/vertex_face_sqrt_collision.hpp - fem/collision_energy/edge_edge_collision.hpp - fem/collision_energy/edge_edge_sqrt_collition.hpp + # fem/collision_energy/edge_edge_collision.hpp + # fem/collision_energy/edge_edge_sqrt_collition.hpp fem/collision_energy/evaluate_collision.hpp fem/collision_energy/collision_utils.hpp ) diff --git a/projects/CuLagrange/fem/FastQuasiStaticStepping.cu b/projects/CuLagrange/fem/FastQuasiStaticStepping.cu deleted file mode 100644 index c9f1bb3a8c..0000000000 --- a/projects/CuLagrange/fem/FastQuasiStaticStepping.cu +++ /dev/null @@ -1,719 +0,0 @@ -#include "Structures.hpp" -#include "Utils.hpp" -#include "zensim/Logger.hpp" -#include "zensim/cuda/execution/ExecutionPolicy.cuh" -#include "zensim/geometry/PoissonDisk.hpp" -#include "zensim/geometry/VdbLevelSet.h" -#include "zensim/geometry/VdbSampler.h" -#include "zensim/io/MeshIO.hpp" -#include "zensim/math/bit/Bits.h" -#include "zensim/types/Property.h" -#include -#include -#include -#include -#include -#include - -namespace zeno { -struct FastQuasiStaticStepping : INode { - using T = float; - using dtiles_t = zs::TileVector; - using tiles_t = typename ZenoParticles::particles_t; - using vec3 = zs::vec; - using mat3 = zs::vec; - - struct FastFEMSystem { - template - T energy(Pol &pol, const Model &model, const zs::SmallString tag, dtiles_t& vtemp) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - Vector res{verts.get_allocator(), 1}; - res.setVal(0); - // elastic potential - pol(range(eles.size()), [verts = proxy({}, verts), - eles = proxy({}, eles), - vtemp = proxy({}, vtemp), - res = proxy(res), tag, model = model,volf = volf] - ZS_LAMBDA (int ei) mutable { - auto DmInv = eles.pack(dim_c<3, 3>, "IB", ei); - auto inds = eles.pack(dim_c<4>, "inds", ei, int_c); - vec3 xs[4] = {vtemp.pack<3>(tag, inds[0]), vtemp.pack<3>(tag, inds[1]), - vtemp.pack<3>(tag, inds[2]), vtemp.pack<3>(tag, inds[3])}; - mat3 F{}; - { - auto x1x0 = xs[1] - xs[0]; - auto x2x0 = xs[2] - xs[0]; - auto x3x0 = xs[3] - xs[0]; - auto Ds = mat3{x1x0[0], x2x0[0], x3x0[0], x1x0[1], x2x0[1], - x3x0[1], x1x0[2], x2x0[2], x3x0[2]}; - F = Ds * DmInv; - } - auto psi = model.psi(F); - auto vole = eles("vol", ei); - - T gpsi = 0; - for(int i = 0;i != 4;++i) - gpsi += (-volf.dot(xs[i])/4); - - atomic_add(exec_cuda, &res[0], (T)(vole * (psi + gpsi))); - }); - // Bone Driven Potential Energy - T lambda = model.lam; - T mu = model.mu; - auto nmEmbedVerts = b_verts.size(); - if(b_bcws.size() != b_verts.size()){ - fmt::print("B_BCWS_SIZE = {}\t B_VERTS_SIZE = {}\n",b_bcws.size(),b_verts.size()); - throw std::runtime_error("B_BCWS SIZE AND B_VERTS SIZE NOT MATCH"); - } - pol(range(nmEmbedVerts), [vtemp = proxy({},vtemp), - eles = proxy({},eles), - b_verts = proxy({},b_verts), - bcws = proxy({},b_bcws),lambda,mu,tag,res = proxy(res),bone_driven_weight = bone_driven_weight] - ZS_LAMBDA(int vi) mutable { - auto ei = bcws("inds",vi, int_c); - if(ei < 0) - return; - auto inds = eles.pack(dim_c<4>, "inds",ei, int_c); - auto w = bcws.pack(dim_c<4>, "w",vi); - - auto tpos = vec3::zeros(); - for(size_t i = 0;i != 4;++i) - tpos += w[i] * vtemp.pack<3>(tag,inds[i]); - auto pdiff = tpos - b_verts.pack<3>("x",vi); - - T stiffness = 2.0066 * mu + 1.0122 * lambda; - T bpsi = (0.5 * bcws("cnorm",vi) * stiffness * bone_driven_weight * eles("vol",ei)) * pdiff.l2NormSqr(); - atomic_add(exec_cuda, &res[0], (T)bpsi); - }); - - return res.getVal(); - } - - template - void gradient(zs::CudaExecutionPolicy& cudaPol, - const Model& model, - const zs::SmallString tag, - dtiles_t& vtemp, - dtiles_t& etemp) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - cudaPol(zs::range(eles.size()), [vtemp = proxy({}, vtemp), - etemp = proxy({}, etemp), - bcws = proxy({},b_bcws), - b_verts = proxy({},b_verts), - verts = proxy({}, verts), - eles = proxy({}, eles),tag, model, volf = volf] ZS_LAMBDA (int ei) mutable { - auto DmInv = eles.pack(dim_c<3, 3>,"IB", ei); - auto dFdX = dFdXMatrix(DmInv); - auto inds = eles.pack(dim_c<4>,"inds", ei, int_c); - vec3 xs[4] = {vtemp.pack(dim_c<3>, tag, inds[0]), vtemp.pack(dim_c<3>, tag, inds[1]), - vtemp.pack(dim_c<3>, tag, inds[2]), vtemp.pack(dim_c<3>, tag, inds[3])}; - mat3 F{}; - { - auto x1x0 = xs[1] - xs[0]; - auto x2x0 = xs[2] - xs[0]; - auto x3x0 = xs[3] - xs[0]; - auto Ds = mat3{x1x0[0], x2x0[0], x3x0[0], x1x0[1], x2x0[1], - x3x0[1], x1x0[2], x2x0[2], x3x0[2]}; - F = Ds * DmInv; - } - auto P = model.first_piola(F); - auto vole = eles("vol", ei); - auto vecP = flatten(P); - auto dFdXT = dFdX.transpose(); - auto vf = -vole * (dFdXT * vecP); - auto mg = volf * vole / 4; - for (int i = 0; i != 4; ++i) { - auto vi = inds[i]; - for (int d = 0; d != 3; ++d) - atomic_add(exec_cuda, &vtemp("grad", d, vi), vf(i * 3 + d) + mg(d)); - } - - }); - - if(b_bcws.size() != b_verts.size()){ - fmt::print("B_BCWS_SIZE = {}\t B_VERTS_SIZE = {}\n",b_bcws.size(),b_verts.size()); - throw std::runtime_error("B_BCWS SIZE AND B_VERTS SIZE NOT MATCH"); - } - - T stiffness = 2.0066 * model.mu + 1.0122 * model.lam; - auto nmEmbedVerts = b_verts.size(); - cudaPol(zs::range(nmEmbedVerts), - [bcws = proxy({},b_bcws),b_verts = proxy({},b_verts),vtemp = proxy({},vtemp),etemp = proxy({},etemp), - eles = proxy({},eles),stiffness,tag,bone_driven_weight = bone_driven_weight] ZS_LAMBDA(int vi) mutable { - auto ei = bcws("inds",vi, int_c); - if(ei < 0) - return; - auto inds = eles.pack(dim_c<4>, "inds",ei, int_c); - auto w = bcws.pack<4>("w",vi); - auto tpos = vec3::zeros(); - for(size_t i = 0;i != 4;++i) - tpos += w[i] * vtemp.pack<3>(tag,inds[i]); - auto pdiff = tpos - b_verts.pack<3>("x",vi); - - for(size_t i = 0;i != 4;++i){ - auto tmp = pdiff * (-stiffness * bcws("cnorm",vi) * bone_driven_weight * w[i] * eles("vol",ei)); - // tmp = pdiff * (-lambda * bcws("cnorm",vi) * bone_driven_weight * w[i]); - for(size_t d = 0;d != 3;++d) - atomic_add(exec_cuda,&vtemp("grad",d,inds[i]),(T)tmp[d]); - } - }); - } - - template - void laplacian(zs::CudaExecutionPolicy& cudaPol, - const Model& model, - const zs::SmallString tag, - const zs::SmallString Htag, - dtiles_t& vtemp, - dtiles_t& etemp) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - T stiffness = 2.0066 * model.mu + 1.0122 * model.lam; - cudaPol(zs::range(eles.size()), - [vtemp = proxy({}, vtemp),etemp = proxy({}, etemp), - bcws = proxy({},b_bcws),b_verts = proxy({},b_verts), - verts = proxy({},verts),eles = proxy({},eles),tag, - Htag,stiffness,bone_driven_weight = bone_driven_weight] - ZS_LAMBDA(int ei) mutable { - auto DmInv = eles.template pack<3, 3>("IB", ei); - auto dFdX = dFdXMatrix(DmInv); - auto vol = eles("vol",ei); - etemp.template tuple<12*12>(Htag,ei) = stiffness * vol * dFdX.transpose() * dFdX; - }); - - cudaPol(zs::range(b_bcws.size()), - [bcws = proxy({},b_bcws),b_verts = proxy({},b_verts),vtemp = proxy({},vtemp),etemp = proxy({},etemp), - eles = proxy({},eles),stiffness,tag,bone_driven_weight = bone_driven_weight] ZS_LAMBDA(int vi) mutable { - auto ei = bcws("inds",vi, int_c); - if(ei < 0) - return; - auto inds = eles.pack(dim_c<4>, "inds",ei, int_c); - auto w = bcws.pack<4>("w",vi); - - for(int i = 0;i != 4;++i) - for(int j = 0;j != 4;++j){ - T alpha = stiffness * bone_driven_weight * w[i] * w[j] * bcws("cnorm",vi) * eles("vol",ei); - for(int d = 0;d != 3;++d){ - atomic_add(exec_cuda,&etemp("He",(i * 3 + d) * 12 + j * 3 + d,ei),alpha); - } - } - - }); - } - - template - void hessian(zs::CudaExecutionPolicy& cudaPol, - const Model& model, - const zs::SmallString xTag, - const zs::SmallString HTag, - dtiles_t& vtemp, - dtiles_t& etemp) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - // fmt::print("check here 0"); - cudaPol(zs::range(eles.size()), [vtemp = proxy({}, vtemp), - etemp = proxy({}, etemp), - bcws = proxy({},b_bcws), - b_verts = proxy({},b_verts), - verts = proxy({}, verts), - eles = proxy({}, eles),tag = xTag,HTag, model, volf = volf] ZS_LAMBDA (int ei) mutable { - auto DmInv = eles.pack(dim_c<3, 3>, "IB", ei); - auto dFdX = dFdXMatrix(DmInv); - auto inds = eles.pack(dim_c<4>, "inds", ei, int_c); - vec3 xs[4] = {vtemp.pack(dim_c<3>, tag, inds[0]), vtemp.pack(dim_c<3>, tag, inds[1]), - vtemp.pack(dim_c<3>, tag, inds[2]), vtemp.pack(dim_c<3>, tag, inds[3])}; - mat3 F{}; - { - auto x1x0 = xs[1] - xs[0]; - auto x2x0 = xs[2] - xs[0]; - auto x3x0 = xs[3] - xs[0]; - auto Ds = mat3{x1x0[0], x2x0[0], x3x0[0], x1x0[1], x2x0[1], - x3x0[1], x1x0[2], x2x0[2], x3x0[2]}; - F = Ds * DmInv; - } - auto vole = eles("vol", ei); - auto dFdXT = dFdX.transpose(); - - auto Hq = model.first_piola_derivative(F, true_c); - auto H = dFdXT * Hq * dFdX * vole; - - etemp.tuple<12 * 12>(HTag, ei) = H; - - }); - T stiffness = 2.0066 * model.mu + 1.0122 * model.lam; - cudaPol(zs::range(b_bcws.size()), - [bcws = proxy({},b_bcws),b_verts = proxy({},b_verts),vtemp = proxy({},vtemp),etemp = proxy({},etemp), - eles = proxy({},eles),stiffness,HTag,bone_driven_weight = bone_driven_weight] ZS_LAMBDA(int vi) mutable { - auto ei = bcws("inds",vi, int_c); - if(ei < 0) - return; - auto inds = eles.pack(dim_c<4>,"inds",ei, int_c); - auto w = bcws.pack(dim_c<4>,"w",vi); - - for(int i = 0;i != 4;++i) - for(int j = 0;j != 4;++j){ - T alpha = stiffness * bone_driven_weight * w[i] * w[j] * bcws("cnorm",vi) * eles("vol",ei); - for(int d = 0;d != 3;++d){ - atomic_add(exec_cuda,&etemp(HTag,(i * 3 + d) * 12 + j * 3 + d,ei),alpha); - } - } - - }); - } - - - template - void precondition(Pol &pol, const zs::SmallString srcTag, - const zs::SmallString dstTag,dtiles_t& vtemp) { - using namespace zs; - constexpr execspace_e space = execspace_e::cuda; - // precondition - pol(zs::range(verts.size()), - [vtemp = proxy({}, vtemp), verts = proxy({}, verts), - srcTag, dstTag] ZS_LAMBDA(int vi) mutable { - vtemp.tuple<3>(dstTag, vi) = - vtemp.pack<3, 3>("P", vi) * vtemp.pack<3>(srcTag, vi); - // vtemp.tuple<3>(dstTag, vi) = vtemp.pack<3>(srcTag, vi); - }); - } - - template - void multiply(Pol &pol, const zs::SmallString dxTag, - const zs::SmallString bTag, - const zs::SmallString HTag, - dtiles_t& vtemp, - const dtiles_t& etemp) { - using namespace zs; - constexpr execspace_e space = execspace_e::cuda; - constexpr auto execTag = wrapv{}; - const auto numVerts = verts.size(); - const auto numEles = eles.size(); - // dx -> b - pol(range(numVerts), - [execTag, vtemp = proxy({}, vtemp), bTag] ZS_LAMBDA( - int vi) mutable { vtemp.tuple<3>(bTag, vi) = vec3::zeros(); }); - // elastic energy - pol(range(numEles), [execTag, etemp = proxy({}, etemp), - vtemp = proxy({}, vtemp), - eles = proxy({}, eles), dxTag, bTag, HTag] ZS_LAMBDA(int ei) mutable { - constexpr int dim = 3; - constexpr auto dimp1 = dim + 1; - auto inds = eles.template pack("inds", ei, int_c); - zs::vec temp{}; - for (int vi = 0; vi != dimp1; ++vi) - for (int d = 0; d != dim; ++d) { - temp[vi * dim + d] = vtemp(dxTag, d, inds[vi]); - } - auto He = etemp.pack(HTag, ei); - - temp = He * temp; - - for (int vi = 0; vi != dimp1; ++vi) - for (int d = 0; d != dim; ++d) { - atomic_add(execTag, &vtemp(bTag, d, inds[vi]), temp[vi * dim + d]); - } - }); - } - - FastFEMSystem(const tiles_t &verts, const tiles_t &eles, const tiles_t &b_bcws, const tiles_t& b_verts,T bone_driven_weight,vec3 volf) - : verts{verts}, eles{eles}, b_bcws{b_bcws}, b_verts{b_verts}, bone_driven_weight{bone_driven_weight},volf{volf}{} - - const tiles_t &verts; - const tiles_t &eles; - const tiles_t &b_bcws; // the barycentric interpolation of embeded bones - const tiles_t &b_verts; // the position of embeded bones - - T bone_driven_weight; - vec3 volf; - - }; - - template - constexpr void backtracking_line_search(zs::CudaExecutionPolicy &cudaPol,Equation& A,Model& models,int max_line_search,T armijo, - const zs::SmallString& dtag,const zs::SmallString& gtag,const zs::SmallString& xtag,T init_step,dtiles_t& vtemp) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - T dg = dot(cudaPol,vtemp,gtag,dtag); - T E0; - match([&](auto &elasticModel) { - E0 = A.energy(cudaPol, elasticModel, xtag,vtemp); - })(models.getElasticModel()); - T E{E0}; - int line_search = 0; - std::vector armijo_buffer(max_line_search); - T step = init_step; - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({},vtemp),xtag,dtag,step] ZS_LAMBDA(int vi) mutable { - vtemp.pack<3>(xtag,vi) += step * vtemp.pack<3>(dtag,vi); - }); - - do { - match([&](auto &elasticModel) { - E = A.energy(cudaPol,elasticModel,xtag,vtemp); - })(models.getElasticModel()); - // fmt::print("E: {} at alpha {}. E0 {}\n", E, alpha, E0); - // fmt::print("Armijo : {} < {}\n",(E - E0)/alpha,dg); - armijo_buffer[line_search] = (E - E0)/step; - // test Armojo condition - if(((double)E - (double)E0) < (double)armijo * (double)dg * (double)step) - break; - step /= 2; - cudaPol(zs::range(vtemp.size()), [vtemp = proxy({}, vtemp),step,xtag,dtag] ZS_LAMBDA(int vi) mutable { - vtemp.tuple<3>(xtag, vi) = vtemp.pack<3>(xtag, vi) - step * vtemp.pack<3>(dtag, vi); - }); - ++line_search; - } while (line_search < max_line_search); - // return line_search; - } - - template - constexpr int solve_equation_using_pcg(zs::CudaExecutionPolicy &cudaPol,Equation& A,Model& models,const zs::SmallString& btag,const zs::SmallString& xtag,const zs::SmallString& Ptag,dtiles_t& vtemp, - zs::SmallString Htag,dtiles_t& etemp,T accuracy) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - // set b = 0 outside the function call - // cudaPol(zs::range(vtemp.size()),[vtemp = proxy({},vtemp)] ZS_LAMBDA(int vi) mutable { - // vtemp.pack<3>("b",vi) = vec3::zeros(); - // }); - - A.multiply(cudaPol,xtag,"temp","L",vtemp,etemp); - cudaPol(zs::range(vtemp.size()),[vtemp = proxy({},vtemp)] ZS_LAMBDA(int vi) mutable { - vtemp.tuple<3>("r",vi) = vtemp.pack<3>("b",vi) - vtemp.pack<3>("temp",vi); - }); - // no projection here - // A.project(cudaPol,"btag",verts,"r",vtemp); - A.precondition(cudaPol,"r","q",vtemp); - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({},vtemp)] ZS_LAMBDA(int vi) mutable { - vtemp.tuple<3>("p",vi) = vtemp.pack<3>("q",vi); - }); - - T zTrk = dot(cudaPol,vtemp,"r","q"); - - auto residualPreconditionedNorm = std::sqrt(zTrk); - auto localTol = accuracy * residualPreconditionedNorm; - int iter = 0; - for(;iter != 1000;++iter){ - if(residualPreconditionedNorm <= localTol){ - fmt::print("finish with cg iter: {}, norm: {} zTrk: {}\n",iter,residualPreconditionedNorm,zTrk); - break; - } - - - A.multiply(cudaPol,"p","temp","L",vtemp,etemp); - T alpha = zTrk / dot(cudaPol,vtemp,"temp","p"); - - cudaPol(range(vtemp.size()), [vtemp = proxy({},vtemp),alpha,xtag] ZS_LAMBDA(int vi) mutable { - vtemp.pack<3>(xtag, vi) += alpha * vtemp.pack<3>("p", vi); - vtemp.pack<3>("r", vi) -= alpha * vtemp.pack<3>("temp", vi); - }); - if(iter % 51 == 50){ - A.multiply(cudaPol,xtag,"temp","L",vtemp,etemp); - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({},vtemp),btag] ZS_LAMBDA(int vi) mutable { - vtemp.template tuple<3>("r",vi) = vtemp.pack<3>(btag,vi) - vtemp.pack<3>("temp",vi); - }); - } - - A.precondition(cudaPol,"r","q",vtemp); - auto zTrkLast = zTrk; - zTrk = dot(cudaPol,vtemp,"q","r"); - auto beta = zTrk / zTrkLast; - - cudaPol(range(vtemp.size()), [vtemp = proxy({}, vtemp),beta] ZS_LAMBDA(int vi) mutable { - vtemp("p", vi) = vtemp("q", vi) + beta * vtemp("p", vi); - }); - - residualPreconditionedNorm = std::sqrt(zTrk); - ++iter; - } - - return iter; - } - - static T reduce(zs::CudaExecutionPolicy &cudaPol, const zs::Vector &res) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - Vector ret{res.get_allocator(), 1}; - auto sid = cudaPol.getStreamid(); - auto procid = cudaPol.getProcid(); - auto &context = Cuda::context(procid); - auto stream = (cudaStream_t)context.streamSpare(sid); - std::size_t temp_bytes = 0; - cub::DeviceReduce::Reduce(nullptr, temp_bytes, res.data(), ret.data(), - res.size(), std::plus{}, (T)0, stream); - Vector temp{res.get_allocator(), - temp_bytes / sizeof(std::max_align_t) + 1}; - cub::DeviceReduce::Reduce(temp.data(), temp_bytes, res.data(), ret.data(), - res.size(), std::plus{}, (T)0, stream); - context.syncStreamSpare(sid); - return (T)ret.getVal(); - } - template - T dot(zs::CudaExecutionPolicy &cudaPol, dtiles_t &vertData, - const zs::SmallString tag0, const zs::SmallString tag1,int offset0 = 0,int offset1 = 0) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - Vector res{vertData.get_allocator(), vertData.size()},ret{vertData.get_allocator(),1}; - cudaPol(range(vertData.size()), - [data = proxy({},vertData),res = proxy(res),tag0,tag1,offset0,offset1] ZS_LAMBDA(int pi) mutable { - res[pi] = (T)0.; - for(int i = 0;i < pack_dim;++i) - res[pi] += data(tag0,offset0*pack_dim + i,pi) * data(tag1,offset1*pack_dim + i,pi); - }); - //zs::reduce(cudaPol,std::begin(res),std:end(res),std::begin(ret), (T)0); - //return (T)ret.getVal(); - return reduce(cudaPol, res); - } - - T infNorm(zs::CudaExecutionPolicy &cudaPol, dtiles_t &vertData, - const zs::SmallString tag = "dir") { - using namespace zs; - constexpr auto space = execspace_e::cuda; - Vector res{vertData.get_allocator(), 1}; - res.setVal(0); - cudaPol(range(vertData.size()), - [data = proxy({}, vertData), res = proxy(res), - tag] __device__(int pi) mutable { - auto v = data.pack<3>(tag, pi); - atomic_max(exec_cuda, res.data(), v.abs().max()); - }); - return res.getVal(); - } - - virtual void apply() override { - using namespace zs; - auto zstets = get_input("ZSParticles"); - auto gravity = get_input("gravity")->get>(); - auto zsbones = get_input("driven_bones"); // driven bones - - auto armijo = get_param("armijo"); - auto curvature = get_param("wolfe"); - auto cg_res = get_param("cg_res"); // cg_res for inner loop of quasi-newton solver - auto btl_res = get_param("btl_res"); // a termination criterion for line search - - auto epsilon = get_param("epsilon"); - auto rel_epsilon = get_param("rel_epsilon"); - - auto models = zstets->getModel(); - auto& verts = zstets->getParticles(); - auto& eles = zstets->getQuadraturePoints(); - - auto tag = get_param("driven_tag"); // tag channel where the bones are binded - auto bone_driven_weight = get_param("bone_driven_weight"); // the weight of bone-driven potential - auto nm_newton_iters = get_param("nm_newton_iters"); - auto quasi_newton_window_size = get_param("window_size"); - - auto volf = vec3::from_array(gravity * models.density); - - static dtiles_t vtemp{verts.get_allocator(), - { - {"grad", 3}, - {"gradp",3}, - {"P", 9}, - {"dir", 3}, - {"xn", 3}, - {"xn0", 3}, - {"xp",3}, - {"temp", 3}, - {"r", 3}, - {"p", 3}, - {"q", 3}, - {"fx", quasi_newton_window_size}, - {"s", 3 * quasi_newton_window_size}, - {"y", 3 * quasi_newton_window_size} - },verts.size()}; - // buffer storage for laplace matrix - static dtiles_t etemp{eles.get_allocator(),{{"L", 12 * 12},{"H",12 * 12}},eles.size()}; - vtemp.resize(verts.size()); - etemp.resize(eles.size()); - FastFEMSystem A{verts,eles,(*zstets)[tag],zsbones->getParticles(),bone_driven_weight,volf}; - - constexpr auto space = execspace_e::cuda; - auto cudaPol = cuda_exec(); - - // use the initial guess if given - if(verts.hasProperty("init_x")) { - fmt::print("set up initial guess for equation solution\n"); - cudaPol(zs::range(verts.size()), - [vtemp = proxy({}, vtemp),verts = proxy({}, verts)] __device__(int vi) mutable { - auto x = verts.pack<3>("init_x", vi); - vtemp.tuple<3>("xn", vi) = x; - }); - } else {// use the previous simulation result - cudaPol(zs::range(verts.size()), - [vtemp = proxy({}, vtemp), - verts = proxy({}, verts)] __device__(int vi) mutable { - auto x = verts.pack<3>("x", vi); - vtemp.tuple<3>("xn", vi) = x; - }); - } - match([&](auto &elasticModel){ - A.laplacian(cudaPol,elasticModel,"xn","L",vtemp,etemp); - })(models.getElasticModel()); - - match([&](auto &elasticModel){ - A.hessian(cudaPol,elasticModel,"xn","H",vtemp,etemp); - })(models.getElasticModel()); - - // build preconditioner for fast cg convergence - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({}, vtemp), - verts = proxy({}, verts)] ZS_LAMBDA (int vi) mutable { - vtemp.tuple<9>("P", vi) = mat3::zeros(); - }); - cudaPol(zs::range(eles.size()), - [vtemp = proxy({},vtemp),etemp = proxy({},etemp),eles = proxy({},eles)] - ZS_LAMBDA (int ei) mutable { - constexpr int dim = 3; - constexpr auto dimp1 = dim + 1; - auto inds = eles.pack(dim_c,"inds",ei, int_c); - auto He = etemp.pack("L",ei); - - for (int vi = 0; vi != dimp1; ++vi) { - #if 1 - for (int i = 0; i != dim; ++i) - for (int j = i; j != dim; ++j){ - atomic_add(exec_cuda, &vtemp("P", i * dim + j, inds[vi]),He(vi * dim + i, vi * dim + j)); - // atomic_add(exec_cuda, &vtemp("P", j * dim + i, inds[vi]),He(vi * dim + i, vi * dim + j)); - } - #else - for (int j = 0; j != dim; ++j) { - atomic_add(exec_cuda, &vtemp("P", j * dim + j, inds[vi]), - He(vi * dim + j, vi * dim + j)); - } - #endif - } - }); - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({}, vtemp), - verts = proxy({}, verts)] ZS_LAMBDA (int vi) mutable { - constexpr int dim = 3; - for (int i = 0; i != dim; ++i) - for (int j = i+1; j != dim; ++j){ - vtemp("P", j * dim + i, vi) = vtemp("P", i * dim + j, vi); - // atomic_add(exec_cuda, &vtemp("P", j * dim + i, inds[vi]),He(vi * dim + i, vi * dim + j)); - } - }); - - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({},vtemp)] __device__(int vi) mutable { - // we need to use double-precision inverse here, when the P matrix is nearly singular or has very large coeffs - vtemp.tuple<9>("P",vi) = inverse(vtemp.pack<3,3>("P",vi).cast()); - }); - - // solve the problem using quasi-newton solver - match([&](auto &elasticModel){ - A.gradient(cudaPol,elasticModel,"xn",vtemp,etemp); - })(models.getElasticModel()); - - T gn = std::sqrt(dot(cudaPol,vtemp,"grad","grad")); - T xn = std::sqrt(dot(cudaPol,vtemp,"xn","xn")); - - if(gn > epsilon && gn > xn * rel_epsilon && false) { - int k = 0; - T step = 1. / gn; - // solve for cg newton dir might be better? - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({},vtemp)] ZS_LAMBDA(int vi) mutable { - vtemp.tuple<3>("dir",vi) = -vtemp.pack<3,3>("P",vi) * vtemp.pack<3>("grad",vi); - }); - - int nm_corr = 0; - std::vector m_alpha(quasi_newton_window_size); - std::vector m_ys(quasi_newton_window_size); - - fmt::print("SOLVE EQUA USING QUASI_NEWTON\n"); - - while(k < nm_newton_iters) { - // copy the x and grad - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({},vtemp)] ZS_LAMBDA(int vi) mutable { - vtemp.tuple<3>("xp",vi) = vtemp.pack<3>("xn",vi); - vtemp.tuple<3>("gradp",vi) = vtemp.pack<3>("grad",vi); - }); - // do line search along the searching direction using armijo condition.../ consider wolfe only when the spd is not enforced - backtracking_line_search(cudaPol,A,models,10,armijo,"dir","grad","xn",step,vtemp); - T gn = std::sqrt(dot(cudaPol,vtemp,"grad","grad")); - T xn = std::sqrt(dot(cudaPol,vtemp,"xn","xn")); - // gradient termination criterion test - if(gn <= epsilon || gn <= epsilon * xn) - break; - // add correction to hessian approximation - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({},vtemp),ws = quasi_newton_window_size,k] ZS_LAMBDA(int vi) mutable { - for(int i = 0;i != 3;++i){ - vtemp("s",(k % ws)*3 + i,vi) = vtemp("xn",i,vi) - vtemp("xp",i,vi); - vtemp("y",(k % ws)*3 + i,vi) = vtemp("grad",i,vi) - vtemp("gradp",i,vi); - // vtemp.tuple<3>("s",k % ws,vi) = vtemp.pack<3>("xn",vi) - vtemp.pack<3>("xp",vi); - // vtemp.tuple<3>("y",k % ws,vi) = vtemp.pack<3>("grad",vi) - vtemp.pack<3>("gradp",vi); - } - }); - // some problem use atomic add - m_ys[k % quasi_newton_window_size] = dot(cudaPol,vtemp,"s","y",k % quasi_newton_window_size,k % quasi_newton_window_size); - ++nm_corr; - // apply Hv - // recursively compute d = -H*g - { - // Loop1 - // m_dir = -m_g - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({},vtemp)] ZS_LAMBDA(int vi) mutable { - vtemp.tuple<3>("temp",vi) = -vtemp.pack<3>("grad",vi); - }); - // point to the most recent correction buffer - int j = (k+1) % quasi_newton_window_size; - for(int i = 0;i < nm_corr;++i){ - // moving backward - j = (j + quasi_newton_window_size - 1) % quasi_newton_window_size; - m_alpha[j] = dot(cudaPol,vtemp,"s","temp",k % quasi_newton_window_size) / m_ys[j]; - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({},vtemp),alpha = m_alpha[j],ws = quasi_newton_window_size,k] - ZS_LAMBDA(int vi) mutable { - for(int i = 0;i != 3;++i) - vtemp("temp",i,vi) -= alpha * vtemp("y",(k % ws)*3 + i,vi); - }); - } - // solve laplace equation using cg, do not have to be that accurate? - solve_equation_using_pcg(cudaPol,A,models,"temp","dir","P",vtemp,"L",etemp,cg_res); - // Loop 2 - for(int i = 0;i < nm_corr;++i){ - T beta = dot(cudaPol,vtemp,"y","dir",j) / m_ys[j]; - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({},vtemp),offset = k % quasi_newton_window_size,alpha = m_alpha[j],beta,j] ZS_LAMBDA(int vi) mutable{ - for(int i = 0;i != 3;++i) - vtemp("dir",i,vi) += (alpha - beta) * vtemp("s",j*3 + i,vi); - }); - j = (j+1) % quasi_newton_window_size; - } - } - - step = 1.; - ++k; - } - }else{ - fmt::print("EARLY TERMINATION\n"); - } - cudaPol(zs::range(vtemp.size()), - [vtemp = proxy({},vtemp),verts = proxy({},verts)] ZS_LAMBDA(int vi) mutable { - verts.template tuple<3>("x",vi) = vtemp.pack<3>("xn",vi); - }); - - set_output("ZSParticles", zstets); - } -}; - - -ZENDEFNODE(FastQuasiStaticStepping, {{"ZSParticles","driven_bones","gravity"}, - {"ZSParticles"}, - {{"float","armijo","0.1"},{"float","wolfe","0.9"}, - {"float","cg_res","0.1"},{"float","btl_res","0.0001"},{"float","epsilon","1e-5"}, - {"float","rel_epsilon","1e-3"}, - {"string","driven_tag","bone_bw"},{"float","bone_driven_weight","0.0"}, - {"int","nm_newton_iters","20"},{"int","window_size","8"} - }, - {"FEM"}}); - -}; \ No newline at end of file diff --git a/projects/CuLagrange/fem/FleshDynamicStepping.cu b/projects/CuLagrange/fem/FleshDynamicStepping.cu index 6c15619002..0403cb5c7c 100644 --- a/projects/CuLagrange/fem/FleshDynamicStepping.cu +++ b/projects/CuLagrange/fem/FleshDynamicStepping.cu @@ -33,23 +33,14 @@ #include "collision_energy/vertex_face_sqrt_collision.hpp" #include "collision_energy/vertex_face_collision.hpp" -#include "collision_energy/edge_edge_sqrt_collision.hpp" -#include "collision_energy/edge_edge_collision.hpp" - - - +// #include "collision_energy/edge_edge_sqrt_collision.hpp" +// #include "collision_energy/edge_edge_collision.hpp" #include "collision_energy/evaluate_collision.hpp" -#define DEBUG_FLESH_DYN_STEPPING 1 - namespace zeno { -// TODO : boundary force -// TODO : fixed points -// Anisotropic Cardiac - -#define MAX_FP_COLLISION_PAIRS 6 +#define MAX_FP_COLLISION_PAIRS 4 struct FleshDynamicStepping : INode { @@ -57,6 +48,7 @@ struct FleshDynamicStepping : INode { using Ti = int; using dtiles_t = zs::TileVector; using tiles_t = typename ZenoParticles::particles_t; + using vec2 = zs::vec; using vec3 = zs::vec; using mat3 = zs::vec; using mat9 = zs::vec; @@ -71,43 +63,134 @@ struct FleshDynamicStepping : INode { // currently only backward euler integrator is supported // topology evaluation should be called before applying this node struct FEMDynamicSteppingSystem { - - constexpr auto dFAdF(const mat3& A) { - mat9 M{}; - M(0,0) = M(1,1) = M(2,2) = A(0,0); - M(3,0) = M(4,1) = M(5,2) = A(0,1); - M(6,0) = M(7,1) = M(8,2) = A(0,2); - - M(0,3) = M(1,4) = M(2,5) = A(1,0); - M(3,3) = M(4,4) = M(5,5) = A(1,1); - M(6,3) = M(7,4) = M(8,5) = A(1,2); - - M(0,6) = M(1,7) = M(2,8) = A(2,0); - M(3,6) = M(4,7) = M(5,8) = A(2,1); - M(6,6) = M(7,7) = M(8,8) = A(2,2); - - return M; - } - template void computeCollisionEnergy(zs::CudaExecutionPolicy& cudaPol,const Model& model, dtiles_t& vtemp, dtiles_t& etemp, dtiles_t& sttemp, dtiles_t& setemp, - dtiles_t& cptemp, - // const bvh_t& stBvh, - // const bvh_t& seBvh, - const T& thickness) { + dtiles_t& ee_buffer, + dtiles_t& fe_buffer) { using namespace zs; constexpr auto space = execspace_e::cuda; T lambda = model.lam; T mu = model.mu; + } + + void findInversion(zs::CudaExecutionPolicy& cudaPol,dtiles_t& vtemp,dtiles_t& etemp) { + using namespace zs; + constexpr auto space = execspace_e::cuda; + TILEVEC_OPS::fill(cudaPol,vtemp,"is_inverted",reinterpret_bits((int)0)); + TILEVEC_OPS::fill(cudaPol,etemp,"is_inverted",reinterpret_bits((int)0)); + cudaPol(zs::range(eles.size()), + [vtemp = proxy({},vtemp), + quads = proxy({},eles), + etemp = proxy({},etemp)] ZS_LAMBDA(int ei) mutable { + auto DmInv = quads.template pack<3,3>("IB",ei); + auto inds = quads.template pack<4>("inds",ei).reinterpret_bits(int_c); + vec3 x1[4] = {vtemp.template pack<3>("xn", inds[0]), + vtemp.template pack<3>("xn", inds[1]), + vtemp.template pack<3>("xn", inds[2]), + vtemp.template pack<3>("xn", inds[3])}; + mat3 F{}; + { + auto x1x0 = x1[1] - x1[0]; + auto x2x0 = x1[2] - x1[0]; + auto x3x0 = x1[3] - x1[0]; + auto Ds = mat3{x1x0[0], x2x0[0], x3x0[0], x1x0[1], x2x0[1], + x3x0[1], x1x0[2], x2x0[2], x3x0[2]}; + F = Ds * DmInv; + } + if(zs::determinant(F) < 0.0){ + // for(int i = 0;i < 4;++i) + // vtemp("is_inverted",inds[i]) = reinterpret_bits((int)1); + etemp("is_inverted",ei) = reinterpret_bits((int)1); + }else { + etemp("is_inverted",ei) = reinterpret_bits((int)0); + } + }); + cudaPol(zs::range(eles.size()), + [vtemp = proxy({},vtemp), + quads = proxy({},eles), + etemp = proxy({},etemp)] ZS_LAMBDA(int ei) mutable { + auto inds = quads.template pack<4>("inds",ei).reinterpret_bits(int_c); + auto is_inverted = reinterpret_bits(etemp("is_inverted",ei)); + if(is_inverted) + for(int i = 0;i != 4;++i){ + vtemp("is_inverted",inds[i]) = reinterpret_bits((int)1); + } + }); } + // template + // void computeKinematicCollisionGradientAndHessian(zs::CudaExecutionPolicy& cudaPol,const Model& model, + // dtiles_t& vtemp, + // dtiles_t& sptemp, + // dtiles_t& sttemp, + // const dtiles_t& kvtemp, + // const dtiles_t& kltemp, + // const dtiles_t& kttemp, + // dtiles_t& kc_buffer, + // dtiles_t& gh_buffer, + // bool neglect_inverted = true) { + // using namespace zs; + // constexpr auto space = execspace_e::cuda; + + // int offset = eles.size() + b_verts.size() + points.size() * MAX_FP_COLLISION_PAIRS; + // T lambda = model.lam; + // T mu = model.mu; + + // // COLLISION_UTILS::do_kinematic_point_collision_detection(cudaPol, + // // vtemp,"xn", + // // points, + // // lines, + // // tris, + // // kvtemp, + // // kltemp, + // // kttemp, + // // kc_buffer, + // // in_collisionEps,out_collisionEps); + + // // COLLISION_UTILS::evaluate_kinematic_fp_collision_grad_and_hessian(cudaPol, + // // vtemp,"xn", + // // kvtemp, + // // kc_buffer, + // // gh_buffer,offset, + // // in_collisionEps,out_collisionEps, + // // (T)collisionStiffness, + // // (T)mu,(T)lambda); + + // if(neglect_inverted) { + // cudaPol(zs::range(kc_buffer.size()), + // [gh_buffer = proxy({},gh_buffer), + // vtemp = proxy({},vtemp), + // kc_buffer = proxy({},kc_buffer), + // offset] ZS_LAMBDA(int cpi) { + // auto inds = gh_buffer.template pack<4>("inds",cpi + offset).reinterpret_bits(int_c); + // for(int i = 0;i != 4;++i) + // if(inds[i] < 0) + // return; + + // bool is_inverted = false; + // int is_fp = reinterpret_bits(kc_buffer("is_fp",cpi)); + // int check_len = is_fp > 0 ? 3 : 1; + // for(int i = 0;i != check_len;++i){ + // auto vi = inds[i]; + // auto is_vertex_inverted = reinterpret_bits(vtemp("is_inverted",vi)); + // if(is_vertex_inverted) + // is_inverted = true; + // } + + // if(is_inverted){ + // gh_buffer.template tuple<12*12>("H",cpi + offset) = zs::vec::zeros(); + // gh_buffer.template tuple<12>("grad",cpi + offset) = zs::vec::zeros(); + // } + // }); + // } + // } template void computeCollisionGradientAndHessian(zs::CudaExecutionPolicy& cudaPol,const Model& model, @@ -115,435 +198,370 @@ struct FleshDynamicStepping : INode { dtiles_t& etemp, dtiles_t& sttemp, dtiles_t& setemp, - dtiles_t& cptemp, - // const bvh_t& stBvh, - // const bvh_t& seBvh, - const T& thickness, + // dtiles_t& ee_buffer, + dtiles_t& fp_buffer, + dtiles_t& kverts, + dtiles_t& kc_buffer, + dtiles_t& gh_buffer, + T kd_theta = (T)0.0, bool explicit_collision = false, bool neglect_inverted = true) { using namespace zs; constexpr auto space = execspace_e::cuda; + int offset = eles.size(); + T lambda = model.lam; T mu = model.mu; - #if DEBUG_FLESH_DYN_STEPPING - if(!vtemp.hasProperty("grad")) - fmt::print(fg(fmt::color::red),"the vtemp has no 'grad' channel\n"); - if(!vtemp.hasProperty("xn")) - fmt::print(fg(fmt::color::red),"the verts has no 'xn' channel\n"); - if(!vtemp.hasProperty("xp")) - fmt::print(fg(fmt::color::red),"the verts has no 'xn' channel\n"); - if(!vtemp.hasProperty("is_inverted")) - fmt::print(fg(fmt::color::red),"the verts has no 'is_inverted' channel\n"); - if(!vtemp.hasProperty("vp")) - fmt::print(fg(fmt::color::red),"the verts has no 'vp' channel\n"); - - if(!etemp.hasProperty("H")) - fmt::print(fg(fmt::color::red),"the etemp has no 'H' channel\n"); - if(!etemp.hasProperty("ActInv")) - fmt::print(fg(fmt::color::red),"the etemp has no 'ActInv' channel\n"); - - if(!verts.hasProperty("m")) - fmt::print(fg(fmt::color::red),"the verts has no 'm' channel\n"); - - if(!eles.hasProperty("inds")) - fmt::print(fg(fmt::color::red),"the eles has no 'IB' channel\n"); - if(!eles.hasProperty("IB")) - fmt::print(fg(fmt::color::red),"the eles has no 'IB' channel\n"); - if(!eles.hasProperty("m")) - fmt::print(fg(fmt::color::red),"the eles has no 'm' channel\n"); - if(!eles.hasProperty("vol")) - fmt::print(fg(fmt::color::red),"the eles has no 'vol' channel\n"); - - // fmt::print(fg(fmt::color::blue),"the size of tris : {}\n",tris.size()); - if(!tris.hasProperty("inds")) - fmt::print(fg(fmt::color::red),"the tris has no 'inds' channel\n"); - if(!tris.hasProperty("area")) - fmt::print(fg(fmt::color::red),"the tris has no 'area' channel\n"); - if(!points.hasProperty("area")) - fmt::print(fg(fmt::color::red),"the points has no 'area' channel\n"); - - #endif - - auto xtag = zs::SmallString("xn"); - if(explicit_collision) - xtag = zs::SmallString("xp"); - - - if(neglect_inverted) { - // // figure out all the vertices which is incident to an inverted tet - TILEVEC_OPS::fill(cudaPol,vtemp,"is_inverted",reinterpret_bits((int)0)); - cudaPol(zs::range(eles.size()), - [vtemp = proxy({},vtemp),quads = proxy({},eles),xtag] ZS_LAMBDA(int ei) mutable { - auto DmInv = quads.template pack<3,3>("IB",ei); - auto inds = quads.template pack<4>("inds",ei).reinterpret_bits(int_c); - vec3 x1[4] = {vtemp.template pack<3>(xtag, inds[0]), - vtemp.template pack<3>(xtag, inds[1]), - vtemp.template pack<3>(xtag, inds[2]), - vtemp.template pack<3>(xtag, inds[3])}; - - mat3 F{}; - { - auto x1x0 = x1[1] - x1[0]; - auto x2x0 = x1[2] - x1[0]; - auto x3x0 = x1[3] - x1[0]; - auto Ds = mat3{x1x0[0], x2x0[0], x3x0[0], x1x0[1], x2x0[1], - x3x0[1], x1x0[2], x2x0[2], x3x0[2]}; - F = Ds * DmInv; - } - if(zs::determinant(F) < 0.0) - for(int i = 0;i < 4;++i) - vtemp("is_inverted",inds[i]) = reinterpret_bits((int)1); - }); - - } - - -#if 0 - TILEVEC_OPS::fill<4>(cudaPol,cptemp,"inds",zs::vec::uniform(-1).template reinterpret_bits()); - // TILEVEC_OPS::fill<12*12>(cudaPol,cptemp,"H",zs::vec::zeros()); - - // compute vertex facet contact pairs - cudaPol(zs::range(points.size()),[lambda = lambda,mu = mu,collisionStiffness = collisionStiffness, - in_collisionEps = in_collisionEps,out_collisionEps = out_collisionEps, - vtemp = proxy({},vtemp), - etemp = proxy({},etemp), - sttemp = proxy({},sttemp), - setemp = proxy({},setemp), - cptemp = proxy({},cptemp), - points = proxy({},points), - lines = proxy({},lines), - tris = proxy({},tris), - stbvh = proxy(stBvh),thickness = thickness, - neglect_inverted = neglect_inverted,xtag] ZS_LAMBDA(int svi) mutable { - // if(svi == 0) { - // if(tris.hasProperty("inds")) - // printf("compare size : %d %d %d\n",(int)vtemp.size(),(int)tris.size(),(int)tris.propertySize("inds")); - // else - // printf("the tris has no inds channel!!!\n"); - // } - - - auto vi = reinterpret_bits(points("inds",svi)); - - if(neglect_inverted) { - auto is_vertex_inverted = reinterpret_bits(vtemp("is_inverted",vi)); - if(is_vertex_inverted) - return; - } - - auto p = vtemp.template pack<3>(xtag,vi); - auto bv = bv_t{get_bounding_box(p - thickness, p + thickness)}; - - - // check whether there is collision happening, and if so, apply the collision force and addup the collision hessian - int nm_collision_pairs = 0; - auto process_vertex_face_collision_pairs = [&](int stI) { - - if(nm_collision_pairs >= MAX_FP_COLLISION_PAIRS) - return; - - auto tri = tris.pack(dim_c<3>, "inds",stI).reinterpret_bits(int_c); - if(tri[0] == vi || tri[1] == vi || tri[2] == vi) - return; - - auto t0 = vtemp.template pack<3>(xtag,tri[0]); - auto t1 = vtemp.template pack<3>(xtag,tri[1]); - auto t2 = vtemp.template pack<3>(xtag,tri[2]); - // check whether the triangle is degenerate - auto restArea = tris("area",stI); - // skip the triangle too small at rest configuration - // if(restArea < (T)1e-6) - // return; - - const auto e10 = t1 - t0; - const auto e20 = t2 - t0; - auto deformedArea = (T)0.5 * e10.cross(e20).norm(); - const T degeneracyEps = 1e-4; - // skip the degenerate triangles - const T relativeArea = deformedArea / (restArea + (T)1e-6); - if(relativeArea < degeneracyEps) - return; - - bool collide = false; - - if(COLLISION_UTILS::is_inside_the_cell(vtemp,xtag, - lines,tris, - sttemp,"nrm", - setemp,"nrm", - stI,p,in_collisionEps,out_collisionEps)){ - // printf("find collision facet-vertex collision in-cell pair : %d %d\n",stI,svi); - collide = true; - } - - if(!collide) - return; - - // now there is collision, build the "collision tets" - // if(!vtemp.hasProperty("oneRingArea")) - // printf("vtemp has no oneRingArea"); - - cptemp.template tuple<4>("inds",svi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = zs::vec(vi,tri[0],tri[1],tri[2]).template reinterpret_bits(); - - auto vertexFaceCollisionAreas = restArea + points("area",svi); - - vec3 collision_verts[4] = {}; - collision_verts[0] = p; - collision_verts[1] = t0; - collision_verts[1] = t1; - collision_verts[1] = t2; - - auto collisionEps = in_collisionEps; - - auto grad = collisionStiffness * VERTEX_FACE_SQRT_COLLISION::gradient(collision_verts,mu,lambda,collisionEps) * vertexFaceCollisionAreas; - auto hessian = collisionStiffness * VERTEX_FACE_SQRT_COLLISION::hessian(collision_verts,mu,lambda,collisionEps) * vertexFaceCollisionAreas; - cptemp.template tuple<12*12>("H",svi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = hessian; - - for(int i = 0;i != 4;++i) { - auto g_vi = i == 0 ? vi : tri[i-1]; - for (int d = 0; d != 3; ++d) - atomic_add(exec_cuda, &vtemp("grad", d, g_vi), grad(i * 3 + d)); - } - nm_collision_pairs++; - - }; - stbvh.iter_neighbors(bv,process_vertex_face_collision_pairs); - }); - -#else - - COLLISION_UTILS::do_facet_point_collision_detection(cudaPol, - vtemp,"xn", - points, - lines, - tris, - sttemp, - setemp, - cptemp, - // stBvh, - in_collisionEps,out_collisionEps); - - - // output all the collision pairs - // cudaPol(zs::range(cptemp.size()), - // [cptemp = proxy({},cptemp)] ZS_LAMBDA(int cpi) mutable { - // auto inds = cptemp.template pack<4>("inds",cpi).reinterpret_bits(int_c); - // bool collide = true; - // for(int i = 0;i != 4;++i) - // if(inds[i] < 0) - // collide = false; - // if(collide) - // printf("collision_pair[%d] : %d %d %d %d\n", - // cpi,inds[0],inds[1],inds[2],inds[3]); - // }); - - COLLISION_UTILS::evaluate_collision_grad_and_hessian(cudaPol, - vtemp,"xn", - cptemp, - in_collisionEps,out_collisionEps, - (T)collisionStiffness, - (T)mu,(T)lambda); - - - - // project out all the neglect verts - if(neglect_inverted) { - cudaPol(zs::range(cptemp.size()), - [cptemp = proxy({},cptemp),vtemp = proxy({},vtemp)] ZS_LAMBDA(int cpi) { - auto inds = cptemp.template pack<4>("inds",cpi).reinterpret_bits(int_c); - for(int i = 0;i != 4;++i) - if(inds[i] < 0) - return; - - bool is_inverted = false; - for(int i = 0;i != 4;++i){ - auto vi = inds[i]; - auto is_vertex_inverted = reinterpret_bits(vtemp("is_inverted",vi)); - if(is_vertex_inverted) - is_inverted = true; - } - - if(is_inverted){ - cptemp.template tuple<12*12>("H",cpi) = zs::vec::zeros(); - cptemp.template tuple<12>("grad",cpi) = zs::vec::zeros(); - } - }); - } - - // auto gradN = TILEVEC_OPS::inf_norm<12>(cudaPol,cptemp,"grad"); - // fmt::print(fg(fmt::color::red),"collision gradN = {}\n",gradN); - // TILEVEC_OPS::fill<12*12>(cudaPol,cptemp,"H",zs::vec::zeros()); - - TILEVEC_OPS::assemble<3,4>(cudaPol,cptemp,"grad",vtemp,"grad"); - + // auto stBvh = bvh_t{}; + // auto bvs = retrieve_bounding_volumes(cudaPol,vtemp,tris,wrapv<3>{},(T)0.0,"xn"); + // stBvh.build(cudaPol,bvs); + // auto avgl = compute_average_edge_length(cudaPol,vtemp,"xn",tris); + // auto bvh_thickness = 5 * avgl; + // if(!calculate_facet_normal(cudaPol,vtemp,"xn",tris,sttemp,"nrm")){ + // throw std::runtime_error("fail updating facet normal"); + // } + // if(!COLLISION_UTILS::calculate_cell_bisector_normal(cudaPol, + // vtemp,"xn", + // lines, + // tris, + // sttemp,"nrm", + // setemp,"nrm")){ + // throw std::runtime_error("fail calculate cell bisector normal"); + // } + + + COLLISION_UTILS::do_facet_point_collision_detection(cudaPol, + vtemp,"xn", + points, + lines, + tris, + sttemp, + setemp, + fp_buffer, + in_collisionEps,out_collisionEps); + + COLLISION_UTILS::evaluate_fp_collision_grad_and_hessian(cudaPol, + vtemp,"xn","vn",dt, + fp_buffer, + gh_buffer,offset, + in_collisionEps,out_collisionEps, + (T)collisionStiffness, + (T)mu,(T)lambda,(T)kd_theta); + -#endif + COLLISION_UTILS::do_kinematic_point_collision_detection(cudaPol, + vtemp,"xn", + points, + lines, + tris, + setemp, + sttemp, + kverts, + kc_buffer, + (T)kine_in_collisionEps,(T)kine_out_collisionEps,false); + + offset = 0; + + COLLISION_UTILS::evaluate_kinematic_fp_collision_grad_and_hessian(cudaPol, + eles, + vtemp,"xn","vn",dt, + tris, + kverts, + kc_buffer, + gh_buffer,offset, + (T)kine_in_collisionEps,(T)kine_out_collisionEps, + (T)kineCollisionStiffness, + (T)mu,(T)lambda,(T)kd_theta); + + + // adding collision damping on self collision + // int offset = eles.size() + b_verts.size(); + // cudaPol(zs::range(fp_buffer.size() + kc_buffer.size()), + // [vtemp = proxy({},vtemp), + // gh_buffer = proxy({},gh_buffer),offset,kd_theta] ZS_LAMBDA(int ci) mutable { + // auto inds = gh_buffer.pack(dim_c<4>,"inds",ci).reinterpret_bits(int_c); + // for(int i = 0;i != 4;++i) + // if(inds[i] < 0) + // return; + // vec3 vs[4] = {}; + // for(int i = 0;i = 4;++i) + // vs[i] = vtemp.pack(dim_c<3>,"vn",inds[i]); + // auto H = gh_buffer.pack(dim_c<12*12>,"H",ci); + // gh_buffer.tuple(dim_c<12*12>,"H",ci) = H; + // }); + } - template + template void computeGradientAndHessian(zs::CudaExecutionPolicy& cudaPol, - const Model& model, - dtiles_t& vtemp, - dtiles_t& etemp) { + const ElasticModel& model, + const AnisoElasticModel& amodel, + const dtiles_t& vtemp, + const dtiles_t& etemp, + dtiles_t& gh_buffer, + T kd_alpha = (T)0.0, + T kd_beta = (T)0.0) { using namespace zs; constexpr auto space = execspace_e::cuda; - #if DEBUG_FLESH_DYN_STEPPING - // std::cout << "CHECK THE PROPERTY CHANNEL" << std::endl; - if(!vtemp.hasProperty("grad")) - fmt::print(fg(fmt::color::red),"the vtemp has no 'grad' channel\n"); - if(!vtemp.hasProperty("xn")) - fmt::print(fg(fmt::color::red),"the verts has no 'xn' channel\n"); - if(!vtemp.hasProperty("xp")) - fmt::print(fg(fmt::color::red),"the verts has no 'xp' channel\n"); - if(!vtemp.hasProperty("vp")) - fmt::print(fg(fmt::color::red),"the verts has no 'vp' channel\n"); - - if(!etemp.hasProperty("H")) - fmt::print(fg(fmt::color::red),"the etemp has no 'H' channel\n"); - if(!etemp.hasProperty("ActInv")) - fmt::print(fg(fmt::color::red),"the etemp has no 'ActInv' channel\n"); - - if(!verts.hasProperty("m")) - fmt::print(fg(fmt::color::red),"the verts has no 'm' channel\n"); - - if(!eles.hasProperty("IB")) - fmt::print(fg(fmt::color::red),"the eles has no 'IB' channel\n"); - if(!eles.hasProperty("m")) - fmt::print(fg(fmt::color::red),"the eles has no 'm' channel\n"); - if(!eles.hasProperty("vol")) - fmt::print(fg(fmt::color::red),"the eles has no 'vol' channel\n"); - if(!eles.hasProperty("inds")) - fmt::print(fg(fmt::color::red),"the eles has no 'inds' channel\n"); - #endif - - TILEVEC_OPS::fill<3>(cudaPol,vtemp,"grad",zs::vec::zeros()); - TILEVEC_OPS::fill<144>(cudaPol,etemp,"H",zs::vec::zeros()); - + int offset = 0; + TILEVEC_OPS::copy<4>(cudaPol,eles,"inds",gh_buffer,"inds",offset); // eval the inertia term gradient - cudaPol(zs::range(vtemp.size()), [dt2 = dt2, - vtemp = proxy({},vtemp), + cudaPol(zs::range(eles.size()),[dt2 = dt2, verts = proxy({},verts), - dt = dt] ZS_LAMBDA(int vi) mutable { - auto m = verts("m",vi);// nodal mass - auto x1 = vtemp.pack<3>("xn",vi); - auto x0 = vtemp.pack<3>("xp",vi); - auto v0 = vtemp.pack<3>("vp",vi); - vtemp.template tuple<3>("grad",vi) = -m * (x1 - x0 - v0 * dt) / dt2; + eles = proxy({},eles), + vtemp = proxy({},vtemp), + gh_buffer = proxy({},gh_buffer), + dt = dt,offset = offset] ZS_LAMBDA(int ei) mutable { + auto m = eles("m",ei)/(T)4.0; + auto inds = eles.pack(dim_c<4>,"inds",ei).reinterpret_bits(int_c); + auto pgrad = zs::vec::zeros(); + // auto H = zs::vec::zeros(); + // if(eles.hasProperty("dt")) { + // dt2 = eles("dt",ei) * eles("dt",ei); + // } + + auto inertia = (T)1.0; + if(eles.hasProperty("inertia")) + inertia = eles("inertia",ei); + for(int i = 0;i != 4;++i){ + auto x1 = vtemp.pack(dim_c<3>,"xn",inds[i]); + auto x0 = vtemp.pack(dim_c<3>,"xp",inds[i]); + auto v0 = vtemp.pack(dim_c<3>,"vp",inds[i]); + + auto alpha = inertia * m/dt2; + auto nodal_pgrad = -alpha * (x1 - x0 - v0 * dt); + for(int d = 0;d != 3;++d){ + auto idx = i * 3 + d; + gh_buffer("grad",idx,ei) = nodal_pgrad[d]; + gh_buffer("H",idx*12 + idx,ei + offset) = alpha; + } + + } + // gh_buffer.tuple(dim_c<12>,"grad",ei + offset) = pgrad; + // gh_buffer.template tuple<12*12>("H",ei + offset) = H; }); - cudaPol(zs::range(eles.size()), [this,dt2 = dt2, + + cudaPol(zs::range(eles.size()), [dt = dt,dt2 = dt2,aniso_strength = aniso_strength, + verts = proxy({},verts), vtemp = proxy({}, vtemp), etemp = proxy({}, etemp), - bcws = proxy({},b_bcws), - b_verts = proxy({},b_verts), - verts = proxy({}, verts), + gh_buffer = proxy({},gh_buffer), eles = proxy({}, eles), - model, volf = volf] ZS_LAMBDA (int ei) mutable { - auto DmInv = eles.template pack<3,3>("IB",ei); - auto dFdX = dFdXMatrix(DmInv); - auto inds = eles.template pack<4>("inds",ei).reinterpret_bits(int_c); - vec3 x1[4] = {vtemp.template pack<3>("xn", inds[0]), - vtemp.template pack<3>("xn", inds[1]), - vtemp.template pack<3>("xn", inds[2]), - vtemp.template pack<3>("xn", inds[3])}; + kd_alpha = kd_alpha,kd_beta = kd_beta, + model = model,amodel = amodel, volf = volf,offset = offset] ZS_LAMBDA (int ei) mutable { + auto DmInv = eles.pack(dim_c<3,3>,"IB",ei); + auto dFdX = dFdXMatrix(DmInv); + auto inds = eles.pack(dim_c<4>,"inds",ei).reinterpret_bits(int_c); + vec3 x1[4] = {vtemp.pack(dim_c<3>,"xn", inds[0]), + vtemp.pack(dim_c<3>,"xn", inds[1]), + vtemp.pack(dim_c<3>,"xn", inds[2]), + vtemp.pack(dim_c<3>,"xn", inds[3])}; + + + mat3 FAct{}; + { + auto x1x0 = x1[1] - x1[0]; + auto x2x0 = x1[2] - x1[0]; + auto x3x0 = x1[3] - x1[0]; + auto Ds = mat3{x1x0[0], x2x0[0], x3x0[0], x1x0[1], x2x0[1], + x3x0[1], x1x0[2], x2x0[2], x3x0[2]}; + FAct = Ds * DmInv; + FAct = FAct * etemp.template pack<3,3>("ActInv",ei); + } + auto dFActdF = dFAdF(etemp.template pack<3,3>("ActInv",ei)); + + // add the force term in gradient + if(eles.hasProperty("mu") && eles.hasProperty("lam")) { + model.mu = eles("mu",ei); + model.lam = eles("lam",ei); + } + auto P = model.first_piola(FAct); + auto vole = eles("vol", ei); + auto vecP = flatten(P); + vecP = dFActdF.transpose() * vecP; + auto dFdXT = dFdX.transpose(); + auto vf = -vole * (dFdXT * vecP); + + auto mg = volf * vole / (T)4.0; + for(int i = 0;i != 4;++i) + for(int d = 0;d !=3 ;++d){ + vf[i*3 + d] += mg[d]; + } - mat3 FAct{}; - { - auto x1x0 = x1[1] - x1[0]; - auto x2x0 = x1[2] - x1[0]; - auto x3x0 = x1[3] - x1[0]; - auto Ds = mat3{x1x0[0], x2x0[0], x3x0[0], x1x0[1], x2x0[1], - x3x0[1], x1x0[2], x2x0[2], x3x0[2]}; - FAct = Ds * DmInv; - FAct = FAct * etemp.template pack<3,3>("ActInv",ei); - } - auto dFActdF = dFAdF(etemp.template pack<3,3>("ActInv",ei)); - - // add the force term in gradient - auto P = model.first_piola(FAct); - auto vole = eles("vol", ei); - auto vecP = flatten(P); - vecP = dFActdF.transpose() * vecP; - auto dFdXT = dFdX.transpose(); - auto vf = -vole * (dFdXT * vecP); - - auto mg = volf * vole / 4; - for (int i = 0; i != 4; ++i) { - auto vi = inds[i]; - for (int d = 0; d != 3; ++d) - atomic_add(exec_cuda, &vtemp("grad", d, vi), vf(i * 3 + d) + mg(d)); + // assemble element-wise hessian matrix + auto Hq = model.first_piola_derivative(FAct, true_c); + auto dFdAct_dFdX = dFActdF * dFdX; + // add inertia hessian term + auto H = dFdAct_dFdX.transpose() * Hq * dFdAct_dFdX * vole; + + if(eles.hasProperty("Muscle_ID") && (int)eles("Muscle_ID",ei) >= 0) { + auto fiber = eles.pack(dim_c<3>,"fiber",ei); + if(zs::abs(fiber.norm() - 1.0) < 1e-3) { + fiber /= fiber.norm(); + // if(eles.hasProperty("mu")) { + // amodel.mu = eles("mu",ei); + // // amodel.lam = eles("lam",ei); + + // } + auto aP = amodel.do_first_piola(FAct,fiber); + auto vecAP = flatten(P); + vecAP = dFActdF.transpose() * vecP; + vf -= vole * dFdXT * vecAP *aniso_strength; + + auto aHq = amodel.do_first_piola_derivative(FAct,fiber); + H += dFdAct_dFdX.transpose() * aHq * dFdAct_dFdX * vole * aniso_strength; + // if((int)eles("Muscle_ID",ei) == 0){ + // printf("fiber : %f %f %f,Fa = %f,aP = %f,aHq = %f,H = %f\n",fiber[0],fiber[1],fiber[2],(float)FAct.norm(),(float)aP.norm(),(float)aHq.norm(),(float)H.norm()); + // } } + } - // assemble element-wise hessian matrix - auto Hq = model.first_piola_derivative(FAct, true_c); - auto dFdAct_dFdX = dFActdF * dFdX; - // dFdAct_dFdX = dFdX; - auto H = dFdAct_dFdX.transpose() * Hq * dFdAct_dFdX * vole; - etemp.template tuple<12 * 12>("H", ei) = H; - - // add inertia hessian term - auto m = eles("m",ei);// element-wise mass - for(int i = 0;i < 12;++i){ - // Mass(i,i) = 1; - etemp("H",i * 12 + i,ei) += m /dt2/4; - } + // adding rayleigh damping term + vec3 v0[4] = {vtemp.pack(dim_c<3>,"vn", inds[0]), + vtemp.pack(dim_c<3>,"vn", inds[1]), + vtemp.pack(dim_c<3>,"vn", inds[2]), + vtemp.pack(dim_c<3>,"vn", inds[3])}; + + auto inertia = (T)1.0; + if(eles.hasProperty("inertia")) + inertia = eles("inertia",ei); + + auto vel = COLLISION_UTILS::flatten(v0); + auto m = eles("m",ei)/(T)4.0; + auto C = kd_beta * H + kd_alpha * inertia * m * zs::vec::identity(); + auto rdamping = C * vel; + gh_buffer.tuple(dim_c<12>,"grad",ei + offset) = gh_buffer.pack(dim_c<12>,"grad",ei + offset) + vf - rdamping; + // gh_buffer.tuple(dim_c<12>,"grad",ei + offset) = gh_buffer.pack(dim_c<12>,"grad",ei + offset) - rdamping; + // H += kd_beta*H/dt; + + gh_buffer.template tuple<12*12>("H",ei + offset) = gh_buffer.template pack<12,12>("H",ei + offset) + H + C/dt; }); // Bone Driven Potential Energy - T lambda = model.lam; - T mu = model.mu; + // T lambda = model.lam; + // T mu = model.mu; + auto nmEmbedVerts = b_verts.size(); - cudaPol(zs::range(nmEmbedVerts), [this, - bcws = proxy({},b_bcws),b_verts = proxy({},b_verts),vtemp = proxy({},vtemp),etemp = proxy({},etemp), - eles = proxy({},eles),lambda,mu,bone_driven_weight = bone_driven_weight] ZS_LAMBDA(int vi) mutable { + + // TILEVEC_OPS::fill_range<4>(cudaPol,gh_buffer,"inds",zs::vec::uniform(-1).reinterpret_bits(float_c),eles.size() + offset,b_verts.size()); + // TILEVEC_OPS::fill_range<3>(cudaPol,gh_buffer,"grad",zs::vec::zeros(),eles.size() + offset,b_verts.size()); + // TILEVEC_OPS::fill_range<144>(cudaPol,gh_buffer,"H",zs::vec::zeros(),eles.size() + offset,b_verts.size()); + + // we should neglect the inverted element + // std::cout << "nmEmbedVerts : " << nmEmbedVerts << std::endl; + // std::cout << "bcwsize : " << b_bcws.size() << std::endl; + // return; + cudaPol(zs::range(nmEmbedVerts), [ + gh_buffer = proxy({},gh_buffer),model = model, + bcws = proxy({},b_bcws),b_verts = proxy(b_verts),vtemp = proxy({},vtemp),etemp = proxy({},etemp), + eles = proxy({},eles),bone_driven_weight = bone_driven_weight,offset = offset] ZS_LAMBDA(int vi) mutable { auto ei = reinterpret_bits(bcws("inds",vi)); - if(ei < 0) + + if(ei < 0){ + return; - auto inds = eles.pack<4>("inds",ei).reinterpret_bits(); - auto w = bcws.pack<4>("w",vi); + } + // if(ei >= etemp.size()){ + // printf("ei too big for etemp\n"); + // return; + // } + // auto is_inverted = reinterpret_bits(etemp("is_inverted",ei)); + // if(is_inverted){ + // if(vi == 0) + // printf("inverted tet\n"); + // return; + // } + + // auto FatID = eles("FatID",ei); + // if(FatID > 0) + // return; + + auto lambda = model.lam; + auto mu = model.mu; + // if(eles.hasProperty("mu") && eles.hasProperty("lam")) { + // mu = eles("mu",ei); + // lambda = eles("lam",ei); + // } + + auto inds = eles.pack(dim_c<4>,"inds",ei).reinterpret_bits(int_c); + // gh_buffer.tuple(dim_c<4>,"inds",vi + offset + eles.size()) = eles.pack(dim_c<4>,"inds",ei); + auto w = bcws.pack(dim_c<4>,"w",vi); + if(w[0] < 1e-4 || w[1] < 1e-4 || w[2] < 1e-4 || w[3] < 1e-4){ + if(vi == 0) + printf("boundary tet\n"); + return; + } auto tpos = vec3::zeros(); - for(size_t i = 0;i != 4;++i) - tpos += w[i] * vtemp.pack<3>("xn",inds[i]); - auto pdiff = tpos - b_verts.pack<3>("x",vi); + for(int i = 0;i != 4;++i) + tpos += w[i] * vtemp.pack(dim_c<3>,"xn",inds[i]); + // auto pdiff = tpos - b_verts.pack<3>("x",vi); + auto pdiff = tpos - b_verts[vi]; T stiffness = 2.0066 * mu + 1.0122 * lambda; + zs::vec elm_grad{}; + // auto elm_H = zs::vec::zeros(); + for(size_t i = 0;i != 4;++i){ - auto tmp = pdiff * (-stiffness * bcws("cnorm",vi) * bone_driven_weight * w[i] * eles("vol",ei)); - // tmp = pdiff * (-lambda * bcws("cnorm",vi) * bone_driven_weight * w[i]); - for(size_t d = 0;d != 3;++d) - atomic_add(exec_cuda,&vtemp("grad",d,inds[i]),(T)tmp[d]); + auto tmp = pdiff * (-stiffness * bcws("strength",vi) * bcws("cnorm",vi) * bone_driven_weight * w[i] * eles("vol",ei)) * eles("bdw",ei); + // if(vi == 0 && i == 0) { + // printf("check: %f %f %f\n",(float)tmp[0],(float)tmp[1],(float)tmp[2]); + // } + for(size_t d = 0;d != 3;++d){ + atomic_add(exec_cuda,&gh_buffer("grad",i*3 + d,ei),tmp[d]); + // elm_grad[i*3 + d] = tmp[d]; + // atomic_add(exec_cuda,&gh_buffer("grad",i * 3 + d,ei),tmp[d]); + } } for(int i = 0;i != 4;++i) for(int j = 0;j != 4;++j){ - T alpha = stiffness * bone_driven_weight * w[i] * w[j] * bcws("cnorm",vi) * eles("vol",ei); + T alpha = stiffness * bone_driven_weight * w[i] * w[j] * bcws("strength",vi) * bcws("cnorm",vi) * eles("vol",ei) * eles("bdw",ei); for(int d = 0;d != 3;++d){ - atomic_add(exec_cuda,&etemp("H",(i * 3 + d) * 12 + j * 3 + d,ei),alpha); + // elm_H(i*3 + d,j*3 + d) = alpha; + atomic_add(exec_cuda,&gh_buffer("H",(i*3 + d)*12 + j*3 + d,ei),alpha); } } - + + // for(int i = 0;i != 12;++i){ + // atomic_add(exec_cuda,&gh_buffer("grad",i,ei),elm_grad[i]); + // for(int j = 0;j != 12;++j) + // atomic_add(exec_cuda,&gh_buffer("H",i*12 + j,ei),elm_H(i,j)); + // } + // gh_buffer.tuple(dim_c<12>,"grad",vi + eles.size() + offset) = elm_grad; + // gh_buffer.tuple(dim_c<12*12>,"H",vi + eles.size() + offset) = elm_H; }); - } + // cudaPol(zs::range(eles.size()), [gh_buffer = proxy({},gh_buffer)] ZS_LAMBDA (int ei) mutable { + // auto H = gh_buffer.template pack<12,12>("H",ei); + // make_pd(H); + // gh_buffer.template tuple<12*12>("H",ei) = H; + // }); + } FEMDynamicSteppingSystem(const tiles_t &verts, const tiles_t &eles, const tiles_t& points,const tiles_t& lines,const tiles_t& tris, T in_collisionEps,T out_collisionEps, - const tiles_t &b_bcws, const tiles_t& b_verts,T bone_driven_weight, - vec3 volf,const T& _dt,const T& collisionStiffness) + const tiles_t &b_bcws, const zs::Vector>& b_verts,T bone_driven_weight, + const vec3& volf,const T& _dt,const T& collisionStiffness, + const T& kine_in_collisionEps,const T& kine_out_collisionEps, + const T& kineCollisionStiffness,const T& aniso_strength) : verts{verts}, eles{eles},points{points}, lines{lines}, tris{tris}, in_collisionEps{in_collisionEps},out_collisionEps{out_collisionEps}, b_bcws{b_bcws}, b_verts{b_verts}, bone_driven_weight{bone_driven_weight}, volf{volf}, - dt{_dt}, dt2{dt * dt},collisionStiffness{collisionStiffness},use_edge_edge_collision{true}, use_vertex_facet_collision{true} {} + kine_in_collisionEps{kine_in_collisionEps},kine_out_collisionEps{kine_out_collisionEps}, + kineCollisionStiffness{kineCollisionStiffness},aniso_strength{aniso_strength}, + dt{_dt}, dt2{_dt * _dt},collisionStiffness{collisionStiffness},use_edge_edge_collision{true}, use_vertex_facet_collision{true} {} const tiles_t &verts; const tiles_t &eles; @@ -551,7 +569,7 @@ struct FleshDynamicStepping : INode { const tiles_t &lines; const tiles_t &tris; const tiles_t &b_bcws; // the barycentric interpolation of embeded bones - const tiles_t &b_verts; // the position of embeded bones + const zs::Vector> &b_verts; // the position of embeded bones T bone_driven_weight; vec3 volf; @@ -566,6 +584,12 @@ struct FleshDynamicStepping : INode { bool use_edge_edge_collision; bool use_vertex_facet_collision; + T kine_in_collisionEps; + T kine_out_collisionEps; + T kineCollisionStiffness; + + T aniso_strength; + // int default_muscle_id; // zs::vec default_muscle_dir; // T default_act; @@ -576,22 +600,27 @@ struct FleshDynamicStepping : INode { + void apply() override { using namespace zs; auto zsparticles = get_input("ZSParticles"); auto gravity = zeno::vec<3,T>(0); if(has_input("gravity")) - gravity = get_input("gravity")->get>(); + gravity = get_input2>("gravity"); T armijo = (T)1e-4; T wolfe = (T)0.9; - // T cg_res = (T)0.001; - T cg_res = (T)0.0001; + // T cg_res = (T)0.01; + // T cg_res = (T)0.0001; + T cg_res = get_param("cg_res"); T btl_res = (T)0.1; auto models = zsparticles->getModel(); auto& verts = zsparticles->getParticles(); auto& eles = zsparticles->getQuadraturePoints(); - if(eles.getPropertySize("inds") != 4) + // zs::Vector(MAX_VERTS) + // TileVec("pos","tag","deleted","") + + if(eles.getChannelSize("inds") != 4) throw std::runtime_error("the input zsparticles is not a tetrahedra mesh"); if(!zsparticles->hasAuxData(ZenoParticles::s_surfTriTag)) throw std::runtime_error("the input zsparticles has no surface tris"); @@ -599,55 +628,94 @@ struct FleshDynamicStepping : INode { throw std::runtime_error("the input zsparticles has no surface lines"); if(!zsparticles->hasAuxData(ZenoParticles::s_surfVertTag)) throw std::runtime_error("the input zsparticles has no surface points"); - // if(!zsparticles->hasBvh(ZenoParticles::s_surfTriTag)) { - // throw std::runtime_error("the input zsparticles has no surface tris's spacial structure"); - // } - // if(!zsparticles->hasBvh(ZenoParticles::s_surfEdgeTag)) { - // throw std::runtime_error("the input zsparticles has no surface edge's spacial structure"); - // } - // if(!zsparticles->hasBvh(ZenoParticles::s_surfVertTag)) { - // throw std::runtime_error("the input zsparticles has no surface vert's spacial structure"); - // } auto& tris = (*zsparticles)[ZenoParticles::s_surfTriTag]; auto& lines = (*zsparticles)[ZenoParticles::s_surfEdgeTag]; auto& points = (*zsparticles)[ZenoParticles::s_surfVertTag]; - // auto& stBvh = zsparticles->bvh(ZenoParticles::s_surfTriTag); - // auto& seBvh = zsparticles->bvh(ZenoParticles::s_surfEdgeTag); - - - auto zsbones = get_input("driven_boudary"); + auto zsbones = get_input("driven_boudary"); auto driven_tag = get_input2("driven_tag"); auto bone_driven_weight = get_input2("driven_weight"); auto muscle_id_tag = get_input2("muscle_id_tag"); - // auto bone_driven_weight = (T)0.02; - auto newton_res = (T)0.01; + // auto bone_driven_weight = (T)0.02; + + auto newton_res = get_input2("newton_res"); auto dt = get_input2("dt"); auto volf = vec3::from_array(gravity * models.density); - std::vector act_; + std::vector act_; std::size_t nm_acts = 0; if(has_input("Acts")) { - act_ = get_input("Acts")->getLiterial(); + act_ = get_input("Acts")->getLiterial(); nm_acts = act_.size(); } constexpr auto host_space = zs::execspace_e::openmp; auto ompExec = zs::omp_exec(); - auto act_buffer = dtiles_t{{{"act",1}},nm_acts,zs::memsrc_e::host}; + auto act_buffer = dtiles_t{{{"act",2}},nm_acts,zs::memsrc_e::host}; ompExec(zs::range(act_buffer.size()), [act_buffer = proxy({},act_buffer),act_] (int i) mutable { - act_buffer("act",i) = act_[i]; + act_buffer.tuple(dim_c<2>,"act",i) = vec2(act_[i][0],act_[i][1]); }); + act_buffer = act_buffer.clone({zs::memsrc_e::device, 0}); + const auto& zsbones_verts = zsbones->verts; + zs::Vector> bverts{zsbones_verts.size()}; + ompExec(zs::range(zsbones_verts.size()), + [bverts = proxy(bverts),&zsbones_verts] (int i) mutable { + auto v = zsbones_verts[i]; + bverts[i] = zs::vec{v[0],v[1],v[2]}; + }); + bverts = bverts.clone({zs::memsrc_e::device,0}); + + + constexpr auto space = execspace_e::cuda; + auto cudaPol = cuda_exec(); + + auto kverts = typename ZenoParticles::particles_t({ + {"x",3}, + {"xp",3}, + {"area",1}},0,zs::memsrc_e::device,0); + if(has_input("kinematic_boundary")){ + auto kinematic_boundary = get_input("kinematic_boundary"); + // if (kinematic_boundary.empty()) + + // const auto& prim_kverts = kinematic_boundary.verts; + // auto& prim_kverts_area = kinematic_boundary.attr("area"); + auto& kb_verts = kinematic_boundary->getParticles(); + + // auto& kb_tris = kinematic_boundary->getQuadraturePoints(); + // if(kb_tris.getPropertySize("inds") != 3){ + // fmt::print(fg(fmt::color::red),"the kinematic boundary is not a surface triangulate mesh\n"); + // throw std::runtime_error("the kinematic boundary is not a surface triangulate mesh"); + // } + // if(!kb_tris.hasProperty("area")){ + // fmt::print(fg(fmt::color::red),"the kinematic boundary has no 'area' channel\n"); + // throw std::runtime_error("the kinematic boundary has no 'area' channel"); + // } + kverts.resize(kb_verts.size()); + TILEVEC_OPS::copy<3>(cudaPol,kb_verts,"x",kverts,"x"); + TILEVEC_OPS::copy<3>(cudaPol,kb_verts,"x",kverts,"xp"); + TILEVEC_OPS::fill(cudaPol,kverts,"area",(T)1.0); + } + // std::cout << "nm_kb_tris : " << kb_tris.size() << " nm_kb_verts : " << kb_verts.size() << std::endl; + // cudaPol(zs::range(kb_tris.size()), + // [kb_verts = proxy({},kb_verts),kb_tris = proxy({},kb_tris),kverts = proxy({},kverts)] ZS_LAMBDA(int ti) mutable { + // auto tri = kb_tris.pack(dim_c<3>,"inds",ti).reinterpret_bits(int_c); + // for(int i = 0;i != 3;++i) + // atomic_add(exec_cuda,&kverts("area",tri[i]),(T)kb_tris("area",ti)/(T)3.0); + // if(ti == 0) + // printf("tri[0] area : %f\n",(float)kb_tris("area",ti)); + // }); + + const auto& bbw = (*zsparticles)[driven_tag]; // the temp buffer only store the data that will change every iterations or every frame static dtiles_t vtemp{verts.get_allocator(), { @@ -657,19 +725,23 @@ struct FleshDynamicStepping : INode { {"dir", 3}, {"xn", 3}, {"xp",3}, + {"vn",3}, {"vp",3}, {"is_inverted",1}, - {"active",1} + {"active",1}, + {"k_active",1}, + // {"inertia",1}, + {"k_thickness",1}, },verts.size()}; // auto max_collision_pairs = tris.size() / 10; - static dtiles_t etemp{eles.get_allocator(), { - {"H", 12 * 12}, - {"inds",4}, - {"ActInv",3*3}, + static dtiles_t etemp(eles.get_allocator(), { + // {"H", 12 * 12}, + {"ActInv",3*3}, // {"muscle_ID",1}, - // {"fiber",3} - }, eles.size()}; + {"is_inverted",1} + }, eles.size() + ); // {{tags}, cnt, memsrc_e::um, 0} static dtiles_t sttemp(tris.get_allocator(), @@ -683,66 +755,105 @@ struct FleshDynamicStepping : INode { },lines.size() ); - static dtiles_t cptemp(points.get_allocator(),{ + // std::cout << "sttemp.size() << " << sttemp.size() << std::endl; + // std::cout << "setemp.size() << " << setemp.size() << std::endl; + + int fp_buffer_size = points.size() * MAX_FP_COLLISION_PAIRS; + // int fp_buffer_size = 0; + + static dtiles_t fp_buffer(points.get_allocator(),{ {"inds",4}, {"area",1}, - {"grad",12}, {"inverted",1}, - {"H",12 * 12} - },points.size() * MAX_FP_COLLISION_PAIRS); + },fp_buffer_size); + // static dtiles_t ee_buffer(lines.get_allocator(),{ + // {"inds",4}, + // {"area",1}, + // {"inverted",1}, + // {"abary",2}, + // {"bbary",2}, + // {"bary",4} + // },lines.size()); - constexpr auto space = execspace_e::cuda; - auto cudaPol = cuda_exec(); - + // int ee_buffer_size = ee_buffer.size(); + int ee_buffer_size = 0; - // TILEVEC_OPS::fill<4>(cudaPol,etemp,"inds",zs::vec::uniform(-1).template reinterpret_bits()) - TILEVEC_OPS::copy<4>(cudaPol,eles,"inds",etemp,"inds"); - auto avgl = compute_average_edge_length(cudaPol,verts,"x",tris); - // auto avgl = (T)1.0; + int kc_buffer_size = kverts.size() * MAX_FP_COLLISION_PAIRS; + // int kc_buffer_size = 0; - auto collisionStiffness = get_input2("cstiffness"); + static dtiles_t kc_buffer(points.get_allocator(),{ + {"inds",2}, + {"area",1}, + {"inverted",1}, + },kc_buffer_size); + // int kc_buffer_size = kc_buffer.size(); + // int kc_buffer_size = 0; - // auto inset_ratio = get_input2("collision_inset"); - // auto outset_ratio = get_input2("collision_outset"); +// change + // static dtiles_t gh_buffer(eles.get_allocator(),{ + // {"inds",4}, + // {"H",12*12}, + // {"grad",12} + // },eles.size() + bbw.size() + fp_buffer.size() + kc_buffer_size); - auto in_collisionEps = get_input2("in_collisionEps"); - auto out_collisionEps = get_input2("out_collisionEps"); + static dtiles_t gh_buffer(eles.get_allocator(),{ + {"inds",4}, + {"H",12*12}, + {"grad",12} + },eles.size() + fp_buffer.size()); - FEMDynamicSteppingSystem A{ - verts,eles, - points,lines,tris, - (T)in_collisionEps,(T)out_collisionEps, - (*zsparticles)[driven_tag],zsbones->getParticles(),bone_driven_weight, - volf,dt,collisionStiffness}; - // TILEVEC_OPS::fill<9>(cudaPol,etemp,"ActInv",zs::vec{1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0}); + // TILEVEC_OPS::fill<4>(cudaPol,etemp,"inds",zs::vec::uniform(-1).template reinterpret_bits()) + // TILEVEC_OPS::copy<4>(cudaPol,eles,"inds",etemp,"inds"); + TILEVEC_OPS::fill<9>(cudaPol,etemp,"ActInv",zs::vec{1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0}); + // TILEVEC_OPS::fill(cudaPol,vtemp,"inertia",(T)1.0); + // if(verts.hasProperty("inertia")) + // TILEVEC_OPS::copy(cudaPol,verts,"inertia",vtemp,"inertia"); + if(verts.hasProperty("k_thickness")) + TILEVEC_OPS::copy(cudaPol,verts,"k_thickness",vtemp,"k_thickness"); + else + TILEVEC_OPS::fill(cudaPol,vtemp,"k_thickness",(T)1.0); // apply muscle activation + + if(!eles.hasProperty("Act")) + eles.append_channels(cudaPol,{{"Act",1}}); + + if(!eles.hasProperty(muscle_id_tag) || !eles.hasProperty("fiber")) + fmt::print(fg(fmt::color::red),"the quadrature has no \"{}\" muscle_id_tag\n",muscle_id_tag); + if(nm_acts == 0) + fmt::print(fg(fmt::color::red),"no activation input\n"); + cudaPol(zs::range(eles.size()), [etemp = proxy({},etemp),eles = proxy({},eles), - act_buffer = proxy({},act_buffer),muscle_id_tag = SmallString(muscle_id_tag),nm_acts,avgl] ZS_LAMBDA(int ei) mutable { + act_buffer = proxy({},act_buffer),muscle_id_tag = SmallString(muscle_id_tag),nm_acts] ZS_LAMBDA(int ei) mutable { // auto act = eles.template pack<3>("act",ei); // auto fiber = etemp.template pack<3>("fiber",ei); - zs::vec fiber{}; - if(!eles.hasProperty("fiber")) + + vec3 act{1.0,1.0,1.0}; + vec3 fiber{}; + // float a = 1.0f; + if(eles.hasProperty("fiber") && eles.hasProperty(muscle_id_tag) && nm_acts > 0 && (int)eles(muscle_id_tag,ei) >= 0 && fabs(eles.template pack<3>("fiber",ei).norm() - 1.0) < 0.001 && (int)eles(muscle_id_tag,ei) < act_buffer.size()){ fiber = eles.template pack<3>("fiber",ei); - else + auto ID = (int)eles(muscle_id_tag,ei); + auto a = 1. - act_buffer("act",0,ID); + auto b = 1. - act_buffer("act",1,ID); + // act = vec3{zs::sqrt(a),zs::sqrt(1./a),zs::sqrt(1./a)}; + // auto aclamp = + // act = vec3{a < 0.7 ? 0.7 : a,zs::sqrt(1./a),zs::sqrt(1./a)}; + act = vec3{a,zs::sqrt(1./b),zs::sqrt(1./b)}; + eles("Act",ei) = act_buffer("act",0,ID) + 1e-6; + }else{ fiber = zs::vec(1.0,0.0,0.0); - vec3 act{1.0,1.0,1.0}; - - - auto nfiber = fiber.norm(); - // auto ID = etemp("muscle_ID",ei); - int ID = -1; - if(eles.hasProperty(muscle_id_tag)) - ID = (int)eles(muscle_id_tag,ei); - - if(nm_acts > 0 && ID > -1){ - float a = 1. - act_buffer("act",ID); - act = vec3{1,zs::sqrt(1./a),zs::sqrt(1./a)}; + act = vec3{1,1,1}; + eles("Act",ei) = (T)0.0; + } + if(fabs(fiber.norm() - 1.0) > 0.1) { + printf("invalid fiber[%d] detected : %f %f %f\n",(int)ei, + (float)fiber[0],(float)fiber[1],(float)fiber[2]); } vec3 dir[3]; @@ -769,147 +880,177 @@ struct FleshDynamicStepping : INode { Act(2,2) = act[2]; Act = R * Act * R.transpose(); - - // if(ei == 0) { - // printf("Act[0]:\n%f %f %f\n%f %f %f\n%f %f %f\n", - // (float)Act(0,0),(float)Act(0,1),(float)Act(0,2), - // (float)Act(1,0),(float)Act(1,1),(float)Act(1,2), - // (float)Act(2,0),(float)Act(2,1),(float)Act(2,2)); - // } - etemp.template tuple<9>("ActInv",ei) = zs::inverse(Act); + // if(a < 1.0f) { + // auto ActInv = etemp.template pack<3,3>("ActInv",ei); + // printf("ActInv[%d] : \n%f %f %f\n%f %f %f\n%f %f %f\n",ei, + // (float)ActInv(0,0),(float)ActInv(0,1),(float)ActInv(0,2), + // (float)ActInv(1,0),(float)ActInv(1,1),(float)ActInv(1,2), + // (float)ActInv(2,0),(float)ActInv(2,1),(float)ActInv(2,2)); + // } }); + auto collisionStiffness = get_input2("cstiffness"); + auto kineCollisionStiffness = get_input2("kineCstiffness"); + + + // auto inset_ratio = get_input2("collision_inset"); + // auto outset_ratio = get_input2("collision_outset"); + + auto in_collisionEps = get_input2("in_collisionEps"); + auto out_collisionEps = get_input2("out_collisionEps"); + + auto kine_in_collisionEps = get_input2("kine_inCollisionEps"); + auto kine_out_collisionEps = get_input2("kine_outCollisionEps"); + + auto aniso_strength = get_input2("aniso_strength"); + + FEMDynamicSteppingSystem A{ + verts,eles, + points,lines,tris, + (T)in_collisionEps,(T)out_collisionEps, + bbw,bverts,bone_driven_weight, + volf,dt,collisionStiffness, + (T)kine_in_collisionEps,(T)kine_out_collisionEps, + (T)kineCollisionStiffness,(T)aniso_strength}; + // std::cout << "set initial guess" << std::endl; // setup initial guess + // if(verts.hasProperty("dt")) { + // std::cout << "verts has property 'dt'" << std::endl; + // } + TILEVEC_OPS::copy<3>(cudaPol,verts,"x",vtemp,"xp"); TILEVEC_OPS::copy<3>(cudaPol,verts,"v",vtemp,"vp"); - TILEVEC_OPS::copy(cudaPol,verts,"active",vtemp,"active"); - if(verts.hasProperty("init_x")) - TILEVEC_OPS::copy<3>(cudaPol,verts,"init_x",vtemp,"xn"); - else { - // TILEVEC_OPS::add<3>(cudaPol,vtemp,"xp",1.0,"vp",dt,"xn"); - TILEVEC_OPS::add<3>(cudaPol,vtemp,"xp",1.0,"vp",(T)0.0,"xn"); - } - TILEVEC_OPS::fill(cudaPol,vtemp,"bou_tag",(T)0.0); - - - auto bvh_thickness = 5 * avgl; + if(verts.hasProperty("active")) + TILEVEC_OPS::copy(cudaPol,verts,"active",vtemp,"active"); + else + TILEVEC_OPS::fill(cudaPol,vtemp,"active",(T)1.0); + + if(verts.hasProperty("k_active")) + TILEVEC_OPS::copy(cudaPol,verts,"k_active",vtemp,"k_active"); + else + TILEVEC_OPS::fill(cudaPol,vtemp,"k_active",(T)1.0); + + // if there is no init_x as guess, then use the baraff witkin approach + // if(verts.hasProperty("init_x")) + // TILEVEC_OPS::copy<3>(cudaPol,verts,"init_x",vtemp,"xn"); + // else { + // TILEVEC_OPS::add<3>(cudaPol,vtemp,"xp",1.0,"vp",dt,"xn"); + TILEVEC_OPS::copy(cudaPol,verts,"v",vtemp,"vn"); + TILEVEC_OPS::copy(cudaPol,verts,"x",vtemp,"xn"); + // TILEVEC_OPS::add<3>(cudaPol,verts,"x",1.0,"vp",(T)0.0,"xn"); + // } + if(verts.hasProperty("bou_tag") && verts.getPropertySize("bou_tag") == 1) + TILEVEC_OPS::copy(cudaPol,verts,"bou_tag",vtemp,"bou_tag"); + else + TILEVEC_OPS::fill(cudaPol,vtemp,"bou_tag",(T)0.0); - int max_newton_iterations = 5; + int max_newton_iterations = get_param("max_newton_iters"); int nm_iters = 0; - // make sure, at least one baraf simi-implicit step will be taken auto res0 = 1e10; + auto kd_alpha = get_input2("kd_alpha"); + auto kd_beta = get_input2("kd_beta"); + auto kd_theta = get_input2("kd_theta"); + + auto max_cg_iters = get_param("max_cg_iters"); + while(nm_iters < max_newton_iterations) { + // break; + + TILEVEC_OPS::fill(cudaPol,gh_buffer,"grad",(T)0.0); + TILEVEC_OPS::fill(cudaPol,gh_buffer,"H",(T)0.0); + TILEVEC_OPS::fill<4>(cudaPol,gh_buffer,"inds",zs::vec::uniform(-1).reinterpret_bits(float_c)); + A.findInversion(cudaPol,vtemp,etemp); + // match([&](auto &elasticModel,auto &anisoModel) -> std::enable_if_t>> {...},[](...) { + // A.computeGradientAndHessian(cudaPol, elasticModel,anisoModel,vtemp,etemp,gh_buffer,kd_alpha,kd_beta); + // })(models.getElasticModel(),models.getAnisoElasticModel()); + + match([&](auto &elasticModel,zs::AnisotropicArap &anisoModel){ + A.computeGradientAndHessian(cudaPol, elasticModel,anisoModel,vtemp,etemp,gh_buffer,kd_alpha,kd_beta); + },[](...) { + throw std::runtime_error("unsupported anisotropic elasticity model"); + })(models.getElasticModel(),models.getAnisoElasticModel()); match([&](auto &elasticModel) { - A.computeGradientAndHessian(cudaPol, elasticModel,vtemp,etemp); + A.computeCollisionGradientAndHessian(cudaPol,elasticModel, + vtemp, + etemp, + sttemp, + setemp, + // ee_buffer, + fp_buffer, + kverts, + kc_buffer, + gh_buffer,kd_theta); })(models.getElasticModel()); - bool include_collision = true; - if(include_collision) { - - // if(!calculate_facet_normal(cudaPol,vtemp,"xn",tris,sttemp,"nrm")){ - // throw std::runtime_error("fail updating facet normal"); - // } - - // if(!COLLISION_UTILS::calculate_cell_bisector_normal(cudaPol, - // vtemp,"xn", - // lines, - // tris, - // sttemp,"nrm", - // setemp,"nrm")){ - // throw std::runtime_error("fail calculate cell bisector normal"); - // } - - // auto stbvs = retrieve_bounding_volumes(cudaPol,vtemp,tris,wrapv<3>{},(T)0.0,"xn"); - // auto sebvs = retrieve_bounding_volumes(cudaPol,vtemp,lines,wrapv<2>{},(T)0.0,"xn"); - // stBvh.refit(cudaPol,stbvs); - // seBvh.refit(cudaPol,sebvs); - - match([&](auto &elasticModel) { - A.computeCollisionGradientAndHessian(cudaPol,elasticModel, - vtemp, - etemp, - sttemp, - setemp, - cptemp, - // stBvh, - // seBvh, - bvh_thickness); - })(models.getElasticModel()); - - } - - PCG::prepare_block_diagonal_preconditioner<4,3>(cudaPol,"H",etemp,cptemp,"P",vtemp); + TILEVEC_OPS::fill(cudaPol,vtemp,"grad",(T)0.0); + TILEVEC_OPS::assemble(cudaPol,gh_buffer,"grad","inds",vtemp,"grad"); + // break; + + PCG::prepare_block_diagonal_preconditioner<4,3>(cudaPol,"H",gh_buffer,"P",vtemp); + // PCG::precondition<3>(cudaPol,vtemp,"P","grad","q"); + // T res = TILEVEC_OPS::inf_norm<3>(cudaPol, vtemp, "q"); + // if(res < newton_res){ + // fmt::print(fg(fmt::color::cyan),"reach desire newton res {} : {}\n",newton_res,res); + // break; + // } + // auto nP = TILEVEC_OPS::inf_norm<9>(cudaPol,vtemp,"P"); + // std::cout << "nP : " << nP << std::endl; // PCG::prepare_block_diagonal_preconditioner<4,3>(cudaPol,"H",etemp,"P",vtemp); // if the grad is too small, return the result // Solve equation using PCG - TILEVEC_OPS::fill<3>(cudaPol,vtemp,"dir",zs::vec::zeros()); + TILEVEC_OPS::fill(cudaPol,vtemp,"dir",(T)0.0); // std::cout << "solve using pcg" << std::endl; - PCG::pcg_with_fixed_sol_solve<3,4>(cudaPol,vtemp,etemp,cptemp,"dir","bou_tag","grad","P","inds","H",cg_res,1000,50); - // PCG::pcg_with_fixed_sol_solve<3,4>(cudaPol,vtemp,etemp,"dir","bou_tag","grad","P","inds","H",cg_res,1000,50); - // std::cout << "finish solve pcg" << std::endl; - PCG::project<3>(cudaPol,vtemp,"dir","bou_tag"); + auto nm_CG_iters = PCG::pcg_with_fixed_sol_solve<3,4>(cudaPol,vtemp,gh_buffer,"dir","bou_tag","grad","P","inds","H",cg_res,max_cg_iters,100); + fmt::print(fg(fmt::color::cyan),"nm_cg_iters : {}\n",nm_CG_iters); T alpha = 1.; - cudaPol(zs::range(vtemp.size()), [vtemp = proxy({}, vtemp),alpha] __device__(int i) mutable { - vtemp.tuple<3>("xn", i) = - vtemp.pack<3>("xn", i) + alpha * vtemp.pack<3>("dir", i); + + auto nxn = TILEVEC_OPS::inf_norm<3>(cudaPol,vtemp,"xn"); + auto ndir = TILEVEC_OPS::dot<3>(cudaPol,vtemp,"dir","dir"); + auto nP = TILEVEC_OPS::dot<9>(cudaPol,vtemp,"P","P"); + + // std::cout << "vtemp's xn : " << nxn << std::endl; + // std::cout << "vtemp's dir : " << ndir << std::endl; + // std::cout << "vtemp's P : " << nP << std::endl; + + cudaPol(zs::range(vtemp.size()), [vtemp = proxy({}, vtemp),alpha,dt] __device__(int i) mutable { + vtemp.template tuple<3>("xn", i) = + vtemp.template pack<3>("xn", i) + alpha * vtemp.template pack<3>("dir", i); + vtemp.template tuple<3>("vn",i) = + (vtemp.template pack<3>("xn",i) - vtemp.template pack<3>("xp",i))/dt; }); - T res = TILEVEC_OPS::inf_norm<3>(cudaPol, vtemp, "dir");// this norm is independent of descriterization - std::cout << "res[" << nm_iters << "] : " << res << std::endl; - if(res < 1e-3) - break; - - // keep dropping, to avoid explosion - if(res < res0) - res0 = res; - else { - // reverse - cudaPol(zs::range(vtemp.size()), [vtemp = proxy({}, vtemp),alpha] __device__(int i) mutable { - vtemp.tuple<3>("xn", i) = - vtemp.pack<3>("xn", i) - alpha * vtemp.pack<3>("dir", i); - }); + // nxn = TILEVEC_OPS::inf_norm<3>(cudaPol,vtemp,"xn"); + // std::cout << "new vtemp's xn : " << nxn << std::endl; + - break; - } + // res = TILEVEC_OPS::inf_norm<3>(cudaPol, vtemp, "dir");// this norm is independent of descriterization + // std::cout << "res[" << nm_iters << "] : " << res << std::endl; + // if(res < newton_res){ + // fmt::print(fg(fmt::color::cyan),"reach desire newton res {} : {}\n",newton_res,res); + // break; + // } nm_iters++; } - cudaPol(zs::range(verts.size()), - [vtemp = proxy({}, vtemp), verts = proxy({}, verts),dt] __device__(int vi) mutable { - auto newX = vtemp.pack<3>("xn", vi); - verts.tuple<3>("x", vi) = newX; - verts.tuple<3>("v",vi) = (vtemp.pack<3>("xn",vi) - vtemp.pack<3>("xp",vi))/dt; + [vtemp = proxy({}, vtemp), verts = proxy({}, verts),dt = dt] __device__(int vi) mutable { + // auto newX = vtemp.pack(dim_c<3>,"xn", vi); + verts.tuple<3>("x", vi) = vtemp.pack(dim_c<3>,"xn", vi); + // if(verts.hasProperty("dt")) + // dt = verts("dt",vi); + verts.tuple<3>("v",vi) = vtemp.pack<3>("vn",vi); }); - dtiles_t nodalForceVis(verts.get_allocator(), - { - {"x",3}, - {"dir",3}, - },verts.size()); - - - - // TILEVEC_OPS::copy<3>(cudaPol,vtemp,"xn",nodalForceVis,"x"); - // TILEVEC_OPS::fill<3>(cudaPol,nodalForceVis,"dir",zs::vec::zeros()); - // TILEVEC_OPS::assemble<3,4>(cudaPol,cptemp,"grad",nodalForceVis,"dir"); - - - - - set_output("ZSParticles", zsparticles); } - - }; -ZENDEFNODE(FleshDynamicStepping, {{"ZSParticles", +ZENDEFNODE(FleshDynamicStepping, {{"ZSParticles","kinematic_boundary", "gravity","Acts", "driven_boudary", {"string","driven_tag","bone_bw"}, @@ -918,13 +1059,21 @@ ZENDEFNODE(FleshDynamicStepping, {{"ZSParticles", {"float","cstiffness","0.0"}, {"float","in_collisionEps","0.01"}, {"float","out_collisionEps","0.01"}, - {"float","dt","0.5"} + {"float","kineCstiffness","1"}, + {"float","kine_inCollisionEps","0.01"}, + {"float","kine_outCollisionEps","0.02"}, + {"float","dt","0.5"}, + {"float","newton_res","0.001"}, + {"float","kd_alpha","0.01"}, + {"float","kd_beta","0.01"}, + {"float","kd_theta","0.01"}, + {"float","aniso_strength","1.0"}, }, {"ZSParticles"}, { + {"int","max_cg_iters","1000"}, + {"int","max_newton_iters","5"}, + {"float","cg_res","0.0001"} }, {"FEM"}}); - - - }; \ No newline at end of file diff --git a/projects/CuLagrange/fem/FleshQuasiStepping.cu b/projects/CuLagrange/fem/FleshQuasiStepping.cu deleted file mode 100644 index 8ee94fb5d5..0000000000 --- a/projects/CuLagrange/fem/FleshQuasiStepping.cu +++ /dev/null @@ -1,588 +0,0 @@ -#include "Structures.hpp" -#include "zensim/Logger.hpp" -#include "zensim/cuda/execution/ExecutionPolicy.cuh" -#include "zensim/omp/execution/ExecutionPolicy.hpp" -#include "zensim/geometry/PoissonDisk.hpp" -#include "zensim/geometry/VdbLevelSet.h" -#include "zensim/geometry/VdbSampler.h" -#include "zensim/io/MeshIO.hpp" -#include "zensim/math/bit/Bits.h" -#include "zensim/types/Property.h" -#include -#include -#include -#include -#include -#include - -#include "../geometry/linear_system/mfcg.hpp" - -namespace zeno { - - -struct FleshQuasiStaticStepping : INode { - using T = float; - using dtiles_t = zs::TileVector; - using tiles_t = typename ZenoParticles::particles_t; - using vec3 = zs::vec; - using mat3 = zs::vec; - struct FEMQuasiStaticSystem { - - constexpr auto dFAdF(const mat3& A) { - zs::vec M{}; - M(0,0) = M(1,1) = M(2,2) = A(0,0); - M(3,0) = M(4,1) = M(5,2) = A(0,1); - M(6,0) = M(7,1) = M(8,2) = A(0,2); - - M(0,3) = M(1,4) = M(2,5) = A(1,0); - M(3,3) = M(4,4) = M(5,5) = A(1,1); - M(6,3) = M(7,4) = M(8,5) = A(1,2); - - M(0,6) = M(1,7) = M(2,8) = A(2,0); - M(3,6) = M(4,7) = M(5,8) = A(2,1); - M(6,6) = M(7,7) = M(8,8) = A(2,2); - - return M; - } - - - template - T energy(Pol &pol, const Model &model, const zs::SmallString tag, dtiles_t& vtemp,dtiles_t& etemp) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - Vector res{verts.get_allocator(), 1}; - res.setVal(0); - bool shouldSync = pol.shouldSync(); - pol.sync(true); - // elastic potential - pol(range(eles.size()), [verts = proxy({}, verts), - eles = proxy({}, eles), - vtemp = proxy({}, vtemp), - etemp = proxy({},etemp), - res = proxy(res), tag, model = model,volf = volf] - ZS_LAMBDA (int ei) mutable { - auto DmInv = eles.template pack<3, 3>("IB", ei); - auto inds = eles.template pack<4>("inds", ei).template reinterpret_bits(); - vec3 xs[4] = {vtemp.pack<3>(tag, inds[0]), vtemp.pack<3>(tag, inds[1]), - vtemp.pack<3>(tag, inds[2]), vtemp.pack<3>(tag, inds[3])}; - mat3 FAct{}; - { - auto x1x0 = xs[1] - xs[0]; - auto x2x0 = xs[2] - xs[0]; - auto x3x0 = xs[3] - xs[0]; - auto Ds = mat3{x1x0[0], x2x0[0], x3x0[0], x1x0[1], x2x0[1], - x3x0[1], x1x0[2], x2x0[2], x3x0[2]}; - FAct = Ds * DmInv; - - FAct = FAct * etemp.template pack<3,3>("ActInv",ei); - - // if(ei == 0) { - // printf("FAct in energy : \n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n", - // (float)FAct(0,0),(float)FAct(0,1),(float)FAct(0,2), - // (float)FAct(1,0),(float)FAct(1,1),(float)FAct(1,2), - // (float)FAct(2,0),(float)FAct(2,1),(float)FAct(2,2)); - // } - } - - auto psi = model.psi(FAct); - auto vole = eles("vol", ei); - - T gpsi = 0; - for(int i = 0;i != 4;++i) - gpsi += (-volf.dot(xs[i])/4); - - atomic_add(exec_cuda, &res[0], (T)(vole * (psi + gpsi))); - }); -// Bone Driven Potential Energy - T lambda = model.lam; - T mu = model.mu; - auto nmEmbedVerts = b_verts.size(); - if(b_bcws.size() != b_verts.size()){ - fmt::print("B_BCWS_SIZE = {}\t B_VERTS_SIZE = {}\n",b_bcws.size(),b_verts.size()); - throw std::runtime_error("B_BCWS SIZE AND B_VERTS SIZE NOT MATCH"); - } - pol(range(nmEmbedVerts), [vtemp = proxy({},vtemp), - eles = proxy({},eles), - b_verts = proxy({},b_verts), - bcws = proxy({},b_bcws),lambda,mu,tag,res = proxy(res),bone_driven_weight = bone_driven_weight] - ZS_LAMBDA(int vi) mutable { - auto ei = reinterpret_bits(bcws("inds",vi)); - if(ei < 0) - return; - auto inds = eles.pack(dim_c<4>, "inds", ei).reinterpret_bits(); - auto w = bcws.pack(dim_c<4>, "w",vi); - - auto tpos = vec3::zeros(); - for(size_t i = 0;i != 4;++i) - tpos += w[i] * vtemp.pack<3>(tag,inds[i]); - auto pdiff = tpos - b_verts.pack<3>("x",vi); - - T stiffness = 2.0066 * mu + 1.0122 * lambda; - // if(eles("vol",ei) < 0) - // printf("WARNING INVERT TET DETECTED<%d> %f\n",ei,(float)eles("vol",ei)); - T bpsi = (0.5 * bcws("cnorm",vi) * stiffness * bone_driven_weight * eles("vol",ei)) * pdiff.l2NormSqr(); - // bpsi = (0.5 * bcws("cnorm",vi) * lambda * bone_driven_weight) * pdiff.dot(pdiff); - // the cnorm here should be the allocated volume of point in embeded tet - atomic_add(exec_cuda, &res[0], (T)bpsi); - }); - pol.sync(shouldSync); - return res.getVal(); - } - - template - void computeGradientAndHessian(zs::CudaExecutionPolicy& cudaPol, - const Model& model, - const zs::SmallString tag, - dtiles_t& vtemp, - dtiles_t& etemp) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - // fmt::print("check here 0"); - TILEVEC_OPS::fill<3>(cudaPol,vtemp,"grad",zs::vec::zeros()); - TILEVEC_OPS::fill<144>(cudaPol,etemp,"He",zs::vec::zeros()); - cudaPol(zs::range(eles.size()), [this, - vtemp = proxy({}, vtemp), - etemp = proxy({}, etemp), - bcws = proxy({},b_bcws), - b_verts = proxy({},b_verts), - verts = proxy({}, verts), - eles = proxy({}, eles),tag, model, volf = volf] ZS_LAMBDA (int ei) mutable { - auto DmInv = eles.template pack<3, 3>("IB", ei); - auto dFdX = dFdXMatrix(DmInv); - auto inds = eles.template pack<4>("inds", ei).template reinterpret_bits(); - vec3 xs[4] = {vtemp.pack<3>(tag, inds[0]), vtemp.pack<3>(tag, inds[1]), - vtemp.pack<3>(tag, inds[2]), vtemp.pack<3>(tag, inds[3])}; - mat3 FAct{}; - { - auto x1x0 = xs[1] - xs[0]; - auto x2x0 = xs[2] - xs[0]; - auto x3x0 = xs[3] - xs[0]; - auto Ds = mat3{x1x0[0], x2x0[0], x3x0[0], x1x0[1], x2x0[1], - x3x0[1], x1x0[2], x2x0[2], x3x0[2]}; - FAct = Ds * DmInv; - - FAct = FAct * etemp.template pack<3,3>("ActInv",ei); - - // if(ei == 0) { - // printf("FAct in gH : \n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n", - // (float)FAct(0,0),(float)FAct(0,1),(float)FAct(0,2), - // (float)FAct(1,0),(float)FAct(1,1),(float)FAct(1,2), - // (float)FAct(2,0),(float)FAct(2,1),(float)FAct(2,2)); - - // auto Act = etemp.template pack<3,3>("ActInv",ei); - - // printf("Act in gH : \n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n", - // (float)Act(0,0),(float)Act(0,1),(float)Act(0,2), - // (float)Act(1,0),(float)Act(1,1),(float)Act(1,2), - // (float)Act(2,0),(float)Act(2,1),(float)Act(2,2)); - // } - - // auto ActInv_check = etemp.template pack<3,3>("ActInv",ei); - // for(int i = 0;i != 3;++i) - // ActInv_check(i,i) -= 1.0; - // if(ActInv_check.norm() > 1){ - // auto ActInv = etemp.template pack<3,3>("ActInv",ei); - // printf("wierd ActInv<%d> in gH : \n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n",ei, - // (float)ActInv(0,0),(float)ActInv(0,1),(float)ActInv(0,2), - // (float)ActInv(1,0),(float)ActInv(1,1),(float)ActInv(1,2), - // (float)ActInv(2,0),(float)ActInv(2,1),(float)ActInv(2,2)); - // } - - } - - auto dFActdF = dFAdF(etemp.template pack<3,3>("ActInv",ei)); - - auto P = model.first_piola(FAct); - auto vole = eles("vol", ei); - auto vecP = flatten(P); - vecP = dFActdF.transpose() * vecP; - auto dFdXT = dFdX.transpose(); - auto vf = -vole * (dFdXT * vecP); - - auto mg = volf * vole / 4; - for (int i = 0; i != 4; ++i) { - auto vi = inds[i]; - for (int d = 0; d != 3; ++d) - atomic_add(exec_cuda, &vtemp("grad", d, vi), vf(i * 3 + d) + mg(d)); - } - - auto Hq = model.first_piola_derivative(FAct, true_c); - auto dFdAct_dFdX = dFActdF * dFdX; - // dFdAct_dFdX = dFdX; - auto H = dFdAct_dFdX.transpose() * Hq * dFdAct_dFdX * vole; - - etemp.tuple<12 * 12>("He", ei) = H; - - - // auto Hn = H.norm(); - // if(isnan(Hn)){ - // auto Hqn = Hq.norm(); - // auto dFdXn = dFdAct_dFdX.norm(); - // printf("elm<%d>_Hn : %f %f %f\n",ei,(float)Hn,(float)dFdXn,(float)Hqn); - // printf("FAct<%d> in gH : \n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n",ei, - // (float)FAct(0,0),(float)FAct(0,1),(float)FAct(0,2), - // (float)FAct(1,0),(float)FAct(1,1),(float)FAct(1,2), - // (float)FAct(2,0),(float)FAct(2,1),(float)FAct(2,2)); - - // auto Act = etemp.template pack<3,3>("ActInv",ei); - // printf("Act<%d> in gH : \n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n",ei, - // (float)Act(0,0),(float)Act(0,1),(float)Act(0,2), - // (float)Act(1,0),(float)Act(1,1),(float)Act(1,2), - // (float)Act(2,0),(float)Act(2,1),(float)Act(2,2)); - // } - - }); - - - // fmt::print("check here 1\n"); - T lambda = model.lam; - T mu = model.mu; - if(b_bcws.size() != b_verts.size()){ - fmt::print("B_BCWS_SIZE = {}\t B_VERTS_SIZE = {}\n",b_bcws.size(),b_verts.size()); - throw std::runtime_error("B_BCWS SIZE AND B_VERTS SIZE NOT MATCH"); - } - - // fmt::print("check here 2\n"); - - auto nmEmbedVerts = b_verts.size(); - cudaPol(zs::range(nmEmbedVerts), - [bcws = proxy({},b_bcws),b_verts = proxy({},b_verts),vtemp = proxy({},vtemp),etemp = proxy({},etemp), - eles = proxy({},eles),lambda,mu,tag,bone_driven_weight = bone_driven_weight] ZS_LAMBDA(int vi) mutable { - auto ei = reinterpret_bits(bcws("inds",vi)); - if(ei < 0) - return; - auto inds = eles.pack<4>("inds",ei).reinterpret_bits(); - auto w = bcws.pack(dim_c<4>, "w", vi); - auto tpos = vec3::zeros(); - for(size_t i = 0;i != 4;++i) - tpos += w[i] * vtemp.pack(dim_c<3>, tag, inds[i]); - auto pdiff = tpos - b_verts.pack(dim_c<3>, "x", vi); - - T stiffness = 2.0066 * mu + 1.0122 * lambda; - - for(size_t i = 0;i != 4;++i){ - auto tmp = pdiff * (-stiffness * bcws("cnorm",vi) * bone_driven_weight * w[i] * eles("vol",ei)); - // tmp = pdiff * (-lambda * bcws("cnorm",vi) * bone_driven_weight * w[i]); - for(size_t d = 0;d != 3;++d) - atomic_add(exec_cuda,&vtemp("grad",d,inds[i]),(T)tmp[d]); - } - for(int i = 0;i != 4;++i) - for(int j = 0;j != 4;++j){ - T alpha = stiffness * bone_driven_weight * w[i] * w[j] * bcws("cnorm",vi) * eles("vol",ei); - // alpha = lambda * bone_driven_weight * w[i] * w[j] * bcws("cnorm",vi); - // if(ei == 11221) - // if(i == 3 && j == 3) - // printf("alpha : %f\n",alpha); - for(int d = 0;d != 3;++d){ - // etemp("He",(i * 3 + d) * 12 + j * 3 + d,ei) += alpha; - // if(isnan(alpha)){ - // printf("nan alpha<%d,%d,%d> %f %f %f %f %f\n",vi,i,j,(float)lambda,(float)bone_driven_weight,(float)w[i],(float)w[j],(float)bcws("cnorm",vi)); - // } - atomic_add(exec_cuda,&etemp("He",(i * 3 + d) * 12 + j * 3 + d,ei),alpha); - } - } - - }); - - } - - FEMQuasiStaticSystem(const tiles_t &verts, const tiles_t &eles, const tiles_t &b_bcws, const tiles_t& b_verts,T bone_driven_weight,vec3 volf) - : verts{verts}, eles{eles}, b_bcws{b_bcws}, b_verts{b_verts}, bone_driven_weight{bone_driven_weight},volf{volf}{} - - const tiles_t &verts; - const tiles_t &eles; - const tiles_t &b_bcws; // the barycentric interpolation of embeded bones - const tiles_t &b_verts; // the position of embeded bones - - T bone_driven_weight; - vec3 volf; - }; - - void apply() override { - using namespace zs; - auto zstets = get_input("ZSParticles"); - auto gravity = zeno::vec<3,T>(0); - if(has_input("gravity")) - gravity = get_input("gravity")->get>(); - auto armijo = get_param("armijo"); - auto curvature = get_param("wolfe"); - auto cg_res = get_param("cg_res"); - auto btl_res = get_param("btl_res"); - auto models = zstets->getModel(); - auto& verts = zstets->getParticles(); - auto& eles = zstets->getQuadraturePoints(); - auto zsbones = get_input("driven_bones"); - auto tag = get_param("driven_tag"); - auto muscle_id_tag = get_param("muscle_id_tag"); - auto bone_driven_weight = get_param("bone_driven_weight"); - auto newton_res = get_param("newton_res"); - - auto volf = vec3::from_array(gravity * models.density); - - // auto nm_acts = get_input("Acts")->arr.size(); - // fmt::print("number of activations : {}\n",nm_acts); - - std::vector act_; - std::size_t nm_acts = 0; - // auto nm_acts_ = zstets->get().get("NM_MUSCLES"); - // std::cout << "nm_acts_ : " << std::endl; - - if(has_input("Acts")) { - act_ = get_input("Acts")->getLiterial(); - nm_acts = act_.size(); - } - // auto act_ = get_input("Acts")->getLiterial(); - // initialize on host qs[i] = qs_[i]->get(); - - constexpr auto host_space = zs::execspace_e::openmp; - auto ompExec = zs::omp_exec(); - auto act_buffer = dtiles_t{{{"act",1}},nm_acts,zs::memsrc_e::host}; - ompExec(range(act_buffer.size()), - [act_buffer = proxy({},act_buffer),act_] (int i) mutable{ - act_buffer("act",i) = act_[i]; - // fmt::print("act<{}> : {}\n",i,act_buffer("act",i)); - }); - act_buffer = act_buffer.clone({zs::memsrc_e::device, 0}); - - static dtiles_t vtemp{verts.get_allocator(), - {{"grad", 3}, - {"P", 9}, - {"bou_tag",1}, - {"dir", 3}, - {"xn", 3}, - {"xn0", 3}, - {"temp", 3}, - {"r", 3}, - {"p", 3}, - {"q", 3}}, - verts.size()}; - static dtiles_t etemp{eles.get_allocator(), {{"He", 12 * 12},{"inds",4},{"ActInv",3*3},{"muscle_ID",1},{"fiber",3}}, eles.size()}; - vtemp.resize(verts.size()); - etemp.resize(eles.size()); - - FEMQuasiStaticSystem A{verts,eles,(*zstets)[tag],zsbones->getParticles(),bone_driven_weight,volf}; - - constexpr auto space = execspace_e::cuda; - auto cudaPol = cuda_exec().sync(false); - - TILEVEC_OPS::copy<4>(cudaPol,eles,"inds",etemp,"inds"); - - - if(!eles.hasProperty("fiber")){ - // fmt::print("The input flesh have no fiber orientations, use the default setting\n"); - TILEVEC_OPS::fill<3>(cudaPol,etemp,"fiber",{1.,0.,0.}); - // throw std::runtime_error("The input flesh should have fiber orientations"); - - }else { - if(eles.getPropertySize("fiber") != 3){ - fmt::print("The input fiber has wrong channel size\n"); - throw std::runtime_error("The input fiber has wrong channel size"); - } - TILEVEC_OPS::copy<3>(cudaPol,eles,"fiber",etemp,"fiber"); - } - if(!eles.hasProperty(muscle_id_tag)) { - // if((!eles.hasProperty(muscle_id_tag)) || (eles.getPropertySize(muscle_id_tag) != 1)){ - // fmt::print("the quadrature has no muscle id tag : {} {}\n",muscle_id_tag,eles.getPropertySize(muscle_id_tag)); - // throw std::runtime_error("the quadrature has no muscle id tag"); - // } - // fmt::print("The input flesh have no mosucle_id specified, use the default setting"); - TILEVEC_OPS::fill(cudaPol,etemp,"muscle_ID",-1); - }else { - TILEVEC_OPS::copy(cudaPol,eles,muscle_id_tag,etemp,"muscle_ID"); - } - - // apply muscle activation - cudaPol(range(etemp.size()), - [etemp = proxy({},etemp),act_buffer = proxy({},act_buffer),muscle_id_tag = SmallString(muscle_id_tag),nm_acts] ZS_LAMBDA(int ei) mutable { - // auto act = eles.template pack<3>("act",ei); - auto fiber = etemp.template pack<3>("fiber",ei); - - vec3 act{0}; - - auto nfiber = fiber.norm(); - auto ID = etemp("muscle_ID",ei); - if(nfiber < 0.5 || ID < -1e-6 || nm_acts == 0){ // if there is no local fiber orientaion, use the default act and fiber - fiber = vec3{1.0,0.0,0.0}; - act = vec3{1.0,1.0,1.0}; - }else{ - // a test - int id = (int)ID; - float a = 1. - act_buffer("act",id); - act = vec3{1,zs::sqrt(1./a),zs::sqrt(1./a)}; - fiber /= nfiber;// in case there is some floating-point error - - // printf("use act[%d] : %f\n",id,(float)a); - } - - vec3 dir[3]; - dir[0] = fiber; - auto tmp = vec3{1.0,0.0,0.0}; - dir[1] = dir[0].cross(tmp); - if(dir[1].length() < 1e-3) { - tmp = vec3{0.0,1.0,0.0}; - dir[1] = dir[0].cross(tmp); - } - - dir[1] = dir[1] / dir[1].length(); - dir[2] = dir[0].cross(dir[1]); - - auto R = mat3{}; - for(int i = 0;i < 3;++i) - for(int j = 0;j < 3;++j) - R(i,j) = dir[j][i]; - - auto Act = mat3::zeros(); - Act(0,0) = act[0]; - Act(1,1) = act[1]; - Act(2,2) = act[2]; - - Act = R * Act * R.transpose(); - - // if(ei == 0) { - // printf("Act : \n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n", - // (float)Act(0,0),(float)Act(0,1),(float)Act(0,2), - // (float)Act(1,0),(float)Act(1,1),(float)Act(1,2), - // (float)Act(2,0),(float)Act(2,1),(float)Act(2,2)); - // } - - - etemp.template tuple<9>("ActInv",ei) = zs::inverse(Act); - - // if(ei == 0) { - // Act = etemp.template pack<3,3>("ActInv",ei); - // printf("Act : \n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n", - // (float)Act(0,0),(float)Act(0,1),(float)Act(0,2), - // (float)Act(1,0),(float)Act(1,1),(float)Act(1,2), - // (float)Act(2,0),(float)Act(2,1),(float)Act(2,2)); - - // // auto dFActdF = dFAdF(eles.template pack<3,3>("ActInv",ei)); - // // printf("dFActdF : \n%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\") - - // } - - }); - - // setup initial guess - TILEVEC_OPS::copy<3>(cudaPol,verts,verts.hasProperty("init_x") ? "init_x" : "x",vtemp,"xn"); - TILEVEC_OPS::fill<1>(cudaPol,vtemp,"bou_tag",zs::vec::zeros()); - - for(int newtonIter = 0;newtonIter != 1000;++newtonIter){ - match([&](auto &elasticModel) { - A.computeGradientAndHessian(cudaPol, elasticModel,"xn",vtemp,etemp); - })(models.getElasticModel()); - - // auto Hn = TILEVEC_OPS::dot<144>(cudaPol,etemp,"He","He"); - // fmt::print("Hn : {}\n",(float)Hn); - - // break; - - // Prepare Preconditioning - PCG::prepare_block_diagonal_preconditioner<4,3>(cudaPol,"He",etemp,"P",vtemp); - - // if the grad is too small, return the result - // Solve equation using PCG - TILEVEC_OPS::fill<3>(cudaPol,vtemp,"dir",zs::vec::zeros()); - PCG::pcg_with_fixed_sol_solve<3,4>(cudaPol,vtemp,etemp,"dir","bou_tag","grad","P","inds","He",cg_res,1000,50); - PCG::project<3>(cudaPol,vtemp,"dir","bou_tag"); - PCG::project<3>(cudaPol,vtemp,"grad","bou_tag"); - T res = TILEVEC_OPS::inf_norm<3>(cudaPol, vtemp, "dir");// this norm is independent of descriterization - - if (res < newton_res) { - fmt::print("\t# newton optimizer reach desired resolution in {} iters with residual {}\n", - newtonIter, res); - break; - } - T dg = TILEVEC_OPS::dot<3>(cudaPol,vtemp,"grad","dir"); - if(fabs(dg) < btl_res){ - // fmt::print("\t# newton optimizer reach stagnation point in {} iters with residual {}\n",newtonIter, res); - break; - } - if(dg < 0){ - T gradn = std::sqrt(TILEVEC_OPS::dot<3>(cudaPol,vtemp,"grad","grad")); - T dirn = std::sqrt(TILEVEC_OPS::dot<3>(cudaPol,vtemp,"dir","dir")); - fmt::print("invalid dg = {} grad = {} dir = {}\n",dg,gradn,dirn); - throw std::runtime_error("INVALID DESCENT DIRECTION"); - } - T alpha = 1.; - TILEVEC_OPS::copy<3>(cudaPol,vtemp,"xn",vtemp,"xn0"); - T E0; - match([&](auto &elasticModel) { - E0 = A.energy(cudaPol, elasticModel, "xn0",vtemp,etemp); - })(models.getElasticModel()); - - dg = -dg; - - T E{E0}; - // Backtracking Linesearch - int max_line_search = 10; - int line_search = 0; - std::vector armijo_buffer(max_line_search); - do { - TILEVEC_OPS::add<3>(cudaPol,vtemp,"xn0",(T)1.0,"dir",alpha,"xn"); - match([&](auto &elasticModel) { - E = A.energy(cudaPol, elasticModel, "xn",vtemp,etemp); - })(models.getElasticModel()); - armijo_buffer[line_search] = (E - E0)/alpha; - // test Armojo condition - if (((double)E - (double)E0) < (double)armijo * (double)dg * (double)alpha) - break; - alpha /= 2; - ++line_search; - } while (line_search < max_line_search); - if(line_search == max_line_search){ - fmt::print("LINE_SEARCH_EXCEED: %f\n",dg); - // for(size_t i = 0;i != max_line_search;++i) - // fmt::print("AB[{}]\t = {} dg = {}\n",i,armijo_buffer[i],dg); - } - - cudaPol(zs::range(vtemp.size()), [vtemp = proxy({}, vtemp), - alpha] __device__(int i) mutable { - vtemp.tuple<3>("xn", i) = - vtemp.pack<3>("xn0", i) + alpha * vtemp.pack<3>("dir", i); - }); - - } - - cudaPol(zs::range(verts.size()), - [vtemp = proxy({}, vtemp), verts = proxy({}, verts)] __device__(int vi) mutable { - auto newX = vtemp.pack<3>("xn", vi); - verts.tuple<3>("x", vi) = newX; - }); - - cudaPol.syncCtx(); - - // write back muscle activation - auto output_act = get_param("output_act"); - if(output_act) { - auto ActTag = get_param("actTag"); - if(!eles.hasProperty(ActTag)) - eles.append_channels(cudaPol,{{ActTag,1}}); - TILEVEC_OPS::fill(cudaPol,eles,ActTag,0); - if(nm_acts > 0) { - cudaPol(zs::range(eles.size()), - [eles = proxy({},eles),muscle_id_tag = zs::SmallString{muscle_id_tag}, - act_buffer = proxy({},act_buffer),ActTag = zs::SmallString{ActTag}] __device__(int ei) mutable { - auto ID = eles(muscle_id_tag,ei); - int id = (int)ID; - eles(ActTag,ei) = id > -1 ? act_buffer("act",id) : 0; - // eles(ActTag,ei) = id > -1 ? 0.5 : 0; - }); - } - } - - cudaPol.syncCtx(); - - set_output("ZSParticles", zstets); - } -}; - -ZENDEFNODE(FleshQuasiStaticStepping, {{"ZSParticles","driven_bones","gravity","Acts"}, - {"ZSParticles"}, - {{"float","armijo","0.1"},{"float","wolfe","0.9"}, - {"float","cg_res","0.1"},{"float","btl_res","0.0001"},{"float","newton_res","0.001"}, - {"string","driven_tag","bone_bw"},{"float","bone_driven_weight","0.0"}, - {"string","muscle_id_tag","ms_id_tag"},{"int","output_act","0"},{"string","actTag","Act"} - }, - {"FEM"}}); - -} \ No newline at end of file diff --git a/projects/CuLagrange/fem/QuasiStaticStepping.cu b/projects/CuLagrange/fem/QuasiStaticStepping.cu deleted file mode 100644 index 8521806090..0000000000 --- a/projects/CuLagrange/fem/QuasiStaticStepping.cu +++ /dev/null @@ -1,349 +0,0 @@ -#include "Structures.hpp" -#include "zensim/Logger.hpp" -#include "zensim/cuda/execution/ExecutionPolicy.cuh" -#include "zensim/geometry/PoissonDisk.hpp" -#include "zensim/geometry/VdbLevelSet.h" -#include "zensim/geometry/VdbSampler.h" -#include "zensim/io/MeshIO.hpp" -#include "zensim/math/bit/Bits.h" -#include "zensim/types/Property.h" -#include -#include -#include -#include -#include -#include - -#include "../geometry/linear_system/mfcg.hpp" - -namespace zeno { -struct QuasiStaticStepping : INode { - using T = float; - using dtiles_t = zs::TileVector; - using tiles_t = typename ZenoParticles::particles_t; - using vec3 = zs::vec; - using mat3 = zs::vec; - struct FEMSystem { - template - T energy(Pol &pol, const Model &model, const zs::SmallString tag, dtiles_t& vtemp) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - Vector res{verts.get_allocator(), 1}; - res.setVal(0); - // elastic potential - pol(range(eles.size()), [verts = proxy({}, verts), - eles = proxy({}, eles), - vtemp = proxy({}, vtemp), - res = proxy(res), tag, model = model,volf = volf] - ZS_LAMBDA (int ei) mutable { - auto DmInv = eles.template pack<3, 3>("IB", ei); - auto inds = eles.template pack<4>("inds", ei).template reinterpret_bits(); - vec3 xs[4] = {vtemp.pack<3>(tag, inds[0]), vtemp.pack<3>(tag, inds[1]), - vtemp.pack<3>(tag, inds[2]), vtemp.pack<3>(tag, inds[3])}; - mat3 F{}; - { - auto x1x0 = xs[1] - xs[0]; - auto x2x0 = xs[2] - xs[0]; - auto x3x0 = xs[3] - xs[0]; - auto Ds = mat3{x1x0[0], x2x0[0], x3x0[0], x1x0[1], x2x0[1], - x3x0[1], x1x0[2], x2x0[2], x3x0[2]}; - F = Ds * DmInv; - } - auto psi = model.psi(F); - auto vole = eles("vol", ei); - - T gpsi = 0; - for(int i = 0;i != 4;++i) - gpsi += (-volf.dot(xs[i])/4); - - atomic_add(exec_cuda, &res[0], (T)(vole * (psi + gpsi))); - }); -// Bone Driven Potential Energy - T lambda = model.lam; - T mu = model.mu; - auto nmEmbedVerts = b_verts.size(); - if(b_bcws.size() != b_verts.size()){ - fmt::print("B_BCWS_SIZE = {}\t B_VERTS_SIZE = {}\n",b_bcws.size(),b_verts.size()); - throw std::runtime_error("B_BCWS SIZE AND B_VERTS SIZE NOT MATCH"); - } - pol(range(nmEmbedVerts), [vtemp = proxy({},vtemp), - eles = proxy({},eles), - b_verts = proxy({},b_verts), - bcws = proxy({},b_bcws),lambda,mu,tag,res = proxy(res),bone_driven_weight = bone_driven_weight] - ZS_LAMBDA(int vi) mutable { - auto ei = reinterpret_bits(bcws("inds",vi)); - if(ei < 0) - return; - auto inds = eles.pack<4>("inds",ei).reinterpret_bits(); - auto w = bcws.pack<4>("w",vi); - - auto tpos = vec3::zeros(); - for(size_t i = 0;i != 4;++i) - tpos += w[i] * vtemp.pack<3>(tag,inds[i]); - auto pdiff = tpos - b_verts.pack<3>("x",vi); - - T stiffness = 2.0066 * mu + 1.0122 * lambda; - // if(eles("vol",ei) < 0) - // printf("WARNING INVERT TET DETECTED<%d> %f\n",ei,(float)eles("vol",ei)); - T bpsi = (0.5 * bcws("cnorm",vi) * stiffness * bone_driven_weight * eles("vol",ei)) * pdiff.l2NormSqr(); - // bpsi = (0.5 * bcws("cnorm",vi) * lambda * bone_driven_weight) * pdiff.dot(pdiff); -// the cnorm here should be the allocated volume of point in embeded tet - atomic_add(exec_cuda, &res[0], (T)bpsi); - }); - - return res.getVal(); - } - - template - void computeGradientAndHessian(zs::CudaExecutionPolicy& cudaPol, - const Model& model, - const zs::SmallString tag, - dtiles_t& vtemp, - dtiles_t& etemp) { - using namespace zs; - constexpr auto space = execspace_e::cuda; - // fmt::print("check here 0"); - TILEVEC_OPS::fill<3>(cudaPol,vtemp,"grad",zs::vec::zeros()); - TILEVEC_OPS::fill<144>(cudaPol,etemp,"He",zs::vec::zeros()); - cudaPol(zs::range(eles.size()), [vtemp = proxy({}, vtemp), - etemp = proxy({}, etemp), - bcws = proxy({},b_bcws), - b_verts = proxy({},b_verts), - verts = proxy({}, verts), - eles = proxy({}, eles),tag, model, volf = volf] ZS_LAMBDA (int ei) mutable { - auto DmInv = eles.template pack<3, 3>("IB", ei); - auto dFdX = dFdXMatrix(DmInv); - auto inds = eles.template pack<4>("inds", ei).template reinterpret_bits(); - vec3 xs[4] = {vtemp.pack<3>(tag, inds[0]), vtemp.pack<3>(tag, inds[1]), - vtemp.pack<3>(tag, inds[2]), vtemp.pack<3>(tag, inds[3])}; - mat3 F{}; - { - auto x1x0 = xs[1] - xs[0]; - auto x2x0 = xs[2] - xs[0]; - auto x3x0 = xs[3] - xs[0]; - auto Ds = mat3{x1x0[0], x2x0[0], x3x0[0], x1x0[1], x2x0[1], - x3x0[1], x1x0[2], x2x0[2], x3x0[2]}; - F = Ds * DmInv; - } - auto P = model.first_piola(F); - auto vole = eles("vol", ei); - auto vecP = flatten(P); - auto dFdXT = dFdX.transpose(); - auto vf = -vole * (dFdXT * vecP); - - auto mg = volf * vole / 4; - for (int i = 0; i != 4; ++i) { - auto vi = inds[i]; - for (int d = 0; d != 3; ++d) - atomic_add(exec_cuda, &vtemp("grad", d, vi), vf(i * 3 + d) + mg(d)); - } - - auto Hq = model.first_piola_derivative(F, true_c); - auto H = dFdXT * Hq * dFdX * vole; - - etemp.tuple<12 * 12>("He", ei) = H; - - }); - - - // fmt::print("check here 1\n"); - T lambda = model.lam; - T mu = model.mu; - if(b_bcws.size() != b_verts.size()){ - fmt::print("B_BCWS_SIZE = {}\t B_VERTS_SIZE = {}\n",b_bcws.size(),b_verts.size()); - throw std::runtime_error("B_BCWS SIZE AND B_VERTS SIZE NOT MATCH"); - } - - // fmt::print("check here 2\n"); - - auto nmEmbedVerts = b_verts.size(); - cudaPol(zs::range(nmEmbedVerts), - [bcws = proxy({},b_bcws),b_verts = proxy({},b_verts),vtemp = proxy({},vtemp),etemp = proxy({},etemp), - eles = proxy({},eles),lambda,mu,tag,bone_driven_weight = bone_driven_weight] ZS_LAMBDA(int vi) mutable { - auto ei = reinterpret_bits(bcws("inds",vi)); - if(ei < 0) - return; - auto inds = eles.pack<4>("inds",ei).reinterpret_bits(); - auto w = bcws.pack<4>("w",vi); - auto tpos = vec3::zeros(); - for(size_t i = 0;i != 4;++i) - tpos += w[i] * vtemp.pack<3>(tag,inds[i]); - auto pdiff = tpos - b_verts.pack<3>("x",vi); - - T stiffness = 2.0066 * mu + 1.0122 * lambda; - - for(size_t i = 0;i != 4;++i){ - auto tmp = pdiff * (-stiffness * bcws("cnorm",vi) * bone_driven_weight * w[i] * eles("vol",ei)); - // tmp = pdiff * (-lambda * bcws("cnorm",vi) * bone_driven_weight * w[i]); - for(size_t d = 0;d != 3;++d) - atomic_add(exec_cuda,&vtemp("grad",d,inds[i]),(T)tmp[d]); - } - for(int i = 0;i != 4;++i) - for(int j = 0;j != 4;++j){ - T alpha = stiffness * bone_driven_weight * w[i] * w[j] * bcws("cnorm",vi) * eles("vol",ei); - // alpha = lambda * bone_driven_weight * w[i] * w[j] * bcws("cnorm",vi); - // if(ei == 11221) - // if(i == 3 && j == 3) - // printf("alpha : %f\n",alpha); - for(int d = 0;d != 3;++d){ - // etemp("He",(i * 3 + d) * 12 + j * 3 + d,ei) += alpha; - if(isnan(alpha)){ - printf("nan alpha<%d,%d,%d> %f %f %f %f %f\n",vi,i,j,(float)lambda,(float)bone_driven_weight,(float)w[i],(float)w[j],(float)bcws("cnorm",vi)); - } - atomic_add(exec_cuda,&etemp("He",(i * 3 + d) * 12 + j * 3 + d,ei),alpha); - } - } - - }); - - } - - FEMSystem(const tiles_t &verts, const tiles_t &eles, const tiles_t &b_bcws, const tiles_t& b_verts,T bone_driven_weight,vec3 volf) - : verts{verts}, eles{eles}, b_bcws{b_bcws}, b_verts{b_verts}, bone_driven_weight{bone_driven_weight},volf{volf}{} - - const tiles_t &verts; - const tiles_t &eles; - const tiles_t &b_bcws; // the barycentric interpolation of embeded bones - const tiles_t &b_verts; // the position of embeded bones - - T bone_driven_weight; - vec3 volf; - }; - - void apply() override { - using namespace zs; - auto zstets = get_input("ZSParticles"); - auto gravity = get_input("gravity")->get>(); - auto armijo = get_param("armijo"); - auto curvature = get_param("wolfe"); - auto cg_res = get_param("cg_res"); - auto btl_res = get_param("btl_res"); - auto models = zstets->getModel(); - auto& verts = zstets->getParticles(); - auto& eles = zstets->getQuadraturePoints(); - auto zsbones = get_input("driven_bones"); - auto tag = get_param("driven_tag"); - auto bone_driven_weight = get_param("bone_driven_weight"); - auto newton_res = get_param("newton_res"); - - auto volf = vec3::from_array(gravity * models.density); - - static dtiles_t vtemp{verts.get_allocator(), - {{"grad", 3}, - {"P", 9}, - {"bou_tag",1}, - {"dir", 3}, - {"xn", 3}, - {"xn0", 3}, - {"temp", 3}, - {"r", 3}, - {"p", 3}, - {"q", 3}}, - verts.size()}; - static dtiles_t etemp{eles.get_allocator(), {{"He", 12 * 12},{"inds",4}}, eles.size()}; - vtemp.resize(verts.size()); - etemp.resize(eles.size()); - - FEMSystem A{verts,eles,(*zstets)[tag],zsbones->getParticles(),bone_driven_weight,volf}; - - constexpr auto space = execspace_e::cuda; - auto cudaPol = cuda_exec(); - - TILEVEC_OPS::copy<4>(cudaPol,eles,"inds",etemp,"inds"); - - // setup initial guess - TILEVEC_OPS::copy<3>(cudaPol,verts,verts.hasProperty("init_x") ? "init_x" : "x",vtemp,"xn"); - TILEVEC_OPS::fill<1>(cudaPol,vtemp,"bou_tag",zs::vec::zeros()); - - for(int newtonIter = 0;newtonIter != 1000;++newtonIter){ - match([&](auto &elasticModel) { - A.computeGradientAndHessian(cudaPol, elasticModel,"xn",vtemp,etemp); - })(models.getElasticModel()); - - // Prepare Preconditioning - PCG::prepare_block_diagonal_preconditioner<4,3>(cudaPol,"He",etemp,"P",vtemp); - - // if the grad is too small, return the result - // Solve equation using PCG - TILEVEC_OPS::fill<3>(cudaPol,vtemp,"dir",zs::vec::zeros()); - PCG::pcg_with_fixed_sol_solve<3,4>(cudaPol,vtemp,etemp,"dir","bou_tag","grad","P","inds","He",cg_res,1000,50); - PCG::project<3>(cudaPol,vtemp,"dir","bou_tag"); - PCG::project<3>(cudaPol,vtemp,"grad","bou_tag"); - T res = TILEVEC_OPS::inf_norm<3>(cudaPol, vtemp, "dir");// this norm is independent of descriterization - - if (res < newton_res) { - fmt::print("\t# newton optimizer reach desired resolution in {} iters with residual {}\n", - newtonIter, res); - break; - } - T dg = TILEVEC_OPS::dot<3>(cudaPol,vtemp,"grad","dir"); - if(fabs(dg) < btl_res){ - fmt::print("\t# newton optimizer reach stagnation point in {} iters with residual {}\n", - newtonIter, res); - break; - } - if(dg < 0){ - T gradn = std::sqrt(TILEVEC_OPS::dot<3>(cudaPol,vtemp,"grad","grad")); - T dirn = std::sqrt(TILEVEC_OPS::dot<3>(cudaPol,vtemp,"dir","dir")); - fmt::print("invalid dg = {} grad = {} dir = {}\n",dg,gradn,dirn); - throw std::runtime_error("INVALID DESCENT DIRECTION"); - } - T alpha = 1.; - TILEVEC_OPS::copy<3>(cudaPol,vtemp,"xn",vtemp,"xn0"); - T E0; - match([&](auto &elasticModel) { - E0 = A.energy(cudaPol, elasticModel, "xn0",vtemp); - })(models.getElasticModel()); - - dg = -dg; - - T E{E0}; - // Backtracking Linesearch - int max_line_search = 10; - int line_search = 0; - std::vector armijo_buffer(max_line_search); - do { - TILEVEC_OPS::add<3>(cudaPol,vtemp,"xn0",(T)1.0,"dir",alpha,"xn"); - match([&](auto &elasticModel) { - E = A.energy(cudaPol, elasticModel, "xn",vtemp); - })(models.getElasticModel()); - armijo_buffer[line_search] = (E - E0)/alpha; - // test Armojo condition - if (((double)E - (double)E0) < (double)armijo * (double)dg * (double)alpha) - break; - alpha /= 2; - ++line_search; - } while (line_search < max_line_search); - if(line_search == max_line_search){ - fmt::print("LINE_SEARCH_EXCEED: %f\n",dg); - for(size_t i = 0;i != max_line_search;++i) - fmt::print("AB[{}]\t = {} dg = {}\n",i,armijo_buffer[i],dg); - } - - cudaPol(zs::range(vtemp.size()), [vtemp = proxy({}, vtemp), - alpha] __device__(int i) mutable { - vtemp.tuple<3>("xn", i) = - vtemp.pack<3>("xn0", i) + alpha * vtemp.pack<3>("dir", i); - }); - - } - - cudaPol(zs::range(verts.size()), - [vtemp = proxy({}, vtemp), verts = proxy({}, verts)] __device__(int vi) mutable { - auto newX = vtemp.pack<3>("xn", vi); - verts.tuple<3>("x", vi) = newX; - }); - - - set_output("ZSParticles", zstets); - } -}; - -ZENDEFNODE(QuasiStaticStepping, {{"ZSParticles","driven_bones","gravity"}, - {"ZSParticles"}, - {{"float","armijo","0.1"},{"float","wolfe","0.9"}, - {"float","cg_res","0.1"},{"float","btl_res","0.0001"},{"float","newton_res","0.001"}, - {"string","driven_tag","bone_bw"},{"float","bone_driven_weight","0.0"}}, - {"FEM"}}); - -} \ No newline at end of file diff --git a/projects/CuLagrange/fem/collision_energy/collision_utils.hpp b/projects/CuLagrange/fem/collision_energy/collision_utils.hpp index 9756a527b5..df631817d7 100644 --- a/projects/CuLagrange/fem/collision_energy/collision_utils.hpp +++ b/projects/CuLagrange/fem/collision_energy/collision_utils.hpp @@ -5,6 +5,8 @@ #include "zensim/math/VecInterface.hpp" +#include "../../geometry/kernel/geo_math.hpp" + namespace zeno { namespace COLLISION_UTILS { @@ -449,134 +451,6 @@ namespace COLLISION_UTILS { return contracted + product + product.transpose(); } - /////////////////////////////////////////////////////////////////////// - // get the linear interpolation coordinates from v0 to the line segment - // between v1 and v2 - /////////////////////////////////////////////////////////////////////// - constexpr VECTOR2 getLerp(const VECTOR3 v0, const VECTOR3& v1, const VECTOR3& v2) - { - const VECTOR3 e0 = v0 - v1; - const VECTOR3 e1 = v2 - v1; - const VECTOR3 e1hat = e1 / e1.norm(); - const REAL projection = e0.dot(e1hat); - - if (projection < 0.0) - return VECTOR2(1.0, 0.0); - - if (projection >= e1.norm()) - return VECTOR2(0.0, 1.0); - - const REAL ratio = projection / e1.norm(); - return VECTOR2(1.0 - ratio, ratio); - } - - - /////////////////////////////////////////////////////////////////////// - // find the distance from a line segment (v1, v2) to a point (v0) - /////////////////////////////////////////////////////////////////////// - constexpr REAL pointLineDistance(const VECTOR3 v0, const VECTOR3& v1, const VECTOR3& v2) - { - const VECTOR3 e0 = v0 - v1; - const VECTOR3 e1 = v2 - v1; - const VECTOR3 e1hat = e1 / e1.norm(); - const REAL projection = e0.dot(e1hat); - - // if it projects onto the line segment, use that length - if (projection > 0.0 && projection < e1.norm()) - { - const VECTOR3 normal = e0 - projection * e1hat; - return normal.norm(); - } - - // if it doesn't, find the point-point distances - const REAL diff01 = (v0 - v1).norm(); - const REAL diff02 = (v0 - v2).norm(); - - return (diff01 < diff02) ? diff01 : diff02; - } - - - /////////////////////////////////////////////////////////////////////// - // get the barycentric coordinate of the projection of v[0] onto the triangle - // formed by v[1], v[2], v[3] - /////////////////////////////////////////////////////////////////////// - constexpr VECTOR3 getBarycentricCoordinates(const VECTOR3 vertices[4]) - { - const VECTOR3 v0 = vertices[1]; - const VECTOR3 v1 = vertices[2]; - const VECTOR3 v2 = vertices[3]; - - const VECTOR3 e1 = v1 - v0; - const VECTOR3 e2 = v2 - v0; - const VECTOR3 n = e1.cross(e2); - const VECTOR3 nHat = n / n.norm(); - const VECTOR3 v = vertices[0] - (nHat.dot(vertices[0] - v0)) * nHat; - - // get the barycentric coordinates - const VECTOR3 na = (v2 - v1).cross(v - v1); - const VECTOR3 nb = (v0 - v2).cross(v - v2); - const VECTOR3 nc = (v1 - v0).cross(v - v0); - const VECTOR3 barycentric(n.dot(na) / n.l2NormSqr(), - n.dot(nb) / n.l2NormSqr(), - n.dot(nc) / n.l2NormSqr()); - - return barycentric; - } - - - /////////////////////////////////////////////////////////////////////// - // get the barycentric coordinate of the projection of v[0] onto the triangle - // formed by v[1], v[2], v[3] - // - // but, if the projection is actually outside, project to all of the - // edges and find the closest point that's still inside the triangle - /////////////////////////////////////////////////////////////////////// - constexpr VECTOR3 getInsideBarycentricCoordinates(const VECTOR3 vertices[4]) - { - VECTOR3 barycentric = getBarycentricCoordinates(vertices); - - // if it's already inside, we're all done - if (barycentric[0] >= 0.0 && - barycentric[1] >= 0.0 && - barycentric[2] >= 0.0) - return barycentric; - - // find distance to all the line segments - // - // there's lots of redundant computation between here and getLerp, - // but let's get it working and see if it fixes the actual - // artifact before optimizing - REAL distance12 = pointLineDistance(vertices[0], vertices[1], vertices[2]); - REAL distance23 = pointLineDistance(vertices[0], vertices[2], vertices[3]); - REAL distance31 = pointLineDistance(vertices[0], vertices[3], vertices[1]); - - // less than or equal is important here, otherwise fallthrough breaks - if (distance12 <= distance23 && distance12 <= distance31) - { - VECTOR2 lerp = getLerp(vertices[0], vertices[1], vertices[2]); - barycentric[0] = lerp[0]; - barycentric[1] = lerp[1]; - barycentric[2] = 0.0; - return barycentric; - } - - // less than or equal is important here, otherwise fallthrough breaks - if (distance23 <= distance12 && distance23 <= distance31) - { - VECTOR2 lerp = getLerp(vertices[0], vertices[2], vertices[3]); - barycentric[0] = 0.0; - barycentric[1] = lerp[0]; - barycentric[2] = lerp[1]; - return barycentric; - } - - // else it must be the 31 case - VECTOR2 lerp = getLerp(vertices[0], vertices[3], vertices[1]); - barycentric[0] = lerp[1]; - barycentric[1] = 0.0; - barycentric[2] = lerp[0]; - return barycentric; - } /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// @@ -660,182 +534,452 @@ namespace COLLISION_UTILS { } -/////////////////////////////////////////////////////////////////////// -// compute distance between a point and triangle -/////////////////////////////////////////////////////////////////////// - constexpr REAL pointTriangleDistance(const VECTOR3& v0, const VECTOR3& v1, - const VECTOR3& v2, const VECTOR3& v) + + #define FMAX(a,b) ((a) > (b) ? (a) : (b)) + #define FMIN(a,b) ((a) > (b) ? (b) : (a)) + #define FABS(a) ((a) < 0.0f ? -(a) : (a)) + #define OUT_OF_RANGE(a) ((a) < 0.0f || (a) > 1.f) + + + /************************************************************************** + | + | Method: FindNearestPointOnLineSegment + | + | Purpose: Given a line (segment) and a point in 3-dimensional space, + | find the point on the line (segment) that is closest to the + | point. + | + | Parameters: Input: + | ------ + | A1x, A1y, A1z - Coordinates of first defining point of the line/segment + | Lx, Ly, Lz - Vector from (A1x, A1y, A1z) to the second defining point + | of the line/segment. + | Bx, By, Bz - Coordinates of the point + | infinite_lines - set to true if lines are to be treated as infinite + | epsilon_squared - tolerance value to be used to check for degenerate + | and parallel lines, and to check for true intersection. + | + | Output: + | ------- + | NearestPointX, - Point on line/segment that is closest to (Bx, By, Bz) + | NearestPointY, + | NearestPointZ + | parameter - Parametric coordinate of the nearest point along the + | line/segment. parameter = 0 at (A1x, A1y, A1z) and + | parameter = 1 at the second defining point of the line/ + | segmetn + **************************************************************************/ + constexpr void FindNearestPointOnLineSegment(const REAL A1x, const REAL A1y, const REAL A1z, + const REAL Lx, const REAL Ly, const REAL Lz, + const REAL Bx, const REAL By, const REAL Bz, + bool infinite_line, REAL epsilon_squared, REAL &NearestPointX, + REAL &NearestPointY, REAL &NearestPointZ, + REAL ¶meter) { - // get the barycentric coordinates - const VECTOR3 e1 = v1 - v0; - const VECTOR3 e2 = v2 - v0; - const VECTOR3 n = e1.cross(e2); - const VECTOR3 na = (v2 - v1).cross(v - v1); - const VECTOR3 nb = (v0 - v2).cross(v - v2); - const VECTOR3 nc = (v1 - v0).cross(v - v0); - const VECTOR3 barycentric(n.dot(na) / n.l2NormSqr(), - n.dot(nb) / n.l2NormSqr(), - n.dot(nc) / n.l2NormSqr()); - - const REAL barySum = zs::abs(barycentric[0]) + zs::abs(barycentric[1]) + zs::abs(barycentric[2]); - - // if the point projects to inside the triangle, it should sum to 1 - if (zs::abs(barySum - 1.0) < 1e-6) + // Line/Segment is degenerate --- special case #1 + REAL D = Lx * Lx + Ly * Ly + Lz * Lz; + if (D < epsilon_squared) { - const VECTOR3 nHat = n / n.norm(); - const REAL normalDistance = (nHat.dot(v - v0)); - return zs::abs(normalDistance); + NearestPointX = A1x; + NearestPointY = A1y; + NearestPointZ = A1z; + return; } - // project onto each edge, find the distance to each edge - const VECTOR3 e3 = v2 - v1; - const VECTOR3 ev = v - v0; - const VECTOR3 ev3 = v - v1; - const VECTOR3 e1Hat = e1 / e1.norm(); - const VECTOR3 e2Hat = e2 / e2.norm(); - const VECTOR3 e3Hat = e3 / e3.norm(); - VECTOR3 edgeDistances(1e8, 1e8, 1e8); - - // see if it projects onto the interval of the edge - // if it doesn't, then the vertex distance will be smaller, - // so we can skip computing anything - const REAL e1dot = e1Hat.dot(ev); - if (e1dot > 0.0 && e1dot < e1.norm()) + REAL ABx = Bx - A1x; + REAL ABy = By - A1y; + REAL ABz = Bz - A1z; + + // parameter is computed from Equation (20). + parameter = (Lx * ABx + Ly * ABy + Lz * ABz) / D; + + if (false == infinite_line) parameter = (REAL)FMAX(0.0, FMIN(1.0, parameter)); + + NearestPointX = A1x + parameter * Lx; + NearestPointY = A1y + parameter * Ly; + NearestPointZ = A1z + parameter * Lz; + return; + } + + + /************************************************************************** + | + | Method: AdjustNearestPoints + | + | Purpose: Given nearest point information for two infinite lines, adjust + | to model finite line segments. + | + | Parameters: Input: + | ------ + | A1x, A1y, A1z - Coordinates of first defining point of line/segment A + | Lax, Lay, Laz - Vector from (A1x, A1y, A1z) to the (A2x, A2y, A2z). + | B1x, B1y, B1z - Coordinates of first defining point of line/segment B + | Lbx, Lby, Lbz - Vector from (B1x, B1y, B1z) to the (B2x, B2y, B2z). + | epsilon_squared - tolerance value to be used to check for degenerate + | and parallel lines, and to check for true intersection. + | s - parameter representing nearest point on infinite line A + | t - parameter representing nearest point on infinite line B + | + | Output: + | ------- + | PointOnSegAx, - Coordinates of the point on segment A that are nearest + | PointOnSegAy, to segment B. This corresponds to point C in the text. + | PointOnSegAz + | PointOnSegBx, - Coordinates of the point on segment B that are nearest + | PointOnSegBy, to segment A. This corresponds to point D in the text. + | PointOnSegBz + **************************************************************************/ + constexpr void AdjustNearestPoints(REAL A1x, REAL A1y, REAL A1z, + REAL Lax, REAL Lay, REAL Laz, + REAL B1x, REAL B1y, REAL B1z, + REAL Lbx, REAL Lby, REAL Lbz, + REAL epsilon_squared, REAL s, REAL t, + REAL &PointOnSegAx, REAL &PointOnSegAy, REAL &PointOnSegAz, + REAL &PointOnSegBx, REAL &PointOnSegBy, REAL &PointOnSegBz) + { + // handle the case where both parameter s and t are out of range + if (OUT_OF_RANGE(s) && OUT_OF_RANGE(t)) + { + s = FMAX((REAL)0.0, FMIN((REAL)1.0, s)); + PointOnSegAx = (A1x + s * Lax); + PointOnSegAy = (A1y + s * Lay); + PointOnSegAz = (A1z + s * Laz); + FindNearestPointOnLineSegment(B1x, B1y, B1z, Lbx, Lby, Lbz, PointOnSegAx, + PointOnSegAy, PointOnSegAz, true, epsilon_squared, + PointOnSegBx, PointOnSegBy, PointOnSegBz, t); + if (OUT_OF_RANGE(t)) + { + t = FMAX((REAL)0.0, FMIN((REAL)1.0, t)); + PointOnSegBx = (B1x + t * Lbx); + PointOnSegBy = (B1y + t * Lby); + PointOnSegBz = (B1z + t * Lbz); + FindNearestPointOnLineSegment(A1x, A1y, A1z, Lax, Lay, Laz, PointOnSegBx, + PointOnSegBy, PointOnSegBz, false, epsilon_squared, + PointOnSegAx, PointOnSegAy, PointOnSegAz, s); + FindNearestPointOnLineSegment(B1x, B1y, B1z, Lbx, Lby, Lbz, PointOnSegAx, + PointOnSegAy, PointOnSegAz, false, epsilon_squared, + PointOnSegBx, PointOnSegBy, PointOnSegBz, t); + } + } + // otherwise, handle the case where the parameter for only one segment is + // out of range + else if (OUT_OF_RANGE(s)) { - const VECTOR3 projected = v0 + e1Hat * e1dot; - edgeDistances[0] = (v - projected).norm(); + s = FMAX((REAL)0.0, FMIN((REAL)1.0, s)); + PointOnSegAx = (A1x + s * Lax); + PointOnSegAy = (A1y + s * Lay); + PointOnSegAz = (A1z + s * Laz); + FindNearestPointOnLineSegment(B1x, B1y, B1z, Lbx, Lby, Lbz, PointOnSegAx, + PointOnSegAy, PointOnSegAz, false, epsilon_squared, + PointOnSegBx, PointOnSegBy, PointOnSegBz, t); } - const REAL e2dot = e2Hat.dot(ev); - if (e2dot > 0.0 && e2dot < e2.norm()) + else if (OUT_OF_RANGE(t)) { - const VECTOR3 projected = v0 + e2Hat * e2dot; - edgeDistances[1] = (v - projected).norm(); + t = FMAX((REAL)0.0, FMIN((REAL)1.0, t)); + PointOnSegBx = (B1x + t * Lbx); + PointOnSegBy = (B1y + t * Lby); + PointOnSegBz = (B1z + t * Lbz); + FindNearestPointOnLineSegment(A1x, A1y, A1z, Lax, Lay, Laz, PointOnSegBx, + PointOnSegBy, PointOnSegBz, false, epsilon_squared, + PointOnSegAx, PointOnSegAy, PointOnSegAz, s); } - const REAL e3dot = e3Hat.dot(ev3); - if (e3dot > 0.0 && e3dot < e3.norm()) + } + + + /************************************************************************** + | + | Method: FindNearestPointOfParallelLineSegments + | + | Purpose: Given two lines (segments) that are known to be parallel, find + | a representative point on each that is nearest to the other. If + | the lines are considered to be finite then it is possible that there + | is one true point on each line that is nearest to the other. This + | code properly handles this case. + | + | This is the most difficult line intersection case to handle, since + | there is potentially a family, or locus of points on each line/segment + | that are nearest to the other. + | Parameters: Input: + | ------ + | A1x, A1y, A1z - Coordinates of first defining point of line/segment A + | A2x, A2y, A2z - Coordinates of second defining point of line/segment A + | Lax, Lay, Laz - Vector from (A1x, A1y, A1z) to the (A2x, A2y, A2z). + | B1x, B1y, B1z - Coordinates of first defining point of line/segment B + | B2x, B2y, B2z - Coordinates of second defining point of line/segment B + | Lbx, Lby, Lbz - Vector from (B1x, B1y, B1z) to the (B2x, B2y, B2z). + | infinite_lines - set to true if lines are to be treated as infinite + | epsilon_squared - tolerance value to be used to check for degenerate + | and parallel lines, and to check for true intersection. + | + | Output: + | ------- + | PointOnSegAx, - Coordinates of the point on segment A that are nearest + | PointOnSegAy, to segment B. This corresponds to point C in the text. + | PointOnSegAz + | PointOnSegBx, - Coordinates of the point on segment B that are nearest + | PointOnSegBy, to segment A. This corresponds to point D in the text. + | PointOnSegBz + + **************************************************************************/ + constexpr void FindNearestPointOfParallelLineSegments(REAL A1x, REAL A1y, REAL A1z, + REAL A2x, REAL A2y, REAL A2z, + REAL Lax, REAL Lay, REAL Laz, + REAL B1x, REAL B1y, REAL B1z, + REAL B2x, REAL B2y, REAL B2z, + REAL Lbx, REAL Lby, REAL Lbz, + bool infinite_lines, REAL epsilon_squared, + REAL &PointOnSegAx, REAL &PointOnSegAy, REAL &PointOnSegAz, + REAL &PointOnSegBx, REAL &PointOnSegBy, REAL &PointOnSegBz) + { + REAL s[2] = {0, 0}; + REAL temp{}; + FindNearestPointOnLineSegment(A1x, A1y, A1z, Lax, Lay, Laz, B1x, B1y, B1z, + true, epsilon_squared, PointOnSegAx, PointOnSegAy, PointOnSegAz, s[0]); + if (true == infinite_lines) { - const VECTOR3 projected = v1 + e3Hat * e3dot; - edgeDistances[2] = (v - projected).norm(); + PointOnSegBx = B1x; + PointOnSegBy = B1y; + PointOnSegBz = B1z; } - - // get the distance to each vertex - const VECTOR3 vertexDistances((v - v0).norm(), - (v - v1).norm(), - (v - v2).norm()); - - // get the smallest of both the edge and vertex distances - REAL vertexMin = 1e8; - REAL edgeMin = 1e8; - for(int i = 0;i < 3;++i){ - vertexMin = vertexMin > vertexDistances[i] ? vertexDistances[i] : vertexMin; - edgeMin = edgeMin > edgeDistances[i] ? edgeDistances[i] : edgeMin; + else + { + REAL tp[3] = {}; + FindNearestPointOnLineSegment(A1x, A1y, A1z, Lax, Lay, Laz, B2x, B2y, B2z, + true, epsilon_squared, tp[0], tp[1], tp[2], s[1]); + if (s[0] < 0.0 && s[1] < 0.0) + { + PointOnSegAx = A1x; + PointOnSegAy = A1y; + PointOnSegAz = A1z; + if (s[0] < s[1]) + { + PointOnSegBx = B2x; + PointOnSegBy = B2y; + PointOnSegBz = B2z; + } + else + { + PointOnSegBx = B1x; + PointOnSegBy = B1y; + PointOnSegBz = B1z; + } + } + else if (s[0] > (REAL)1.0 && s[1] > (REAL)1.0) + { + PointOnSegAx = A2x; + PointOnSegAy = A2y; + PointOnSegAz = A2z; + if (s[0] < s[1]) + { + PointOnSegBx = B1x; + PointOnSegBy = B1y; + PointOnSegBz = B1z; + } + else + { + PointOnSegBx = B2x; + PointOnSegBy = B2y; + PointOnSegBz = B2z; + } + } + else + { + temp = (REAL)0.5*(FMAX((REAL)0.0, FMIN((REAL)1.0, s[0])) + FMAX((REAL)0.0, FMIN((REAL)1.0, s[1]))); + PointOnSegAx = (A1x + temp * Lax); + PointOnSegAy = (A1y + temp * Lay); + PointOnSegAz = (A1z + temp * Laz); + FindNearestPointOnLineSegment(B1x, B1y, B1z, Lbx, Lby, Lbz, + PointOnSegAx, PointOnSegAy, PointOnSegAz, true, + epsilon_squared, PointOnSegBx, PointOnSegBy, PointOnSegBz, temp); + } } - // return the smallest of those - return (vertexMin < edgeMin) ? vertexMin : edgeMin; } -constexpr REAL pointTriangleDistance(const VECTOR3& v0, const VECTOR3& v1, - const VECTOR3& v2, const VECTOR3& v,REAL& barySum) - { - // get the barycentric coordinates - const VECTOR3 e1 = v1 - v0; - const VECTOR3 e2 = v2 - v0; - const VECTOR3 n = e1.cross(e2); - const VECTOR3 na = (v2 - v1).cross(v - v1); - const VECTOR3 nb = (v0 - v2).cross(v - v2); - const VECTOR3 nc = (v1 - v0).cross(v - v0); - const VECTOR3 barycentric(n.dot(na) / n.l2NormSqr(), - n.dot(nb) / n.l2NormSqr(), - n.dot(nc) / n.l2NormSqr()); - - barySum = zs::abs(barycentric[0]) + zs::abs(barycentric[1]) + zs::abs(barycentric[2]); - - // if the point projects to inside the triangle, it should sum to 1 - if (zs::abs(barySum - 1.0) < 1e-6) - { - const VECTOR3 nHat = n / n.norm(); - const REAL normalDistance = (nHat.dot(v - v0)); - return zs::abs(normalDistance); - } - // project onto each edge, find the distance to each edge - const VECTOR3 e3 = v2 - v1; - const VECTOR3 ev = v - v0; - const VECTOR3 ev3 = v - v1; - const VECTOR3 e1Hat = e1 / e1.norm(); - const VECTOR3 e2Hat = e2 / e2.norm(); - const VECTOR3 e3Hat = e3 / e3.norm(); - VECTOR3 edgeDistances(1e8, 1e8, 1e8); - - // see if it projects onto the interval of the edge - // if it doesn't, then the vertex distance will be smaller, - // so we can skip computing anything - const REAL e1dot = e1Hat.dot(ev); - if (e1dot > 0.0 && e1dot < e1.norm()) + /************************************************************************** + | + | Method: IntersectLineSegments + | + | Purpose: Find the nearest point between two finite length line segments + | or two infinite lines in 3-dimensional space. The function calculates + | the point on each line/line segment that is closest to the other + | line/line segment, the midpoint between the nearest points, and + | the vector between these two points. If the two nearest points + | are close within a tolerance, a flag is set indicating the lines + | have a "true" intersection. + | + | Parameters: Input: + | ------ + | A1x, A1y, A1z - Coordinates of first defining point of line/segment A + | A2x, A2y, A2z - Coordinates of second defining point of line/segment A + | B1x, B1y, B1z - Coordinates of first defining point of line/segment B + | B2x, B2y, B2z - Coordinates of second defining point of line/segment B + | infinite_lines - set to true if lines are to be treated as infinite + | epsilon - tolerance value to be used to check for degenerate + | and parallel lines, and to check for true intersection. + | + | Output: + | ------- + | PointOnSegAx, - Coordinates of the point on segment A that are nearest + | PointOnSegAy, to segment B. This corresponds to point C in the text. + | PointOnSegAz + | PointOnSegBx, - Coordinates of the point on segment B that are nearest + | PointOnSegBy, to segment A. This corresponds to point D in the text. + | PointOnSegBz + | NearestPointX, - Midpoint between the two nearest points. This can be + | NearestPointY, treated as *the* intersection point if nearest points + | NearestPointZ are sufficiently close. This corresponds to point P + | in the text. + | NearestVectorX, - Vector between the nearest point on A to the nearest + | point on segment B. This vector is normal to both + | lines if the lines are infinite, but is not guaranteed + | to be normal to both lines if both lines are finite + | length. + | true_intersection - true if the nearest points are close within a small + | tolerance. + **************************************************************************/ + constexpr void IntersectLineSegments(const REAL A1x, const REAL A1y, const REAL A1z, + const REAL A2x, const REAL A2y, const REAL A2z, + const REAL B1x, const REAL B1y, const REAL B1z, + const REAL B2x, const REAL B2y, const REAL B2z, + bool infinite_lines, REAL epsilon, REAL &PointOnSegAx, + REAL &PointOnSegAy, REAL &PointOnSegAz, REAL &PointOnSegBx, + REAL &PointOnSegBy, REAL &PointOnSegBz, REAL &NearestPointX, + REAL &NearestPointY, REAL &NearestPointZ, REAL &NearestVectorX, + REAL &NearestVectorY, REAL &NearestVectorZ, bool &true_intersection) + { + REAL temp = (REAL)0.0; + REAL epsilon_squared = epsilon * epsilon; + + // Compute parameters from Equations (1) and (2) in the text + REAL Lax = A2x - A1x; + REAL Lay = A2y - A1y; + REAL Laz = A2z - A1z; + REAL Lbx = B2x - B1x; + REAL Lby = B2y - B1y; + REAL Lbz = B2z - B1z; + // From Equation (15) + REAL L11 = (Lax * Lax) + (Lay * Lay) + (Laz * Laz); + REAL L22 = (Lbx * Lbx) + (Lby * Lby) + (Lbz * Lbz); + + // Line/Segment A is degenerate ---- Special Case #1 + if (L11 < epsilon_squared) { - const VECTOR3 projected = v0 + e1Hat * e1dot; - edgeDistances[0] = (v - projected).norm(); + PointOnSegAx = A1x; + PointOnSegAy = A1y; + PointOnSegAz = A1z; + FindNearestPointOnLineSegment(B1x, B1y, B1z, Lbx, Lby, Lbz, A1x, A1y, A1z, + infinite_lines, epsilon, PointOnSegBx, PointOnSegBy, + PointOnSegBz, temp); } - const REAL e2dot = e2Hat.dot(ev); - if (e2dot > 0.0 && e2dot < e2.norm()) + // Line/Segment B is degenerate ---- Special Case #1 + else if (L22 < epsilon_squared) { - const VECTOR3 projected = v0 + e2Hat * e2dot; - edgeDistances[1] = (v - projected).norm(); + PointOnSegBx = B1x; + PointOnSegBy = B1y; + PointOnSegBz = B1z; + FindNearestPointOnLineSegment(A1x, A1y, A1z, Lax, Lay, Laz, B1x, B1y, B1z, + infinite_lines, epsilon, PointOnSegAx, PointOnSegAy, + PointOnSegAz, temp); } - const REAL e3dot = e3Hat.dot(ev3); - if (e3dot > 0.0 && e3dot < e3.norm()) + // Neither line/segment is degenerate + else { - const VECTOR3 projected = v1 + e3Hat * e3dot; - edgeDistances[2] = (v - projected).norm(); - } + // Compute more parameters from Equation (3) in the text. + REAL ABx = B1x - A1x; + REAL ABy = B1y - A1y; + REAL ABz = B1z - A1z; + + // and from Equation (15). + REAL L12 = -(Lax * Lbx) - (Lay * Lby) - (Laz * Lbz); - // get the distance to each vertex - const VECTOR3 vertexDistances((v - v0).norm(), - (v - v1).norm(), - (v - v2).norm()); - - // get the smallest of both the edge and vertex distances - REAL vertexMin = 1e8; - REAL edgeMin = 1e8; - for(int i = 0;i < 3;++i){ - vertexMin = vertexMin > vertexDistances[i] ? vertexDistances[i] : vertexMin; - edgeMin = edgeMin > edgeDistances[i] ? edgeDistances[i] : edgeMin; + REAL DetL = L11 * L22 - L12 * L12; + // Lines/Segments A and B are parallel ---- special case #2. + if (FABS(DetL) < epsilon) + { + FindNearestPointOfParallelLineSegments(A1x, A1y, A1z, A2x, A2y, A2z, + Lax, Lay, Laz, + B1x, B1y, B1z, B2x, B2y, B2z, + Lbx, Lby, Lbz, + infinite_lines, epsilon, + PointOnSegAx, PointOnSegAy, PointOnSegAz, + PointOnSegBx, PointOnSegBy, PointOnSegBz); + } + // The general case + else + { + // from Equation (15) + REAL ra = Lax * ABx + Lay * ABy + Laz * ABz; + REAL rb = -Lbx * ABx - Lby * ABy - Lbz * ABz; + + REAL t = (L11 * rb - ra * L12)/DetL; // Equation (12) + + #ifdef USE_CRAMERS_RULE + REAL s = (L22 * ra - rb * L12)/DetL; + #else + REAL s = (ra-L12*t)/L11; // Equation (13) + #endif // USE_CRAMERS_RULE + + #ifdef CHECK_ANSWERS + REAL check_ra = s*L11 + t*L12; + REAL check_rb = s*L12 + t*L22; + // assert(FABS(check_ra-ra) < epsilon); + // assert(FABS(check_rb-rb) < epsilon); + #endif // CHECK_ANSWERS + + // if we are dealing with infinite lines or if parameters s and t both + // lie in the range [0,1] then just compute the points using Equations + // (1) and (2) from the text. + PointOnSegAx = (A1x + s * Lax); + PointOnSegAy = (A1y + s * Lay); + PointOnSegAz = (A1z + s * Laz); + PointOnSegBx = (B1x + t * Lbx); + PointOnSegBy = (B1y + t * Lby); + PointOnSegBz = (B1z + t * Lbz); + // otherwise, at least one of s and t is outside of [0,1] and we have to + // handle this case. + if (false == infinite_lines && (OUT_OF_RANGE(s) || OUT_OF_RANGE(t))) + { + AdjustNearestPoints(A1x, A1y, A1z, Lax, Lay, Laz, + B1x, B1y, B1z, Lbx, Lby, Lbz, + epsilon, s, t, + PointOnSegAx, PointOnSegAy, PointOnSegAz, + PointOnSegBx, PointOnSegBy, PointOnSegBz); + } + } } - // return the smallest of those - return (vertexMin < edgeMin) ? vertexMin : edgeMin; - } + NearestPointX = (REAL)0.5 * (PointOnSegAx + PointOnSegBx); + NearestPointY = (REAL)0.5 * (PointOnSegAy + PointOnSegBy); + NearestPointZ = (REAL)0.5 * (PointOnSegAz + PointOnSegBz); - /////////////////////////////////////////////////////////////////////// - // see if the projection of v onto the plane of v0,v1,v2 is inside - // the triangle formed by v0,v1,v2 - /////////////////////////////////////////////////////////////////////// - constexpr bool pointProjectsInsideTriangle(const VECTOR3& v0, const VECTOR3& v1, - const VECTOR3& v2, const VECTOR3& v){ - // get the barycentric coordinates - const VECTOR3 e1 = v1 - v0; - const VECTOR3 e2 = v2 - v0; - const VECTOR3 n = e1.cross(e2); - const VECTOR3 na = (v2 - v1).cross(v - v1); - const VECTOR3 nb = (v0 - v2).cross(v - v2); - const VECTOR3 nc = (v1 - v0).cross(v - v0); - const VECTOR3 barycentric(n.dot(na) / n.l2NormSqr(), - n.dot(nb) / n.l2NormSqr(), - n.dot(nc) / n.l2NormSqr()); - - const REAL barySum = zs::abs(barycentric[0]) + zs::abs(barycentric[1]) + zs::abs(barycentric[2]); - - // if the point projects to inside the triangle, it should sum to 1 - if (zs::abs(barySum - 1.0) < 1e-6) - return true; + NearestVectorX = PointOnSegBx - PointOnSegAx; + NearestVectorY = PointOnSegBy - PointOnSegAy; + NearestVectorZ = PointOnSegBz - PointOnSegAz; - return false; + // optional check to indicate if the lines truly intersect + true_intersection = (FABS(NearestVectorX) + + FABS(NearestVectorY) + + FABS(NearestVectorZ)) < epsilon ? true : false; } - - + constexpr void IntersectLineSegments(const VECTOR3& a0, const VECTOR3& a1, + const VECTOR3& b0, const VECTOR3& b1, + VECTOR3& aPoint, VECTOR3& bPoint) + { + VECTOR3 midpoint{}; + VECTOR3 normal{}; + bool intersect{}; + IntersectLineSegments(a0[0], a0[1], a0[2], a1[0], a1[1], a1[2], + b0[0], b0[1], b0[2], b1[0], b1[1], b1[2], + false, 1e-6, + aPoint[0], aPoint[1], aPoint[2], + bPoint[0], bPoint[1], bPoint[2], + midpoint[0], midpoint[1], midpoint[2], + normal[0], normal[1], normal[2], intersect); + } }; }; \ No newline at end of file diff --git a/projects/CuLagrange/fem/collision_energy/edge_edge_collision.hpp b/projects/CuLagrange/fem/collision_energy/edge_edge_collision.hpp index 7a0546606b..b661822939 100644 --- a/projects/CuLagrange/fem/collision_energy/edge_edge_collision.hpp +++ b/projects/CuLagrange/fem/collision_energy/edge_edge_collision.hpp @@ -231,8 +231,10 @@ namespace EDGE_EDGE_COLLISION { // ndotHessian = ndot_hessian(x); const MATRIX12 springLengthH = springLengthHessian(e,n,diff,a,b); - return (REAL)2.0 * _mu * (dyadic_prod(springLengthGrad,springLengthGrad) + - springLength * springLengthH); + //return 2.0 * _mu * (springLengthGrad * springLengthGrad.transpose() + + // springLength * springLengthH); + + return (REAL)2.0 * _mu * dyadic_prod(springLengthGrad,springLengthGrad); } /////////////////////////////////////////////////////////////////////// @@ -288,8 +290,11 @@ namespace EDGE_EDGE_COLLISION { //return 2.0 * _mu * (springLengthGrad * springLengthGrad.transpose() + // springLength * springLengthH); - return (REAL)-2.0 * _mu * (springLength * springLengthH - - zs::dyadic_prod(springLengthGrad,springLengthGrad)); + // return (REAL)-2.0 * _mu * (springLength * springLengthH - + // zs::dyadic_prod(springLengthGrad,springLengthGrad)); + + return (REAL)2.0 * _mu * zs::dyadic_prod(springLengthGrad,springLengthGrad); + } }; diff --git a/projects/CuLagrange/fem/collision_energy/edge_edge_sqrt_collision.hpp b/projects/CuLagrange/fem/collision_energy/edge_edge_sqrt_collision.hpp deleted file mode 100644 index 359742184a..0000000000 --- a/projects/CuLagrange/fem/collision_energy/edge_edge_sqrt_collision.hpp +++ /dev/null @@ -1,227 +0,0 @@ -#pragma once - -#include "collision_utils.hpp" - -namespace zeno { -namespace EDGE_EDGE_SQRT_COLLISION { - using namespace COLLISION_UTILS; - -/////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////// -constexpr REAL psi(const VECTOR3 v[4], - const VECTOR2& a, - const VECTOR2& b, - const REAL& _mu, - const REAL& _nu, - const REAL& _eps, - const REAL& _tooSmall) -{ - // convert to vertices and edges - VECTOR3 e[3] = {}; - e[0] = v[3] - v[2]; - e[1] = v[0] - v[2]; - e[2] = v[1] - v[2]; - - // get the interpolated vertices - const VECTOR3 va = (a[0] * v[0] + a[1] * v[1]); - const VECTOR3 vb = (b[0] * v[2] + b[1] * v[3]); - if ((vb - va).norm() < _tooSmall) - return 0.0; - - // there is not sign switch operation - const REAL springLength = _eps - (vb - va).norm(); - return _mu * springLength * springLength; -} - -/////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////// -constexpr REAL psiNegated(const VECTOR3 v[4], - const VECTOR2& a, - const VECTOR2& b, - const REAL& _mu, - const REAL& _nu, - const REAL& _eps, - const REAL& _tooSmall) -{ - // convert to vertices and edges - VECTOR3 e[3] = {}; - e[0] = v[3] - v[2]; - e[1] = v[0] - v[2]; - e[2] = v[1] - v[2]; - - // get the interpolated vertices - const VECTOR3 va = (a[0] * v[0] + a[1] * v[1]); - const VECTOR3 vb = (b[0] * v[2] + b[1] * v[3]); - if ((vb - va).norm() < _tooSmall) - return 0.0; - - const REAL springLength = _eps + (vb - va).norm(); - return _mu * springLength * springLength; -} - -/////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////// -constexpr VECTOR12 gradient(const VECTOR3 v[4], - const VECTOR2& a, - const VECTOR2& b, - const REAL& _mu, - const REAL& _nu, - const REAL& _eps, - const REAL& _tooSmall) -{ - // convert to vertices and edges - VECTOR3 e[3] = {}; - e[0] = v[3] - v[2]; - e[1] = v[0] - v[2]; - e[2] = v[1] - v[2]; - - // get the interpolated vertices - const VECTOR3 va = (a[0] * v[0] + a[1] * v[1]); - const VECTOR3 vb = (b[0] * v[2] + b[1] * v[3]); - const VECTOR3 diff = vb - va; - - // if the two are co-linear, give up - // should probably fall back to cross-product formula here - // (see EDGE_HYBRID_COLLISION) - if (diff.norm() < _tooSmall) - return VECTOR12::zeros(); - - // get the normal - VECTOR3 n = diff; - n = n / n.norm(); - - const REAL springLength = _eps - diff.norm(); - return (REAL)-2.0 * _mu * springLength * (vDiffPartial(a,b).transpose() * n); -} - - -/////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////// -constexpr VECTOR12 gradientNegated(const VECTOR3 v[4], - const VECTOR2& a, - const VECTOR2& b, - const REAL& _mu, - const REAL& _nu, - const REAL& _eps, - const REAL& _tooSmall) -{ - // convert to vertices and edges - VECTOR3 e[3] = {}; - e[0] = v[3] - v[2]; - e[1] = v[0] - v[2]; - e[2] = v[1] - v[2]; - - - // get the interpolated vertices - const VECTOR3 va = (a[0] * v[0] + a[1] * v[1]); - const VECTOR3 vb = (b[0] * v[2] + b[1] * v[3]); - const VECTOR3 diff = vb - va; - - // if the two are co-linear, give up - // should probably fall back to cross-product formula here - // (see EDGE_HYBRID_COLLISION) - if (diff.norm() < _tooSmall) - return VECTOR12::zeros(); - - // get the direction - VECTOR3 d = diff; - d = d / d.norm(); - - const REAL springLength = _eps + diff.norm(); - const MATRIX3x12 vPartial = vDiffPartial(a,b); - - return (REAL)2.0 * _mu * springLength * (vPartial.transpose() * d); -} - -/////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////// -constexpr MATRIX12 hessian(const VECTOR3 v[4], - const VECTOR2& a, - const VECTOR2& b, - const REAL& _mu, - const REAL& _nu, - const REAL& _eps, - const REAL& _tooSmall) -{ - // convert to vertices and edges - VECTOR3 e[3] = {}; - e[0] = v[3] - v[2]; - e[1] = v[0] - v[2]; - e[2] = v[1] - v[2]; - - // get the interpolated vertices - const VECTOR3 va = (a[0] * v[0] + a[1] * v[1]); - const VECTOR3 vb = (b[0] * v[2] + b[1] * v[3]); - const VECTOR3 diff = vb - va; - const REAL diffNorm = diff.norm(); - - // if the two are co-linear, give up - // should probably fall back to cross-product formula here - // (see EDGE_HYBRID_COLLISION) - if (diffNorm < _tooSmall) - return MATRIX12::zeros(); - - // get the normal - VECTOR3 d = diff; - d = d / d.norm(); - - const MATRIX3x12 vPartial = vDiffPartial(a,b); - const REAL invNorm = (diffNorm >= 1e-8) ? 1.0 / diffNorm : 1.0; - const REAL invNorm3 = invNorm * invNorm * invNorm; - - const VECTOR12 normPartial = -invNorm * (vPartial.transpose() * diff); - const MATRIX3x12 dGrad = invNorm * vPartial - - invNorm3 * zs::dyadic_prod(diff,(vPartial.transpose() * diff)); - - return (REAL)-2.0 * _mu * ((_eps - diffNorm) * (vPartial.transpose() * dGrad) + - zs::dyadic_prod(normPartial,vPartial.transpose() * d)); -} - -/////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////// -constexpr MATRIX12 hessianNegated(const VECTOR3 v[4], - const VECTOR2& a, - const VECTOR2& b, - const REAL& _mu, - const REAL& _nu, - const REAL& _eps, - const REAL& _tooSmall) -{ - // convert to vertices and edges - VECTOR3 e[3] = {}; - e[0] = v[3] - v[2]; - e[1] = v[0] - v[2]; - e[2] = v[1] - v[2]; - - // get the interpolated vertices - const VECTOR3 va = (a[0] * v[0] + a[1] * v[1]); - const VECTOR3 vb = (b[0] * v[2] + b[1] * v[3]); - const VECTOR3 diff = vb - va; - const REAL diffNorm = diff.norm(); - const REAL diffNorm3 = diffNorm * diffNorm * diffNorm; - - // if the two are co-linear, give up - // should probably fall back to cross-product formula here - // (see EDGE_HYBRID_COLLISION) - if (diffNorm < _tooSmall) - return MATRIX12::zeros(); - - // get the normal - VECTOR3 n = diff; - n = n / n.norm(); - - const MATRIX3x12 vPartial = vDiffPartial(a,b); - const VECTOR12 normPartial = ((REAL)-1.0 / diffNorm) * (vPartial.transpose() * diff); - - const MATRIX3x12 nGrad = ((REAL)1.0 / diffNorm) * vPartial - - ((REAL)1.0 / diffNorm3) * zs::dyadic_prod(diff, (vPartial.transpose() * diff)); - - // this is the energetically consistent one - return (REAL)2.0 * _mu * ((_eps + diffNorm) * (vPartial.transpose() * nGrad) - - zs::dyadic_prod(normPartial,vPartial.transpose() * n)); -} - - - -}; -}; \ No newline at end of file diff --git a/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp b/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp index a59d0ea97f..ed1851af50 100644 --- a/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp +++ b/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp @@ -29,8 +29,8 @@ #include "vertex_face_sqrt_collision.hpp" #include "vertex_face_collision.hpp" -#include "edge_edge_sqrt_collision.hpp" -#include "edge_edge_collision.hpp" +// #include "edge_edge_sqrt_collision.hpp" +// #include "edge_edge_collision.hpp" namespace zeno { namespace COLLISION_UTILS { @@ -55,7 +55,7 @@ void do_facet_point_collision_detection(Pol& cudaPol, const SurfTriTileVec& tris, SurfTriNrmVec& sttemp, SurfLineNrmVec& setemp, - FPCollisionBuffer& cptemp, + FPCollisionBuffer& fp_collision_buffer, // const bvh_t& stBvh, T in_collisionEps,T out_collisionEps) { using namespace zs; @@ -68,6 +68,7 @@ void do_facet_point_collision_detection(Pol& cudaPol, auto avgl = compute_average_edge_length(cudaPol,verts,xtag,tris); auto bvh_thickness = 5 * avgl; + if(!calculate_facet_normal(cudaPol,verts,xtag,tris,sttemp,"nrm")){ throw std::runtime_error("fail updating facet normal"); } @@ -79,8 +80,8 @@ void do_facet_point_collision_detection(Pol& cudaPol, setemp,"nrm")){ throw std::runtime_error("fail calculate cell bisector normal"); } - TILEVEC_OPS::fill<4>(cudaPol,cptemp,"inds",zs::vec::uniform(-1).template reinterpret_bits()); - TILEVEC_OPS::fill(cudaPol,cptemp,"inverted",reinterpret_bits((int)0)); + TILEVEC_OPS::fill<4>(cudaPol,fp_collision_buffer,"inds",zs::vec::uniform(-1).template reinterpret_bits()); + TILEVEC_OPS::fill(cudaPol,fp_collision_buffer,"inverted",reinterpret_bits((int)0)); cudaPol(zs::range(points.size()),[in_collisionEps = in_collisionEps, out_collisionEps = out_collisionEps, verts = proxy({},verts),xtag, @@ -89,12 +90,21 @@ void do_facet_point_collision_detection(Pol& cudaPol, points = proxy({},points), lines = proxy({},lines), tris = proxy({},tris), - cptemp = proxy({},cptemp), + fp_collision_buffer = proxy({},fp_collision_buffer), stbvh = proxy(stBvh),thickness = bvh_thickness] ZS_LAMBDA(int svi) mutable { auto vi = reinterpret_bits(points("inds",svi)); auto active = verts("active",vi); - if(active < 1e-6) + bool is_active_vert = true; + if(active < 1e-6){ + is_active_vert = false; return; + } + + if(verts.hasProperty("is_verted")) { + auto is_inverted =reinterpret_bits(verts("is_inverted",vi)); + if(is_inverted) + return; + } auto p = verts.template pack<3>(xtag,vi); auto bv = bv_t{get_bounding_box(p - thickness, p + thickness)}; @@ -108,12 +118,19 @@ void do_facet_point_collision_detection(Pol& cudaPol, if(tri[0] == vi || tri[1] == vi || tri[2] == vi) return; + + if(verts.hasProperty("is_verted")) { + + for(int i = 0;i != 3;++i) + if(reinterpret_bits(verts("is_inverted",tri[i]))) + return; + + } + bool is_active_tri = true; for(int i = 0;i != 3;++i) if(verts("active",tri[i]) < 1e-6) - is_active_tri = false; - if(!is_active_tri) - return; + return; T dist = (T)0.0; @@ -133,22 +150,6 @@ void do_facet_point_collision_detection(Pol& cudaPol, if(areaDeform < 1e-1) return; - // if(COLLISION_UTILS::is_inside_the_cell(verts,xtag, - // lines,tris, - // sttemp,"nrm", - // setemp,"nrm", - // stI,p,in_collisionEps,out_collisionEps,dist)) { - // // cptemp.template tuple<4>("inds",svi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = zs::vec(vi,tri[0],tri[1],tri[2]).template reinterpret_bits(); - // // auto vertexFaceCollisionAreas = tris("area",stI) + points("area",svi); - // // cptemp("area",svi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = vertexFaceCollisionAreas; - // // if(vertexFaceCollisionAreas < 0) - // // printf("negative face area detected\n"); - // // int is_inverted = dist > (T)0.0 ? 1 : 0; - // // cptemp("inverted",svi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = reinterpret_bits(is_inverted); - // // nm_collision_pairs++; - - // } - auto nrm = sttemp.template pack<3>("nrm",stI); auto seg = p - verts.template pack<3>(xtag,tri[0]); @@ -165,12 +166,12 @@ void do_facet_point_collision_detection(Pol& cudaPol, // auto avge = (e01 + e02 + e12)/(T)3.0; T barySum = (T)1.0; - T distance = COLLISION_UTILS::pointTriangleDistance(t0,t1,t2,p,barySum); + T distance = LSL_GEO::pointTriangleDistance(t0,t1,t2,p,barySum); // auto max_ratio = inset_ratio > outset_ratio ? inset_ratio : outset_ratio; // collisionEps = avge * max_ratio; auto collisionEps = seg.dot(nrm) > 0 ? out_collisionEps : in_collisionEps; - if(barySum > 2) + if(barySum > 5) return; if(distance > collisionEps) @@ -180,7 +181,7 @@ void do_facet_point_collision_detection(Pol& cudaPol, // return; // if the triangle cell is too degenerate - if(!pointProjectsInsideTriangle(t0,t1,t2,p)) + if(!LSL_GEO::pointProjectsInsideTriangle(t0,t1,t2,p)) for(int i = 0;i != 3;++i) { auto bisector_normal = get_bisector_orient(lines,tris,setemp,"nrm",stI,i); // auto test = bisector_normal.cross(nrm).norm() < 1e-2; @@ -193,13 +194,13 @@ void do_facet_point_collision_detection(Pol& cudaPol, // now the points is inside the cell - cptemp.template tuple<4>("inds",svi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = zs::vec(vi,tri[0],tri[1],tri[2]).template reinterpret_bits(); + fp_collision_buffer.template tuple<4>("inds",svi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = zs::vec(vi,tri[0],tri[1],tri[2]).template reinterpret_bits(); auto vertexFaceCollisionAreas = tris("area",stI) + points("area",svi); - cptemp("area",svi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = vertexFaceCollisionAreas; + fp_collision_buffer("area",svi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = vertexFaceCollisionAreas; if(vertexFaceCollisionAreas < 0) printf("negative face area detected\n"); int is_inverted = dist > (T)0.0 ? 1 : 0; - cptemp("inverted",svi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = reinterpret_bits(is_inverted); + fp_collision_buffer("inverted",svi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = reinterpret_bits(is_inverted); nm_collision_pairs++; }; @@ -207,258 +208,716 @@ void do_facet_point_collision_detection(Pol& cudaPol, }); } -// template +void do_kinematic_point_collision_detection(Pol& cudaPol, + PosTileVec& verts,const zs::SmallString& xtag, + const SurfPointTileVec& points, + SurfLineTileVec& lines, + SurfTriTileVec& tris, + SurfLineNrmTileVec& nrmLines, + SurfTriNrmTileVec& nrmTris, + const KPosTileVec& kverts, + KCollisionBuffer& kc_buffer, + T in_collisionEps,T out_collisionEps,bool update_normal = true) { + using namespace zs; + constexpr auto space = execspace_e::cuda; + + auto stBvh = bvh_t{}; + auto bvs = retrieve_bounding_volumes(cudaPol,verts,tris,wrapv<3>{},(T)0.0,xtag); + stBvh.build(cudaPol,bvs); + + auto avgl = compute_average_edge_length(cudaPol,verts,xtag,tris); + auto bvh_thickness = 5 * avgl; + + if(update_normal) { + if(!calculate_facet_normal(cudaPol,verts,xtag,tris,nrmTris,"nrm")){ + throw std::runtime_error("fail updating kinematic facet normal"); + } + if(!COLLISION_UTILS::calculate_cell_bisector_normal(cudaPol, + verts,xtag, + lines, + tris, + nrmTris,"nrm", + nrmLines,"nrm")){ + throw std::runtime_error("fail calculate cell bisector normal"); + } + } + + TILEVEC_OPS::fill<2>(cudaPol,kc_buffer,"inds",zs::vec::uniform(-1).template reinterpret_bits()); + TILEVEC_OPS::fill(cudaPol,kc_buffer,"inverted",reinterpret_bits((int)0)); + + cudaPol(zs::range(kverts.size()),[in_collisionEps = in_collisionEps, + out_collisionEps = out_collisionEps, + verts = proxy({},verts),xtag, + lines = proxy({},lines), + tris = proxy({},tris), + nrmTris = proxy({},nrmTris), + nrmLines = proxy({},nrmLines), + kverts = proxy({},kverts), + kc_buffer = proxy({},kc_buffer), + stBvh = proxy(stBvh),thickness = bvh_thickness] ZS_LAMBDA(int kvi) mutable { + + auto kp = kverts.pack(dim_c<3>,"x",kvi); + auto bv = bv_t{get_bounding_box(kp - thickness,kp + thickness)}; + + int nm_collision_pairs = 0; + auto process_kinematic_vertex_face_collision_pairs = [&](int stI) { + if(nm_collision_pairs >= MAX_KINEMATIC_COLLISION_PAIRS) + return; + auto tri = tris.pack(dim_c<3>,"inds",stI).reinterpret_bits(int_c); + for(int i = 0;i != 3;++i) + if(verts("k_active",tri[i]) < 1e-6) + return; + + auto average_thickness = (T)0.0; + if(verts.hasProperty("k_thickness")){ + // average_thickness = (T)0.0; + for(int i = 0;i != 3;++i) + average_thickness += verts("k_thickness",tri[i])/(T)3.0; + } + + + + if(verts.hasProperty("is_verted")) { + + for(int i = 0;i != 3;++i) + if(reinterpret_bits(verts("is_inverted",tri[i]))) + return; + + } + + T dist = (T)0.0; + + // if(tri[0] > 5326 || tri[1] > 5326 || tri[2] > 5326){ + // printf("invalid tri detected : %d %d %d\n",tri[0],tri[1],tri[2]); + // return; + // } + + auto nrm = nrmTris.pack(dim_c<3>,"nrm",stI); + auto seg = kp - verts.pack(dim_c<3>,xtag,tri[0]); + + + auto t0 = verts.pack(dim_c<3>,xtag,tri[0]); + auto t1 = verts.pack(dim_c<3>,xtag,tri[1]); + auto t2 = verts.pack(dim_c<3>,xtag,tri[2]); + + auto e01 = (t0 - t1).norm(); + auto e02 = (t0 - t2).norm(); + auto e12 = (t1 - t2).norm(); + + T barySum = (T)1.0; + T distance = LSL_GEO::pointTriangleDistance(t0,t1,t2,kp,barySum); + + dist = seg.dot(nrm); + // increase the stability, the tri must already in collided in the previous frame before been penerated in the current frame + // if(dist > 0 && tris("collide",stI) < 0.5) + // return; + + auto collisionEps = dist < 0 ? out_collisionEps * ((T)1.0 + average_thickness) : in_collisionEps; + + if(barySum > 1.1) + return; + + if(distance > collisionEps) + return; + + // if(dist < -(avge * inset_ratio + 1e-6) || dist > (outset_ratio * avge + 1e-6)) + // return; + + // if the triangle cell is too degenerate + if(!LSL_GEO::pointProjectsInsideTriangle(t0,t1,t2,kp)) + for(int i = 0;i != 3;++i) { + auto bisector_normal = get_bisector_orient(lines,tris,nrmLines,"nrm",stI,i); + // auto test = bisector_normal.cross(nrm).norm() < 1e-2; + seg = kp - verts.pack(dim_c<3>,xtag,tri[i]); + if(bisector_normal.dot(seg) < 0) + return; + } + + kc_buffer.template tuple<2>("inds",kvi * MAX_KINEMATIC_COLLISION_PAIRS + nm_collision_pairs) = zs::vec(kvi,stI).template reinterpret_bits(); + auto vertexFaceCollisionAreas = /*tris("area",stI) + */kverts("area",kvi); + kc_buffer("area",kvi * MAX_KINEMATIC_COLLISION_PAIRS + nm_collision_pairs) = vertexFaceCollisionAreas; + // if(vertexFaceCollisionAreas < 0) + // printf("negative face area detected\n"); + int is_inverted = dist > (T)0.0 ? 1 : 0; + kc_buffer("inverted",kvi * MAX_KINEMATIC_COLLISION_PAIRS + nm_collision_pairs) = reinterpret_bits(is_inverted); + nm_collision_pairs++; + }; + stBvh.iter_neighbors(bv,process_kinematic_vertex_face_collision_pairs); + }); +} + + + +// template // void do_edge_edge_collision_detection(Pol& cudaPol, // const PosTileVec& verts,const zs::SmallString& xtag, // const SurfPointTileVec& points, // const SurfLineTileVec& lines, // const SurfTriTileVec& tris, -// SurfTriNrmVec& sttemp, -// SurfLineNrmVec& setemp, -// EECollisionBuffer& eetemp, -// const PointNeighHash& pphash, +// SurfTriNrmVec& sttemp,SurfLineNrmVec& setemp, +// EECollisionBuffer& ee_collision_buffer, +// // const PointNeighHash& pphash,// we might need an one-ring neighbor removal tech // T in_collisionEps,T out_collisionEps) { // using namespace zs; // constexpr auto space = execspace_e::cuda; // auto seBvh = bvh_t{}; // auto bvs = retrieve_bounding_volumes(cudaPol,verts,lines,wrapv<2>{},(T)0.0,xtag); +// seBvh.build(cudaPol,bvs); // auto avgl = compute_average_edge_length(cudaPol,verts,xtag,lines); // auto bvh_thickness = 5 * avgl; -// if(!calculate_facet_normal(cudaPol,verts,xtag,sttemp,"nrm")) -// throw std::runtime_error("fail updating facet normal"); +// if(!sttemp.hasProperty("nrm") || sttemp.getChannelSize("nrm") != 3) +// throw std::runtime_error("do_edge_edge_collision_detection::invalid sttemp's \"nrm\" channel"); + +// if(!setemp.hasProperty("nrm") || setemp.getChannelSize("nrm") != 3) +// throw std::runtime_error("do_edge_edge_collision_detection::invalid setemp's \"nrm\" channel"); + +// if(setemp.size() != lines.size()) +// throw std::runtime_error("setemp.size() != lines.size()"); +// if(sttemp.size() != tris.size()) +// throw std::runtime_error("sttemp.size() != tris.size()"); + +// // std::cout << "do edge edge collision detection" << std::endl; +// if(!calculate_facet_normal(cudaPol,verts,xtag,tris,sttemp,"nrm")) +// throw std::runtime_error("do_edge_edge_collision_detection::fail updating facet normal"); + + +// // std::cout << "calculate edge normal" << std::endl; + +// if(!calculate_edge_normal_from_facet_normal(cudaPol,sttemp,"nrm",setemp,"nrm",lines)) +// throw std::runtime_error("do_edge_edge_collision_detection::fail updating edge normal"); + +// if(ee_collision_buffer.size() != lines.size()) +// throw std::runtime_error("do_edge_edge_collision_detection::invalid ee_colliision_buffer size"); + +// if(!ee_collision_buffer.hasProperty("inds") || ee_collision_buffer.getChannelSize("inds") != 4) +// throw std::runtime_error("do_edge_edge_collision_detection::invalid ee_colliision_buffer's \"inds\" channel"); + +// if(!ee_collision_buffer.hasProperty("inverted") || ee_collision_buffer.getChannelSize("inverted") != 1) +// throw std::runtime_error("do_edge_edge_collision_detection::invalid ee_colliision_buffer's \"inverted\" channel"); + +// if(!ee_collision_buffer.hasProperty("abary") || ee_collision_buffer.getChannelSize("abary") != 2) +// throw std::runtime_error("do_edge_edge_collision_detection::invalid ee_colliision_buffer's \"abary\" channel"); + +// if(!ee_collision_buffer.hasProperty("bbary") || ee_collision_buffer.getChannelSize("bbary") != 2) +// throw std::runtime_error("do_edge_edge_collision_detection::invalid ee_colliision_buffer's \"bbary\" channel"); + +// if(!ee_collision_buffer.hasProperty("area") || ee_collision_buffer.getChannelSize("area") != 1) +// throw std::runtime_error("do_edge_edge_collision_detection::invalid ee_colliision_buffer's \"area\" channel"); + +// if(!lines.hasProperty("area") || lines.getChannelSize("area") != 1) +// throw std::runtime_error("do_edge_edge_collision_detection::invalid lines's \"area\" channel"); + +// TILEVEC_OPS::fill<4>(cudaPol,ee_collision_buffer,"inds",zs::vec::uniform(-1).template reinterpret_bits()); +// TILEVEC_OPS::fill(cudaPol,ee_collision_buffer,"inverted",reinterpret_bits((int)0)); +// // TILEVEC_OPS::fill(cudaPol,ee_collision_buffer,"abary",(T)0.0); +// // TILEVEC_OPS::fill(cudaPol,ee_collision_buffer,"bbary",(T)0.0); + +// if(!verts.hasProperty("active") || verts.getChannelSize("active") != 1) +// throw std::runtime_error("do_edge_edge_collision_detection::invalid verts' \"active\" channel"); +// if(!verts.hasProperty(xtag) || verts.getChannelSize(xtag) != 3) +// throw std::runtime_error("do_edge_edge_collision_detection::invalid verts' \"xtag\" channel"); + +// cudaPol(zs::range(lines.size()),[in_collisionEps = in_collisionEps, +// out_collisionEps = out_collisionEps, +// verts = proxy({},verts),xtag = xtag, +// points = proxy({},points), +// lines = proxy({},lines), +// tris = proxy({},tris), +// sttemp = proxy({},sttemp), +// setemp = proxy({},setemp), +// ee_collision_buffer = proxy({},ee_collision_buffer), +// seBvh = proxy(seBvh), +// thickness = bvh_thickness] ZS_LAMBDA(int sei) mutable { +// auto einds = lines.template pack<2>("inds",sei).reinterpret_bits(int_c); +// auto id0 = einds[0]; +// auto id1 = einds[1]; +// auto is_active0 = verts("active",id0) > 1e-6; +// auto is_active1 = verts("active",id1) > 1e-6; + +// if(!is_active0 || !is_active1){ +// // printf("skip inactive edge %d\n",sei); +// return; +// } + +// auto a0 = verts.template pack<3>(xtag,id0); +// auto a1 = verts.template pack<3>(xtag,id1); + +// auto ac = (a0 + a1) / (T)2.0; +// auto bv = bv_t{get_bounding_box(ac - thickness,ac + thickness)}; + +// // int nm_collision_pairs = 0; +// int closestEdge = -1; +// T closestDistance = 1e8; + +// zs::vec aClosest{}; +// zs::vec bClosest{}; +// zs::vec aClosestPoint{}; +// zs::vec bClosestPoint{}; + +// auto aNrm = setemp.template pack<3>("nrm",sei); + +// auto process_edge_edge_collision_pairs = [&](int nseI) { +// // printf("check edge pairs : %d %d\n",sei,nseI); + +// zs::vec aPoint{}; +// zs::vec bPoint{}; +// // zs::vec bNrm{}; +// zs::vec a{},b{}; + +// auto nedge = lines.pack(dim_c<2>,"inds",nseI).reinterpret_bits(int_c); + +// if(nedge[0] == id0 || nedge[1] == id0 || nedge[0] == id1 || nedge[1] == id1){ +// // printf("skip neighbor pairs : %d %d\n",sei,nseI); +// return; +// } + +// auto is_active0_nei = verts("active",nedge[0]) > 1e-6; +// auto is_active1_nei = verts("active",nedge[1]) > 1e-6; + +// if(!is_active0_nei || !is_active1_nei){ +// // printf("skip inactive nedge %d\n",sei); +// return; +// } + + +// // // the two edges should orient in different directions +// auto bNrm = setemp.template pack<3>("nrm",nseI); +// auto orient = bNrm.dot(aNrm); +// if(orient > 0.2){ +// // printf("skip pairs : %d %d due to orient problem %f %f %f\n",sei,nseI,(float)orient,(float)bNrm.norm(),(float)aNrm.norm()); +// return; +// } + +// auto nid0 = nedge[0]; +// auto nid1 = nedge[1]; + +// auto b0 = verts.template pack<3>(xtag,nid0); +// auto b1 = verts.template pack<3>(xtag,nid1); + +// COLLISION_UTILS::IntersectLineSegments(a0,a1,b0,b1,aPoint,bPoint); +// auto distance = (aPoint - bPoint).norm(); + +// if(distance > closestDistance){ +// // printf("skip pairs : %d %d due to distance %f %f\n",sei,nseI,(float)distance,(float)closestDistance); +// return; +// } + +// zs::vec ea = a1 - a0; +// zs::vec eb = b1 - b0; + +// a[1] = (aPoint - a0).norm() / ea.norm(); +// a[0] = (T)1.0 - a[1]; + +// b[1] = (bPoint - b0).norm() / eb.norm(); +// b[0] = (T)1.0 - b[1]; + +// T skipEps = 1e-4; +// if ((a[0] < skipEps) || (a[0] > 1.0 - skipEps)) return; +// if ((a[1] < skipEps) || (a[1] > 1.0 - skipEps)) return; +// if ((b[0] < skipEps) || (b[0] > 1.0 - skipEps)) return; +// if ((b[1] < skipEps) || (b[1] > 1.0 - skipEps)) return; + +// closestDistance = distance; +// closestEdge = nseI; + +// aClosest = a; +// bClosest = b; +// aClosestPoint = aPoint; +// bClosestPoint = bPoint; +// }; +// seBvh.iter_neighbors(bv,process_edge_edge_collision_pairs); + + + +// if(closestEdge == -1) return; + +// // printf("find closest pairs : %d -> %d\n",sei,closestEdge); + + +// if(closestEdge >= lines.size()){ +// printf("closestEdge bigger than lines size\n"); +// return; +// } + +// if(lines.size() != setemp.size()){ +// printf("lines size and setemp size not match\n"); +// return; +// } +// if(!setemp.hasProperty("nrm")){ +// printf("setemp has no nrm channel"); +// return; +// } + +// auto innerEdge = lines.pack(dim_c<2>,"inds",closestEdge).reinterpret_bits(int_c); + +// // return; + +// // // skip the one-ring neighbor_check +// // bool insideOneRing = false; + +// // for (int j = 0; j < 2; j++) +// // { +// // pair lookup; +// // lookup.first = outerEdge[j]; +// // for (int i = 0; i < 2; i++) +// // { +// // lookup.second = innerEdge[i]; +// // if (_insideSurfaceVertexOneRing.find(lookup) != _insideSurfaceVertexOneRing.end()) +// // insideOneRing = true; +// // } +// // } +// // if (insideOneRing) return; +// auto a2b = bClosestPoint - aClosestPoint; +// auto bNrm = setemp.template pack<3>("nrm",closestEdge); + + +// // auto avgNrm = (bNrm - aNrm).normalized(); +// bool is_penertrating = a2b.dot(aNrm) < 0 && a2b.dot(bNrm) > 0; + +// auto collisionEps = is_penertrating ? in_collisionEps : out_collisionEps; + +// // then there is edge edge collision +// if(closestDistance > collisionEps) +// return; + +// // if(is_penertrating) +// // printf("find penertrating pair %d %d %d %d\n",einds[0],einds[1],innerEdge[0],innerEdge[1]); + + +// ee_collision_buffer.template tuple<4>("inds",sei) = zs::vec(einds[0],einds[1],innerEdge[0],innerEdge[1]).template reinterpret_bits(); +// auto edgeEdgeCollsionAreas = lines("area",sei) + lines("area",closestEdge); +// ee_collision_buffer("area",sei) = edgeEdgeCollsionAreas; + +// int is_inverted = is_penertrating ? 1 : 0; +// ee_collision_buffer("inverted",sei) = reinterpret_bits(is_inverted); + +// ee_collision_buffer.template tuple<4>("bary",sei) = zs::vec(aClosest[0],aClosest[1],bClosest[0],bClosest[1]); + +// // return; + +// ee_collision_buffer.template tuple<2>("abary",sei) = aClosest; + +// // ee_collision_buffer("abary",0,sei) = (T)0.0; +// // ee_collision_buffer("abary",1,sei) = (T)0.0; +// // // return; +// ee_collision_buffer.template tuple<2>("bbary",sei) = bClosest; +// // ee_collision_buffer("bbary",0,sei) = (T)0.0; +// // ee_collision_buffer("bbary",1,sei) = (T)0.0; +// }); // } -template -void evaluate_collision_grad_and_hessian(Pol& cudaPol, - const PosTileVec& verts,const zs::SmallString& xtag, - FPCollisionBuffer& cptemp, + typename FPCollisionBuffer, + typename GradHessianTileVec> +void evaluate_fp_collision_grad_and_hessian( + Pol& cudaPol, + const PosTileVec& verts,const zs::SmallString& xtag,const zs::SmallString& vtag,T dt, + const FPCollisionBuffer& fp_collision_buffer,// recording all the fp collision pairs + GradHessianTileVec& gh_buffer,int offset, T in_collisionEps,T out_collisionEps, T collisionStiffness, - T mu,T lambda) { + T mu,T lambda,T kd_theta) { using namespace zs; constexpr auto space = execspace_e::cuda; - TILEVEC_OPS::fill<12*12>(cudaPol,cptemp,"H",zs::vec::zeros()); - TILEVEC_OPS::fill<12>(cudaPol,cptemp,"grad",zs::vec::zeros()); - // TILEVEC_OPS::fill(cudaPol,cptemp,"area",(T)0.0); - -#if 0 - int nm_points = cptemp.size() / MAX_FP_COLLISION_PAIRS; - cudaPol(zs::range(nm_points), - [verts = proxy({},verts),xtag, - cptemp = proxy({},cptemp), + + int start = offset; + int fp_size = fp_collision_buffer.size(); + + TILEVEC_OPS::fill_range(cudaPol,gh_buffer,"H",(T)0.0,start,fp_size); + TILEVEC_OPS::fill_range(cudaPol,gh_buffer,"grad",(T)0.0,start,fp_size); + + // std::cout << "inds size compair : " << fp_collision_buffer.getChannelSize("inds") << "\t" << gh_buffer.getChannelSize("inds") << std::endl; + + TILEVEC_OPS::copy(cudaPol,fp_collision_buffer,"inds",gh_buffer,"inds",start); + + cudaPol(zs::range(fp_size), + [verts = proxy({},verts),xtag,vtag,dt,kd_theta, + fp_collision_buffer = proxy({},fp_collision_buffer), + gh_buffer = proxy({},gh_buffer), in_collisionEps = in_collisionEps, out_collisionEps = out_collisionEps, stiffness = collisionStiffness, - mu = mu,lam = lambda] ZS_LAMBDA(int pi) mutable { - for(int i = 0;i != MAX_FP_COLLISION_PAIRS;++i) { - auto inds = cptemp.template pack<4>("inds",pi * MAX_FP_COLLISION_PAIRS + i).reinterpret_bits(int_c); + mu = mu,lam = lambda,start = start] ZS_LAMBDA(int cpi) mutable { + auto inds = fp_collision_buffer.template pack<4>("inds",cpi).reinterpret_bits(int_c); for(int j = 0;j != 4;++j) if(inds[j] < 0) return; - - for(int j = 0;j != 4;++j){ - auto active = verts("active",inds[j]); - if(active < 1e-6) - return; - } vec3 cv[4] = {}; for(int j = 0;j != 4;++j) cv[j] = verts.template pack<3>(xtag,inds[j]); - + // auto is_inverted = reinterpret_bits(fp_collision_buffer("inverted",cpi)); + // auto ceps = is_inverted ? in_collisionEps : out_collisionEps; - auto is_inverted = reinterpret_bits(cptemp("inverted",pi * MAX_FP_COLLISION_PAIRS + i)); - auto ceps = is_inverted ? in_collisionEps : out_collisionEps; + auto ceps = out_collisionEps; + // ceps += (T)1e-2 * ceps; auto alpha = stiffness; - auto beta = cptemp("area",pi * MAX_FP_COLLISION_PAIRS + i); - cptemp.template tuple<12>("grad",pi * MAX_FP_COLLISION_PAIRS + i) = alpha * beta * VERTEX_FACE_SQRT_COLLISION::gradient(cv,mu,lam,ceps); - cptemp.template tuple<12*12>("H",pi * MAX_FP_COLLISION_PAIRS + i) = alpha * beta * VERTEX_FACE_SQRT_COLLISION::hessian(cv,mu,lam,ceps); - } + auto beta = fp_collision_buffer("area",cpi); + + auto cforce = -alpha * beta * VERTEX_FACE_SQRT_COLLISION::gradient(cv,mu,lam,ceps); + auto K = alpha * beta * VERTEX_FACE_SQRT_COLLISION::hessian(cv,mu,lam,ceps); + + // gh_buffer.template tuple<12>("grad",cpi + start) = -alpha * beta * VERTEX_FACE_SQRT_COLLISION::gradient(cv,mu,lam,ceps); + // gh_buffer.template tuple<12*12>("H",cpi + start) = alpha * beta * VERTEX_FACE_SQRT_COLLISION::hessian(cv,mu,lam,ceps); + + + // adding rayleigh damping term + vec3 v0[4] = {verts.pack(dim_c<3>,vtag, inds[0]), + verts.pack(dim_c<3>,vtag, inds[1]), + verts.pack(dim_c<3>,vtag, inds[2]), + verts.pack(dim_c<3>,vtag, inds[3])}; + auto vel = COLLISION_UTILS::flatten(v0); + + auto C = K * kd_theta; + auto dforce = -C * vel; + gh_buffer.template tuple<12>("grad",cpi + start) = cforce + dforce; + gh_buffer.template tuple<12*12>("H",cpi + start) = K + C/dt; }); -#else - cudaPol(zs::range(cptemp.size()), - [verts = proxy({},verts),xtag, - cptemp = proxy({},cptemp), + +} + +// TODO: add damping collision term +template +void evaluate_kinematic_fp_collision_grad_and_hessian( + Pol& cudaPol, + const TetTileVec& eles, + const PosTileVec& verts,const zs::SmallString& xtag,const zs::SmallString& vtag,T dt, + const SurfTriTileVec& tris, + const PosTileVec& kverts, + const FPCollisionBuffer& kc_buffer, + GradHessianTileVec& gh_buffer,int offset, + T in_collisionEps,T out_collisionEps, + T collisionStiffness, + T mu,T lambda,T kd_theta) { + using namespace zs; + constexpr auto space = execspace_e::cuda; + + int start = offset; + int fp_size = kc_buffer.size(); + + // TILEVEC_OPS::fill_range(cudaPol,gh_buffer,"H",(T)0.0,start,fp_size); + // TILEVEC_OPS::fill_range(cudaPol,gh_buffer,"grad",(T)0.0,start,fp_size); + + // get only the dynamic object's dofs + // TILEVEC_OPS::copy(cudaPol,kc_buffer,"inds",gh_buffer,"inds",start); + // cudaPol(zs::range(fp_size), + // [gh_buffer = proxy({},gh_buffer),start = start] ZS_LAMBDA(int fpi) mutable { + // gh_buffer("inds",0,start + fpi) = gh_buffer("inds",1,start + fpi); + // auto tmp = gh_buffer("inds",2,start + fpi); + // gh_buffer("inds",2,start + fpi) = gh_buffer("inds",3,start + fpi); + // gh_buffer("inds",3,start + fpi) = tmp; + // }); + + + cudaPol(zs::range(fp_size), + [verts = proxy({},verts),xtag,vtag,dt,kd_theta, + eles = proxy({},eles), + tris = proxy({},tris), + kverts = proxy({},kverts), + kc_buffer = proxy({},kc_buffer), + gh_buffer = proxy({},gh_buffer),start, in_collisionEps = in_collisionEps, out_collisionEps = out_collisionEps, stiffness = collisionStiffness, mu = mu,lam = lambda] ZS_LAMBDA(int cpi) mutable { - auto inds = cptemp.template pack<4>("inds",cpi).reinterpret_bits(int_c); - for(int j = 0;j != 4;++j) - if(inds[j] < 0) + auto inds = kc_buffer.pack(dim_c<2>,"inds",cpi).reinterpret_bits(int_c); + // auto oinds = kc_buffer.pack(dim_c<4>,"inds",cpi).reinterpret_bits(int_c); + for(int i = 0;i != 2;++i) + if(inds[i] < 0) return; vec3 cv[4] = {}; - for(int j = 0;j != 4;++j) - cv[j] = verts.template pack<3>(xtag,inds[j]); - - auto is_inverted = reinterpret_bits(cptemp("inverted",cpi)); - // auto ceps = is_inverted ? in_collisionEps : out_collisionEps; + cv[0] = kverts.pack(dim_c<3>,"x",inds[0]); + auto tri = tris.pack(dim_c<3>,"inds",inds[1]).reinterpret_bits(int_c); + for(int j = 1;j != 4;++j) + cv[j] = verts.template pack<3>(xtag,tri[j-1]); + + // vec3 cvel[4] = {}; + // cvel[0] = vec3::zeros(); + // for(int j = 1;j != 4;++j) + // cvel[j] = verts.template pack<3>(vel_tag,inds[j]); + + // auto is_inverted = reinterpret_bits(kc_buffer("inverted",cpi)); + auto average_thickness = (T)0.0; + if(verts.hasProperty("k_thickness")){ + // average_thickness = (T)0.0; + for(int i = 0;i != 3;++i) + average_thickness += verts("k_thickness",tri[i])/(T)3.0; + } - auto ceps = out_collisionEps; - // ceps += (T)1e-2 * ceps; + auto ceps = out_collisionEps * ((T)1.0 + average_thickness); auto alpha = stiffness; - auto beta = cptemp("area",cpi); - -#if 0 - cptemp.template tuple<12>("grad",cpi) = alpha * beta * VERTEX_FACE_COLLISION::gradient(cv,mu,lam,out_collisionEps); - cptemp.template tuple<12*12>("H",cpi) = alpha * beta * VERTEX_FACE_COLLISION::hessian(cv,mu,lam,out_collisionEps); -#else - cptemp.template tuple<12>("grad",cpi) = -alpha * beta * VERTEX_FACE_SQRT_COLLISION::gradient(cv,mu,lam,ceps); - cptemp.template tuple<12*12>("H",cpi) = alpha * beta * VERTEX_FACE_SQRT_COLLISION::hessian(cv,mu,lam,ceps); -#endif + auto beta = kc_buffer("area",cpi); + + // change the + + auto cgrad = -alpha * beta * VERTEX_FACE_SQRT_COLLISION::gradient(cv,mu,lam,ceps,true); + auto cH = alpha * beta * VERTEX_FACE_SQRT_COLLISION::hessian(cv,mu,lam,ceps,true); + + auto ei = reinterpret_bits(tris("ft_inds",inds[1])); + // auto cp = gh_buffer.pack(dim_c<2>,"inds",ei).reinterpret_bits(int_c); + // auto pidx = cp[0]; + // auto tri = tris.pack(dim_c<3>,"inds",cp[1]).reinterpret_bits(int_c); + auto tet = eles.pack(dim_c<4>,"inds",ei).reinterpret_bits(int_c); + auto inds_reorder = zs::vec::zeros(); + for(int i = 0;i != 3;++i){ + auto idx = tri[i]; + for(int j = 0;j != 4;++j) + if(idx == tet[j]) + inds_reorder[i] = j; + } - // printf("cpi[%d] : %f %f %f\n",cpi,(float)alpha,(float)beta,(float)cptemp.template pack<12>("grad",cpi).norm()); + vec3 v0[4] = {zs::vec::zeros(), + verts.pack(dim_c<3>,vtag, tri[0]), + verts.pack(dim_c<3>,vtag, tri[1]), + verts.pack(dim_c<3>,vtag, tri[2])}; + auto vel = COLLISION_UTILS::flatten(v0); + + auto C = cH * kd_theta; + auto dforce = -C * vel; + + cgrad += dforce; + cH += C/dt; + + // gh_buffer.template tuple<12>("grad",cpi + start) = cforce + dforce; + // gh_buffer.template tuple<12*12>("H",cpi + start) = K + C/dt; + + for(int i = 3;i != 12;++i){ + int d0 = i % 3; + int row = inds_reorder[i/3 - 1]*3 + d0; + atomic_add(exec_cuda,&gh_buffer("grad",row,ei),cgrad[i]); + for(int j = 3;j != 12;++j){ + int d1 = j % 3; + int col = inds_reorder[j/3 - 1]*3 + d1; + if(row >= 12 || col >= 12){ + printf("invalid row = %d and col = %d %d %d detected %d %d %d\n",row,col,i/3,j/3, + inds_reorder[0], + inds_reorder[1], + inds_reorder[2]); + } + atomic_add(exec_cuda,&gh_buffer("H",row*12 + col,ei),cH(i,j)); + } + } + // for(int i = 1;i != 4;++i){ + // auto idx = inds[i]; + // for(int j = 0;j != 4;++j){ + // if(idx == tet[j]) { + // for(int d = 0;d != 3;++d) + // atomic_add(exec_cuda,&gh_buffer("grad",j*3 + d,ei),cgrad[i * 3 + d]); + // } + // } + + // gh_buffer("grad",i,cpi + start) = cgrad[i]; + // } + // for(int i = 3;i != 12;++i) + // for(int j = 3;j != 12;++j) + // gh_buffer("H",i * 12 + j,cpi + start) = cH(i,j); + // auto test_ind = gh_buffer.pack(dim_c<4>,"inds",start + cpi).reinterpret_bits(int_c); + // auto cgrad_norm = cgrad.norm(); + // auto cH_norm = cH.norm(); + // printf("find_kinematic_collision[%d %d %d %d] : %f %f\n",inds[0],inds[1],inds[2],inds[3],(float)alpha,(float)beta); }); +} -#endif - - } - - -// template -// void evaluate_collision_grad_and_hessian(Pol& cudaPol, -// const PosTileVec& verts, -// const zs::SmallString& xtag, -// const SurfPointTileVec& points, -// const SurfLineTileVec& lines, -// const SurfTriTileVec& tris, -// CellPointTileVec& sptemp, -// CellBisectorTileVec& setemp, -// CellTriTileVec& sttemp, -// FPCollisionBuffer& cptemp, -// T cellBvhThickness, -// T collisionEps, +// template +// void evaluate_ee_collision_grad_and_hessian(Pol& cudaPol, +// const PosTileVec& verts,const zs::SmallString& xtag, +// const EECollisionBuffer& ee_collision_buffer, +// GradHessianTileVec& gh_buffer,int offset, +// T in_collisionEps,T out_collisionEps, // T collisionStiffness, // T mu,T lambda) { // using namespace zs; // constexpr auto space = execspace_e::cuda; -// TILEVEC_OPS::fill<12*12>(cudaPol,cptemp,"H",zs::vec::zeros()); -// TILEVEC_OPS::fill<3>(cudaPol,sttemp,"grad",zs::vec::zeros()); -// TILEVEC_OPS::fill<3>(cudaPol,sptemp,"grad",zs::vec::zeros()); - -// cudaPol(zs::range(points.size()), -// [ collisionEps = collisionEps, -// cellBvhThickness = cellBvhThickness, -// verts = proxy({},verts), -// sttemp = proxy({},sttemp), -// setemp = proxy({},setemp), -// sptemp = proxy({},sptemp), -// cptemp = proxy({},cptemp), -// points = proxy({},points), -// lines = proxy({},lines), -// tris = proxy({},tris), -// stbvh = proxy(stbvh),xtag, -// collisionStiffness = collisionStiffness, -// mu = mu,lambda = lambda] ZS_LAMBDA(int pi) mutable { - -// auto vi = reinterpret_bits(points("inds",pi)); -// auto p = verts.template pack<3>(xtag,vi); -// auto bv = bv_t{get_bounding_box(p - cellBvhThickness, p + cellBvhThickness)}; - -// vec3 collision_verts[4] = {}; -// collision_verts[0] = p; - -// int nm_collision_pairs = 0; -// auto process_vertex_face_collision_pairs = [&](int stI) { -// if(nm_collision_pairs >= MAX_FP_COLLISION_PAIRS) -// return; - -// auto tri = tris.pack(dim_c<3>, "inds",stI).reinterpret_bits(int_c); -// if(tri[0] == vi || tri[1] == vi || tri[2] == vi) -// return; - -// collision_verts[1] = verts.template pack<3>(xtag,tri[0]); -// collision_verts[2] = verts.template pack<3>(xtag,tri[1]); -// collision_verts[3] = verts.template pack<3>(xtag,tri[2]); - -// // check whether the triangle is degenerate -// auto restArea = tris("area",stI); -// const auto e10 = collision_verts[2] - collision_verts[1]; -// const auto e20 = collision_verts[3] - collision_verts[1]; -// auto deformedArea = (T)0.5 * e10.cross(e20).norm(); -// const T degeneracyEps = 1e-4; -// // skip the degenerate triangles -// const T relativeArea = deformedArea / (restArea + (T)1e-6); -// if(relativeArea < degeneracyEps) -// return; - -// bool collide = false; - -// if(COLLISION_UTILS::is_inside_the_cell(verts,xtag, -// lines,tris, -// sttemp,"nrm", -// setemp,"nrm", -// stI,p,collisionEps)) { -// collide = true; +// int start = offset; +// int ee_size = ee_collision_buffer.size(); + +// TILEVEC_OPS::fill_range(cudaPol,gh_buffer,"H",(T)0.0,start,ee_size); +// TILEVEC_OPS::fill_range(cudaPol,gh_buffer,"grad",(T)0.0,start,ee_size); +// TILEVEC_OPS::copy(cudaPol,ee_collision_buffer,"inds",gh_buffer,"inds",start); + +// cudaPol(zs::range(ee_size),[ +// verts = proxy({},verts),xtag, +// in_collisionEps,out_collisionEps, +// ee_collision_buffer = proxy({},ee_collision_buffer), +// gh_buffer = proxy({},gh_buffer), +// start = start, +// stiffness = collisionStiffness,mu = mu,lam = lambda] ZS_LAMBDA(int eei) mutable { +// auto inds = ee_collision_buffer.template pack<4>("inds",eei).reinterpret_bits(int_c); +// for(int i = 0;i != 4;++i) +// if(inds[i] < 0) +// return; +// for(int j = 0;j != 4;++j){ +// auto active = verts("active",inds[j]); +// if(active < 1e-6) +// return; +// } +// vec3 cv[4] = {}; +// for(int j = 0;j != 4;++j) +// cv[j] = verts.template pack<3>(xtag,inds[j]); + +// auto is_inverted = reinterpret_bits(ee_collision_buffer("inverted",eei)); +// auto ceps = is_inverted ? in_collisionEps : out_collisionEps; + +// auto alpha = stiffness; +// auto beta = ee_collision_buffer("area",eei); + +// auto a = ee_collision_buffer.template pack<2>("abary",eei); +// auto b = ee_collision_buffer.template pack<2>("bbary",eei); + +// const T tooSmall = (T)1e-6; + +// if(is_inverted) { +// gh_buffer.template tuple<12>("grad",eei + start) = -alpha * beta * EDGE_EDGE_SQRT_COLLISION::gradientNegated(cv,a,b,mu,lam,ceps,tooSmall); +// gh_buffer.template tuple<12*12>("H",eei + start) = alpha * beta * EDGE_EDGE_SQRT_COLLISION::hessianNegated(cv,a,b,mu,lam,ceps,tooSmall); +// // gh_buffer.template tuple<12>("grad",eei + start) = -alpha * beta * EDGE_EDGE_COLLISION::gradientNegated(cv,a,b,mu,lam,ceps); +// // gh_buffer.template tuple<12*12>("H",eei + start) = alpha * beta * EDGE_EDGE_COLLISION::hessianNegated(cv,a,b,mu,lam,ceps); +// }else { +// gh_buffer.template tuple<12>("grad",eei + start) = -alpha * beta * EDGE_EDGE_SQRT_COLLISION::gradient(cv,a,b,mu,lam,ceps,tooSmall); +// gh_buffer.template tuple<12*12>("H",eei + start) = alpha * beta * EDGE_EDGE_SQRT_COLLISION::hessian(cv,a,b,mu,lam,ceps,tooSmall); +// // gh_buffer.template tuple<12>("grad",eei + start) = -alpha * beta * EDGE_EDGE_COLLISION::gradient(cv,a,b,mu,lam,ceps); +// // gh_buffer.template tuple<12*12>("H",eei + start) = alpha * beta * EDGE_EDGE_COLLISION::hessian(cv,a,b,mu,lam,ceps); // } - -// if(!collide) -// return; - -// auto vertexFaceCollisionAreas = tris("area",stI) + points("area",pi); - -// auto grad = collisionStiffness * VERTEX_FACE_SQRT_COLLISION::gradient(collision_verts,mu,lambda,collisionEps) * vertexFaceCollisionAreas; -// auto hessian = collisionStiffness * VERTEX_FACE_SQRT_COLLISION::hessian(collision_verts,mu,lambda,collisionEps) * vertexFaceCollisionAreas; - - -// cptemp.template tuple<4>("inds",pi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = zs::vec(vi,tri[0],tri[1],tri[2]).template reinterpret_bits(); -// cptemp.template tuple<12*12>("H",pi * MAX_FP_COLLISION_PAIRS + nm_collision_pairs) = hessian; -// // auto pf = zs::vec{grad[0],grad[1],grad[2]}; -// zs::vec tf[3] = {}; -// for(int j = 0;j != 3;++j) -// tf[j] = zs::vec{grad[j * 3 + 3 + 0],grad[j * 3 + 3 + 1],grad[j * 3 + 3 + 2]}; - -// // auto avgtf = (tf[0] + tf[1] + tf[2])/(T)3.0; -// auto avgtf = (tf[0] + tf[1] + tf[2]); -// for(int j = 0;j != 3;++j) -// atomic_add(exec_cuda,&sttemp("grad",j,stI),avgtf[j]); - - -// auto fp_inds = tris.template pack<3>("fp_inds",stI).reinterpret_bits(int_c); -// for(int j = 0;j != 3;++j){ -// atomic_add(exec_cuda,&sptemp("grad",j,pi),grad[j]); -// for(int k = 0;k != 3;++k) { -// auto fp_idx = fp_inds[k]; -// atomic_add(exec_cuda,&sptemp("grad",j,fp_idx),tf[k][j]); -// } -// } - -// nm_collision_pairs++; -// }; -// stbvh.iter_neighbors(bv,process_vertex_face_collision_pairs); // }); // } + }; }; \ No newline at end of file diff --git a/projects/CuLagrange/fem/collision_energy/vertex_face_sqrt_collision.hpp b/projects/CuLagrange/fem/collision_energy/vertex_face_sqrt_collision.hpp index 6e77d354cf..cf398350fc 100644 --- a/projects/CuLagrange/fem/collision_energy/vertex_face_sqrt_collision.hpp +++ b/projects/CuLagrange/fem/collision_energy/vertex_face_sqrt_collision.hpp @@ -35,14 +35,14 @@ namespace VERTEX_FACE_SQRT_COLLISION { /////////////////////////////////////////////////////////////////////// constexpr REAL psi(const VECTOR3 v[4],const REAL& _mu,const REAL& _nu,const REAL& _eps) { - const VECTOR3 bary = getInsideBarycentricCoordinates(v); + const VECTOR3 bary = LSL_GEO::getInsideBarycentricCoordinates(v); return psi(v,bary,_mu,_nu,_eps); } /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// // our normal pointing outward - constexpr VECTOR12 gradient(const VECTOR3 v[4], const VECTOR3& bary,const REAL& _mu,const REAL& _nu,const REAL& _eps) + constexpr VECTOR12 gradient(const VECTOR3 v[4], const VECTOR3& bary,const REAL& _mu,const REAL& _nu,const REAL& _eps,bool collide_from_inside = false) { // REAL _inverseEps = 1e-6; using DREAL = double; @@ -51,7 +51,9 @@ namespace VERTEX_FACE_SQRT_COLLISION { e[0] = v[3] - v[2]; e[1] = v[0] - v[2]; e[2] = v[1] - v[2]; - const bool reversal = !reverse(v,e); + bool reversal = !reverse(v,e); + if(collide_from_inside) + reversal = !reversal; // remember we had to reorder vertices in a wonky way const VECTOR3 xs = bary[0] * v[1] + bary[1] * v[2] + bary[2] * v[3]; @@ -95,15 +97,15 @@ namespace VERTEX_FACE_SQRT_COLLISION { /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// - constexpr VECTOR12 gradient(const VECTOR3 v[4],const REAL& _mu,const REAL& _nu,const REAL& _eps) + constexpr VECTOR12 gradient(const VECTOR3 v[4],const REAL& _mu,const REAL& _nu,const REAL& _eps,bool collide_from_inside = false) { - const VECTOR3 bary = getInsideBarycentricCoordinates(v); - return gradient(v, bary, _mu, _nu, _eps); + const VECTOR3 bary = LSL_GEO::getInsideBarycentricCoordinates(v); + return gradient(v, bary, _mu, _nu, _eps,collide_from_inside); } /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// - constexpr MATRIX12 hessian(const VECTOR3 v[4], const VECTOR3& bary,const REAL& _mu,const REAL& _nu,const REAL& _eps) + constexpr MATRIX12 hessian(const VECTOR3 v[4], const VECTOR3& bary,const REAL& _mu,const REAL& _nu,const REAL& _eps,bool collide_from_inside = false) { // REAL _inverseEps = 1e-6; @@ -112,8 +114,12 @@ namespace VERTEX_FACE_SQRT_COLLISION { e[0] = v[3] - v[2]; e[1] = v[0] - v[2]; e[2] = v[1] - v[2]; - const bool reversal = !reverse(v,e); - + bool reversal = !reverse(v,e); + if(collide_from_inside) + reversal = !reversal; + +#if 0 + using DREAL = double; // remember we had to reorder vertices in a wonky way @@ -142,14 +148,17 @@ namespace VERTEX_FACE_SQRT_COLLISION { alpha = alpha > 0 ? alpha : 0; beta = beta > 0 ? beta : 0; + return (REAL)2.0 * _mu * ((REAL)alpha * (zs::dyadic_prod(productn,productn)) + (REAL)beta * tDiff.transpose() * tDiff); + // auto H = (REAL)2.0 * _mu * ((REAL)alpha * (zs::dyadic_prod(productn,productn)) + (REAL)beta * tDiff.transpose() * tDiff); + // make_pd(H); + // return H; + // return (REAL)2.0 * _mu * (((REAL)1.0 / tDott - springDiff / (tDott * tMagnitude)) * (zs::dyadic_prod(product,product)) + // (springDiff / tMagnitude) * tDiff.transpose() * tDiff); // return (REAL)2.0 * _mu * (alpha * (zs::dyadic_prod(product,product)) + beta * tDiff.transpose() * tDiff); - return (REAL)2.0 * _mu * ((REAL)alpha * (zs::dyadic_prod(productn,productn)) + (REAL)beta * tDiff.transpose() * tDiff); - // could instead try to trap all the inverses and hand back something fixed up, // but consistency is not guaranteed, so let's just zero it out at the first // sign of trouble @@ -157,16 +166,85 @@ namespace VERTEX_FACE_SQRT_COLLISION { //const REAL tDottInv = (zs::abs(tDott) > _inverseEps) ? 1.0 / tDott : 1.0; //return 2.0 * _mu * ((tDottInv - springDiff / (tDott * tMagnitude)) * (product * product.transpose()) + // (springDiff * tMagnitudeInv) * tDiff.transpose() * tDiff); + +#else + const VECTOR3 xs = bary[0] * v[1] + bary[1] * v[2] + bary[2] * v[3]; + const VECTOR3 t = v[0] - xs; + + const REAL tDott = t.dot(t); + const REAL tMagnitude = zs::sqrt(tDott); + + + const REAL springDiff = (reversal) ? tMagnitude + _eps : tMagnitude - _eps; + const MATRIX3x12 tDiff = tDiffPartial(bary); + + // get the spring length, non-zero rest-length + const VECTOR12 product = tDiff.transpose() * t; + + auto res = (REAL)2.0 * _mu * (((REAL)1.0 / tDott - springDiff / (tDott * tMagnitude)) * (zs::dyadic_prod(product,product)) + (springDiff / tMagnitude) * tDiff.transpose() * tDiff); + make_pd(res); + return res; + +#endif } /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// - constexpr MATRIX12 hessian(const VECTOR3 v[4],const REAL& _mu,const REAL& _nu,const REAL& _eps) + constexpr MATRIX12 hessian(const VECTOR3 v[4],const REAL& _mu,const REAL& _nu,const REAL& _eps,bool collide_from_inside = false) { - const VECTOR3 bary = getInsideBarycentricCoordinates(v); - return hessian(v, bary,_mu,_nu,_eps); + const VECTOR3 bary = LSL_GEO::getInsideBarycentricCoordinates(v); + return hessian(v, bary,_mu,_nu,_eps,collide_from_inside); } + // constexpr VECTOR12 damp_gradient(const VECTOR v[4],const VECTOR vp[4],const REAL& _dt, const VECTOR3& bary,const REAL& _kd,const REAL& _mu,const REAL& _nu,const REAL& eps){ + // using DREAL = double; + + // // const VECTOR3 vs = bary[0] * v[1] + bary[1] * v[2] + bary[2] * v[3]; + // const VECTOR3 t = v[0] - (bary[0] * v[1] + bary[1] * v[2] + bary[2] * v[3]);// relative position + // // const VECTOR3 vps = bary[0] * vp[1] + bary[1] * vp[2] + bary[2] * vp[3]; + // const VECTOR3 tp = vp[0] - (bary[0] * vp[1] + bary[1] * vp[2] + bary[2] * vp[3]);// previous relative position + // const VECTOR3 vel_t = (t - tp) / _dt;// relative velocity + + // const MATRIX3x12 tDiff = tDiffPartial(bary); + // const auto tn = t.template cast().normalized().template cast(); + + // const DREAL project_vel_t = vel_t.dot(tn); + // return (REAL)2.0 * _mu * _kd * (REAL)project_vel_t * tDiff.transpose() * tn; + // } + + // constexpr VECTOR12 damp_gradient(const VECTOR v[4],const VECTOR vp[4],const REAL& _dt,const REAL& _kd,const REAL& _mu,const REAL& _nu,const REAL& eps) + // { + // const VECTOR3 bary = LSL_GEO::getInsideBarycentricCoordinates(v); + // return damp_gradient(v, vp,_dt,bary,_kd, _mu,_nu,eps); + // } + + // const MATRIX12 damp_hessian(const VECTOR v[4],const VECTOR vp[4],const REAL& _dt, const VECTOR3& bary,const REAL& _kd,const REAL& _mu,const REAL& _nu,const REAL& eps) { + // using DREAL = double; + + // // const VECTOR3 vs = bary[0] * v[1] + bary[1] * v[2] + bary[2] * v[3]; + // const VECTOR3 t = v[0] - (bary[0] * v[1] + bary[1] * v[2] + bary[2] * v[3]);// relative position + // // const VECTOR3 vps = bary[0] * vp[1] + bary[1] * vp[2] + bary[2] * vp[3]; + // const VECTOR3 tp = vp[0] - (bary[0] * vp[1] + bary[1] * vp[2] + bary[2] * vp[3]);// previous relative position + // const VECTOR3 vel_t = (t - tp) / _dt;// relative velocity + + // const MATRIX3x12 tDiff = tDiffPartial(bary); + // const REAL tDott = t.dot(t); + // const REAL tMagnitude = zs::sqrt(tDott); + + // const VECTOR12 product = tDiff.transpose() * t; + // const VECTOR12 vproduct = tDiff.transpose() * vel_t; + + // const DREAL project_vel_t = vel_t.dot(tn); + + // return (T)2.0 * mu * kd * ( + // ((T)1.0/_dt/tDott - (T)2.0*project_vel_t/tMagnitude/tDott)*zs::dyadic_prod(product,product) + + // project_vel_t/tDott * zs::dyadic_prod(tDiff) + + // (T)1.0/tDott * zs::dyadic_prod(product,product) + // ); + + // } + + }; diff --git a/projects/CuLagrange/geometry/BaryCentricInterpolator.cu b/projects/CuLagrange/geometry/BaryCentricInterpolator.cu index 68b240ff9c..c930cb7c2b 100644 --- a/projects/CuLagrange/geometry/BaryCentricInterpolator.cu +++ b/projects/CuLagrange/geometry/BaryCentricInterpolator.cu @@ -9,6 +9,9 @@ #include #include +#include "zensim/container/Bcht.hpp" +#include "kernel/tiled_vector_ops.hpp" + #include namespace zeno{ @@ -19,6 +22,18 @@ using vec4 = zs::vec; using mat3 = zs::vec; using mat4 = zs::vec; + +// 给定一个四面网格与一组点,计算每个点在四面体网格单元中的质心坐标 +struct ZSComputeBaryCentricWeights2 : INode { + void apply() override { + using namespace zs; + + + } +}; + + + struct ZSComputeBaryCentricWeights : INode { void apply() override { using namespace zs; @@ -28,48 +43,50 @@ struct ZSComputeBaryCentricWeights : INode { auto zsvolume = get_input("zsvolume"); auto zssurf = get_input("zssurf"); + auto mark_embed_elm = get_input2("mark_elm"); // the bvh of zstets // auto lbvh = get_input("lbvh"); auto thickness = get_param("bvh_thickness"); auto fitting_in = get_param("fitting_in"); auto bvh_channel = get_param("bvh_channel"); - auto tag = get_param("tag"); + auto tag = get_input2("tag"); - const auto& verts = zsvolume->getParticles(); - const auto& eles = zsvolume->getQuadraturePoints(); + auto& verts = zsvolume->getParticles(); + auto& eles = zsvolume->getQuadraturePoints(); const auto& everts = zssurf->getParticles(); - const auto& e_eles = zssurf->getQuadraturePoints(); + // const auto& e_eles = zssurf->getQuadraturePoints(); auto &bcw = (*zsvolume)[tag]; - bcw = typename ZenoParticles::particles_t({{"inds",1},{"w",4},{"cnorm",1}},everts.size(),zs::memsrc_e::device,0); + + bcw = typename ZenoParticles::particles_t({ + {"X",3}, + {"inds",1}, + {"w",4}, + {"strength",1}, + {"cnorm",1}},everts.size(),zs::memsrc_e::device,0); + + + + + // auto topo_tag = tag + std::string("_topo"); + // auto &bcw_topo = (*zsvolume)[topo_tag]; + + // auto e_dim = e_eles.getPropertySize("inds"); + // bcw_topo = typename ZenoParticles::particles_t({{"inds",e_dim}},e_eles.size(),zs::memsrc_e::device,0); + auto cudaExec = zs::cuda_exec(); const auto numFEMVerts = verts.size(); const auto numFEMEles = eles.size(); const auto numEmbedVerts = bcw.size(); - const auto numEmbedEles = e_eles.size(); + // const auto numEmbedEles = e_eles.size(); constexpr auto space = zs::execspace_e::cuda; - // fmt::print("TRY COMPUTE BARYCENTRIC WEIGHTS\n"); - - // std::cout << "TRY COMPUTE BARYCENTRIC WEIGHTS" << std::endl; - - - // cudaExec(zs::range(eles.size()), - // [eles = proxy({},eles)] __device__(int ei) mutable { - // auto quad = eles.template pack<4>("inds", ei).template reinterpret_bits(); - // if(quad[0] < 0 || quad[1] < 0 || quad[2] < 0 || quad[3] < 0) - // printf("invalid quad : %d %d %d %d\n",quad[0],quad[1],quad[2],quad[3]); - // if(quad[0] > 13572 || quad[1] > 13572 || quad[2] > 13572 || quad[3] > 13572) - // printf("invalid quad : %d %d %d %d\n",quad[0],quad[1],quad[2],quad[3]); - // }); + TILEVEC_OPS::copy<3>(cudaExec,everts,"x",bcw,"X"); compute_barycentric_weights(cudaExec,verts,eles,everts,"x",bcw,"inds","w",thickness,fitting_in); - // set_output("zsvolume", zsvolume);return; - - // fmt::print("FINISH COMPUTING BARYCENTRIC WEIGHTS\n"); cudaExec(zs::range(numEmbedVerts), [bcw = proxy({},bcw),fitting_in] ZS_LAMBDA(int vi) mutable { @@ -79,7 +96,13 @@ struct ZSComputeBaryCentricWeights : INode { } ); - auto e_dim = e_eles.getPropertySize("inds"); + + // cudaExec(zs::range(e_eles.size()),[e_dim = e_dim, + // e_eles = proxy({},e_eles),bcw_topo = proxy({},bcw_topo)] ZS_LAMBDA(int ei) mutable { + // for(int i = 0;i != e_dim;++i) + // bcw_topo("inds",i,ei) = e_eles("inds",i,ei); + // }); + cudaExec(zs::range(numEmbedVerts), [bcw = proxy({},bcw)] ZS_LAMBDA (int vi) mutable { @@ -94,41 +117,143 @@ struct ZSComputeBaryCentricWeights : INode { nmEmbedVerts[ei] = (T)0.; }); - if(e_dim !=3 && e_dim !=4) { - throw std::runtime_error("INVALID EMBEDDED PRIM TOPO"); + // if(e_dim !=3 && e_dim !=4) { + // throw std::runtime_error("INVALID EMBEDDED PRIM TOPO"); + // } + + if(mark_embed_elm && everts.hasProperty("tag")){ + eles.append_channels(cudaExec,{{"nmBones",1},{"bdw",1}}); + + cudaExec(zs::range(eles.size()), + [eles = proxy({},eles)] ZS_LAMBDA(int elm_id) mutable{ + eles("nmBones",elm_id) = (T)0.0; + eles("bdw",elm_id) = (T)1.0; + }); + + + auto nmBones = get_input2("nmCpns"); + using vec2i = zs::vec; + using vec3i = zs::vec; + bcht, 32> ebtab{eles.get_allocator(), eles.size() * nmBones}; + cudaExec(zs::range(bcw.size()), + [bcw = proxy({},bcw),ebtab = proxy(ebtab),everts = proxy({},everts)] + ZS_LAMBDA(int vi) mutable{ + auto ei = reinterpret_bits(bcw("inds",vi)); + if(ei < 0) + return; + else{ + int tag = (int)everts("tag",vi); + ebtab.insert(vec2i{ei,tag}); + } + }); + + cudaExec(zs::range(eles.size()), + [eles = proxy({},eles),ebtab = proxy(ebtab),nmBones] ZS_LAMBDA(int ei) mutable { + for(int i = 0;i != nmBones;++i) { + auto res = ebtab.query(vec2i{ei,i}); + if(res < 0) + continue; + eles("nmBones",ei) += (T)1.0; + } + // if(eles("nmBones",ei) > 0) + // printf("nmEmbedCmps[%d] : [%d]\n",ei,(int)eles("nmBones",ei)); + }); + }else { + eles.append_channels(cudaExec,{{"nmBones",1},{"bdw",1}}); + cudaExec(zs::range(eles.size()),[ + eles = proxy({},eles)] ZS_LAMBDA(int ei) mutable { + eles("bdw",ei) = (T)1.0; + eles("nmBones",ei) = (T)1.0; + }); } cudaExec(zs::range(bcw.size()), - [everts = proxy({},everts),bcw = proxy({},bcw),execTag = wrapv{},nmEmbedVerts = proxy(nmEmbedVerts)] + [everts = proxy({},everts), + bcw = proxy({},bcw), + execTag = wrapv{}, + nmEmbedVerts = proxy(nmEmbedVerts), + eles = proxy({},eles), + verts = proxy({},verts)] ZS_LAMBDA (int vi) mutable { using T = typename RM_CVREF_T(bcw)::value_type; auto ei = reinterpret_bits(bcw("inds",vi)); if(ei < 0) return; + auto tet = eles.pack(dim_c<3>,"inds",ei).reinterpret_bits(int_c); atomic_add(execTag,&nmEmbedVerts[ei],(T)1.0); }); cudaExec(zs::range(bcw.size()), - [bcw = proxy({},bcw),nmEmbedVerts = proxy(nmEmbedVerts)] + [bcw = proxy({},bcw),nmEmbedVerts = proxy(nmEmbedVerts),eles = proxy({},eles),everts = proxy({},everts)] ZS_LAMBDA(int vi) mutable{ auto ei = reinterpret_bits(bcw("inds",vi)); - if(ei < 0) - bcw("cnorm",vi) = (T)0.0; + if(everts.hasProperty("strength")) + bcw("strength",vi) = everts("strength",vi); else - bcw("cnorm",vi) = (T)1.0/(T)nmEmbedVerts[ei]; + bcw("strength",vi) = (T)1.0; + if(ei >= 0){ + auto alpha = (T)1.0/(T)nmEmbedVerts[ei]; + bcw("cnorm",vi) = (T)alpha; + if(eles("nmBones",ei) > (T)1.5) + eles("bdw",ei) = (T)0.0; + } + + // if(ei < 0 || eles("nmBones",ei) > (T)1.5){ + // // bcw("strength",vi) = (T)0.0; + // bcw("cnorm",vi) = (T)0.0; + // if(ei >= 0) + // eles("bdw",ei) = (T)0.0; + // } + // else{ + + // // bcw("cnorm",vi) = (T)1.0; + // } }); + + // we might also do some smoothing on cnorm set_output("zsvolume", zsvolume); } }; -ZENDEFNODE(ZSComputeBaryCentricWeights, {{{"interpolator","zsvolume"}, {"embed surf", "zssurf"}}, +ZENDEFNODE(ZSComputeBaryCentricWeights, {{{"interpolator","zsvolume"}, {"embed surf", "zssurf"},{"int","mark_elm","0"},{"int","nmCpns","1"},{"string","tag","skin"}}, {{"interpolator on gpu", "zsvolume"}}, - {{"float","bvh_thickness","0"},{"int","fitting_in","1"},{"string","tag","skin_bw"},{"string","bvh_channel","x"}}, + {{"float","bvh_thickness","0"},{"int","fitting_in","1"},{"string","bvh_channel","x"}}, {"ZSGeometry"}}); +struct VisualizeInterpolator : zeno::INode { + void apply() override { + using namespace zs; + auto zsvolume = get_input("zsvolume"); + auto tag = get_input2("interpolator_name"); + const auto& bcw = (*zsvolume)[tag].clone({zs::memsrc_e::host}); + auto topo_tag = tag + std::string("_topo"); + const auto &bcw_topo = (*zsvolume)[topo_tag].clone({zs::memsrc_e::host}); + + auto bcw_vis = std::make_shared(); + bcw_vis->resize(bcw.size()); + auto& bcw_X = bcw_vis->verts; + auto& bcw_cnorm = bcw_vis->add_attr("cnorm"); + auto& bcw_strength = bcw_vis->add_attr("strength"); + + auto ompPol = omp_exec(); + constexpr auto omp_space = execspace_e::openmp; + ompPol(zs::range(bcw.size()), + [&bcw_X,&bcw_cnorm,&bcw_strength,bcw = proxy({},bcw)] (int vi) mutable { + bcw_X[vi] = bcw.pack(dim_c<3>,"X",vi).to_array(); + bcw_cnorm[vi] = bcw("cnorm",vi); + bcw_strength[vi] = bcw("strength",vi); + }); + set_output("bcw_vis",std::move(bcw_vis)); + } +}; + +ZENDEFNODE(VisualizeInterpolator, {{{"interpolator","zsvolume"},{"string","interpolator_name","skin"}}, + {{"visual bcw", "bcw_vis"}}, + {}, + {"ZSGeometry"}}); struct ZSSampleEmbedVectorField : zeno::INode { void apply() override { @@ -322,7 +447,7 @@ struct ZSInterpolateEmbedAttr : zeno::INode { auto srcAttr = get_param("srcAttr"); auto dstAttr = get_param("dstAttr"); - auto bcw_tag = get_param("bcw_tag"); + auto bcw_tag = get_input2("bcw_tag"); auto strategy = get_param("strategy"); const auto& bcw = (*source)[bcw_tag]; auto& dest_pars = dest->getParticles(); @@ -340,20 +465,20 @@ struct ZSInterpolateEmbedAttr : zeno::INode { fmt::print("the source have no {} channel\n",srcAttr); throw std::runtime_error("the source have no specified channel"); } - if(topo.getPropertySize("inds") != 4) { + if(topo.getChannelSize("inds") != 4) { fmt::print("only support tetrahedra mesh as source\n"); throw std::runtime_error("only support tetrahedra mesh as source"); } - if(dest_pars.hasProperty(dstAttr) && dest_pars.getPropertySize(dstAttr) != source_pars.getPropertySize(srcAttr)){ + if(dest_pars.hasProperty(dstAttr) && dest_pars.getChannelSize(dstAttr) != source_pars.getChannelSize(srcAttr)){ fmt::print("the dest attr_{} and source attr_{} not match in size\n",dstAttr,srcAttr); throw std::runtime_error("the dest attr and source attr not match in size"); } - if(source_pars.getPropertySize(srcAttr) == 1) + if(source_pars.getChannelSize(srcAttr) == 1) interpolate_p2p_imp<1>(srcAttr,dstAttr,source_pars,dest_pars,topo,bcw); - if(source_pars.getPropertySize(srcAttr) == 2) + if(source_pars.getChannelSize(srcAttr) == 2) interpolate_p2p_imp<2>(srcAttr,dstAttr,source_pars,dest_pars,topo,bcw); - if(source_pars.getPropertySize(srcAttr) == 3) + if(source_pars.getChannelSize(srcAttr) == 3) interpolate_p2p_imp<3>(srcAttr,dstAttr,source_pars,dest_pars,topo,bcw); }else if(strategy == "q2p") { const auto& source_quads = source->getQuadraturePoints(); @@ -361,16 +486,16 @@ struct ZSInterpolateEmbedAttr : zeno::INode { fmt::print("the source have no {} channel\n",srcAttr); throw std::runtime_error("the source have no specified channel"); } - if(dest_pars.hasProperty(dstAttr) && dest_pars.getPropertySize(dstAttr) != source_quads.getPropertySize(srcAttr)){ + if(dest_pars.hasProperty(dstAttr) && dest_pars.getChannelSize(dstAttr) != source_quads.getChannelSize(srcAttr)){ fmt::print("the dest attr_{} and source attr_{} not match in size\n",dstAttr,srcAttr); throw std::runtime_error("the dest attr and source attr not match in size"); } - if(source_quads.getPropertySize(srcAttr) == 1) + if(source_quads.getChannelSize(srcAttr) == 1) interpolate_q2p_imp<1>(srcAttr,dstAttr,source_quads,dest_pars,bcw); - if(source_quads.getPropertySize(srcAttr) == 2) + if(source_quads.getChannelSize(srcAttr) == 2) interpolate_q2p_imp<2>(srcAttr,dstAttr,source_quads,dest_pars,bcw); - if(source_quads.getPropertySize(srcAttr) == 3) + if(source_quads.getChannelSize(srcAttr) == 3) interpolate_q2p_imp<3>(srcAttr,dstAttr,source_quads,dest_pars,bcw); } set_output("dest",dest); @@ -378,12 +503,11 @@ struct ZSInterpolateEmbedAttr : zeno::INode { }; -ZENDEFNODE(ZSInterpolateEmbedAttr, {{{"source"}, {"dest"}}, +ZENDEFNODE(ZSInterpolateEmbedAttr, {{{"source"}, {"dest"},{"string","bcw_tag","skin_bw"}}, {{"dest"}}, { {"string","srcAttr","x"}, {"string","dstAttr","x"}, - {"string","bcw_tag","skin_bw"}, {"enum p2p q2p","strategy","p2p"} }, @@ -484,12 +608,12 @@ struct ZSInterpolateEmbedPrim : zeno::INode { auto idx = inds[i]; everts.tuple<3>(outAttr,vi) = everts.pack<3>(outAttr,vi) + w[i] * verts.pack<3>(inAttr, idx); } -#if 0 - if(vi == 100){ - auto vert = everts.pack<3>(outAttr,vi); - printf("V<%d>->E<%d>(%f,%f,%f,%f) :\t%f\t%f\t%f\n",vi,ei,w[0],w[1],w[2],w[3],vert[0],vert[1],vert[2]); - } -#endif +// #if 0 +// if(vi == 100){ +// auto vert = everts.pack<3>(outAttr,vi); +// printf("V<%d>->E<%d>(%f,%f,%f,%f) :\t%f\t%f\t%f\n",vi,ei,w[0],w[1],w[2],w[3],vert[0],vert[1],vert[2]); +// } +// #endif // } }); diff --git a/projects/CuLagrange/geometry/BiharmonicBoundedWeight.cu b/projects/CuLagrange/geometry/BiharmonicBoundedWeight.cu index 47d30a4a98..2b95c3f952 100644 --- a/projects/CuLagrange/geometry/BiharmonicBoundedWeight.cu +++ b/projects/CuLagrange/geometry/BiharmonicBoundedWeight.cu @@ -13,7 +13,7 @@ #include #include -#include "kernel/laplace_matrix.hpp" +#include "kernel/laplacian.hpp" #include "linear_system/active_set.hpp" namespace zeno { diff --git a/projects/CuLagrange/geometry/CollisionVis.cu b/projects/CuLagrange/geometry/CollisionVis.cu index b56478057a..dffb0801e4 100644 --- a/projects/CuLagrange/geometry/CollisionVis.cu +++ b/projects/CuLagrange/geometry/CollisionVis.cu @@ -7,7 +7,6 @@ #include #include -#include "TopoUtils.hpp" #include "zensim/omp/execution/ExecutionPolicy.hpp" #include "kernel/calculate_facet_normal.hpp" @@ -15,6 +14,7 @@ #include "kernel/compute_characteristic_length.hpp" #include "kernel/calculate_bisector_normal.hpp" #include "kernel/tiled_vector_ops.hpp" +#include "kernel/calculate_edge_normal.hpp" #include "../fem/collision_energy/evaluate_collision.hpp" @@ -34,6 +34,7 @@ namespace zeno { using mat3 = zs::vec; using mat4 = zs::vec; // using vec2i = zs::vec; + // using vec2i = zs::vec; // using vec3i = zs::vec; // using vec4i = zs::vec; @@ -42,6 +43,365 @@ namespace zeno { // TODO: build a half edge structure struct ZSInitSurfaceTopoConnect : INode { + // void compute_surface_neighbors(zs::CudaExecutionPolicy &pol, typename ZenoParticles::particles_t &sfs, + // typename ZenoParticles::particles_t &ses, typename ZenoParticles::particles_t &svs) { + // using namespace zs; + // constexpr auto space = execspace_e::cuda; + // using vec2i = zs::vec; + // using vec3i = zs::vec; + // sfs.append_channels(pol, {{"ff_inds", 3}, {"fe_inds", 3}, {"fp_inds", 3}}); + // ses.append_channels(pol, {{"fe_inds", 2},{"ep_inds",2}}); + + // fmt::print("sfs size: {}, ses size: {}, svs size: {}\n", sfs.size(), ses.size(), svs.size()); + + // bcht, 32> etab{sfs.get_allocator(), sfs.size() * 3}; + // Vector sfi{sfs.get_allocator(), sfs.size() * 3}; // surftri indices corresponding to edges in the table + + // bcht,32> ptab(svs.get_allocator(),svs.size()); + // Vector spi{svs.get_allocator(),svs.size()}; + + // /// @brief compute hash table + // { + // // compute directed edge to triangle idx hash table + // pol(range(sfs.size()), [etab = proxy(etab), sfs = proxy({}, sfs), + // sfi = proxy(sfi)] __device__(int ti) mutable { + // auto tri = sfs.pack(dim_c<3>, "inds", ti).reinterpret_bits(int_c); + // for (int i = 0; i != 3; ++i) + // if (auto no = etab.insert(vec2i{tri[i], tri[(i + 1) % 3]}); no >= 0) { + // sfi[no] = ti; + // } else { + // auto oti = sfi[etab.query(vec2i{tri[i], tri[(i + 1) % 3]})]; + // auto otri = sfs.pack(dim_c<3>, "inds", oti).reinterpret_bits(int_c); + // printf("the same directed edge <%d, %d> has been inserted twice! original sfi %d <%d, %d, %d>, cur " + // "%d <%d, %d, %d>\n", + // tri[i], tri[(i + 1) % 3], oti, otri[0], otri[1], otri[2], ti, tri[0], tri[1], tri[2]); + // } + // }); + // // // compute surface point to vert hash table + // // pol(range(svs.size()),[ptab = proxy(ptab),svs = proxy({},svs), + // // spi = proxy(spi)] __device__(int pi) mutable { + // // auto pidx = reinterpret_bits(svs("inds",pi)); + // // if(auto no = ptab.insert(pidx); no >= 0) + // // spi[no] = pi; + // // else { + // // auto opi = spi[ptab.query(pidx)]; + // // auto opidx = reinterpret_bits(svs("inds",opi)); + // // printf("the same surface point <%d> has been inserted twice! origin svi %d <%d>, cur " + // // "%d <%d>\n", + // // pidx,opi,opidx,pi,pidx); + // // } + // // }); + // } + // /// @brief compute ep neighbors + // // { + // // pol(range(ses.size()),[ptab = proxy(ptab),ses = proxy({},ses), + // // svs = proxy({},svs),spi = proxy(spi)] __device__(int ei) mutable { + // // auto neighpIds = vec2i::uniform(-1); + // // auto edge = ses.pack(dim_c<2>,"inds",ei).reinterpret_bits(int_c); + // // for(int i = 0;i != 2;++i) + // // if(auto no = ptab.query(edge[i]);no >= 0) { + // // neighpIds[i] = spi[no]; + // // } + // // ses.tuple(dim_c<2>,"ep_inds",ei) = neighpIds.reinterpret_bits(float_c); + // // }); + // // } + + // /// @brief compute ff neighbors + // { + // pol(range(sfs.size()), [etab = proxy(etab), sfs = proxy({}, sfs), + // sfi = proxy(sfi)] __device__(int ti) mutable { + // auto neighborIds = vec3i::uniform(-1); + // auto tri = sfs.pack(dim_c<3>, "inds", ti).reinterpret_bits(int_c); + // for (int i = 0; i != 3; ++i) + // if (auto no = etab.query(vec2i{tri[(i + 1) % 3], tri[i]}); no >= 0) { + // neighborIds[i] = sfi[no]; + // } + // sfs.tuple(dim_c<3>, "ff_inds", ti) = neighborIds.reinterpret_bits(float_c); + // }); + // } + // /// @brief compute fe neighbors + // { + // auto sfindsOffset = sfs.getPropertyOffset("inds"); + // auto sfFeIndsOffset = sfs.getPropertyOffset("fe_inds"); + // auto seFeIndsOffset = ses.getPropertyOffset("fe_inds"); + // pol(range(ses.size()), + // [etab = proxy(etab), sfs = proxy({}, sfs), ses = proxy({}, ses), + // sfi = proxy(sfi), sfindsOffset, sfFeIndsOffset, seFeIndsOffset] __device__(int li) mutable { + // auto findLineIdInTri = [](const auto &tri, int v0, int v1) -> int { + // for (int loc = 0; loc < 3; ++loc) + // if (tri[loc] == v0 && tri[(loc + 1) % 3] == v1) + // return loc; + // return -1; + // }; + // auto neighborTris = vec2i::uniform(-1); + // auto line = ses.pack(dim_c<2>, "inds", li).reinterpret_bits(int_c); + + // { + // if (auto no = etab.query(line); no >= 0) { + // // tri + // auto triNo = sfi[no]; + // auto tri = sfs.pack(dim_c<3>, sfindsOffset, triNo).reinterpret_bits(int_c); + // auto loc = findLineIdInTri(tri, line[0], line[1]); + // if (loc == -1) { + // printf("ridiculous, this edge <%d, %d> does not belong to tri <%d, %d, %d>\n", line[0], + // line[1], tri[0], tri[1], tri[2]); + // return; + // } + // sfs(sfFeIndsOffset + loc, triNo) = li; + // // edge + // neighborTris[0] = triNo; + // } + // } + // vec2i rline{line[1], line[0]}; + // { + // if (auto no = etab.query(rline); no >= 0) { + // // tri + // auto triNo = sfi[no]; + // auto tri = sfs.pack(dim_c<3>, sfindsOffset, triNo).reinterpret_bits(int_c); + // auto loc = findLineIdInTri(tri, rline[0], rline[1]); + // if (loc == -1) { + // printf("ridiculous, this edge <%d, %d> does not belong to tri <%d, %d, %d>\n", rline[0], + // rline[1], tri[0], tri[1], tri[2]); + // return; + // } + // sfs(sfFeIndsOffset + loc, triNo) = li; + // // edge + // neighborTris[1] = triNo; + // } + // } + // ses.tuple(dim_c<2>, seFeIndsOffset, li) = neighborTris.reinterpret_bits(float_c); + // }); + // } + // /// @brief compute fp neighbors + // /// @note surface vertex index is not necessarily consecutive, thus hashing + // { + // bcht, 32> vtab{svs.get_allocator(), svs.size()}; + // Vector svi{etab.get_allocator(), svs.size()}; // surftri indices corresponding to edges in the table + // // svs + // pol(range(svs.size()), [vtab = proxy(vtab), svs = proxy({}, svs), + // svi = proxy(svi)] __device__(int vi) mutable { + // int vert = reinterpret_bits(svs("inds", vi)); + // if (auto no = vtab.insert(vert); no >= 0) + // svi[no] = vi; + // }); + // // + // pol(range(sfs.size()), [vtab = proxy(vtab), sfs = proxy({}, sfs), + // svi = proxy(svi)] __device__(int ti) mutable { + // auto neighborIds = vec3i::uniform(-1); + // auto tri = sfs.pack(dim_c<3>, "inds", ti).reinterpret_bits(int_c); + // for (int i = 0; i != 3; ++i) + // if (auto no = vtab.query(tri[i]); no >= 0) { + // neighborIds[i] = svi[no]; + // } + // sfs.tuple(dim_c<3>, "fp_inds", ti) = neighborIds.reinterpret_bits(float_c); + // }); + // } + // } + + + // void compute_surface_neighbors(zs::CudaExecutionPolicy &pol, ZenoParticles::particles_t &sfs, + // ZenoParticles::particles_t &ses, ZenoParticles::particles_t &svs) { + // using namespace zs; + // constexpr auto space = execspace_e::cuda; + // using vec2i = zs::vec; + // using vec3i = zs::vec; + // sfs.append_channels(pol, {{"ff_inds", 3}, {"fe_inds", 3}, {"fp_inds", 3}}); + // ses.append_channels(pol, {{"fe_inds", 2},{"ep_inds",2}}); + + // // fmt::print("sfs size: {}, ses size: {}, svs size: {}\n", sfs.size(), ses.size(), svs.size()); + + // bcht, 32> etab{sfs.get_allocator(), sfs.size() * 3}; + // Vector sfi{sfs.get_allocator(), sfs.size() * 3}; // surftri indices corresponding to edges in the table + // bcht,32> ptab(svs.get_allocator(),svs.size()); + // Vector spi{svs.get_allocator(),svs.size()}; + + // pol(range(sfi.size()), + // [sfi = proxy(sfi)] __device__(int i) mutable { + // sfi[i] = -1; + // }); + + // /// @brief compute space hash + // { + // pol(range(sfs.size()), [etab = proxy(etab), sfs = proxy({}, sfs), + // sfi = proxy(sfi)] __device__(int ti) mutable { + // auto tri = sfs.pack(dim_c<3>, "inds", ti).reinterpret_bits(int_c); + // for (int i = 0; i != 3; ++i) + // if (auto no = etab.insert(vec2i{tri[i], tri[(i + 1) % 3]}); no >= 0) { + // sfi[no] = ti; + // } else { + // int pid = etab.query(vec2i{tri[i], tri[(i + 1) % 3]}); + // int oti = sfi[pid]; + // // auto otri = sfs.pack(dim_c<3>, "inds", oti).reinterpret_bits(int_c); + // printf("the same directed edge <%d, %d> has been inserted twice! original sfi[%d,%d]= %d, cur " + // "%d <%d, %d, %d>\n", + // tri[i], tri[(i + 1) % 3],no , pid, oti, ti, tri[0], tri[1], tri[2]); + // } + // }); + + // std::cout << "output svs's channel and channel size :" << std::endl; + // for(auto tag : svs.getPropertyTags()) { + // std::cout << tag.name << "\t:\t" << tag.numChannels << std::endl; + // } + + // // if(svs.hasProperty("inds")) + // // fmt::print(fg(fmt::color::red),"svs has \"inds\" channel\n"); + // auto svsIndsOffset = svs.getPropertyOffset("inds"); + // // std::cout << "svdIndsOffset : " << svsIndsOffset << std::endl; + // pol(range(spi.size()), + // [spi = proxy(spi)] ZS_LAMBDA(int pi) mutable { + // spi[pi] = -1; + // }); + // pol(range(svs.size()),[ptab = proxy(ptab),svs = proxy({},svs,"filling_in_ptab"), + // spi = proxy(spi),svsIndsOffset] __device__(int pi) mutable { + // // auto numChannels = svs.propertySize("inds"); + // // if(pi == 0){ + // // printf("svdInds[\"inds\"][%d] : %d %d\n",(int)numChannels,(int)svsIndsOffset,(int)pi); + + // auto pidx = reinterpret_bits(svs("inds",pi)); + + // // } + // // auto no = ptab.insert(pidx); + // // if(no >=0 && no >= spi.size()) + // // printf("ptab overflow %d %d %d\n",(int)pidx,(int)no,(int)spi.size()); + // // if(no < 0) + // // printf("negative ptab : %d\n",(int)no); + // // auto no = ptab.insert(pidx); + // // duplicate of pi and inds + // if(auto no = ptab.insert(pidx);no >= 0) + // spi[no] = pi; + // else { + // // printf("invalid ptab insertion\n"); + // auto opi = spi[ptab.query(pidx)]; + // auto opidx = reinterpret_bits(svs(svsIndsOffset,opi)); + // printf("the same surface point <%d> has been inserted twice! origin svi %d <%d>, cur " + // "%d <%d>\n", + // pidx,opi,opidx,pi,pidx); + // } + // }); + + // pol(range(spi.size()), + // [spi = proxy(spi)] ZS_LAMBDA(int pi) mutable { + // if(spi[pi] < 0) + // printf("invalid spi[%d] = %d\n",pi,spi[pi]); + // }); + + + // } + // /// @brief compute ep neighbors + // { + // if(!ses.hasProperty("inds") || ses.getChannelSize("inds") != 2) + // throw std::runtime_error("ses has no valid inds"); + + // if(!ses.hasProperty("ep_inds") || ses.getChannelSize("ep_inds") != 2) + // throw std::runtime_error("ses has no valid ep_inds"); + // pol(range(ses.size()),[ptab = proxy(ptab),ses = proxy({},ses,"ses:retrieve_inds_set_ep_inds"), + // svs = proxy({},svs),spi = proxy(spi)] __device__(int ei) mutable { + // auto neighpIds = vec2i::uniform(-1); + // auto edge = ses.pack(dim_c<2>,"inds",ei).reinterpret_bits(int_c); + // for(int i = 0;i != 2;++i){ + // if(auto no = ptab.query(edge[i]);no >= 0) { + // neighpIds[i] = spi[no]; + // } + // } + // ses.tuple(dim_c<2>,"ep_inds",ei) = neighpIds.reinterpret_bits(float_c); + // }); + // } + + // /// @brief compute ff neighbors + // { + + // pol(range(sfs.size()), [etab = proxy(etab), sfs = proxy({}, sfs), + // sfi = proxy(sfi)] __device__(int ti) mutable { + // auto neighborIds = vec3i::uniform(-1); + // auto tri = sfs.pack(dim_c<3>, "inds", ti).reinterpret_bits(int_c); + // for (int i = 0; i != 3; ++i) + // if (auto no = etab.query(vec2i{tri[(i + 1) % 3], tri[i]}); no >= 0) { + // neighborIds[i] = sfi[no]; + // } + // sfs.tuple(dim_c<3>, "ff_inds", ti) = neighborIds.reinterpret_bits(float_c); + // sfs.tuple(dim_c<3>, "fe_inds", ti) = vec3i::uniform(-1); // default initialization + // }); + // } + // /// @brief compute fe neighbors + // { + // auto sfindsOffset = sfs.getPropertyOffset("inds"); + // auto sfFeIndsOffset = sfs.getPropertyOffset("fe_inds"); + // auto seFeIndsOffset = ses.getPropertyOffset("fe_inds"); + // pol(range(ses.size()), + // [etab = proxy(etab), sfs = proxy({}, sfs), ses = proxy({}, ses), + // sfi = proxy(sfi), sfindsOffset, sfFeIndsOffset, seFeIndsOffset] __device__(int li) mutable { + // auto findLineIdInTri = [](const auto &tri, int v0, int v1) -> int { + // for (int loc = 0; loc < 3; ++loc) + // if (tri[loc] == v0 && tri[(loc + 1) % 3] == v1) + // return loc; + // return -1; + // }; + // auto neighborTris = vec2i::uniform(-1); + // auto line = ses.pack(dim_c<2>, "inds", li).reinterpret_bits(int_c); + + // { + // if (auto no = etab.query(line); no >= 0) { + // // tri + // auto triNo = sfi[no]; + // auto tri = sfs.pack(dim_c<3>, sfindsOffset, triNo).reinterpret_bits(int_c); + // auto loc = findLineIdInTri(tri, line[0], line[1]); + // if (loc == -1) { + // printf("ridiculous, this edge <%d, %d> does not belong to tri <%d, %d, %d>\n", line[0], + // line[1], tri[0], tri[1], tri[2]); + // } else { + // sfs(sfFeIndsOffset + loc, triNo) = reinterpret_bits(li); + // // edge + // neighborTris[0] = triNo; + // } + // } + // } + // vec2i rline{line[1], line[0]}; + // { + // if (auto no = etab.query(rline); no >= 0) { + // // tri + // auto triNo = sfi[no]; + // auto tri = sfs.pack(dim_c<3>, sfindsOffset, triNo).reinterpret_bits(int_c); + // auto loc = findLineIdInTri(tri, rline[0], rline[1]); + // if (loc == -1) { + // printf("ridiculous, this edge <%d, %d> does not belong to tri <%d, %d, %d>\n", rline[0], + // rline[1], tri[0], tri[1], tri[2]); + // } else { + // sfs(sfFeIndsOffset + loc, triNo) = reinterpret_bits(li); + // // edge + // neighborTris[1] = triNo; + // } + // } + // } + // ses.tuple(dim_c<2>, seFeIndsOffset, li) = neighborTris.reinterpret_bits(float_c); + // }); + // } + // /// @brief compute fp neighbors + // /// @note surface vertex index is not necessarily consecutive, thus hashing + // { + // bcht, 32> vtab{svs.get_allocator(), svs.size()}; + // Vector svi{etab.get_allocator(), svs.size()}; // surftri indices corresponding to edges in the table + // // svs + // pol(range(svs.size()), [vtab = proxy(vtab), svs = proxy({}, svs), + // svi = proxy(svi)] __device__(int vi) mutable { + // int vert = reinterpret_bits(svs("inds", vi)); + // if (auto no = vtab.insert(vert); no >= 0) + // svi[no] = vi; + // }); + // // + // pol(range(sfs.size()), [vtab = proxy(vtab), sfs = proxy({}, sfs), + // svi = proxy(svi)] __device__(int ti) mutable { + // auto neighborIds = vec3i::uniform(-1); + // auto tri = sfs.pack(dim_c<3>, "inds", ti).reinterpret_bits(int_c); + // for (int i = 0; i != 3; ++i) + // if (auto no = vtab.query(tri[i]); no >= 0) { + // neighborIds[i] = svi[no]; + // } + // sfs.tuple(dim_c<3>, "fp_inds", ti) = neighborIds.reinterpret_bits(float_c); + // }); + // } + // } + + void apply() override { using namespace zs; @@ -56,15 +416,16 @@ namespace zeno { auto& tris = (*surf)[ZenoParticles::s_surfTriTag]; auto& lines = (*surf)[ZenoParticles::s_surfEdgeTag]; auto& points = (*surf)[ZenoParticles::s_surfVertTag]; + auto& tets = surf->getQuadraturePoints(); - if(!tris.hasProperty("inds") || tris.getPropertySize("inds") != 3){ + if(!tris.hasProperty("inds") || tris.getChannelSize("inds") != 3){ throw std::runtime_error("the tris has no inds channel"); } - if(!lines.hasProperty("inds") || lines.getPropertySize("inds") != 2) { + if(!lines.hasProperty("inds") || lines.getChannelSize("inds") != 2) { throw std::runtime_error("the line has no inds channel"); } - if(!points.hasProperty("inds") || points.getPropertySize("inds") != 1) { + if(!points.hasProperty("inds") || points.getChannelSize("inds") != 1) { throw std::runtime_error("the point has no inds channel"); } @@ -79,28 +440,64 @@ namespace zeno { // printf("line[%d] : %d %d\n",(int)li,(int)inds[0],(int)inds[1]); // }); - auto bvh_thickness = (T)3 * compute_average_edge_length(cudaExec,verts,"x",tris); +#if 1 - // std::cout << "bvh_thickness : " << bvh_thickness << std::endl; + auto bvh_thickness = (T)2 * compute_average_edge_length(cudaExec,verts,"x",tris); + + // std::cout << "bvh_thickness : " << bvh_thickness << std::endl; - // compute_surface_neighbors(cudaExec, tris, lines, points); -#if 1 tris.append_channels(cudaExec,{{"ff_inds",3},{"fe_inds",3},{"fp_inds",3}}); - lines.append_channels(cudaExec,{{"fe_inds",2}}); + lines.append_channels(cudaExec,{{"fe_inds",2},{"ep_inds",2}}); + if(tets.getChannelSize("inds") == 4){ + tris.append_channels(cudaExec,{{"ft_inds",1}}); + if(!compute_ft_neigh_topo(cudaExec,verts,tris,tets,"ft_inds",bvh_thickness)) + throw std::runtime_error("ZSInitTopoConnect::compute_face_tet_neigh_topo fail"); + } if(!compute_ff_neigh_topo(cudaExec,verts,tris,"ff_inds",bvh_thickness)) throw std::runtime_error("ZSInitTopoConnect::compute_face_neigh_topo fail"); if(!compute_fe_neigh_topo(cudaExec,verts,lines,tris,"fe_inds",bvh_thickness)) throw std::runtime_error("ZSInitTopoConnect::compute_face_neigh_topo fail"); if(!compute_fp_neigh_topo(cudaExec,verts,points,tris,"fp_inds",bvh_thickness)) throw std::runtime_error("ZSInitTopoConnect::compute_face_point_neigh_topo fail"); +#else + compute_surface_neighbors(cudaExec,tris,lines,points); #endif + auto fbuffer = typename ZenoParticles::particles_t({{"non_manifold",1},{"inds",3}},tris.size(),zs::memsrc_e::device,0); + auto vbuffer = typename ZenoParticles::particles_t({{"x",3}},verts.size(),zs::memsrc_e::device,0); + TILEVEC_OPS::copy(cudaExec,tris,"non_manifold",fbuffer,"non_manifold"); + TILEVEC_OPS::copy(cudaExec,tris,"inds",fbuffer,"inds"); + TILEVEC_OPS::copy(cudaExec,verts,"x",vbuffer,"x"); + + fbuffer = fbuffer.clone({zs::memsrc_e::host}); + vbuffer = vbuffer.clone({zs::memsrc_e::host}); + + constexpr auto omp_space = execspace_e::openmp; + auto ompPol = omp_exec(); + + auto nmf_prim = std::make_shared(); + auto& nmf_verts = nmf_prim->verts; + nmf_verts.resize(tris.size() * 3); + auto& nmf_tris = nmf_prim->tris; + nmf_tris.resize(tris.size()); + ompPol(range(nmf_tris.size()), + [&nmf_tris,&nmf_verts,fbuffer = proxy({},fbuffer),vbuffer = proxy({},vbuffer)] (int ti) mutable { + auto inds = fbuffer.pack(dim_c<3>,"inds",ti).reinterpret_bits(int_c); + for(int i = 0;i != 3;++i) + nmf_verts[ti * 3 + i] = vbuffer.pack(dim_c<3>,"x",inds[i]).to_array(); + if(fbuffer("non_manifold",ti) > 0) { + nmf_tris[ti] = zeno::vec3i(ti * 3 + 0,ti * 3 + 1,ti * 3 + 2); + }else{ + nmf_tris[ti] = zeno::vec3i(0,0,0); + } + }); + set_output("non_manifold_facets",std::move(nmf_prim)); set_output("zssurf",surf); } }; ZENDEFNODE(ZSInitSurfaceTopoConnect, {{{"zssurf"}}, - {{"zssurf"}}, + {{"zssurf"},{"non_manifold_facets"}}, {}, {"ZSGeometry"}}); @@ -108,7 +505,7 @@ namespace zeno { template constexpr vec3 eval_center(const VTILEVEC& verts,const zs::vec& tet) { auto res = vec3::zeros(); - for(int i = 0;i < 4;++i) + for(int i = 0;i != 4;++i) res += verts.template pack<3>("x",tet[i]) / (T)4.0; return res; } @@ -144,6 +541,7 @@ namespace zeno { throw std::runtime_error("the input zsparticles has no surface points"); } + const auto& tets = zsparticles->getQuadraturePoints(); auto& tris = (*zsparticles)[ZenoParticles::s_surfTriTag]; auto& lines = (*zsparticles)[ZenoParticles::s_surfEdgeTag]; auto& points = (*zsparticles)[ZenoParticles::s_surfVertTag]; @@ -164,11 +562,14 @@ namespace zeno { std::vector tags{{"x",3}}; int nm_tris = tris.size(); + int nm_lines = lines.size(); // output ff topo first auto ff_topo = typename ZenoParticles::particles_t(tags,nm_tris * 4,zs::memsrc_e::device,0); auto fe_topo = typename ZenoParticles::particles_t(tags,nm_tris * 4,zs::memsrc_e::device,0); auto fp_topo = typename ZenoParticles::particles_t(tags,nm_tris * 4,zs::memsrc_e::device,0); + auto ep_topo = typename ZenoParticles::particles_t(tags,nm_lines * 2,zs::memsrc_e::device,0); + auto ft_topo = typename ZenoParticles::particles_t(tags,nm_tris * 2,zs::memsrc_e::device,0); // transfer the data from gpu to cpu constexpr auto cuda_space = execspace_e::cuda; @@ -177,11 +578,15 @@ namespace zeno { [ff_topo = proxy({},ff_topo), fe_topo = proxy({},fe_topo), fp_topo = proxy({},fp_topo), + ft_topo = proxy({},ft_topo), + tets = proxy({},tets), tris = proxy({},tris), lines = proxy({},lines), points = proxy({},points), verts = proxy({},verts)] ZS_LAMBDA(int ti) mutable { auto tri = tris.template pack<3>("inds",ti).reinterpret_bits(int_c); + auto tet_id = reinterpret_bits(tris("ft_inds",ti)); + auto tet = tets.template pack<4>("inds",tet_id).reinterpret_bits(int_c); auto ff_inds = tris.template pack<3>("ff_inds",ti).reinterpret_bits(int_c); auto fe_inds = tris.template pack<3>("fe_inds",ti).reinterpret_bits(int_c); auto fp_inds = tris.template pack<3>("fp_inds",ti).reinterpret_bits(int_c); @@ -190,6 +595,11 @@ namespace zeno { ff_topo.template tuple<3>("x",ti * 4 + 0) = center; fe_topo.template tuple<3>("x",ti * 4 + 0) = center; fp_topo.template tuple<3>("x",ti * 4 + 0) = center; + auto tcenter = eval_center(verts,tet); + + ft_topo.template tuple<3>("x",ti * 2 + 0) = center; + ft_topo.template tuple<3>("x",ti * 2 + 1) = tcenter; + for(int i = 0;i != 3;++i) { auto nti = ff_inds[i]; auto ntri = tris.template pack<3>("inds",nti).reinterpret_bits(int_c); @@ -208,13 +618,30 @@ namespace zeno { }); + cudaPol(zs::range(nm_lines), + [ep_topo = proxy({},ep_topo), + verts = proxy({},verts), + points = proxy({},points), + lines = proxy({},lines)] ZS_LAMBDA(int li) mutable { + auto ep_inds = lines.template pack<2>("ep_inds",li).reinterpret_bits(int_c); + for(int i = 0;i != 2;++i) { + auto pidx = ep_inds[i]; + auto vidx = reinterpret_bits(points("inds",pidx)); + ep_topo.template tuple<3>("x",li * 2 + i) = verts.template pack<3>("x",vidx); + } + }); + ff_topo = ff_topo.clone({zs::memsrc_e::host}); fe_topo = fe_topo.clone({zs::memsrc_e::host}); fp_topo = fp_topo.clone({zs::memsrc_e::host}); + ep_topo = ep_topo.clone({zs::memsrc_e::host}); + ft_topo = ft_topo.clone({zs::memsrc_e::host}); int ff_size = ff_topo.size(); int fe_size = fe_topo.size(); int fp_size = fp_topo.size(); + int ep_size = ep_topo.size(); + int ft_size = ft_topo.size(); constexpr auto omp_space = execspace_e::openmp; auto ompPol = omp_exec(); @@ -222,6 +649,8 @@ namespace zeno { auto ff_prim = std::make_shared(); auto fe_prim = std::make_shared(); auto fp_prim = std::make_shared(); + auto ep_prim = std::make_shared(); + auto ft_prim = std::make_shared(); auto& ff_verts = ff_prim->verts; auto& ff_lines = ff_prim->lines; @@ -232,9 +661,17 @@ namespace zeno { auto& fp_verts = fp_prim->verts; auto& fp_lines = fp_prim->lines; + auto& ep_verts = ep_prim->verts; + auto& ep_lines = ep_prim->lines; + + auto& ft_verts = ft_prim->verts; + auto& ft_lines = ft_prim->lines; + int ff_pair_count = nm_tris * 3; int fe_pair_count = nm_tris * 3; int fp_pair_count = nm_tris * 3; + int ep_pair_count = nm_lines * 1; + int ft_pair_count = nm_tris; ff_verts.resize(ff_size); ff_lines.resize(ff_pair_count); @@ -242,6 +679,18 @@ namespace zeno { fe_lines.resize(fe_pair_count); fp_verts.resize(fp_size); fp_lines.resize(fp_pair_count); + ep_verts.resize(ep_size); + ep_lines.resize(ep_pair_count); + ft_verts.resize(ft_size); + ft_lines.resize(ft_pair_count); + + ompPol(zs::range(nm_tris), + [&ft_verts,&ft_lines,ft_topo = proxy({},ft_topo)] (int fi) mutable { + ft_verts[fi * 2 + 0] = ft_topo.template pack<3>("x",fi * 2 + 0).to_array(); + ft_verts[fi * 2 + 1] = ft_topo.template pack<3>("x",fi * 2 + 1).to_array(); + // ft_verts[fi * 2 + 1] = zeno::vec3f(0.0,0.0,0.0); + ft_lines[fi] = zeno::vec2i(fi * 2 + 0,fi * 2 + 1); + }); ompPol(zs::range(nm_tris), [&ff_verts,&ff_lines,ff_topo = proxy({},ff_topo)] (int fi) mutable { @@ -276,17 +725,42 @@ namespace zeno { } }); + ompPol(zs::range(nm_lines), + [&ep_verts,&ep_lines,ep_topo = proxy({},ep_topo)] (int li) mutable { + for(int i = 0;i != 2;++i) + ep_verts[li * 2 + i] = ep_topo.template pack<3>("x",li * 2 + i).to_array(); + ep_lines[li] = zeno::vec2i(li * 2 + 0,li * 2 + 1); + }); + // for(int i = 0;i < fe_lines.size();++i) // std::cout << "fe_line<" << i << "> : \t" << fe_lines[i][0] << "\t" << fe_lines[i][1] << std::endl; + set_output("ft_topo",std::move(ft_prim)); set_output("fp_topo",std::move(fp_prim)); set_output("ff_topo",std::move(ff_prim)); set_output("fe_topo",std::move(fe_prim)); + set_output("ep_topo",std::move(ep_prim)); } }; ZENDEFNODE(VisualizeTopology, {{{"ZSParticles"}}, - {{"ff_topo"},{"fe_topo"},{"fp_topo"}}, + {{"ft_topo"},{"ff_topo"},{"fe_topo"},{"fp_topo"},{"ep_topo"}}, + {}, + {"ZSGeometry"}}); + + + struct CopyShape : INode { + virtual void apply() override { + auto prim1 = get_input("prim1"); + auto prim2 = get_input("prim2"); + auto& nx = prim1->add_attr("npos"); + for(int i = 0;i != prim1->size();++i) + nx[i] = prim2->verts[i]; + set_output("prim1",prim1); + } + }; + ZENDEFNODE(CopyShape, {{{"prim1"},{"prim2"}}, + {{"prim1"}}, {}, {"ZSGeometry"}}); @@ -309,7 +783,7 @@ namespace zeno { const auto& points = (*zsparticles)[ZenoParticles::s_surfVertTag]; const auto& verts = zsparticles->getParticles(); - if(!tris.hasProperty("fp_inds") || tris.getPropertySize("fp_inds") != 3) { + if(!tris.hasProperty("fp_inds") || tris.getChannelSize("fp_inds") != 3) { throw std::runtime_error("call ZSInitSurfaceTopology first before VisualizeSurfaceMesh"); } @@ -466,10 +940,13 @@ namespace zeno { // std::cout << "CALCULATE SURFACE NORMAL" << std::endl; if(!calculate_facet_normal(cudaExec,verts,"x",tris,tris,"nrm")) - throw std::runtime_error("ZSCalNormal::calculate_facet_normal fail"); + throw std::runtime_error("VisualizeSurfaceEdgeNormal::calculate_facet_normal fail"); auto buffer = typename ZenoParticles::particles_t({{"nrm",3},{"x",3}},lines.size(),zs::memsrc_e::device,0); + if(!calculate_edge_normal_from_facet_normal(cudaExec,tris,"nrm",buffer,"nrm",lines)) + throw std::runtime_error("VisualizeSurfaceEdgeNormal::calculate_edge_normal_from_facet_normal fail"); + cudaExec(zs::range(lines.size()),[ buffer = proxy({},buffer), @@ -485,7 +962,7 @@ namespace zeno { auto v0 = verts.template pack<3>("x",linds[0]); auto v1 = verts.template pack<3>("x",linds[1]); - buffer.template tuple<3>("nrm",ei) = (n0 + n1).normalized(); + // buffer.template tuple<3>("nrm",ei) = (n0 + n1).normalized(); // buffer.template tuple<3>("nrm",ei) = lines.template pack<3>("nrm",ei); buffer.template tuple<3>("x",ei) = (v0 + v1) / (T)2.0; }); @@ -579,16 +1056,13 @@ namespace zeno { // lines.template tuple<3>(ceNrmTag,ei) = e10.cross(ne).normalized(); // }); -#if 1 - update_surface_cell_normals(cudaExec, const_cast(verts), "x", 0, const_cast(tris), "nrm", lines, ceNrmTag); -#else + COLLISION_UTILS::calculate_cell_bisector_normal(cudaExec, verts,"x", lines, tris, tris,"nrm", lines,ceNrmTag); -#endif set_output("ZSParticles",zsparticles); @@ -903,310 +1377,463 @@ namespace zeno { - struct VisualizeFacetPointIntersection : zeno::INode { - using T = float; - using Ti = int; - using dtiles_t = zs::TileVector; - using tiles_t = typename ZenoParticles::particles_t; - using bvh_t = zs::LBvh<3,int,T>; - using bv_t = zs::AABBBox<3, T>; - using vec3 = zs::vec; - - virtual void apply() override { - using namespace zs; - - auto zsparticles = get_input("ZSParticles"); - - if(!zsparticles->hasAuxData(ZenoParticles::s_surfTriTag)) - throw std::runtime_error("the input zsparticles has no surface tris"); - if(!zsparticles->hasAuxData(ZenoParticles::s_surfEdgeTag)) - throw std::runtime_error("the input zsparticles has no surface lines"); - if(!zsparticles->hasAuxData(ZenoParticles::s_surfVertTag)) - throw std::runtime_error("the input zsparticles has no surface points"); - // if(!zsparticles->hasBvh(ZenoParticles::s_surfTriTag)) { - // throw std::runtime_error("the input zsparticles has no surface tris's spacial structure"); - // } - // if(!zsparticles->hasBvh(ZenoParticles::s_surfEdgeTag)) { - // throw std::runtime_error("the input zsparticles has no surface edge's spacial structure"); - // } - // if(!zsparticles->hasBvh(ZenoParticles::s_surfVertTag)) { - // throw std::runtime_error("the input zsparticles has no surface vert's spacial structure"); - // } - - const auto& verts = zsparticles->getParticles(); +// struct VisualizeFacetPointIntersection : zeno::INode { +// using T = float; +// using Ti = int; +// using dtiles_t = zs::TileVector; +// using tiles_t = typename ZenoParticles::particles_t; +// using bvh_t = zs::LBvh<3,int,T>; +// using bv_t = zs::AABBBox<3, T>; +// using vec3 = zs::vec; + +// virtual void apply() override { +// using namespace zs; + +// auto zsparticles = get_input("ZSParticles"); + +// if(!zsparticles->hasAuxData(ZenoParticles::s_surfTriTag)) +// throw std::runtime_error("the input zsparticles has no surface tris"); +// if(!zsparticles->hasAuxData(ZenoParticles::s_surfEdgeTag)) +// throw std::runtime_error("the input zsparticles has no surface lines"); +// if(!zsparticles->hasAuxData(ZenoParticles::s_surfVertTag)) +// throw std::runtime_error("the input zsparticles has no surface points"); +// // if(!zsparticles->hasBvh(ZenoParticles::s_surfTriTag)) { +// // throw std::runtime_error("the input zsparticles has no surface tris's spacial structure"); +// // } +// // if(!zsparticles->hasBvh(ZenoParticles::s_surfEdgeTag)) { +// // throw std::runtime_error("the input zsparticles has no surface edge's spacial structure"); +// // } +// // if(!zsparticles->hasBvh(ZenoParticles::s_surfVertTag)) { +// // throw std::runtime_error("the input zsparticles has no surface vert's spacial structure"); +// // } + +// const auto& verts = zsparticles->getParticles(); + +// auto& tris = (*zsparticles)[ZenoParticles::s_surfTriTag]; +// auto& lines = (*zsparticles)[ZenoParticles::s_surfEdgeTag]; +// auto& points = (*zsparticles)[ZenoParticles::s_surfVertTag]; + +// // auto& stBvh = zsparticles->bvh(ZenoParticles::s_surfTriTag); +// // auto& seBvh = zsparticles->bvh(ZenoParticles::s_surfEdgeTag); + +// auto in_collisionEps = get_input2("in_collisionEps"); +// auto out_collisionEps = get_input2("out_collisionEps"); + +// dtiles_t sttemp(tris.get_allocator(), +// { +// {"nrm",3} +// },tris.size() +// ); +// dtiles_t setemp(lines.get_allocator(), +// { +// {"nrm",3} +// },lines.size() +// ); + +// dtiles_t cptemp(points.get_allocator(), +// { +// {"inds",4}, +// {"area",1}, +// {"inverted",1} +// },points.size() * MAX_FP_COLLISION_PAIRS); - auto& tris = (*zsparticles)[ZenoParticles::s_surfTriTag]; - auto& lines = (*zsparticles)[ZenoParticles::s_surfEdgeTag]; - auto& points = (*zsparticles)[ZenoParticles::s_surfVertTag]; - // auto& stBvh = zsparticles->bvh(ZenoParticles::s_surfTriTag); - // auto& seBvh = zsparticles->bvh(ZenoParticles::s_surfEdgeTag); - - auto in_collisionEps = get_input2("in_collisionEps"); - auto out_collisionEps = get_input2("out_collisionEps"); - - dtiles_t sttemp(tris.get_allocator(), - { - {"nrm",3} - },tris.size() - ); - dtiles_t setemp(lines.get_allocator(), - { - {"nrm",3} - },lines.size() - ); +// constexpr auto space = execspace_e::cuda; +// auto cudaPol = cuda_exec(); + +// std::vector cv_tags{{"xs",3},{"xe",3}}; +// auto cv_buffer = typename ZenoParticles::particles_t(cv_tags,points.size() * MAX_FP_COLLISION_PAIRS,zs::memsrc_e::device,0); +// std::vector cv_pt_tags{{"p",3},{"t0",3},{"t1",3},{"t2",3}}; +// auto cv_pt_buffer = typename ZenoParticles::particles_t(cv_pt_tags,points.size() * MAX_FP_COLLISION_PAIRS,zs::memsrc_e::device,0); + +// #if 0 + +// if(!calculate_facet_normal(cudaPol,verts,"x",tris,sttemp,"nrm")){ +// throw std::runtime_error("fail updating facet normal"); +// } + + +// // TILEVEC_OPS::copy<4>(cudaPol,eles,"inds",etemp,"inds"); + + + +// if(!COLLISION_UTILS::calculate_cell_bisector_normal(cudaPol, +// verts,"x", +// lines, +// tris, +// sttemp,"nrm", +// setemp,"nrm")){ +// throw std::runtime_error("fail calculate cell bisector normal"); +// } + +// auto stbvs = retrieve_bounding_volumes(cudaPol,verts,tris,wrapv<3>{},(T)0.0,"x"); +// auto sebvs = retrieve_bounding_volumes(cudaPol,verts,lines,wrapv<2>{},(T)0.0,"x"); +// stBvh.refit(cudaPol,stbvs); +// seBvh.refit(cudaPol,sebvs); + +// auto avgl = compute_average_edge_length(cudaPol,verts,"x",tris); +// auto bvh_thickness = 5 * avgl; + +// TILEVEC_OPS::fill(cudaPol,sptemp,"fp_collision_pairs",zs::vec::uniform(-1).template reinterpret_bits()); +// cudaPol(zs::range(points.size()),[collisionEps = collisionEps, +// verts = proxy({},verts), +// sttemp = proxy({},sttemp), +// setemp = proxy({},setemp), +// sptemp = proxy({},sptemp), +// points = proxy({},points), +// lines = proxy({},lines), +// tris = proxy({},tris), +// stbvh = proxy(stBvh),thickness = bvh_thickness] ZS_LAMBDA(int svi) mutable { + + +// auto vi = reinterpret_bits(points("inds",svi)); +// // auto is_vertex_inverted = reinterpret_bits(verts("is_inverted",vi)); +// // if(is_vertex_inverted) +// // return; + +// auto p = verts.template pack<3>("x",vi); +// auto bv = bv_t{get_bounding_box(p - thickness, p + thickness)}; + +// int nm_collision_pairs = 0; +// auto process_vertex_face_collision_pairs = [&](int stI) { +// auto tri = tris.pack(dim_c<3>, "inds",stI).reinterpret_bits(int_c); +// if(tri[0] == vi || tri[1] == vi || tri[2] == vi) +// return; + +// zs::vec t[3] = {}; +// t[0] = verts.template pack<3>("x",tri[0]); +// t[1] = verts.template pack<3>("x",tri[1]); +// t[2] = verts.template pack<3>("x",tri[2]); + +// bool collide = false; + +// if(COLLISION_UTILS::is_inside_the_cell(verts,"x", +// lines,tris, +// sttemp,"nrm", +// setemp,"nrm", +// stI,p,collisionEps)) { +// collide = true; +// } + + +// if(!collide) +// return; + +// if(nm_collision_pairs < MAX_FP_COLLISION_PAIRS) { +// sptemp("fp_collision_pairs",nm_collision_pairs++,svi) = reinterpret_bits(stI); +// } +// }; +// stbvh.iter_neighbors(bv,process_vertex_face_collision_pairs); +// }); + + +// cudaPol(zs::range(points.size()), +// [cv_buffer = proxy({},cv_buffer),cv_pt_buffer = proxy({},cv_pt_buffer), +// sptemp = proxy({},sptemp),verts = proxy({},verts),points = proxy({},points),tris = proxy({},tris)] ZS_LAMBDA(int pi) mutable { +// auto collision_pairs = sptemp.template pack("fp_collision_pairs",pi).reinterpret_bits(int_c); +// auto vi = reinterpret_bits(points("inds",pi)); +// auto pvert = verts.template pack<3>("x",vi); + +// for(int i = 0;i != MAX_FP_COLLISION_PAIRS;++i){ +// auto sti = collision_pairs[i]; +// if(sti < 0){ +// cv_buffer.template tuple<3>("xs",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_buffer.template tuple<3>("xe",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; + +// cv_pt_buffer.template tuple<3>("p",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_pt_buffer.template tuple<3>("t0",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_pt_buffer.template tuple<3>("t1",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_pt_buffer.template tuple<3>("t2",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; + +// }else { +// auto tri = tris.template pack<3>("inds",sti).reinterpret_bits(int_c); +// auto t0 = verts.template pack<3>("x",tri[0]); +// auto t1 = verts.template pack<3>("x",tri[1]); +// auto t2 = verts.template pack<3>("x",tri[2]); +// auto center = (t0 + t1 + t2) / (T)3.0; + +// cv_buffer.template tuple<3>("xs",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_buffer.template tuple<3>("xe",MAX_FP_COLLISION_PAIRS * pi + i) = center; + +// cv_pt_buffer.template tuple<3>("p",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_pt_buffer.template tuple<3>("t0",MAX_FP_COLLISION_PAIRS * pi + i) = t0; +// cv_pt_buffer.template tuple<3>("t1",MAX_FP_COLLISION_PAIRS * pi + i) = t1; +// cv_pt_buffer.template tuple<3>("t2",MAX_FP_COLLISION_PAIRS * pi + i) = t2; + +// } +// } +// }); + +// #else +// // auto stbvs = retrieve_bounding_volumes(cudaPol,verts,tris,wrapv<3>{},(T)0.0,"x"); +// // stBvh.refit(cudaPol,stbvs); + +// COLLISION_UTILS::do_facet_point_collision_detection(cudaPol, +// verts,"x", +// points, +// lines, +// tris, +// sttemp, +// setemp, +// cptemp, +// // stBvh, +// in_collisionEps,out_collisionEps); + + + +// cudaPol(zs::range(points.size()), +// [cptemp = proxy({},cptemp),verts = proxy({},verts), +// cv_buffer = proxy({},cv_buffer), +// cv_pt_buffer = proxy({},cv_pt_buffer), +// points = proxy({},points)] ZS_LAMBDA(int pi) mutable { +// for(int i = 0;i != MAX_FP_COLLISION_PAIRS;++i) { +// auto inds = cptemp.template pack<4>("inds",pi * MAX_FP_COLLISION_PAIRS + i).reinterpret_bits(int_c); +// bool contact = true; +// auto pvert = zs::vec::zeros(); +// for(int j = 0;j != 4;++j) +// if(inds[j] < 0) +// contact = false; +// if(contact) { +// pvert = verts.template pack<3>("x",inds[0]); +// auto t0 = verts.template pack<3>("x",inds[1]); +// auto t1 = verts.template pack<3>("x",inds[2]); +// auto t2 = verts.template pack<3>("x",inds[3]); +// auto center = (t0 + t1 + t2) / (T)3.0; + +// cv_buffer.template tuple<3>("xs",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_buffer.template tuple<3>("xe",MAX_FP_COLLISION_PAIRS * pi + i) = center; + +// cv_pt_buffer.template tuple<3>("p",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_pt_buffer.template tuple<3>("t0",MAX_FP_COLLISION_PAIRS * pi + i) = t0; +// cv_pt_buffer.template tuple<3>("t1",MAX_FP_COLLISION_PAIRS * pi + i) = t1; +// cv_pt_buffer.template tuple<3>("t2",MAX_FP_COLLISION_PAIRS * pi + i) = t2; +// }else{ +// cv_buffer.template tuple<3>("xs",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_buffer.template tuple<3>("xe",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; + +// cv_pt_buffer.template tuple<3>("p",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_pt_buffer.template tuple<3>("t0",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_pt_buffer.template tuple<3>("t1",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// cv_pt_buffer.template tuple<3>("t2",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; +// } +// } +// }); - dtiles_t cptemp(points.get_allocator(), - { - {"inds",4}, - {"area",1}, - {"inverted",1} - },points.size() * MAX_FP_COLLISION_PAIRS); +// #endif +// // cudaPol.syncCtx(); - constexpr auto space = execspace_e::cuda; - auto cudaPol = cuda_exec(); - std::vector cv_tags{{"xs",3},{"xe",3}}; - auto cv_buffer = typename ZenoParticles::particles_t(cv_tags,points.size() * MAX_FP_COLLISION_PAIRS,zs::memsrc_e::device,0); - std::vector cv_pt_tags{{"p",3},{"t0",3},{"t1",3},{"t2",3}}; - auto cv_pt_buffer = typename ZenoParticles::particles_t(cv_pt_tags,points.size() * MAX_FP_COLLISION_PAIRS,zs::memsrc_e::device,0); +// cv_buffer = cv_buffer.clone({zs::memsrc_e::host}); +// auto collisionFacetVis = std::make_shared(); +// auto& cv_verts = collisionFacetVis->verts; +// auto& cv_lines = collisionFacetVis->lines; +// cv_verts.resize(points.size() * 2 * MAX_FP_COLLISION_PAIRS); +// cv_lines.resize(points.size() * MAX_FP_COLLISION_PAIRS); -#if 0 +// auto ompPol = omp_exec(); +// constexpr auto omp_space = execspace_e::openmp; - if(!calculate_facet_normal(cudaPol,verts,"x",tris,sttemp,"nrm")){ - throw std::runtime_error("fail updating facet normal"); - } +// ompPol(zs::range(cv_buffer.size()), +// [cv_buffer = proxy({},cv_buffer),&cv_verts,&cv_lines] (int pi) mutable { +// auto xs = cv_buffer.template pack<3>("xs",pi); +// auto xe = cv_buffer.template pack<3>("xe",pi); +// cv_verts[pi * 2 + 0] = zeno::vec3f(xs[0],xs[1],xs[2]); +// cv_verts[pi * 2 + 1] = zeno::vec3f(xe[0],xe[1],xe[2]); +// cv_lines[pi] = zeno::vec2i(pi * 2 + 0,pi * 2 + 1); +// }); +// set_output("collisionFacetVis",std::move(collisionFacetVis)); - // TILEVEC_OPS::copy<4>(cudaPol,eles,"inds",etemp,"inds"); +// cv_pt_buffer = cv_pt_buffer.clone({zs::memsrc_e::host}); +// auto colPointFacetPairVis = std::make_shared(); +// auto& cv_pt_verts = colPointFacetPairVis->verts; +// auto& cv_pt_tris = colPointFacetPairVis->tris; - if(!COLLISION_UTILS::calculate_cell_bisector_normal(cudaPol, - verts,"x", - lines, - tris, - sttemp,"nrm", - setemp,"nrm")){ - throw std::runtime_error("fail calculate cell bisector normal"); - } - - auto stbvs = retrieve_bounding_volumes(cudaPol,verts,tris,wrapv<3>{},(T)0.0,"x"); - auto sebvs = retrieve_bounding_volumes(cudaPol,verts,lines,wrapv<2>{},(T)0.0,"x"); - stBvh.refit(cudaPol,stbvs); - seBvh.refit(cudaPol,sebvs); - - auto avgl = compute_average_edge_length(cudaPol,verts,"x",tris); - auto bvh_thickness = 5 * avgl; - - TILEVEC_OPS::fill(cudaPol,sptemp,"fp_collision_pairs",zs::vec::uniform(-1).template reinterpret_bits()); - cudaPol(zs::range(points.size()),[collisionEps = collisionEps, - verts = proxy({},verts), - sttemp = proxy({},sttemp), - setemp = proxy({},setemp), - sptemp = proxy({},sptemp), - points = proxy({},points), - lines = proxy({},lines), - tris = proxy({},tris), - stbvh = proxy(stBvh),thickness = bvh_thickness] ZS_LAMBDA(int svi) mutable { - - - auto vi = reinterpret_bits(points("inds",svi)); - // auto is_vertex_inverted = reinterpret_bits(verts("is_inverted",vi)); - // if(is_vertex_inverted) - // return; - - auto p = verts.template pack<3>("x",vi); - auto bv = bv_t{get_bounding_box(p - thickness, p + thickness)}; - - int nm_collision_pairs = 0; - auto process_vertex_face_collision_pairs = [&](int stI) { - auto tri = tris.pack(dim_c<3>, "inds",stI).reinterpret_bits(int_c); - if(tri[0] == vi || tri[1] == vi || tri[2] == vi) - return; - - zs::vec t[3] = {}; - t[0] = verts.template pack<3>("x",tri[0]); - t[1] = verts.template pack<3>("x",tri[1]); - t[2] = verts.template pack<3>("x",tri[2]); - - bool collide = false; - - if(COLLISION_UTILS::is_inside_the_cell(verts,"x", - lines,tris, - sttemp,"nrm", - setemp,"nrm", - stI,p,collisionEps)) { - collide = true; - } +// cv_pt_verts.resize(cv_pt_buffer.size() * 4); +// cv_pt_tris.resize(cv_pt_buffer.size()); +// ompPol(zs::range(cv_pt_buffer.size()), +// [&cv_pt_verts,&cv_pt_tris,cv_pt_buffer = proxy({},cv_pt_buffer)] (int pi) mutable { +// cv_pt_verts[pi * 4 + 0] = cv_pt_buffer.template pack<3>("p",pi).to_array(); +// cv_pt_verts[pi * 4 + 1] = cv_pt_buffer.template pack<3>("t0",pi).to_array(); +// cv_pt_verts[pi * 4 + 2] = cv_pt_buffer.template pack<3>("t1",pi).to_array(); +// cv_pt_verts[pi * 4 + 3] = cv_pt_buffer.template pack<3>("t2",pi).to_array(); - if(!collide) - return; +// cv_pt_tris[pi] = zeno::vec3i(pi * 4 + 1,pi * 4 + 2,pi * 4 + 3); +// }); - if(nm_collision_pairs < MAX_FP_COLLISION_PAIRS) { - sptemp("fp_collision_pairs",nm_collision_pairs++,svi) = reinterpret_bits(stI); - } - }; - stbvh.iter_neighbors(bv,process_vertex_face_collision_pairs); - }); +// set_output("colPointFacetPairVis",std::move(colPointFacetPairVis)); - cudaPol(zs::range(points.size()), - [cv_buffer = proxy({},cv_buffer),cv_pt_buffer = proxy({},cv_pt_buffer), - sptemp = proxy({},sptemp),verts = proxy({},verts),points = proxy({},points),tris = proxy({},tris)] ZS_LAMBDA(int pi) mutable { - auto collision_pairs = sptemp.template pack("fp_collision_pairs",pi).reinterpret_bits(int_c); - auto vi = reinterpret_bits(points("inds",pi)); - auto pvert = verts.template pack<3>("x",vi); +// } +// }; - for(int i = 0;i != MAX_FP_COLLISION_PAIRS;++i){ - auto sti = collision_pairs[i]; - if(sti < 0){ - cv_buffer.template tuple<3>("xs",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_buffer.template tuple<3>("xe",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - - cv_pt_buffer.template tuple<3>("p",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_pt_buffer.template tuple<3>("t0",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_pt_buffer.template tuple<3>("t1",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_pt_buffer.template tuple<3>("t2",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - }else { - auto tri = tris.template pack<3>("inds",sti).reinterpret_bits(int_c); - auto t0 = verts.template pack<3>("x",tri[0]); - auto t1 = verts.template pack<3>("x",tri[1]); - auto t2 = verts.template pack<3>("x",tri[2]); - auto center = (t0 + t1 + t2) / (T)3.0; +// ZENDEFNODE(VisualizeFacetPointIntersection, {{"ZSParticles",{"float","in_collisionEps","0.01"},{"float","out_collisionEps","0.01"}}, +// {"collisionFacetVis","colPointFacetPairVis"}, +// { +// }, +// {"ZSGeometry"}}); - cv_buffer.template tuple<3>("xs",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_buffer.template tuple<3>("xe",MAX_FP_COLLISION_PAIRS * pi + i) = center; - cv_pt_buffer.template tuple<3>("p",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_pt_buffer.template tuple<3>("t0",MAX_FP_COLLISION_PAIRS * pi + i) = t0; - cv_pt_buffer.template tuple<3>("t1",MAX_FP_COLLISION_PAIRS * pi + i) = t1; - cv_pt_buffer.template tuple<3>("t2",MAX_FP_COLLISION_PAIRS * pi + i) = t2; - } - } - }); +// struct VisualizeEdgeEdgeIntersection : zeno::INode { +// using T = float; +// using Ti = int; +// using dtiles_t = zs::TileVector; +// using tiles_t = typename ZenoParticles::particles_t; -#else - // auto stbvs = retrieve_bounding_volumes(cudaPol,verts,tris,wrapv<3>{},(T)0.0,"x"); - // stBvh.refit(cudaPol,stbvs); +// virtual void apply() override { +// using namespace zs; +// auto zsparticles = get_input("ZSParticles"); - COLLISION_UTILS::do_facet_point_collision_detection(cudaPol, - verts,"x", - points, - lines, - tris, - sttemp, - setemp, - cptemp, - // stBvh, - in_collisionEps,out_collisionEps); - - - - cudaPol(zs::range(points.size()), - [cptemp = proxy({},cptemp),verts = proxy({},verts), - cv_buffer = proxy({},cv_buffer), - cv_pt_buffer = proxy({},cv_pt_buffer), - points = proxy({},points)] ZS_LAMBDA(int pi) mutable { - for(int i = 0;i != MAX_FP_COLLISION_PAIRS;++i) { - auto inds = cptemp.template pack<4>("inds",pi * MAX_FP_COLLISION_PAIRS + i).reinterpret_bits(int_c); - bool contact = true; - auto pvert = zs::vec::zeros(); - for(int j = 0;j != 4;++j) - if(inds[j] < 0) - contact = false; - if(contact) { - pvert = verts.template pack<3>("x",inds[0]); - auto t0 = verts.template pack<3>("x",inds[1]); - auto t1 = verts.template pack<3>("x",inds[2]); - auto t2 = verts.template pack<3>("x",inds[3]); - auto center = (t0 + t1 + t2) / (T)3.0; - - cv_buffer.template tuple<3>("xs",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_buffer.template tuple<3>("xe",MAX_FP_COLLISION_PAIRS * pi + i) = center; - - cv_pt_buffer.template tuple<3>("p",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_pt_buffer.template tuple<3>("t0",MAX_FP_COLLISION_PAIRS * pi + i) = t0; - cv_pt_buffer.template tuple<3>("t1",MAX_FP_COLLISION_PAIRS * pi + i) = t1; - cv_pt_buffer.template tuple<3>("t2",MAX_FP_COLLISION_PAIRS * pi + i) = t2; - }else{ - cv_buffer.template tuple<3>("xs",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_buffer.template tuple<3>("xe",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - - cv_pt_buffer.template tuple<3>("p",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_pt_buffer.template tuple<3>("t0",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_pt_buffer.template tuple<3>("t1",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - cv_pt_buffer.template tuple<3>("t2",MAX_FP_COLLISION_PAIRS * pi + i) = pvert; - } - } - }); - - -#endif - // cudaPol.syncCtx(); +// if(!zsparticles->hasAuxData(ZenoParticles::s_surfTriTag)) +// throw std::runtime_error("the input zsparticles has no surface tris"); +// if(!zsparticles->hasAuxData(ZenoParticles::s_surfEdgeTag)) +// throw std::runtime_error("the input zsparticles has no surface lines"); +// if(!zsparticles->hasAuxData(ZenoParticles::s_surfVertTag)) +// throw std::runtime_error("the input zsparticles has no surface points"); +// const auto& verts = zsparticles->getParticles(); +// auto& tris = (*zsparticles)[ZenoParticles::s_surfTriTag]; +// auto& lines = (*zsparticles)[ZenoParticles::s_surfEdgeTag]; +// auto& points = (*zsparticles)[ZenoParticles::s_surfVertTag]; - cv_buffer = cv_buffer.clone({zs::memsrc_e::host}); - auto collisionFacetVis = std::make_shared(); - auto& cv_verts = collisionFacetVis->verts; - auto& cv_lines = collisionFacetVis->lines; - cv_verts.resize(points.size() * 2 * MAX_FP_COLLISION_PAIRS); - cv_lines.resize(points.size() * MAX_FP_COLLISION_PAIRS); +// auto in_collisionEps = get_input2("in_collisionEps"); +// auto out_collisionEps = get_input2("out_collisionEps"); - auto ompPol = omp_exec(); - constexpr auto omp_space = execspace_e::openmp; - ompPol(zs::range(cv_buffer.size()), - [cv_buffer = proxy({},cv_buffer),&cv_verts,&cv_lines] (int pi) mutable { - auto xs = cv_buffer.template pack<3>("xs",pi); - auto xe = cv_buffer.template pack<3>("xe",pi); - cv_verts[pi * 2 + 0] = zeno::vec3f(xs[0],xs[1],xs[2]); - cv_verts[pi * 2 + 1] = zeno::vec3f(xe[0],xe[1],xe[2]); - cv_lines[pi] = zeno::vec2i(pi * 2 + 0,pi * 2 + 1); - }); +// dtiles_t sttemp(tris.get_allocator(), +// { +// {"nrm",3} +// },tris.size() +// ); +// dtiles_t setemp(lines.get_allocator(), +// { +// {"nrm",3}, +// {"inds",4}, +// {"area",1}, +// {"inverted",1}, +// {"abary",2}, +// {"bbary",2} +// },lines.size() +// ); + +// constexpr auto space = execspace_e::cuda; +// auto cudaPol = cuda_exec(); - set_output("collisionFacetVis",std::move(collisionFacetVis)); +// std::cout << "before do edge edge collision detection" << std::endl; +// COLLISION_UTILS::do_edge_edge_collision_detection(cudaPol, +// verts,"x", +// points,lines,tris, +// sttemp,setemp, +// setemp, +// in_collisionEps,out_collisionEps); + +// // std::vector cv_tags{{"xs",3},{"xe",3}}; +// // auto cv_buffer = typename ZenoParticles::particles_t(cv_tags,setemp.size(),zs::memsrc_e::device,0); +// std::vector cv_ee_tags{{"a0",3},{"a1",3},{"b0",3},{"b1",3},{"abary",2},{"bbary",2}}; +// auto cv_ee_buffer = typename ZenoParticles::particles_t(cv_ee_tags,setemp.size(),zs::memsrc_e::device,0); + +// cudaPol(zs::range(setemp.size()), +// [setemp = proxy({},setemp),verts = proxy({},verts), +// cv_ee_buffer = proxy({},cv_ee_buffer)] ZS_LAMBDA(int ei) mutable { +// auto inds = setemp.template pack<4>("inds",ei).reinterpret_bits(int_c); +// bool collide = true; +// if(inds[0] < 0 || inds[1] < 0 || inds[2] < 0 || inds[3] < 0) +// collide = false; +// if(collide) { +// auto abary = setemp.template pack<2>("abary",ei); +// auto bbary = setemp.template pack<2>("bbary",ei); +// printf("find collision pairs : %d %d %d %d with bary %f %f %f %f\n",inds[0],inds[1],inds[2],inds[3], +// (float)abary[0],(float)abary[1],(float)bbary[0],(float)bbary[1]); +// cv_ee_buffer.template tuple<3>("a0",ei) = verts.template pack<3>("x",inds[0]); +// cv_ee_buffer.template tuple<3>("a1",ei) = verts.template pack<3>("x",inds[1]); +// cv_ee_buffer.template tuple<3>("b0",ei) = verts.template pack<3>("x",inds[2]); +// cv_ee_buffer.template tuple<3>("b1",ei) = verts.template pack<3>("x",inds[3]); +// cv_ee_buffer.template tuple<2>("abary",ei) = abary; +// cv_ee_buffer.template tuple<2>("bbary",ei) = bbary; +// }else { +// cv_ee_buffer.template tuple<3>("a0",ei) = zs::vec::zeros(); +// cv_ee_buffer.template tuple<3>("a1",ei) = zs::vec::zeros(); +// cv_ee_buffer.template tuple<3>("b0",ei) = zs::vec::zeros(); +// cv_ee_buffer.template tuple<3>("b1",ei) = zs::vec::zeros(); +// cv_ee_buffer.template tuple<2>("abary",ei) = zs::vec((T)1.0,0.0); +// cv_ee_buffer.template tuple<2>("bbary",ei) = zs::vec((T)1.0,0.0); +// } +// }); + +// cv_ee_buffer = cv_ee_buffer.clone({zs::memsrc_e::host}); + + +// auto ompPol = omp_exec(); +// constexpr auto omp_space = execspace_e::openmp; + +// auto collisionEdgeVis = std::make_shared(); +// auto& ee_verts = collisionEdgeVis->verts; +// auto& ee_lines = collisionEdgeVis->lines; +// ee_verts.resize(cv_ee_buffer.size() * 2); +// ee_lines.resize(cv_ee_buffer.size()); + + +// ompPol(zs::range(cv_ee_buffer.size()), +// [cv_ee_buffer = proxy({},cv_ee_buffer),&ee_verts,&ee_lines] (int eei) mutable { +// auto a0 = cv_ee_buffer.template pack<3>("a0",eei); +// auto a1 = cv_ee_buffer.template pack<3>("a1",eei); +// auto b0 = cv_ee_buffer.template pack<3>("b0",eei); +// auto b1 = cv_ee_buffer.template pack<3>("b1",eei); + +// auto abary = cv_ee_buffer.template pack<2>("abary",eei); +// auto bbary = cv_ee_buffer.template pack<2>("bbary",eei); +// // auto ac = (a0 + a1) / (T)2.0; +// // auto bc = (b0 + b1) / (T)2.0; - cv_pt_buffer = cv_pt_buffer.clone({zs::memsrc_e::host}); - auto colPointFacetPairVis = std::make_shared(); - auto& cv_pt_verts = colPointFacetPairVis->verts; - auto& cv_pt_tris = colPointFacetPairVis->tris; +// auto ac = abary[0] * a0 + abary[1] * a1; +// auto bc = bbary[0] * b0 + bbary[1] * b1; - cv_pt_verts.resize(cv_pt_buffer.size() * 4); - cv_pt_tris.resize(cv_pt_buffer.size()); +// ee_verts[eei * 2 + 0] = zeno::vec3f(ac[0],ac[1],ac[2]); +// ee_verts[eei * 2 + 1] = zeno::vec3f(bc[0],bc[1],bc[2]); +// ee_lines[eei] = zeno::vec2i(eei * 2 + 0,eei * 2 + 1); +// }); - ompPol(zs::range(cv_pt_buffer.size()), - [&cv_pt_verts,&cv_pt_tris,cv_pt_buffer = proxy({},cv_pt_buffer)] (int pi) mutable { - cv_pt_verts[pi * 4 + 0] = cv_pt_buffer.template pack<3>("p",pi).to_array(); - cv_pt_verts[pi * 4 + 1] = cv_pt_buffer.template pack<3>("t0",pi).to_array(); - cv_pt_verts[pi * 4 + 2] = cv_pt_buffer.template pack<3>("t1",pi).to_array(); - cv_pt_verts[pi * 4 + 3] = cv_pt_buffer.template pack<3>("t2",pi).to_array(); +// set_output("collisionEdgeVis",std::move(collisionEdgeVis)); - cv_pt_tris[pi] = zeno::vec3i(pi * 4 + 1,pi * 4 + 2,pi * 4 + 3); - }); +// auto colEdgetPairVis = std::make_shared(); +// auto& cv_ee_verts = colEdgetPairVis->verts; +// auto& cv_ee_lines = colEdgetPairVis->lines; +// cv_ee_verts.resize(cv_ee_buffer.size() * 4); +// cv_ee_lines.resize(cv_ee_buffer.size() * 2); - set_output("colPointFacetPairVis",std::move(colPointFacetPairVis)); +// ompPol(zs::range(cv_ee_buffer.size()), +// [&cv_ee_verts,&cv_ee_lines,cv_ee_buffer = proxy({},cv_ee_buffer)] (int eei) mutable { +// cv_ee_verts[eei * 4 + 0] = cv_ee_buffer.template pack<3>("a0",eei).to_array(); +// cv_ee_verts[eei * 4 + 1] = cv_ee_buffer.template pack<3>("a1",eei).to_array(); +// cv_ee_verts[eei * 4 + 2] = cv_ee_buffer.template pack<3>("b0",eei).to_array(); +// cv_ee_verts[eei * 4 + 3] = cv_ee_buffer.template pack<3>("b1",eei).to_array(); - } - }; +// cv_ee_lines[eei * 2 + 0] = zeno::vec2i(eei * 4 + 0,eei * 4 + 1); +// cv_ee_lines[eei * 2 + 1] = zeno::vec2i(eei * 4 + 2,eei * 4 + 3); +// }); -ZENDEFNODE(VisualizeFacetPointIntersection, {{"ZSParticles",{"float","in_collisionEps","0.01"},{"float","out_collisionEps","0.01"}}, - {"collisionFacetVis","colPointFacetPairVis"}, - { - }, - {"ZSGeometry"}}); +// set_output("colEdgetPairVis",std::move(colEdgetPairVis)); +// } +// }; +// ZENDEFNODE(VisualizeEdgeEdgeIntersection, {{"ZSParticles",{"float","in_collisionEps","0.01"},{"float","out_collisionEps","0.01"}}, +// {"collisionEdgeVis","colEdgetPairVis"}, +// { +// }, +// {"ZSGeometry"}}); -struct VisualizeCollisionForce : zeno::INode { +struct VisualizeKineCollision : zeno::INode { using T = float; using Ti = int; using dtiles_t = zs::TileVector; @@ -1215,71 +1842,67 @@ struct VisualizeCollisionForce : zeno::INode { using bv_t = zs::AABBBox<3, T>; using vec3 = zs::vec; - virtual void apply() override { using namespace zs; - auto zsparticles = get_input("ZSParticles"); - if(!zsparticles->hasAuxData(ZenoParticles::s_surfTriTag)) throw std::runtime_error("the input zsparticles has no surface tris"); if(!zsparticles->hasAuxData(ZenoParticles::s_surfEdgeTag)) throw std::runtime_error("the input zsparticles has no surface lines"); if(!zsparticles->hasAuxData(ZenoParticles::s_surfVertTag)) throw std::runtime_error("the input zsparticles has no surface points"); - // if(!zsparticles->hasBvh(ZenoParticles::s_surfTriTag)) { - // throw std::runtime_error("the input zsparticles has no surface tris's spacial structure"); - // } - // if(!zsparticles->hasBvh(ZenoParticles::s_surfEdgeTag)) { - // throw std::runtime_error("the input zsparticles has no surface edge's spacial structure"); - // } - // if(!zsparticles->hasBvh(ZenoParticles::s_surfVertTag)) { - // throw std::runtime_error("the input zsparticles has no surface vert's spacial structure"); - // } - + + const auto& eles = zsparticles->getQuadraturePoints(); const auto& verts = zsparticles->getParticles(); - - auto& tris = (*zsparticles)[ZenoParticles::s_surfTriTag]; + auto& tris = (*zsparticles)[ZenoParticles::s_surfTriTag]; auto& lines = (*zsparticles)[ZenoParticles::s_surfEdgeTag]; auto& points = (*zsparticles)[ZenoParticles::s_surfVertTag]; - // auto& stBvh = zsparticles->bvh(ZenoParticles::s_surfTriTag); - // auto& seBvh = zsparticles->bvh(ZenoParticles::s_surfEdgeTag); - + // ksurf should be a surface tris + auto ksurf = get_input("KinematicSurf"); + auto kverts = ksurf->getParticles(); + // if(!kverts.hasProperty("nrm")) { + // fmt::print(fg(fmt::color::red),"KinematicSurf has no surface normal\n"); + // throw std::runtime_error("the Kinematic surf has no surface normal"); + // } + dtiles_t sttemp(tris.get_allocator(), { - {"nrm",3}, - {"x",3} + {"nrm",3} },tris.size() ); dtiles_t setemp(lines.get_allocator(), { + // {"inds",4}, + // {"area",1}, + // {"inverted",1}, + // {"abary",2}, + // {"bbary",2}, {"nrm",3} + // {"grad",12}, + // {"H",12*12} },lines.size() ); - dtiles_t sptemp(points.get_allocator(), { - {"nrm",3}, - {"x",3} + {"nrm",3} },points.size() ); - dtiles_t cptemp(points.get_allocator(), + + dtiles_t fp_buffer(kverts.get_allocator(), { - {"inds",4}, + {"inds",2}, {"area",1}, - {"grad",12}, - {"H",12 * 12}, {"inverted",1} - },points.size() * MAX_FP_COLLISION_PAIRS); - + },kverts.size() * MAX_FP_COLLISION_PAIRS); - dtiles_t vtemp(verts.get_allocator(), + dtiles_t gh_buffer(points.get_allocator(), { - {"x",3}, - {"dir",3}, - },verts.size()); + {"inds",4}, + {"H",12*12}, + {"grad",12} + },eles.size()); auto in_collisionEps = get_input2("in_collisionEps"); @@ -1287,193 +1910,272 @@ struct VisualizeCollisionForce : zeno::INode { constexpr auto space = execspace_e::cuda; auto cudaPol = cuda_exec(); - - // if(!calculate_facet_normal(cudaPol,verts,"x",tris,sttemp,"nrm")){ - // throw std::runtime_error("fail updating facet normal"); - // } + auto kverts_ = typename ZenoParticles::particles_t({ + {"x",3}, + {"area",1}},kverts.size(),zs::memsrc_e::device,0); + TILEVEC_OPS::copy<3>(cudaPol,kverts,"x",kverts_,"x"); + TILEVEC_OPS::fill(cudaPol,kverts_,"area",(T)1.0); + TILEVEC_OPS::copy<4>(cudaPol,eles,"inds",gh_buffer,"inds"); - // auto avgl = compute_average_edge_length(cudaPol,verts,"x",tris); - // auto bvh_thickness = 5 * avgl; - -#if 0 - if(!COLLISION_UTILS::calculate_cell_bisector_normal(cudaPol, + COLLISION_UTILS::do_kinematic_point_collision_detection(cudaPol, verts,"x", + points, lines, tris, - sttemp,"nrm", - setemp,"nrm")){ - throw std::runtime_error("fail calculate cell bisector normal"); - } - - auto stbvs = retrieve_bounding_volumes(cudaPol,verts,tris,wrapv<3>{},(T)0.0,"x"); - auto sebvs = retrieve_bounding_volumes(cudaPol,verts,lines,wrapv<2>{},(T)0.0,"x"); - stBvh.refit(cudaPol,stbvs); - seBvh.refit(cudaPol,sebvs); - - - if(!calculate_facet_normal(cudaPol,verts,"x",tris,sttemp,"nrm")){ - throw std::runtime_error("fail updating facet normal"); - } - - auto collisionEps = get_input2("collisionEps"); - - TILEVEC_OPS::fill<3>(cudaPol,sttemp,"cf",zs::vec::zeros()); - TILEVEC_OPS::fill<3>(cudaPol,sptemp,"cf",zs::vec::zeros()); - // TILEVEC_OPS::copy<3>(cudaPol,verts,"x",sptemp,"x"); - cudaPol(zs::range(sptemp.size()), - [sptemp = proxy({},sptemp),verts = proxy({},verts),points = proxy({},points)] ZS_LAMBDA(int pi) mutable { - auto pidx = reinterpret_bits(points("inds",pi)); - sptemp.template tuple<3>("x",pi) = verts.template pack<3>("x",pidx); - }); - - // evaluate the center of tris - cudaPol(zs::range(tris.size()), - [verts = proxy({},verts),tris = proxy({},tris),sttemp = proxy({},sttemp)] ZS_LAMBDA(int ti) mutable { - sttemp.template tuple<3>("x",ti) = zs::vec::zeros(); - auto inds = tris.template pack<3>("inds",ti).reinterpret_bits(int_c); - for(int i = 0;i != 3;++i) - sttemp.template tuple<3>("x",ti) = sttemp.template pack<3>("x",ti) + verts.template pack<3>("x",inds[i]) / (T)3.0; + setemp, + sttemp, + kverts_, + fp_buffer, + in_collisionEps,out_collisionEps); + + std::vector cv_tags{{"xp",3},{"xt",3},{"t0",3},{"t1",3},{"t2",3}}; + auto cv_buffer = typename ZenoParticles::particles_t(cv_tags,fp_buffer.size(),zs::memsrc_e::device,0); + + cudaPol(zs::range(fp_buffer.size()), + [fp_buffer = proxy({},fp_buffer), + verts = proxy({},verts), + tris = proxy({},tris), + kverts = proxy({},kverts), + cv_buffer = proxy({},cv_buffer)] ZS_LAMBDA(int ci) mutable { + auto cp = fp_buffer.pack(dim_c<2>,"inds",ci).reinterpret_bits(int_c); + + auto contact = true; + for(int i = 0;i != 2;++i) + if(cp[i] < 0){ + contact = false; + break; + } + auto pvert = zs::vec::zeros(); + if(contact) { + // auto pidx = cp[0]; + auto tri = tris.pack(dim_c<3>,"inds",cp[1]).reinterpret_bits(int_c); + pvert = kverts.pack(dim_c<3>,"x",cp[0]); + auto t0 = verts.pack(dim_c<3>,"x",tri[0]); + auto t1 = verts.pack(dim_c<3>,"x",tri[1]); + auto t2 = verts.pack(dim_c<3>,"x",tri[2]); + + auto tc = (t0 + t1 + t2)/(T)3.0; + + cv_buffer.template tuple<3>("xp",ci) = pvert; + cv_buffer.template tuple<3>("xt",ci) = tc; + cv_buffer.template tuple<3>("t0",ci) = t0; + cv_buffer.template tuple<3>("t1",ci) = t1; + cv_buffer.template tuple<3>("t2",ci) = t2; + } else { + cv_buffer.template tuple<3>("xp",ci) = pvert; + cv_buffer.template tuple<3>("xt",ci) = pvert; + cv_buffer.template tuple<3>("t0",ci) = pvert; + cv_buffer.template tuple<3>("t1",ci) = pvert; + cv_buffer.template tuple<3>("t2",ci) = pvert; + } + }); - // evaluate the collision force - cudaPol(zs::range(points.size()),[collisionEps = collisionEps, - verts = proxy({},verts), - sttemp = proxy({},sttemp), - setemp = proxy({},setemp), - sptemp = proxy({},sptemp), - points = proxy({},points), - lines = proxy({},lines), - tris = proxy({},tris), - stbvh = proxy(stBvh),thickness = bvh_thickness] ZS_LAMBDA(int svi) mutable { + auto ompPol = omp_exec(); + constexpr auto omp_space = execspace_e::openmp; + cv_buffer = cv_buffer.clone({zs::memsrc_e::host}); + auto colPointTriPairVis = std::make_shared(); + auto& cv_pt_verts = colPointTriPairVis->verts; + auto& cv_pt_tris = colPointTriPairVis->tris; - auto vi = reinterpret_bits(points("inds",svi)); - // auto is_vertex_inverted = reinterpret_bits(verts("is_inverted",vi)); - // if(is_vertex_inverted) - // return; + cv_pt_verts.resize(cv_buffer.size() * 4); + cv_pt_tris.resize(cv_buffer.size()); - auto p = verts.template pack<3>("x",vi); - auto bv = bv_t{get_bounding_box(p - thickness, p + thickness)}; + ompPol(zs::range(cv_buffer.size()), + [&cv_pt_verts,&cv_pt_tris,cv_buffer = proxy({},cv_buffer)] (int ci) mutable { + cv_pt_verts[ci * 4 + 0] = cv_buffer.pack(dim_c<3>,"xp",ci).to_array(); + cv_pt_verts[ci * 4 + 1] = cv_buffer.pack(dim_c<3>,"t0",ci).to_array(); + cv_pt_verts[ci * 4 + 2] = cv_buffer.pack(dim_c<3>,"t1",ci).to_array(); + cv_pt_verts[ci * 4 + 3] = cv_buffer.pack(dim_c<3>,"t2",ci).to_array(); + + cv_pt_tris[ci] = zeno::vec3i(ci * 4 + 1,ci * 4 + 2,ci * 4 + 3); + }); - vec3 collision_verts[4] = {}; - collision_verts[0] = p; + set_output("colPointFacePairVis",std::move(colPointTriPairVis)); + auto colCenterLineVis = std::make_shared(); + auto& cv_cl_verts = colCenterLineVis->verts; + auto& cv_cl_lines = colCenterLineVis->lines; + + cv_cl_verts.resize(cv_buffer.size() * 2); + cv_cl_lines.resize(cv_buffer.size()); - auto process_vertex_face_collision_pairs = [&](int stI) { - auto tri = tris.pack(dim_c<3>, "inds",stI).reinterpret_bits(int_c); - if(tri[0] == vi || tri[1] == vi || tri[2] == vi) - return; + ompPol(zs::range(cv_buffer.size()), + [cv_buffer = proxy({},cv_buffer),&cv_cl_verts,&cv_cl_lines] (int ci) mutable { + cv_cl_verts[ci * 2 + 0] = cv_buffer.pack(dim_c<3>,"xp",ci).to_array(); + cv_cl_verts[ci * 2 + 1] = cv_buffer.pack(dim_c<3>,"xt",ci).to_array(); + cv_cl_lines[ci] = zeno::vec2i(ci * 2 + 0,ci * 2 + 1); + }); - bool collide = false; + set_output("colConnVis",std::move(colCenterLineVis)); - if(COLLISION_UTILS::is_inside_the_cell(verts,"x", - lines,tris, - sttemp,"nrm", - setemp,"nrm", - stI,p,collisionEps)) { - collide = true; - } + COLLISION_UTILS::evaluate_kinematic_fp_collision_grad_and_hessian( + cudaPol, + eles, + verts,"x","v",(T)1.0, + tris, + kverts_, + fp_buffer, + gh_buffer,0, + in_collisionEps,out_collisionEps, + (T)1.0, + (T)1.0,(T)1.0,(T)0.01); - if(!collide) - return; + dtiles_t vtemp(verts.get_allocator(), + { + {"x",3}, + {"dir",3}, + },verts.size()); + TILEVEC_OPS::copy<3>(cudaPol,verts,"x",vtemp,"x"); + TILEVEC_OPS::fill<3>(cudaPol,vtemp,"dir",zs::vec::zeros()); - collision_verts[1] = verts.template pack<3>("x",tri[0]); - collision_verts[2] = verts.template pack<3>("x",tri[1]); - collision_verts[3] = verts.template pack<3>("x",tri[2]); + TILEVEC_OPS::assemble_range(cudaPol,gh_buffer,"grad","inds",vtemp,"dir",0,gh_buffer.size()); + vtemp = vtemp.clone({zs::memsrc_e::host}); - auto vertexFaceCollisionAreas = tris("area",stI) + points("area",svi); + auto nodalForceVis = std::make_shared(); + auto& spverts = nodalForceVis->verts; + spverts.resize(vtemp.size() * 2); + auto& splines = nodalForceVis->lines; + splines.resize(vtemp.size()); - auto grad = (T)1.0 * VERTEX_FACE_SQRT_COLLISION::gradient(collision_verts,1,1,collisionEps) * vertexFaceCollisionAreas; + auto scale = get_input2("scale"); + ompPol(zs::range(vtemp.size()), + [vtemp = proxy({},vtemp),&spverts,&splines,scale] (int vi) mutable { + auto xs = vtemp.template pack<3>("x",vi); + auto dir = vtemp.template pack<3>("dir",vi); - // auto pf = zs::vec{grad[0],grad[1],grad[2]}; - zs::vec tf[3] = {}; - for(int j = 0;j != 3;++j) - tf[j] = zs::vec{grad[j * 3 + 3 + 0],grad[j * 3 + 3 + 1],grad[j * 3 + 3 + 2]}; + auto xe = xs + scale * dir; - // auto avgtf = (tf[0] + tf[1] + tf[2])/(T)3.0; - auto avgtf = (tf[0] + tf[1] + tf[2]); - for(int j = 0;j != 3;++j) - atomic_add(exec_cuda,&sttemp("cf",j,stI),avgtf[j]); + spverts[vi * 2 + 0] = xs.to_array(); + spverts[vi * 2 + 1] = xe.to_array(); + splines[vi] = zeno::vec2i(vi * 2 + 0,vi * 2 + 1); + }); + set_output("FPNodalForceVis",std::move(nodalForceVis)); - auto fp_inds = tris.template pack<3>("fp_inds",stI).reinterpret_bits(int_c); - for(int j = 0;j != 3;++j){ - atomic_add(exec_cuda,&sptemp("cf",j,svi),grad[j]); - // for(int k = 0;k != 3;++k) { - // auto fp_idx = fp_inds[k]; - // atomic_add(exec_cuda,&sptemp("cf",j,fp_idx),tf[k][j]); - // } - } - }; - stbvh.iter_neighbors(bv,process_vertex_face_collision_pairs); - }); + } +}; - cudaPol.syncCtx(); +ZENDEFNODE(VisualizeKineCollision, {{"ZSParticles","KinematicSurf",{"float","in_collisionEps"},{"float","out_collisionEps"},{"float","scale"}}, + { + "colPointFacePairVis", + "colConnVis", + "FPNodalForceVis" + }, + { + }, + {"ZSGeometry"}}); - auto scale = get_input2("scale"); - auto ompPol = omp_exec(); - constexpr auto omp_space = execspace_e::openmp; - - sptemp = sptemp.clone({zs::memsrc_e::host}); - // sttemp = sttemp.clone({zs::memsrc_e::host}); +struct VisualizeCollision : zeno::INode { - auto nodalForceVis = std::make_shared(); - auto& spverts = nodalForceVis->verts; - spverts.resize(sptemp.size() * 2); - auto& splines = nodalForceVis->lines; - splines.resize(sptemp.size()); + using T = float; + using Ti = int; + using dtiles_t = zs::TileVector; + using tiles_t = typename ZenoParticles::particles_t; + using bvh_t = zs::LBvh<3,int,T>; + using bv_t = zs::AABBBox<3, T>; + using vec3 = zs::vec; - ompPol(zs::range(sptemp.size()), - [sptemp = proxy({},sptemp),&spverts,&splines,scale] (int pi) mutable { - auto xs = sptemp.template pack<3>("x",pi); - auto dir = sptemp.template pack<3>("cf",pi); - // auto dir = zs::vec{1.0,0.0,0.0}; - auto xe = xs + dir * scale; + virtual void apply() override { + using namespace zs; - spverts[pi * 2 + 0] = xs.to_array(); - spverts[pi * 2 + 1] = xe.to_array(); - splines[pi] = zeno::vec2i(pi * 2 + 0,pi * 2 + 1); - }); + auto zsparticles = get_input("ZSParticles"); - set_output("nodalForceVis",std::move(nodalForceVis)); + if(!zsparticles->hasAuxData(ZenoParticles::s_surfTriTag)) + throw std::runtime_error("the input zsparticles has no surface tris"); + if(!zsparticles->hasAuxData(ZenoParticles::s_surfEdgeTag)) + throw std::runtime_error("the input zsparticles has no surface lines"); + if(!zsparticles->hasAuxData(ZenoParticles::s_surfVertTag)) + throw std::runtime_error("the input zsparticles has no surface points"); + // if(!zsparticles->hasBvh(ZenoParticles::s_surfTriTag)) { + // throw std::runtime_error("the input zsparticles has no surface tris's spacial structure"); + // } + // if(!zsparticles->hasBvh(ZenoParticles::s_surfEdgeTag)) { + // throw std::runtime_error("the input zsparticles has no surface edge's spacial structure"); + // } + // if(!zsparticles->hasBvh(ZenoParticles::s_surfVertTag)) { + // throw std::runtime_error("the input zsparticles has no surface vert's spacial structure"); + // } + const auto& verts = zsparticles->getParticles(); + auto& tris = (*zsparticles)[ZenoParticles::s_surfTriTag]; + auto& lines = (*zsparticles)[ZenoParticles::s_surfEdgeTag]; + auto& points = (*zsparticles)[ZenoParticles::s_surfVertTag]; - sttemp = sttemp.clone({zs::memsrc_e::host}); - auto facetForceVis = std::make_shared(); - auto& stverts = facetForceVis->verts; - auto& stlines = facetForceVis->lines; + // auto& stBvh = zsparticles->bvh(ZenoParticles::s_surfTriTag); + // auto& seBvh = zsparticles->bvh(ZenoParticles::s_surfEdgeTag); - stverts.resize(sttemp.size() * 2); - stlines.resize(sttemp.size()); + dtiles_t sttemp(tris.get_allocator(), + { + {"nrm",3}, + {"x",3} + },tris.size() + ); + dtiles_t setemp(lines.get_allocator(), + { + // {"inds",4}, + // {"area",1}, + // {"inverted",1}, + // {"abary",2}, + // {"bbary",2}, + {"nrm",3} + // {"grad",12}, + // {"H",12*12} + },lines.size() + ); + dtiles_t sptemp(points.get_allocator(), + { + {"nrm",3}, + {"x",3} + },points.size() + ); - ompPol(zs::range(sttemp.size()), - [&stverts,&stlines,sttemp = proxy({},sttemp),scale] (int ti) mutable { - auto xs = sttemp.template pack<3>("x",ti); - auto dir = sttemp.template pack<3>("cf",ti); - auto xe = xs + dir * scale; + dtiles_t fp_buffer(points.get_allocator(), + { + {"inds",4}, + {"area",1}, + {"inverted",1} + },points.size() * MAX_FP_COLLISION_PAIRS); + dtiles_t ee_buffer(lines.get_allocator(), + { + {"inds",4}, + {"area",1}, + {"inverted",1}, + {"abary",2}, + {"bbary",2} + },lines.size()); - stverts[ti * 2 + 0] = xs.to_array(); - stverts[ti * 2 + 1] = xe.to_array(); + dtiles_t gh_buffer(points.get_allocator(), + { + {"inds",4}, + {"H",12*12}, + {"grad",12} + },points.size() * MAX_FP_COLLISION_PAIRS + lines.size()); - stlines[ti] = zeno::vec2i(ti * 2 + 0,ti * 2 + 1); - }); + dtiles_t vtemp(verts.get_allocator(), + { + {"x",3}, + {"dir",3}, + },verts.size()); - set_output("facetForceVis",std::move(facetForceVis)); -#else + auto in_collisionEps = get_input2("in_collisionEps"); + auto out_collisionEps = get_input2("out_collisionEps"); - // auto stbvs = retrieve_bounding_volumes(cudaPol,verts,tris,wrapv<3>{},(T)0.0,"x"); - // stBvh.refit(cudaPol,stbvs); + constexpr auto space = execspace_e::cuda; + auto cudaPol = cuda_exec(); + // calculate facet-point collision pairs and force COLLISION_UTILS::do_facet_point_collision_detection(cudaPol, verts,"x", @@ -1482,12 +2184,10 @@ struct VisualizeCollisionForce : zeno::INode { tris, sttemp, setemp, - cptemp, - // stBvh, + fp_buffer, in_collisionEps,out_collisionEps); - std::vector cv_tags{{"xs",3},{"xe",3}}; auto cv_buffer = typename ZenoParticles::particles_t(cv_tags,points.size() * MAX_FP_COLLISION_PAIRS,zs::memsrc_e::device,0); std::vector cv_pt_tags{{"p",3},{"t0",3},{"t1",3},{"t2",3}}; @@ -1495,12 +2195,12 @@ struct VisualizeCollisionForce : zeno::INode { cudaPol(zs::range(points.size()), - [cptemp = proxy({},cptemp),verts = proxy({},verts), + [fp_buffer = proxy({},fp_buffer),verts = proxy({},verts), cv_buffer = proxy({},cv_buffer), cv_pt_buffer = proxy({},cv_pt_buffer), points = proxy({},points)] ZS_LAMBDA(int pi) mutable { for(int i = 0;i != MAX_FP_COLLISION_PAIRS;++i) { - auto inds = cptemp.template pack<4>("inds",pi * MAX_FP_COLLISION_PAIRS + i).reinterpret_bits(int_c); + auto inds = fp_buffer.template pack<4>("inds",pi * MAX_FP_COLLISION_PAIRS + i).reinterpret_bits(int_c); bool contact = true; auto pvert = zs::vec::zeros(); for(int j = 0;j != 4;++j) @@ -1575,56 +2275,21 @@ struct VisualizeCollisionForce : zeno::INode { set_output("colPointFacetPairVis",std::move(colPointFacetPairVis)); - - - COLLISION_UTILS::evaluate_collision_grad_and_hessian(cudaPol, - verts,"x", - cptemp, - in_collisionEps, - out_collisionEps, + COLLISION_UTILS::evaluate_fp_collision_grad_and_hessian( + cudaPol, + verts,"x","v",(T)1.0, + fp_buffer, + gh_buffer,0, + in_collisionEps,out_collisionEps, (T)1.0, - (T)1.0,(T)1.0); + (T)1.0,(T)1.0,(T)0.0); TILEVEC_OPS::copy<3>(cudaPol,verts,"x",vtemp,"x"); TILEVEC_OPS::fill<3>(cudaPol,vtemp,"dir",zs::vec::zeros()); - - // TILEVEC_OPS::fill<12>(cudaPol,cptemp,"grad",zs::vec{1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0}); - // auto gradN = TILEVEC_OPS::inf_norm<12>(cudaPol,cptemp,"grad"); - - // cudaPol(zs::range(cptemp.size()), - // [cptemp = proxy({},cptemp),verts = proxy({},verts)] ZS_LAMBDA(int cpi) mutable { - // auto inds = cptemp.template pack<4>("inds",cpi).reinterpret_bits(int_c); - // bool in_active = false; - // for(int i = 0;i != 4;++i) { - // if(inds[i] < 0) - // in_active = false; - // else{ - // auto active = verts("active",inds[i]); - // if(active < 1e-6) - // in_active = false; - // } - // } - // if(in_active) - // cptemp.template tuple<12>("grad",cpi) = zs::vec::zeros(); - // }); - - - TILEVEC_OPS::assemble<3,4>(cudaPol,cptemp,"grad",vtemp,"dir"); - - // cudaPol(zs::range(verts.size()), - // [verts = proxy({},verts),vtemp = proxy({},vtemp)] ZS_LAMBDA(int vi) mutable { - // auto active = verts("active",vi); - // if(active < 1e-6) - // vtemp.template tuple<3>("dir",vi) = zs::vec::zeros(); - // }); - - // cudaPol.syncCtx(); - // fmt::print(fg(fmt::color::dark_cyan), - // "gradN = {}\n",gradN); + TILEVEC_OPS::assemble_range(cudaPol,gh_buffer,"grad","inds",vtemp,"dir",0,fp_buffer.size()); - - auto scale = get_input2("scale"); + auto scale = get_input2("fp_scale"); // auto ompPol = omp_exec(); // constexpr auto omp_space = execspace_e::openmp; @@ -1650,29 +2315,166 @@ struct VisualizeCollisionForce : zeno::INode { splines[vi] = zeno::vec2i(vi * 2 + 0,vi * 2 + 1); }); - set_output("nodalForceVis",std::move(nodalForceVis)); + set_output("FPNodalForceVis",std::move(nodalForceVis)); + + // calculate edge edge collision pairs and face + // COLLISION_UTILS::do_edge_edge_collision_detection(cudaPol, + // verts,"x", + // points,lines,tris, + // sttemp,setemp, + // ee_buffer, + // in_collisionEps,out_collisionEps); + // std::vector cv_ee_tags{{"a0",3},{"a1",3},{"b0",3},{"b1",3},{"abary",2},{"bbary",2}}; + // auto cv_ee_buffer = typename ZenoParticles::particles_t(cv_ee_tags,setemp.size(),zs::memsrc_e::device,0); + + // cudaPol(zs::range(ee_buffer.size()), + // [ee_buffer = proxy({},ee_buffer),verts = proxy({},verts), + // cv_ee_buffer = proxy({},cv_ee_buffer)] ZS_LAMBDA(int ei) mutable { + // auto inds = ee_buffer.template pack<4>("inds",ei).reinterpret_bits(int_c); + // bool collide = true; + // if(inds[0] < 0 || inds[1] < 0 || inds[2] < 0 || inds[3] < 0) + // collide = false; + // if(collide) { + // auto abary = ee_buffer.template pack<2>("abary",ei); + // auto bbary = ee_buffer.template pack<2>("bbary",ei); + + // // printf("Found edge collision pair %d %d %d %d %f %f %f %f\n",inds[0],inds[1],inds[2],inds[3], + // // (float)abary[0],(float)abary[1],(float)bbary[0],(float)bbary[1]); + + // // printf("find collision pairs : %d %d %d %d with bary %f %f %f %f\n",inds[0],inds[1],inds[2],inds[3], + // // (float)abary[0],(float)abary[1],(float)bbary[0],(float)bbary[1]); + // cv_ee_buffer.template tuple<3>("a0",ei) = verts.template pack<3>("x",inds[0]); + // cv_ee_buffer.template tuple<3>("a1",ei) = verts.template pack<3>("x",inds[1]); + // cv_ee_buffer.template tuple<3>("b0",ei) = verts.template pack<3>("x",inds[2]); + // cv_ee_buffer.template tuple<3>("b1",ei) = verts.template pack<3>("x",inds[3]); + // cv_ee_buffer.template tuple<2>("abary",ei) = abary; + // cv_ee_buffer.template tuple<2>("bbary",ei) = bbary; + // }else { + // cv_ee_buffer.template tuple<3>("a0",ei) = zs::vec::zeros(); + // cv_ee_buffer.template tuple<3>("a1",ei) = zs::vec::zeros(); + // cv_ee_buffer.template tuple<3>("b0",ei) = zs::vec::zeros(); + // cv_ee_buffer.template tuple<3>("b1",ei) = zs::vec::zeros(); + // cv_ee_buffer.template tuple<2>("abary",ei) = zs::vec((T)1.0,0.0); + // cv_ee_buffer.template tuple<2>("bbary",ei) = zs::vec((T)1.0,0.0); + // } + // }); + + // cv_ee_buffer = cv_ee_buffer.clone({zs::memsrc_e::host}); - auto facetForceVis = std::make_shared(); - auto& stverts = facetForceVis->verts; - auto& stlines = facetForceVis->lines; + // // auto ompPol = omp_exec(); + // // constexpr auto omp_space = execspace_e::openmp; - stverts.resize(0); - stlines.resize(0); + // auto collisionEdgeVis = std::make_shared(); + // auto& ee_verts = collisionEdgeVis->verts; + // auto& ee_lines = collisionEdgeVis->lines; + // ee_verts.resize(cv_ee_buffer.size() * 2); + // ee_lines.resize(cv_ee_buffer.size()); - set_output("facetForceVis",std::move(facetForceVis)); -#endif + // ompPol(zs::range(cv_ee_buffer.size()), + // [cv_ee_buffer = proxy({},cv_ee_buffer),&ee_verts,&ee_lines] (int eei) mutable { + // auto a0 = cv_ee_buffer.template pack<3>("a0",eei); + // auto a1 = cv_ee_buffer.template pack<3>("a1",eei); + // auto b0 = cv_ee_buffer.template pack<3>("b0",eei); + // auto b1 = cv_ee_buffer.template pack<3>("b1",eei); + + // auto abary = cv_ee_buffer.template pack<2>("abary",eei); + // auto bbary = cv_ee_buffer.template pack<2>("bbary",eei); + + // // auto ac = (a0 + a1) / (T)2.0; + // // auto bc = (b0 + b1) / (T)2.0; + + // auto ac = abary[0] * a0 + abary[1] * a1; + // auto bc = bbary[0] * b0 + bbary[1] * b1; + + // ee_verts[eei * 2 + 0] = zeno::vec3f(ac[0],ac[1],ac[2]); + // ee_verts[eei * 2 + 1] = zeno::vec3f(bc[0],bc[1],bc[2]); + // ee_lines[eei] = zeno::vec2i(eei * 2 + 0,eei * 2 + 1); + // }); + // set_output("collisionEdgeVis",std::move(collisionEdgeVis)); + + // auto colEdgetPairVis = std::make_shared(); + // auto& cv_ee_verts = colEdgetPairVis->verts; + // auto& cv_ee_lines = colEdgetPairVis->lines; + + // cv_ee_verts.resize(cv_ee_buffer.size() * 4); + // cv_ee_lines.resize(cv_ee_buffer.size() * 2); + + // ompPol(zs::range(cv_ee_buffer.size()), + // [&cv_ee_verts,&cv_ee_lines,cv_ee_buffer = proxy({},cv_ee_buffer)] (int eei) mutable { + // cv_ee_verts[eei * 4 + 0] = cv_ee_buffer.template pack<3>("a0",eei).to_array(); + // cv_ee_verts[eei * 4 + 1] = cv_ee_buffer.template pack<3>("a1",eei).to_array(); + // cv_ee_verts[eei * 4 + 2] = cv_ee_buffer.template pack<3>("b0",eei).to_array(); + // cv_ee_verts[eei * 4 + 3] = cv_ee_buffer.template pack<3>("b1",eei).to_array(); + + // cv_ee_lines[eei * 2 + 0] = zeno::vec2i(eei * 4 + 0,eei * 4 + 1); + // cv_ee_lines[eei * 2 + 1] = zeno::vec2i(eei * 4 + 2,eei * 4 + 3); + // }); + + + // set_output("colEdgePairVis",std::move(colEdgetPairVis)); + + + // dtiles_t ee_vtemp(verts.get_allocator(), + // { + // {"x",3}, + // {"dir",3}, + // },verts.size()); + + // COLLISION_UTILS::evaluate_ee_collision_grad_and_hessian(cudaPol, + // verts,"x", + // ee_buffer, + // gh_buffer,fp_buffer.size(), + // in_collisionEps,out_collisionEps, + // 1.0, + // 1.0,1.0); + + // TILEVEC_OPS::copy<3>(cudaPol,verts,"x",ee_vtemp,"x"); + // TILEVEC_OPS::fill(cudaPol,ee_vtemp,"dir",(T)0.0); + // TILEVEC_OPS::assemble_range(cudaPol,gh_buffer,"grad","inds",ee_vtemp,"dir",fp_buffer.size(),ee_buffer.size()); + + // auto EENodalForceVis = std::make_shared(); + // auto& ee_spverts = EENodalForceVis->verts; + // ee_spverts.resize(ee_vtemp.size() * 2); + // auto& ee_splines = EENodalForceVis->lines; + // ee_splines.resize(ee_vtemp.size()); + + // scale = get_input2("ee_scale"); + + // ee_vtemp = ee_vtemp.clone({zs::memsrc_e::host}); + // ompPol(zs::range(ee_vtemp.size()), + // [ee_vtemp = proxy({},ee_vtemp),&ee_spverts,&ee_splines,scale] (int vi) mutable { + // auto xs = ee_vtemp.template pack<3>("x",vi); + // auto dir = ee_vtemp.template pack<3>("dir",vi); + + // auto xe = xs + scale * dir; + + // ee_spverts[vi * 2 + 0] = xs.to_array(); + // ee_spverts[vi * 2 + 1] = xe.to_array(); + // ee_splines[vi] = zeno::vec2i(vi * 2 + 0,vi * 2 + 1); + // }); + + // set_output("EENodalForceVis",std::move(EENodalForceVis)); } }; -ZENDEFNODE(VisualizeCollisionForce, {{"ZSParticles",{"float","scale","1.0"},{"float","in_collisionEps"},{"float","out_collisionEps"}}, - {"nodalForceVis","facetForceVis","collisionFacetVis","colPointFacetPairVis"}, +ZENDEFNODE(VisualizeCollision, {{"ZSParticles",{"float","fp_scale","1.0"},{"float","ee_scale","1.0"},{"float","in_collisionEps"},{"float","out_collisionEps"}}, + { + "collisionFacetVis", + "colPointFacetPairVis", + "FPNodalForceVis", + // "collisionEdgeVis", + // "colEdgePairVis", + // "EENodalForceVis", + }, { }, {"ZSGeometry"}}); + + } \ No newline at end of file diff --git a/projects/CuLagrange/geometry/DeformationField.cu b/projects/CuLagrange/geometry/DeformationField.cu index 93aa101d75..18f1d3da6d 100644 --- a/projects/CuLagrange/geometry/DeformationField.cu +++ b/projects/CuLagrange/geometry/DeformationField.cu @@ -20,8 +20,8 @@ struct ZSIsotropicTensionField : INode { auto& verts = zssurf->getParticles(); auto& tris = zssurf->getQuadraturePoints(); - if(tris.getPropertySize("inds") != 3) { - fmt::print("ZSCalcSurfaceTenssionField only supports triangle surface mesh {}\n",tris.getPropertySize("inds")); + if(tris.getChannelSize("inds") != 3) { + fmt::print("ZSCalcSurfaceTenssionField only supports triangle surface mesh {}\n",tris.getChannelSize("inds")); throw std::runtime_error("ZSCalcSurfaceTenssionField only supports triangle surface mesh"); } if(!verts.hasProperty(ref_channel)){ @@ -158,7 +158,7 @@ struct ZSEvalDeformationGradient : zeno::INode { } auto& quads = zsvolume->getQuadraturePoints(); - if(quads.getPropertySize("inds") != 4) { + if(quads.getChannelSize("inds") != 4) { fmt::print("the input zsvolume should be a tetrahedra mesh\n"); throw std::runtime_error("the input zsvolume should be a tetrahedra mesh"); } @@ -173,7 +173,7 @@ struct ZSEvalDeformationGradient : zeno::INode { if(!quads.hasProperty(gradientTag)) { quads.append_channels(cudaExec,{{gradientTag,9}}); - }else if(quads.getPropertySize(gradientTag) != 9) { + }else if(quads.getChannelSize(gradientTag) != 9) { fmt::print("the size of F channel {} is not 9\n",gradientTag); throw std::runtime_error("the size of F channel is not 9"); } diff --git a/projects/CuLagrange/geometry/SolveLaplacian.cu b/projects/CuLagrange/geometry/SolveLaplacian.cu index 15facc5009..22c41ff33f 100644 --- a/projects/CuLagrange/geometry/SolveLaplacian.cu +++ b/projects/CuLagrange/geometry/SolveLaplacian.cu @@ -13,7 +13,7 @@ #include #include -#include "kernel/laplace_matrix.hpp" +#include "kernel/laplacian.hpp" #include "linear_system/mfcg.hpp" namespace zeno { diff --git a/projects/CuLagrange/geometry/Topology.cu b/projects/CuLagrange/geometry/Topology.cu index b317c2f996..dbdd2e4bc0 100644 --- a/projects/CuLagrange/geometry/Topology.cu +++ b/projects/CuLagrange/geometry/Topology.cu @@ -7,43 +7,316 @@ #include #include +#include "kernel/tiled_vector_ops.hpp" +#include "zensim/container/Bcht.hpp" + +#include "zensim/cuda/execution/ExecutionPolicy.cuh" +#include "zensim/omp/execution/ExecutionPolicy.hpp" + +#include "kernel/topology.hpp" + namespace zeno { -struct FilterTopology : INode { - void apply() override { - auto prim = get_input("prim"); - auto filTopo = get_param("topo"); - - auto primOut = std::static_pointer_cast(prim->clone()); - if (filTopo == "lines") { - primOut->tris.resize(0); - primOut->quads.resize(0); - } - if (filTopo == "tris") { - primOut->lines.resize(0); - primOut->quads.resize(0); - } - if (filTopo == "quads") { - primOut->lines.resize(0); - primOut->tris.resize(0); - } - - set_output("primOut", std::move(primOut)); - } +struct BuildSurfaceHalfEdgeStructure : zeno::INode { + using T = float; + + virtual void apply() override { + using namespace zs; + using vec2i = zs::vec; + using vec3i = zs::vec; + + auto zsparticles = get_input("zsparticles"); + if(!zsparticles->hasAuxData(ZenoParticles::s_surfTriTag)) + throw std::runtime_error("the input zsparticles has no surface tris"); + if(!zsparticles->hasAuxData(ZenoParticles::s_surfEdgeTag)) + throw std::runtime_error("the input zsparticles has no surface lines"); + if(!zsparticles->hasAuxData(ZenoParticles::s_surfVertTag)) + throw std::runtime_error("the input zsparticles has no surface lines"); + + auto& tris = (*zsparticles)[ZenoParticles::s_surfTriTag]; + auto& lines = (*zsparticles)[ZenoParticles::s_surfEdgeTag]; + auto& points = (*zsparticles)[ZenoParticles::s_surfVertTag]; + + auto& halfEdge = (*zsparticles)[ZenoParticles::s_surfHalfEdgeTag]; + halfEdge = typename ZenoParticles::particles_t({{"to_vertex",1},{"face",1},{"edge",1},{"opposite_he",1},{"next_he",1}}, + tris.size() * 3,zs::memsrc_e::device,0); + + auto cudaPol = zs::cuda_exec(); + + + points.append_channels(cudaPol,{{"he_inds",1}}); + lines.append_channels(cudaPol,{{"he_inds",1}}); + tris.append_channels(cudaPol,{{"he_inds",1}}); + +#if 0 + + constexpr auto space = zs::execspace_e::cuda; + + TILEVEC_OPS::fill(cudaPol,halfEdge,"to_vertex",reinterpret_bits((int)-1)); + TILEVEC_OPS::fill(cudaPol,halfEdge,"face",reinterpret_bits((int)-1)); + TILEVEC_OPS::fill(cudaPol,halfEdge,"edge",reinterpret_bits((int)-1)); + TILEVEC_OPS::fill(cudaPol,halfEdge,"opposite_he",reinterpret_bits((int)-1)); + TILEVEC_OPS::fill(cudaPol,halfEdge,"next_he",reinterpret_bits((int)-1)); + + // we might also need a space hash structure here, map from [i1,i2]->[ej] + bcht,32> de2fi{halfEdge.get_allocator(),halfEdge.size()}; + + cudaPol(zs::range(tris.size()), [ + tris = proxy({},tris),de2fi = proxy(de2fi),halfEdge = proxy({},halfEdge)] ZS_LAMBDA(int ti) mutable { + auto fe_inds = tris.pack(dim_c<3>,"fe_inds",ti).reinterpret_bits(int_c); + auto tri = tris.pack(dim_c<3>,"fp_inds",ti).reinterpret_bits(int_c); + + vec3i nos{}; + for(int i = 0;i != 3;++i) { + if(auto no = de2fi.insert(vec2i{tri[i],tri[(i+1) % 3]});no >= 0){ + nos[i] = no; + halfEdge("to_vertex",no) = reinterpret_bits(tri[i]); + halfEdge("face",no) = reinterpret_bits(ti); + halfEdge("edge",no) = reinterpret_bits(fe_inds[i]); + // halfEdge("next_he",no) = ti * 3 + (i+1) % 3; + } else { + // some error happen + + } + } + for(int i = 0;i != 3;++i) + halfEdge("next_he",nos[i]) = reinterpret_bits(nos[(i+1) % 3]); + }); + cudaPol(zs::range(halfEdge.size()), + [halfEdge = proxy({},halfEdge),de2fi = proxy(de2fi)] ZS_LAMBDA(int hei) mutable { + auto idx0 = reinterpret_bits(halfEdge("to_vertex",hei)); + auto nexthei = reinterpret_bits(halfEdge("next_he",hei)); + auto idx1 = reinterpret_bits(halfEdge("to_vertex",nexthei)); + if(auto no = de2fi.query(vec2i{idx1,idx0});no >= 0) + halfEdge("opposite_he",hei) = reinterpret_bits(no); + else + halfEdge("opposite_he",hei) = reinterpret_bits((int)-1); + }); + + points.append_channels(cudaPol,{{"he_inds",1}}); + lines.append_channels(cudaPol,{{"he_inds",1}}); + tris.append_channels(cudaPol,{{"he_inds",1}}); + + cudaPol(zs::range(lines.size()),[ + lines = proxy({},lines),de2fi = proxy(de2fi)] ZS_LAMBDA(int li) mutable { + auto linds = lines.pack(dim_c<2>,"ep_inds",li).reinterpret_bits(int_c); + if(auto no = de2fi.query(vec2i{linds[0],linds[1]});no >= 0){ + lines("he_inds",li) = reinterpret_bits((int)no); + }else { + // some algorithm bug + } + }); + + cudaPol(zs::range(tris.size()),[ + points = proxy({},points),tris = proxy({},tris),de2fi = proxy(de2fi)] ZS_LAMBDA(int ti) mutable { + auto tinds = tris.pack(dim_c<3>,"fp_inds",ti).reinterpret_bits(int_c); + if(auto no = de2fi.query(vec2i{tinds[0],tinds[1]});no >= 0){ + tris("he_inds",ti) = reinterpret_bits((int)no); + }else { + // some algorithm bug + } + + for(int i = 0;i != 3;++i) { + if(auto no = de2fi.query(vec2i{tinds[i],tinds[(i+1) % 3]});no >= 0){ + points("he_inds",tinds[i]) = reinterpret_bits((int)no); + }else { + // some algorithm bug + } + } + }); +#else + if(!build_surf_half_edge(cudaPol,tris,lines,points,halfEdge)) + throw std::runtime_error("fail building surf half edge"); +#endif + + set_output("zsparticles",zsparticles); + // zsparticles->setMeta("de2fi",std::move()) + } + +}; + + +ZENDEFNODE(BuildSurfaceHalfEdgeStructure, {{{"zsparticles"}}, + {{"zsparticles"}}, + {}, + {"ZSGeometry"}}); + + +// visualize the one-ring points, lines, and tris +struct VisualizeOneRingNeighbors : zeno::INode { + using T = float; + virtual void apply() override { + using namespace zs; + auto zsparticles = get_input("zsparticles"); + constexpr int MAX_NEIGHS = 8; + + if(!zsparticles->hasAuxData(ZenoParticles::s_surfTriTag)) + throw std::runtime_error("the input zsparticles has no surface tris"); + if(!zsparticles->hasAuxData(ZenoParticles::s_surfEdgeTag)) + throw std::runtime_error("the input zsparticles has no surface lines"); + if(!zsparticles->hasAuxData(ZenoParticles::s_surfVertTag)) + throw std::runtime_error("the input zsparticles has no surface lines"); + if(!zsparticles->hasAuxData(ZenoParticles::s_surfHalfEdgeTag)) + throw std::runtime_error("the input zsparticles has no half edges"); + + const auto& verts = zsparticles->getParticles(); + const auto& tris = (*zsparticles)[ZenoParticles::s_surfTriTag]; + const auto& lines = (*zsparticles)[ZenoParticles::s_surfEdgeTag]; + const auto& points = (*zsparticles)[ZenoParticles::s_surfVertTag]; + const auto& half_edges = (*zsparticles)[ZenoParticles::s_surfHalfEdgeTag]; + + + auto cudaPol = zs::cuda_exec(); + constexpr auto space = zs::execspace_e::cuda; + + auto one_ring_points = typename ZenoParticles::particles_t({{"x",3},{"active",1}},points.size() * (MAX_NEIGHS + 1),zs::memsrc_e::device,0); + TILEVEC_OPS::fill(cudaPol,one_ring_points,"active",(T)0); + + auto one_ring_lines = typename ZenoParticles::particles_t({{"x",3},{"active",1}},points.size() * (MAX_NEIGHS * 2),zs::memsrc_e::device,0); + TILEVEC_OPS::fill(cudaPol,one_ring_lines,"active",(T)0); + + // auto one_ring_tris = typename ZenoParticles::particles_t({{"x",3},{"active",1}},points.size() * (MAX_NEIGHS + 1),zs::memsrc_e::device,0); + + + // auto one_ring_lines = typename ZenoParticles::particles_t({{"x",3},{"active",1}},points.size() * (MAX_NEIGHS + 1)); + // auto one_ring_tris = typename ZenoParticles::particles_t({{"x",3},{"active",1}},points.size() * (MAX_NEIGHS + 1)); + + // cudaPol(zs::range(lines.size()), + // [lines = proxy({},lines)] ZS_LAMBDA(int li) { + // auto ep_inds = lines.pack(dim_c<2>,"ep_inds",li).reinterpret_bits(int_c); + // printf("ep_inds[%d] : %d %d\n",li,ep_inds[0],ep_inds[1]); + // }); + + // cudaPol(zs::range(half_edges.size()), + // [half_edges = proxy({},half_edges)] ZS_LAMBDA(int hei) { + // auto id0 = reinterpret_bits(half_edges("to_vertex",hei)); + // auto nhei = get_next_half_edge(hei,half_edges,1,false); + // auto id1 = reinterpret_bits(half_edges("to_vertex",nhei)); + // auto rhei = reinterpret_bits(half_edges("opposite_he",hei)); + // auto rid0 = reinterpret_bits(half_edges("to_vertex",rhei)); + // auto nrhei = get_next_half_edge(rhei,half_edges,1,false); + // auto rid1 = reinterpret_bits(half_edges("to_vertex",nrhei)); + // printf("half_edge[%d] : %d %d \t <-> half_edge[%d] : %d %d\n",hei,id0,id1,rhei,rid0,rid1); + // }); + + cudaPol(zs::range(points.size()),[ + verts = proxy({},verts), + one_ring_points = proxy({},one_ring_points), + // one_ring_lines = proxy({},one_ring_lines), + // one_ring_tris = proxy({},one_ring_tris), + points = proxy({},points), + lines = proxy({},lines), + tris = proxy({},tris), + half_edges = proxy({},half_edges)] ZS_LAMBDA(int pi) mutable { + // calculate one-ring neighbored points + one_ring_points("active",pi * (MAX_NEIGHS+1) + 0) = (T)1.0; + auto pidx = reinterpret_bits(points("inds",pi)); + one_ring_points.tuple(dim_c<3>,"x",pi * (MAX_NEIGHS+1) + 0) = verts.pack(dim_c<3>,"x",pidx); + + auto he_idx = reinterpret_bits(points("he_inds",pi)); + + zs::vec pneighs = get_one_ring_neigh_points(he_idx,half_edges); + // printf("one_ring_neighbors[%d] : %d %d %d %d %d %d\n",(int)pi, + // (int)pneighs[0],(int)pneighs[1],(int)pneighs[2],(int)pneighs[3],(int)pneighs[4],(int)pneighs[5]); + for(int i = 0;i != MAX_NEIGHS;++i){ + if(pneighs[i] < 0) + break; + auto npidx = reinterpret_bits(points("inds",pneighs[i])); + one_ring_points("active",pi * (MAX_NEIGHS+1) + i + 1) = (T)1.0; + one_ring_points.tuple(dim_c<3>,"x",pi * (MAX_NEIGHS+1) + i + 1) = verts.pack(dim_c<3>,"x",npidx); + } + + }); + + cudaPol(zs::range(points.size()),[ + verts = proxy({},verts), + one_ring_lines = proxy({},one_ring_lines), + points = proxy({},points), + lines = proxy({},lines), + half_edges = proxy({},half_edges)] ZS_LAMBDA(int pi) mutable { + auto he_idx = reinterpret_bits(points("he_inds",pi)); + zs::vec pneighs = get_one_ring_neigh_edges(he_idx,half_edges); + // printf("one_ring_line_neighbors[%d] : %d %d %d %d %d %d\n",(int)pi, + // (int)pneighs[0],(int)pneighs[1],(int)pneighs[2],(int)pneighs[3],(int)pneighs[4],(int)pneighs[5]); + for(int i = 0;i != MAX_NEIGHS;++i) { + if(pneighs[i] < 0) + break; + one_ring_lines("active",pi * (2 * MAX_NEIGHS) + 2 * i + 0) = (T)1.0; + one_ring_lines("active",pi * (2 * MAX_NEIGHS) + 2 * i + 1) = (T)1.0; + auto ne = lines.pack(dim_c<2>,"inds",pneighs[i]).reinterpret_bits(int_c); + one_ring_lines.tuple(dim_c<3>,"x",pi * (2 * MAX_NEIGHS) + 2 * i + 0) = verts.pack(dim_c<3>,"x",ne[0]); + one_ring_lines.tuple(dim_c<3>,"x",pi * (2 * MAX_NEIGHS) + 2 * i + 1) = verts.pack(dim_c<3>,"x",ne[1]); + } + }); + + one_ring_points = one_ring_points.clone({zs::memsrc_e::host}); + auto pn_prim = std::make_shared(); + auto& pn_verts = pn_prim->verts; + auto& pn_lines = pn_prim->lines; + + pn_verts.resize(points.size() * (MAX_NEIGHS + 1)); + pn_lines.resize(points.size() * MAX_NEIGHS); + constexpr auto omp_space = execspace_e::openmp; + auto ompPol = omp_exec(); + + ompPol(zs::range(points.size()), + [one_ring_points = proxy({},one_ring_points),&pn_verts,&pn_lines] (int pi) { + int nm_active = 0; + for(int i = 0;i != MAX_NEIGHS + 1;++i) { + if(one_ring_points("active",pi * (MAX_NEIGHS+1) + i) > 0) + nm_active++; + else + break; + pn_verts[pi * (MAX_NEIGHS+1) + i] = one_ring_points.pack(dim_c<3>,"x",pi * (MAX_NEIGHS+1) + i).to_array(); + // if(i > 0) { + // auto diff = pn_verts[pi * (MAX_NEIGHS+1) + i] - pn_verts[pi * (MAX_NEIGHS+1) + 0]; + // pn_verts[pi * (MAX_NEIGHS+1) + i] = pn_verts[pi * (MAX_NEIGHS+1) + 0] + diff * 0.9; + // } + } + for(int i = 0;i < nm_active-1;++i) + pn_lines[pi * MAX_NEIGHS + i] = zeno::vec2i(pi * (MAX_NEIGHS + 1) + 0,pi * (MAX_NEIGHS + 1) + i + 1); + }); + + // for(int i = 0;i != pn_lines.size();++i) + // std::cout << "pn_lines[" << i << "] : " << pn_lines[i][0] << "\t" << pn_lines[i][1] << std::endl; + + set_output("pn_prim",std::move(pn_prim)); + + + one_ring_lines = one_ring_lines.clone({zs::memsrc_e::host}); + auto en_prim = std::make_shared(); + auto& en_verts = en_prim->verts; + auto& en_lines = en_prim->lines; + + en_verts.resize(points.size() * (MAX_NEIGHS * 2)); + en_lines.resize(points.size() * MAX_NEIGHS); + + ompPol(zs::range(points.size()), + [one_ring_lines = proxy({},one_ring_lines),&en_verts,&en_lines] (int pi) { + int nm_active = 0; + for(int i = 0;i != 2*MAX_NEIGHS;++i) { + if(one_ring_lines("active",pi * MAX_NEIGHS * 2 + i) > 0) + nm_active++; + else + break; + en_verts[pi * MAX_NEIGHS * 2 + i] = one_ring_lines.pack(dim_c<3>,"x",pi * MAX_NEIGHS * 2 + i).to_array(); + } + int nm_active_edges = nm_active / 2; + for(int i = 0;i != nm_active_edges;++i) + en_lines[pi * MAX_NEIGHS + i] = zeno::vec2i(pi * MAX_NEIGHS * 2 + i * 2 + 0,pi * MAX_NEIGHS * 2 + i * 2 + 1); + }); + + set_output("en_prim",std::move(en_prim)); + } }; -ZENDEFNODE(FilterTopology, {/* inputs: */ { - {"prim"}, - }, - /* outputs: */ - { - {"primOut"}, - }, - /* params: */ - {{"enum lines tris quads", "topo", "tris"}}, - /* category: */ - { - "ZSGEOMETRY", - }}); - -}; // namespace zeno \ No newline at end of file + +ZENDEFNODE(VisualizeOneRingNeighbors, {{{"zsparticles"}}, + {{"pn_prim"} + ,{"en_prim"} + }, + {}, + {"ZSGeometry"}}); + + +}; \ No newline at end of file diff --git a/projects/CuLagrange/geometry/VectorField.cu b/projects/CuLagrange/geometry/VectorField.cu index 22d6f07fda..111cc1edb9 100644 --- a/projects/CuLagrange/geometry/VectorField.cu +++ b/projects/CuLagrange/geometry/VectorField.cu @@ -308,8 +308,8 @@ struct ZSSampleQuadratureAttr2Vert : zeno::INode { if(!verts.hasProperty(attr)) { fmt::print("append new nodal attribute {}[{}]\n",attr,attr_dim); verts.append_channels(cudaPol,{{attr,attr_dim}}); - }else if(verts.getPropertySize(attr) != attr_dim){ - fmt::print("the verts' {} attr[{}] and quads' {} attr[{}] not matched\n",attr,verts.getPropertySize(attr),attr,attr_dim); + }else if(verts.getChannelSize(attr) != attr_dim){ + fmt::print("the verts' {} attr[{}] and quads' {} attr[{}] not matched\n",attr,verts.getChannelSize(attr),attr,attr_dim); } cudaPol(range(verts.size()), [verts = proxy({},verts),attr_dim,attr = SmallString(attr)] @@ -335,16 +335,17 @@ struct ZSSampleQuadratureAttr2Vert : zeno::INode { // if(ei == 0) // printf("w : %f\n",(float)w); // w = 1.0;// cancel out the specified weight info + // printf("quads[%s][%d] : %f\n",attr.asChars(),ei,(float)quads(attr,0,ei)); for(int i = 0;i != simplex_size;++i){ auto idx = reinterpret_bits(quads("inds",i,ei)); if(skip_bou && verts(bou_tag,idx) > 1e-6) continue; + auto alpha = w; for(int j = 0;j != attr_dim;++j) { // verts(attr,j,idx) += w * quads(attr,j,ei) / (float)simplex_size; - auto alpha = w / (float)simplex_size; atomic_add(execTag,&verts(attr,j,idx),alpha * quads(attr,j,ei)); - atomic_add(execTag,&vtemp("wsum",idx),alpha); } + atomic_add(execTag,&vtemp("wsum",idx),alpha); } }); @@ -389,6 +390,17 @@ struct ZSSampleVertAttr2Quadrature : zeno::INode { auto& verts = field->getParticles(); auto& quads = field->getQuadraturePoints(); + + + // auto skip_bou = get_param("skip_bou"); + // auto bou_tag = get_param("bou_tag"); + + // if(skip_bou && !quads.hasProperty(bou_tag)) { + // fmt::print("the input vertices have no {} boudary tag when skip bou is on\n",bou_tag); + // throw std::runtime_error("the input vertices have no boudary tag when skip bou is on"); + // } + + auto attr = get_param("attr"); if(!verts.hasProperty(attr)){ fmt::print("the input verts have no specified channel : {}\n",attr); @@ -401,13 +413,14 @@ struct ZSSampleVertAttr2Quadrature : zeno::INode { // throw std::runtime_error("the input vertices have no specified weight channel"); // } + int simplex_size = quads.getPropertySize("inds"); int attr_dim = verts.getPropertySize(attr); if(!quads.hasProperty(attr)) quads.append_channels(cudaPol,{{attr,attr_dim}}); - else if(quads.getPropertySize(attr) != attr_dim) { - fmt::print("the size of channel {} V[{}] and Q[{}] not match\n",attr,attr_dim,quads.getPropertySize(attr)); + else if(quads.getChannelSize(attr) != attr_dim) { + fmt::print("the size of channel {} V[{}] and Q[{}] not match\n",attr,attr_dim,quads.getChannelSize(attr)); throw std::runtime_error("the size of channel does not match"); } @@ -566,6 +579,7 @@ struct ZSGaussianNeighborQuadatureSampler : zeno::INode { auto radius_shrink = get_input2("radius"); auto mark = get_input2("mark"); auto mark_tag = get_param("mark_tag"); + auto weight_tag = get_param("weight_tag"); // auto bvh_thickness = get_param("bvh_thickness"); @@ -627,24 +641,24 @@ struct ZSGaussianNeighborQuadatureSampler : zeno::INode { [ dst_quads = proxy({},dst_quads),src_quads = proxy({},src_quads), dst_verts = proxy({},dst_verts),src_verts = proxy({},src_verts), src_centers = proxy(src_centers),dst_centers = proxy(dst_centers), - attr = SmallString(attr),xtag = SmallString(xtag),simplex_size,attr_dim, + attr = SmallString(attr),xtag = SmallString(xtag),simplex_size,attr_dim,weight_tag = zs::SmallString(weight_tag), bvh = proxy(quadsBvh),sigma,this,use_append,radius_shrink,mark_tag = SmallString(mark_tag),mark] __device__(int di) mutable { - if(!use_append) - for(int i = 0;i != attr_dim;++i) - dst_quads(attr,i,di) = 0.0; - else{ - float field_norm = 0.f; - for(int i = 0;i != attr_dim;++i) - field_norm += dst_quads(attr,i,di) * dst_quads(attr,i,di); - field_norm = zs::sqrt(field_norm); - if(field_norm > 1e-6) - return; - } + // if(!use_append) + // for(int i = 0;i != attr_dim;++i) + // dst_quads(attr,i,di) = 0.0; + // else{ + // float field_norm = 0.f; + // for(int i = 0;i != attr_dim;++i) + // field_norm += dst_quads(attr,i,di) * dst_quads(attr,i,di); + // field_norm = zs::sqrt(field_norm); + // if(field_norm > 1e-6) + // return; + // } // compute the center of the src tet auto dst_ct = dst_centers[di]; float radius = 0; - float w_sum = 0; + // float w_sum = 0; // automatically detected the approapiate radius size for(int i = 0; i != simplex_size;++i){ @@ -661,20 +675,38 @@ struct ZSGaussianNeighborQuadatureSampler : zeno::INode { // } auto dst_bv = bv_t{get_bounding_box(dst_ct - radius, dst_ct + radius)}; + bool first_iter = true; + bool has_been_sampled = false; bvh.iter_neighbors(dst_bv,[&](int si){ auto src_ct = src_centers[si]; auto dist = (src_ct - dst_ct).norm(); + if(dist > radius * 2) + return; + auto w = gauss_kernel(dist,sigma); + if(w < 1e-4) + return; + + has_been_sampled = true; + if(first_iter && !use_append){ + for(int i = 0;i != attr_dim;++i) + dst_quads(attr,i,di) = 0.0; + first_iter = false; + } + // float distds = dist/sigma; // float beta = zs::exp(-0.5 * distds * distds); // w = 1/(sigma /* zs::sqrt(2*zs::g_pi)*/) * zs::exp(-0.5 * distds * distds); - w_sum += w; + // w_sum += w; + dst_quads(weight_tag,di) += w; // printf("sample neighbor : %d->%d %f %f %f\n",si,di,(float)w,(float)alpha,(float)zs::g_pi); for(int i = 0;i != attr_dim;++i) dst_quads(attr,i,di) += w * src_quads(attr,i,si); + // if(attr_dim == 1) + // printf("dst_quads[%s][%d] sample src_quads[%s][%d] : %f\n",attr.asChars(),di,attr.asChars(),si,src_quads(attr,0,si)); dst_quads(mark_tag,di) = mark; }); @@ -682,8 +714,9 @@ struct ZSGaussianNeighborQuadatureSampler : zeno::INode { // if(w_sum < 1e-6){ // printf("lost element %d\n",di); // } - for(int i = 0;i != attr_dim;++i) - dst_quads(attr,i,di) /= (w_sum + 1e-6); + // if(has_been_sampled) + // for(int i = 0;i != attr_dim;++i) + // dst_quads(attr,i,di) /= (w_sum + 1e-6); }); @@ -697,6 +730,7 @@ ZENDEFNODE(ZSGaussianNeighborQuadatureSampler,{ {"source","dest",{"int","use_append","0"},{"float","radius","1"},{"float","mark","-1.0"}}, {"dest"}, { + {"string","weight_tag","weight_tag"}, {"string","mark_tag","mark_tag"}, {"string","attr","attr"}, {"string","xtag","x"}, diff --git a/projects/CuLagrange/geometry/file_parser/read_vtk_mesh.hpp b/projects/CuLagrange/geometry/file_parser/read_vtk_mesh.hpp index 66dd49d938..85c96a40c1 100644 --- a/projects/CuLagrange/geometry/file_parser/read_vtk_mesh.hpp +++ b/projects/CuLagrange/geometry/file_parser/read_vtk_mesh.hpp @@ -329,6 +329,13 @@ namespace zeno { for(int array_id = 0;array_id != nm_arrays;++array_id){ int nm_components,nm_tuples; bufferp = readline(buffer,fp,&line_count); + sscanf(bufferp,"%s",array_name); + if(!strcmp(array_name,"METADATA")){ + printf("skip_line : %s\n",bufferp); + bufferp = readline(buffer,fp,&line_count); + printf("skip_line : %s\n",bufferp); + bufferp = readline(buffer,fp,&line_count); + } sscanf(bufferp,"%s %d %d %s",array_name,&nm_components,&nm_tuples,dummy_str); printf("array_name : %s | nm_components %d | nm_tuples : %d | type : %s at %d\n", array_name,nm_components,nm_tuples,dummy_str,line_count); diff --git a/projects/CuLagrange/geometry/kernel/bary_centric_weights.hpp b/projects/CuLagrange/geometry/kernel/bary_centric_weights.hpp index 20eacf57a8..a26cbe4059 100644 --- a/projects/CuLagrange/geometry/kernel/bary_centric_weights.hpp +++ b/projects/CuLagrange/geometry/kernel/bary_centric_weights.hpp @@ -10,6 +10,8 @@ namespace zeno { + + template zs::Vector> get_bounding_volumes(zs::CudaExecutionPolicy &pol, const TileVecT &vtemp, @@ -40,33 +42,6 @@ namespace zeno { return ret; } - template - constexpr T compute_dist_2_facet(const zs::vec& vp,const zs::vec& v0,const zs::vec& v1,const zs::vec& v2){ - auto v012 = (v0 + v1 + v2) / 3; - auto v01 = (v0 + v1) / 2; - auto v02 = (v0 + v2) / 2; - auto v12 = (v1 + v2) / 2; - - T dist = 1e6; - T tdist = (v012 - vp).norm(); - dist = tdist < dist ? tdist : dist; - tdist = (v01 - vp).norm(); - dist = tdist < dist ? tdist : dist; - tdist = (v02 - vp).norm(); - dist = tdist < dist ? tdist : dist; - tdist = (v12 - vp).norm(); - dist = tdist < dist ? tdist : dist; - - tdist = (v0 - vp).norm(); - dist = tdist < dist ? tdist : dist; - tdist = (v1 - vp).norm(); - dist = tdist < dist ? tdist : dist; - tdist = (v2 - vp).norm(); - dist = tdist < dist ? tdist : dist; - - return dist; - } - template constexpr T volume( const zs::vec& p0, @@ -170,7 +145,15 @@ namespace zeno { // return; auto bvs = retrieve_bounding_volumes(pol,verts,quads,wrapv<4>{},bvh_thickness,x_tag); - // std::cout << "TRY BUILDING TETS BVH" << std::endl; + // std::cout << "sizeof bvs : " << bvs.size() << std::endl; + // // std::cout << "TRY BUILDING TETS BVH" << std::endl; + // pol(zs::range(bvs.size()),[ + // bvs = proxy(bvs)] ZS_LAMBDA(int bi) mutable { + // printf("bv[%d] : min(%f %f %f); max(%f %f %f)\n",bi, + // (float)bvs[bi]._min[0],(float)bvs[bi]._min[1],(float)bvs[bi]._min[2], + // (float)bvs[bi]._max[0],(float)bvs[bi]._max[1],(float)bvs[bi]._max[2]); + // }); + auto tetsBvh = LBvh<3, int,T>{}; @@ -192,14 +175,14 @@ namespace zeno { T closest_dist = 1e6; bool found = false; // if(vi == 10820) - // printf("check to locate vert %d using bvh\n",vi); + // printf("check to locate vert %d using bvh with pos = %f %f %f\n",vi,(float)p[0],(float)p[1],(float)p[2]); // auto dst_bv = bv_t{get_bounding_box(dst )} tetsBvh.iter_neighbors(p,[&](int ei){ + // printf("test %d v's neighbor element %d ei\n",vi,ei); if(found) return; // if(vi == 10820) - // printf("test neighbor element %d ei\n",ei); auto inds = eles.template pack<4>(elm_tag, ei).template reinterpret_bits(); auto p0 = verts.template pack<3>(x_tag,inds[0]); auto p1 = verts.template pack<3>(x_tag,inds[1]); @@ -217,9 +200,11 @@ namespace zeno { } if(!fitting_in) return; + zs::vec bary{}; if(ws[0] < 0){ - T dist = compute_dist_2_facet(p,p1,p2,p3); + // T dist = compute_dist_2_facet(p,p1,p2,p3); + T dist = LSL_GEO::pointTriangleDistance(p1,p2,p3,p,bary); if(dist < closest_dist){ closest_dist = dist; bcw(elm_tag,vi) = reinterpret_bits(ei); @@ -227,7 +212,7 @@ namespace zeno { } } if(ws[1] < 0){ - T dist = compute_dist_2_facet(p,p0,p2,p3); + T dist = LSL_GEO::pointTriangleDistance(p0,p2,p3,p,bary); if(dist < closest_dist){ closest_dist = dist; bcw(elm_tag,vi) = reinterpret_bits(ei); @@ -235,7 +220,7 @@ namespace zeno { } } if(ws[2] < 0){ - T dist = compute_dist_2_facet(p,p0,p1,p3); + T dist = LSL_GEO::pointTriangleDistance(p0,p1,p3,p,bary); if(dist < closest_dist){ closest_dist = dist; bcw(elm_tag,vi) = reinterpret_bits(ei); @@ -243,17 +228,58 @@ namespace zeno { } } if(ws[3] < 0){ - T dist = compute_dist_2_facet(p,p0,p1,p2); + T dist = LSL_GEO::pointTriangleDistance(p0,p1,p2,p,bary); if(dist < closest_dist){ closest_dist = dist; bcw(elm_tag,vi) = reinterpret_bits(ei); bcw.template tuple<4>(weight_tag,vi) = ws; } } + + // if(ws[0] < 0){ + // T dist = compute_dist_2_facet(p,p1,p2,p3); + // if(dist < closest_dist){ + // closest_dist = dist; + // bcw(elm_tag,vi) = reinterpret_bits(ei); + // bcw.template tuple<4>(weight_tag,vi) = ws; + // } + // } + // if(ws[1] < 0){ + // T dist = compute_dist_2_facet(p,p0,p2,p3); + // if(dist < closest_dist){ + // closest_dist = dist; + // bcw(elm_tag,vi) = reinterpret_bits(ei); + // bcw.template tuple<4>(weight_tag,vi) = ws; + // } + // } + // if(ws[2] < 0){ + // T dist = compute_dist_2_facet(p,p0,p1,p3); + // if(dist < closest_dist){ + // closest_dist = dist; + // bcw(elm_tag,vi) = reinterpret_bits(ei); + // bcw.template tuple<4>(weight_tag,vi) = ws; + // } + // } + // if(ws[3] < 0){ + // T dist = compute_dist_2_facet(p,p0,p1,p2); + // if(dist < closest_dist){ + // closest_dist = dist; + // bcw(elm_tag,vi) = reinterpret_bits(ei); + // bcw.template tuple<4>(weight_tag,vi) = ws; + // } + // } + + if(!fitting_in){ + printf("bind vert %d to %d under non-fitting-in mode\n",vi,ei); + // return; + } + + });// finish iter the neighbor tets }); } + }; diff --git a/projects/CuLagrange/geometry/kernel/calculate_bisector_normal.hpp b/projects/CuLagrange/geometry/kernel/calculate_bisector_normal.hpp index 8ce2b8550f..038b06993d 100644 --- a/projects/CuLagrange/geometry/kernel/calculate_bisector_normal.hpp +++ b/projects/CuLagrange/geometry/kernel/calculate_bisector_normal.hpp @@ -101,7 +101,7 @@ namespace zeno { namespace COLLISION_UTILS { // // auto avge = (e01 + e02 + e12)/(T)3.0; // T barySum = (T)1.0; - // T distance = COLLISION_UTILS::pointTriangleDistance(t0,t1,t2,p,barySum); + // T distance = pointTriangleDistance(t0,t1,t2,p,barySum); // // auto max_ratio = inset_ratio > outset_ratio ? inset_ratio : outset_ratio; // // collisionEps = avge * max_ratio; // auto collisionEps = seg.dot(nrm) > 0 ? out_collisionEps : in_collisionEps; diff --git a/projects/CuLagrange/geometry/kernel/calculate_edge_normal.hpp b/projects/CuLagrange/geometry/kernel/calculate_edge_normal.hpp index 13ccd5dd86..310b15509b 100644 --- a/projects/CuLagrange/geometry/kernel/calculate_edge_normal.hpp +++ b/projects/CuLagrange/geometry/kernel/calculate_edge_normal.hpp @@ -10,22 +10,44 @@ namespace zeno { using T = float; - template - bool calculate_edge_normal_from_facet_normal(Pol& pol,const LineTileVec& lines, + template + bool calculate_edge_normal_from_facet_normal(Pol& pol, const SurfTriNrmTileVec& ttemp,const zs::SmallString& srcTag, - SurfLineNrmTileVec& etemp,const zs::SmallString& dstTag) { + SurfLineNrmTileVec& etemp,const zs::SmallString& dstTag, + const SurfTriTopoTileVec& ltopo) { using namespace zs; - if(!ttemp.hasProperty(srcTag) || ttemp.getPropertySize(srcTag) != 3){ + + if(!ttemp.hasProperty(srcTag) || ttemp.getChannelSize(srcTag) != 3){ fmt::print(fg(fmt::color::red),"the input triNrmTileVec has no valid {} normal channel\n",srcTag); return false; } - if(!etemp.hasProperty(dstTag) || etemp.getPropertySize(dstTag) != 3) { + if(!etemp.hasProperty(dstTag) || etemp.getChannelSize(dstTag) != 3) { fmt::print(fg(fmt::color::red),"the input lineNrmTileVec has no valid {} normal channel\n",dstTag); return false; } + if(!ltopo.hasProperty("fe_inds") || ltopo.getChannelSize("fe_inds") != 2){ + fmt::print(fg(fmt::color::red),"the input ltopo has no \"fe_inds\" channel\n"); + return false; + } + + // std::cout << "doing assemble" << std::endl; + + // constexpr auto space = execspace_e::cuda; + // auto cudaPol = cuda_exec(); + // cudaPol(zs::range(ltopo.size()), + // [ltopo = proxy({},ltopo)] ZS_LAMBDA(int li) mutable { + // auto inds = ltopo.template pack<2>("fe_inds",li).reinterpret_bits(int_c); + // printf("ltopo<%d> : %d %d\n",li,inds[0],inds[1]); + // }); + - TILEVEC_OPS::assemble_from<3,2>(pol,ttemp,srcTag,etemp,dstTag,"fe_inds"); + // TILEVEC_OPS::fill<3>(pol,etemp,dstTag,zs::vec::zeros()); + TILEVEC_OPS::fill(pol,etemp,dstTag,(T)0.0); + TILEVEC_OPS::assemble_from(pol,ttemp,srcTag,etemp,dstTag,ltopo,"fe_inds"); + // std::cout << "finish assemble" << std::endl; TILEVEC_OPS::normalized_channel<3>(pol,etemp,dstTag); + // std::cout << "finish normalize" << std::endl; + return true; } }; \ No newline at end of file diff --git a/projects/CuLagrange/geometry/kernel/calculate_facet_center.hpp b/projects/CuLagrange/geometry/kernel/calculate_facet_center.hpp index 3c00dfb21f..57fcfd806d 100644 --- a/projects/CuLagrange/geometry/kernel/calculate_facet_center.hpp +++ b/projects/CuLagrange/geometry/kernel/calculate_facet_center.hpp @@ -11,11 +11,11 @@ namespace zeno { template bool calculate_facet_center(Pol& pol,const PosTileVec& verts,const zs::SmallString& xTag,SurfTriTileVec& tris,SurfCenterTileVec& tri_center_buffer,const zs::SmallString& centerTag) { using namespace zs; - if(!tris.hasProperty("inds") || tris.getPropertySize("inds") != 3) { + if(!tris.hasProperty("inds") || tris.getChannelSize("inds") != 3) { if(!tris.hasProperty("inds")) fmt::print(fg(fmt::color::red),"the tris has no 'inds' channel\n"); - else if(tris.getPropertySize("inds") != 3) - fmt::print(fg(fmt::color::red),"the tris has invalid 'inds' channel size {}\n",tris.getPropertySize("inds")); + else if(tris.getChannelSize("inds") != 3) + fmt::print(fg(fmt::color::red),"the tris has invalid 'inds' channel size {}\n",tris.getChannelSize("inds")); return false; } if(tris.size() != tri_center_buffer.size()) { diff --git a/projects/CuLagrange/geometry/kernel/calculate_facet_normal.hpp b/projects/CuLagrange/geometry/kernel/calculate_facet_normal.hpp index 121e6cb946..b9c654ac9c 100644 --- a/projects/CuLagrange/geometry/kernel/calculate_facet_normal.hpp +++ b/projects/CuLagrange/geometry/kernel/calculate_facet_normal.hpp @@ -9,21 +9,41 @@ namespace zeno { using T = float; template - bool calculate_facet_normal(Pol& pol,const PosTileVec& verts,const zs::SmallString& xTag,SurfTriTileVec& tris,SurfNrmTileVec& tri_nrm_buffer,const zs::SmallString& nrmTag) { + bool calculate_facet_normal(Pol& pol,const PosTileVec& verts,const zs::SmallString& xTag,const SurfTriTileVec& tris,SurfNrmTileVec& tri_nrm_buffer,const zs::SmallString& nrmTag) { + // std::cout << "calculate facet normal" << std::endl; + using namespace zs; - if(!tris.hasProperty("inds") || tris.getPropertySize("inds") != 3) { - if(!tris.hasProperty("inds")) - fmt::print(fg(fmt::color::red),"the tris has no 'inds' channel\n"); - else if(tris.getPropertySize("inds") != 3) - fmt::print(fg(fmt::color::red),"the tris has invalid 'inds' channel size {}\n",tris.getPropertySize("inds")); + + if(!tris.hasProperty("inds")){ + std::cout << "the tris has no 'inds' channel\n" << std::endl; + fmt::print(fg(fmt::color::red),"the tris has no 'inds' channel\n"); + return false; + } + if(tris.getChannelSize("inds") != 3){ + std::cout << "the tris has invalid 'inds' channel size {}\n" << std::endl; + fmt::print(fg(fmt::color::red),"the tris has invalid 'inds' channel size {}\n",tris.getChannelSize("inds")); return false; } if(tris.size() != tri_nrm_buffer.size()) { + std::cout << "invalid tris and triNrms" << std::endl; fmt::print(fg(fmt::color::red),"the tris's size {} does not match that of tri_nrm_buffer {}\n", tris.size(),tri_nrm_buffer.size()); return false; } + if(!tri_nrm_buffer.hasProperty(nrmTag)) { + // std::cout << "the tri_nrm_buffer has no " << nrmTag << " channel" << std::endl; + fmt::print(fg(fmt::color::red),"the tri_nrm_buffer has no {} channel\n",nrmTag); + return false; + } + + if(tri_nrm_buffer.getChannelSize(nrmTag) != 3) { + // std::cout << "the tri_nrm_buffer has no " << nrmTag << " channel" << std::endl; + fmt::print(fg(fmt::color::red),"the tri_nrm_buffer has invalid {} channel, which should be vec3\n",nrmTag); + return false; + } + + constexpr auto space = execspace_e::cuda; pol(zs::range(tris.size()), [verts = proxy({},verts),tris = proxy({},tris),tri_nrm_buffer = proxy({},tri_nrm_buffer),xTag,nrmTag] ZS_LAMBDA(int ti) mutable { @@ -52,7 +72,7 @@ namespace zeno { // template // constexpr bool calculate_point_normal(Pol& pol,const VTileVec& verts,const TTileVec& tris,const zs::SmallString& nrmTag) { // using namespace zs; - // if(!tris.hasProperty("inds") || tris.getPropertySize("inds") != 3) + // if(!tris.hasProperty("inds") || tris.getChannelSize("inds") != 3) // return false; // constexpr auto space = execspace_e::cuda; diff --git a/projects/CuLagrange/geometry/kernel/compute_characteristic_length.hpp b/projects/CuLagrange/geometry/kernel/compute_characteristic_length.hpp index a63d31c637..f84038eec6 100644 --- a/projects/CuLagrange/geometry/kernel/compute_characteristic_length.hpp +++ b/projects/CuLagrange/geometry/kernel/compute_characteristic_length.hpp @@ -17,12 +17,14 @@ namespace zeno { if(!verts.hasProperty(xTag)) throw std::runtime_error("compute_average_edge_length::verts contain no specified \"xTag\" channel"); + if(!elms.hasProperty("inds")) + throw std::runtime_error("compute_average_edge_length::elms contain no \"inds\" channel"); constexpr auto space = execspace_e::cuda; Vector length_sum{verts.get_allocator(),1}; length_sum.setVal((T)0); - auto elm_dim = elms.getPropertySize("inds"); + auto elm_dim = elms.getChannelSize("inds"); auto nm_elms = elms.size(); auto nm_edges = (elm_dim * nm_elms); diff --git a/projects/CuLagrange/geometry/kernel/geo_math.hpp b/projects/CuLagrange/geometry/kernel/geo_math.hpp index 7eebaf3a6d..5bf657dc2c 100644 --- a/projects/CuLagrange/geometry/kernel/geo_math.hpp +++ b/projects/CuLagrange/geometry/kernel/geo_math.hpp @@ -5,6 +5,14 @@ namespace zeno { namespace LSL_GEO { + using REAL = float; + using VECTOR12 = typename zs::vec; + using VECTOR4 = typename zs::vec; + using VECTOR3 = typename zs::vec; + using VECTOR2 = typename zs::vec; + using MATRIX3x12 = typename zs::vec; + using MATRIX12 = typename zs::vec; + template= 2 && simplex_size <= 4)> = 0> constexpr zs::vec ordered_edges() { if constexpr (simplex_size == 4) @@ -188,5 +196,383 @@ namespace zeno { namespace LSL_GEO { b = x0 - F * X0; } + + /////////////////////////////////////////////////////////////////////// + // get the linear interpolation coordinates from v0 to the line segment + // between v1 and v2 + /////////////////////////////////////////////////////////////////////// + constexpr VECTOR2 getLerp(const VECTOR3 v0, const VECTOR3& v1, const VECTOR3& v2) + { + const VECTOR3 e0 = v0 - v1; + const VECTOR3 e1 = v2 - v1; + const VECTOR3 e1hat = e1 / e1.norm(); + const REAL projection = e0.dot(e1hat); + + if (projection < 0.0) + return VECTOR2(1.0, 0.0); + + if (projection >= e1.norm()) + return VECTOR2(0.0, 1.0); + + const REAL ratio = projection / e1.norm(); + return VECTOR2(1.0 - ratio, ratio); + } + + + /////////////////////////////////////////////////////////////////////// + // find the distance from a line segment (v1, v2) to a point (v0) + /////////////////////////////////////////////////////////////////////// + constexpr REAL pointLineDistance(const VECTOR3 v0, const VECTOR3& v1, const VECTOR3& v2) + { + const VECTOR3 e0 = v0 - v1; + const VECTOR3 e1 = v2 - v1; + const VECTOR3 e1hat = e1 / e1.norm(); + const REAL projection = e0.dot(e1hat); + + // if it projects onto the line segment, use that length + if (projection > 0.0 && projection < e1.norm()) + { + const VECTOR3 normal = e0 - projection * e1hat; + return normal.norm(); + } + + // if it doesn't, find the point-point distances + const REAL diff01 = (v0 - v1).norm(); + const REAL diff02 = (v0 - v2).norm(); + + return (diff01 < diff02) ? diff01 : diff02; + } + + + /////////////////////////////////////////////////////////////////////// + // get the barycentric coordinate of the projection of v[0] onto the triangle + // formed by v[1], v[2], v[3] + /////////////////////////////////////////////////////////////////////// + constexpr VECTOR3 getBarycentricCoordinates(const VECTOR3 vertices[4]) + { + const VECTOR3 v0 = vertices[1]; + const VECTOR3 v1 = vertices[2]; + const VECTOR3 v2 = vertices[3]; + + const VECTOR3 e1 = v1 - v0; + const VECTOR3 e2 = v2 - v0; + const VECTOR3 n = e1.cross(e2); + const VECTOR3 nHat = n / n.norm(); + const VECTOR3 v = vertices[0] - (nHat.dot(vertices[0] - v0)) * nHat; + + // get the barycentric coordinates + const VECTOR3 na = (v2 - v1).cross(v - v1); + const VECTOR3 nb = (v0 - v2).cross(v - v2); + const VECTOR3 nc = (v1 - v0).cross(v - v0); + const VECTOR3 barycentric(n.dot(na) / n.l2NormSqr(), + n.dot(nb) / n.l2NormSqr(), + n.dot(nc) / n.l2NormSqr()); + + return barycentric; + } + + + /////////////////////////////////////////////////////////////////////// + // get the barycentric coordinate of the projection of v[0] onto the triangle + // formed by v[1], v[2], v[3] + // + // but, if the projection is actually outside, project to all of the + // edges and find the closest point that's still inside the triangle + /////////////////////////////////////////////////////////////////////// + constexpr VECTOR3 getInsideBarycentricCoordinates(const VECTOR3 vertices[4]) + { + VECTOR3 barycentric = getBarycentricCoordinates(vertices); + + // if it's already inside, we're all done + if (barycentric[0] >= 0.0 && + barycentric[1] >= 0.0 && + barycentric[2] >= 0.0) + return barycentric; + + // find distance to all the line segments + // + // there's lots of redundant computation between here and getLerp, + // but let's get it working and see if it fixes the actual + // artifact before optimizing + REAL distance12 = pointLineDistance(vertices[0], vertices[1], vertices[2]); + REAL distance23 = pointLineDistance(vertices[0], vertices[2], vertices[3]); + REAL distance31 = pointLineDistance(vertices[0], vertices[3], vertices[1]); + + // less than or equal is important here, otherwise fallthrough breaks + if (distance12 <= distance23 && distance12 <= distance31) + { + VECTOR2 lerp = getLerp(vertices[0], vertices[1], vertices[2]); + barycentric[0] = lerp[0]; + barycentric[1] = lerp[1]; + barycentric[2] = 0.0; + return barycentric; + } + + // less than or equal is important here, otherwise fallthrough breaks + if (distance23 <= distance12 && distance23 <= distance31) + { + VECTOR2 lerp = getLerp(vertices[0], vertices[2], vertices[3]); + barycentric[0] = 0.0; + barycentric[1] = lerp[0]; + barycentric[2] = lerp[1]; + return barycentric; + } + + // else it must be the 31 case + VECTOR2 lerp = getLerp(vertices[0], vertices[3], vertices[1]); + barycentric[0] = lerp[1]; + barycentric[1] = 0.0; + barycentric[2] = lerp[0]; + return barycentric; + } + + +/////////////////////////////////////////////////////////////////////// +// compute distance between a point and triangle +/////////////////////////////////////////////////////////////////////// + constexpr REAL pointTriangleDistance(const VECTOR3& v0, const VECTOR3& v1, + const VECTOR3& v2, const VECTOR3& v,VECTOR3& barycentric) + { + // get the barycentric coordinates + const VECTOR3 e1 = v1 - v0; + const VECTOR3 e2 = v2 - v0; + const VECTOR3 n = e1.cross(e2); + const VECTOR3 na = (v2 - v1).cross(v - v1); + const VECTOR3 nb = (v0 - v2).cross(v - v2); + const VECTOR3 nc = (v1 - v0).cross(v - v0); + barycentric = VECTOR3(n.dot(na) / n.l2NormSqr(), + n.dot(nb) / n.l2NormSqr(), + n.dot(nc) / n.l2NormSqr()); + + const REAL barySum = zs::abs(barycentric[0]) + zs::abs(barycentric[1]) + zs::abs(barycentric[2]); + + // if the point projects to inside the triangle, it should sum to 1 + if (zs::abs(barySum - 1.0) < 1e-6) + { + const VECTOR3 nHat = n / n.norm(); + const REAL normalDistance = (nHat.dot(v - v0)); + return zs::abs(normalDistance); + } + + // project onto each edge, find the distance to each edge + const VECTOR3 e3 = v2 - v1; + const VECTOR3 ev = v - v0; + const VECTOR3 ev3 = v - v1; + const VECTOR3 e1Hat = e1 / e1.norm(); + const VECTOR3 e2Hat = e2 / e2.norm(); + const VECTOR3 e3Hat = e3 / e3.norm(); + VECTOR3 edgeDistances(1e8, 1e8, 1e8); + + // see if it projects onto the interval of the edge + // if it doesn't, then the vertex distance will be smaller, + // so we can skip computing anything + const REAL e1dot = e1Hat.dot(ev); + if (e1dot > 0.0 && e1dot < e1.norm()) + { + const VECTOR3 projected = v0 + e1Hat * e1dot; + edgeDistances[0] = (v - projected).norm(); + } + const REAL e2dot = e2Hat.dot(ev); + if (e2dot > 0.0 && e2dot < e2.norm()) + { + const VECTOR3 projected = v0 + e2Hat * e2dot; + edgeDistances[1] = (v - projected).norm(); + } + const REAL e3dot = e3Hat.dot(ev3); + if (e3dot > 0.0 && e3dot < e3.norm()) + { + const VECTOR3 projected = v1 + e3Hat * e3dot; + edgeDistances[2] = (v - projected).norm(); + } + + // get the distance to each vertex + const VECTOR3 vertexDistances((v - v0).norm(), + (v - v1).norm(), + (v - v2).norm()); + + // get the smallest of both the edge and vertex distances + REAL vertexMin = 1e8; + REAL edgeMin = 1e8; + for(int i = 0;i < 3;++i){ + vertexMin = vertexMin > vertexDistances[i] ? vertexDistances[i] : vertexMin; + edgeMin = edgeMin > edgeDistances[i] ? edgeDistances[i] : edgeMin; + } + // return the smallest of those + return (vertexMin < edgeMin) ? vertexMin : edgeMin; + } + + constexpr REAL pointTriangleDistance(const VECTOR3& v0, const VECTOR3& v1, + const VECTOR3& v2, const VECTOR3& v) + { + // // get the barycentric coordinates + // const VECTOR3 e1 = v1 - v0; + // const VECTOR3 e2 = v2 - v0; + // const VECTOR3 n = e1.cross(e2); + // const VECTOR3 na = (v2 - v1).cross(v - v1); + // const VECTOR3 nb = (v0 - v2).cross(v - v2); + // const VECTOR3 nc = (v1 - v0).cross(v - v0); + // const VECTOR3 barycentric(n.dot(na) / n.l2NormSqr(), + // n.dot(nb) / n.l2NormSqr(), + // n.dot(nc) / n.l2NormSqr()); + + // const REAL barySum = zs::abs(barycentric[0]) + zs::abs(barycentric[1]) + zs::abs(barycentric[2]); + + // // if the point projects to inside the triangle, it should sum to 1 + // if (zs::abs(barySum - 1.0) < 1e-6) + // { + // const VECTOR3 nHat = n / n.norm(); + // const REAL normalDistance = (nHat.dot(v - v0)); + // return zs::abs(normalDistance); + // } + + // // project onto each edge, find the distance to each edge + // const VECTOR3 e3 = v2 - v1; + // const VECTOR3 ev = v - v0; + // const VECTOR3 ev3 = v - v1; + // const VECTOR3 e1Hat = e1 / e1.norm(); + // const VECTOR3 e2Hat = e2 / e2.norm(); + // const VECTOR3 e3Hat = e3 / e3.norm(); + // VECTOR3 edgeDistances(1e8, 1e8, 1e8); + + // // see if it projects onto the interval of the edge + // // if it doesn't, then the vertex distance will be smaller, + // // so we can skip computing anything + // const REAL e1dot = e1Hat.dot(ev); + // if (e1dot > 0.0 && e1dot < e1.norm()) + // { + // const VECTOR3 projected = v0 + e1Hat * e1dot; + // edgeDistances[0] = (v - projected).norm(); + // } + // const REAL e2dot = e2Hat.dot(ev); + // if (e2dot > 0.0 && e2dot < e2.norm()) + // { + // const VECTOR3 projected = v0 + e2Hat * e2dot; + // edgeDistances[1] = (v - projected).norm(); + // } + // const REAL e3dot = e3Hat.dot(ev3); + // if (e3dot > 0.0 && e3dot < e3.norm()) + // { + // const VECTOR3 projected = v1 + e3Hat * e3dot; + // edgeDistances[2] = (v - projected).norm(); + // } + + // // get the distance to each vertex + // const VECTOR3 vertexDistances((v - v0).norm(), + // (v - v1).norm(), + // (v - v2).norm()); + + // // get the smallest of both the edge and vertex distances + // REAL vertexMin = 1e8; + // REAL edgeMin = 1e8; + // for(int i = 0;i < 3;++i){ + // vertexMin = vertexMin > vertexDistances[i] ? vertexDistances[i] : vertexMin; + // edgeMin = edgeMin > edgeDistances[i] ? edgeDistances[i] : edgeMin; + // } + // // return the smallest of those + // return (vertexMin < edgeMin) ? vertexMin : edgeMin; + VECTOR3 barycentric{}; + return pointTriangleDistance(v0,v1,v2,v,barycentric); + } + + + + constexpr REAL pointTriangleDistance(const VECTOR3& v0, const VECTOR3& v1, + const VECTOR3& v2, const VECTOR3& v,REAL& barySum) + { + // get the barycentric coordinates + const VECTOR3 e1 = v1 - v0; + const VECTOR3 e2 = v2 - v0; + const VECTOR3 n = e1.cross(e2); + const VECTOR3 na = (v2 - v1).cross(v - v1); + const VECTOR3 nb = (v0 - v2).cross(v - v2); + const VECTOR3 nc = (v1 - v0).cross(v - v0); + const VECTOR3 barycentric(n.dot(na) / n.l2NormSqr(), + n.dot(nb) / n.l2NormSqr(), + n.dot(nc) / n.l2NormSqr()); + + barySum = zs::abs(barycentric[0]) + zs::abs(barycentric[1]) + zs::abs(barycentric[2]); + + // if the point projects to inside the triangle, it should sum to 1 + if (zs::abs(barySum - 1.0) < 1e-6) + { + const VECTOR3 nHat = n / n.norm(); + const REAL normalDistance = (nHat.dot(v - v0)); + return zs::abs(normalDistance); + } + + // project onto each edge, find the distance to each edge + const VECTOR3 e3 = v2 - v1; + const VECTOR3 ev = v - v0; + const VECTOR3 ev3 = v - v1; + const VECTOR3 e1Hat = e1 / e1.norm(); + const VECTOR3 e2Hat = e2 / e2.norm(); + const VECTOR3 e3Hat = e3 / e3.norm(); + VECTOR3 edgeDistances(1e8, 1e8, 1e8); + + // see if it projects onto the interval of the edge + // if it doesn't, then the vertex distance will be smaller, + // so we can skip computing anything + const REAL e1dot = e1Hat.dot(ev); + if (e1dot > 0.0 && e1dot < e1.norm()) + { + const VECTOR3 projected = v0 + e1Hat * e1dot; + edgeDistances[0] = (v - projected).norm(); + } + const REAL e2dot = e2Hat.dot(ev); + if (e2dot > 0.0 && e2dot < e2.norm()) + { + const VECTOR3 projected = v0 + e2Hat * e2dot; + edgeDistances[1] = (v - projected).norm(); + } + const REAL e3dot = e3Hat.dot(ev3); + if (e3dot > 0.0 && e3dot < e3.norm()) + { + const VECTOR3 projected = v1 + e3Hat * e3dot; + edgeDistances[2] = (v - projected).norm(); + } + + // get the distance to each vertex + const VECTOR3 vertexDistances((v - v0).norm(), + (v - v1).norm(), + (v - v2).norm()); + + // get the smallest of both the edge and vertex distances + REAL vertexMin = 1e8; + REAL edgeMin = 1e8; + for(int i = 0;i < 3;++i){ + vertexMin = vertexMin > vertexDistances[i] ? vertexDistances[i] : vertexMin; + edgeMin = edgeMin > edgeDistances[i] ? edgeDistances[i] : edgeMin; + } + // return the smallest of those + return (vertexMin < edgeMin) ? vertexMin : edgeMin; + } + + + /////////////////////////////////////////////////////////////////////// + // see if the projection of v onto the plane of v0,v1,v2 is inside + // the triangle formed by v0,v1,v2 + /////////////////////////////////////////////////////////////////////// + constexpr bool pointProjectsInsideTriangle(const VECTOR3& v0, const VECTOR3& v1, + const VECTOR3& v2, const VECTOR3& v){ + // get the barycentric coordinates + const VECTOR3 e1 = v1 - v0; + const VECTOR3 e2 = v2 - v0; + const VECTOR3 n = e1.cross(e2); + const VECTOR3 na = (v2 - v1).cross(v - v1); + const VECTOR3 nb = (v0 - v2).cross(v - v2); + const VECTOR3 nc = (v1 - v0).cross(v - v0); + const VECTOR3 barycentric(n.dot(na) / n.l2NormSqr(), + n.dot(nb) / n.l2NormSqr(), + n.dot(nc) / n.l2NormSqr()); + + const REAL barySum = zs::abs(barycentric[0]) + zs::abs(barycentric[1]) + zs::abs(barycentric[2]); + + // if the point projects to inside the triangle, it should sum to 1 + if (zs::abs(barySum - 1.0) < 1e-6) + return true; + + return false; + } + }; }; \ No newline at end of file diff --git a/projects/CuLagrange/geometry/kernel/laplacian.hpp b/projects/CuLagrange/geometry/kernel/laplacian.hpp new file mode 100644 index 0000000000..433271f003 --- /dev/null +++ b/projects/CuLagrange/geometry/kernel/laplacian.hpp @@ -0,0 +1,351 @@ +#pragma once + +#include "Structures.hpp" +#include "topology.hpp" + +namespace zeno { + template + constexpr T doublearea(T a,T b,T c) { + T s = (a + b + c)/2; + return 2*zs::sqrt(s*(s-a)*(s-b)*(s-c)); + } + + template + constexpr T area(T a,T b,T c) { + return doublearea(a,b,c)/2; + } + + template + constexpr T volume(zs::vec l) { + T u = l(0); + T v = l(1); + T w = l(2); + T U = l(3); + T V = l(4); + T W = l(5); + T X = (w - U + v)*(U + v + w); + T x = (U - v + w)*(v - w + U); + T Y = (u - V + w)*(V + w + u); + T y = (V - w + u)*(w - u + V); + T Z = (v - W + u)*(W + u + v); + T z = (W - u + v)*(u - v + W); + T a = zs::sqrt(x*Y*Z); + T b = zs::sqrt(y*Z*X); + T c = zs::sqrt(z*X*Y); + T d = zs::sqrt(x*y*z); + T vol = zs::sqrt( + (-a + b + c + d)* + ( a - b + c + d)* + ( a + b - c + d)* + ( a + b + c - d))/ + (192.*u*v*w); + + return vol; + } + + template + constexpr void dihedral_angle_intrinsic(const zs::vec& l,const zs::vec& s,zs::vec& theta,zs::vec& cos_theta) { + zs::vec H_sqr{}; + H_sqr[0] = (1./16.) * (4.*l(3)*l(3)*l(0)*l(0) - zs::sqr((l(1)*l(1) + l(4)*l(4)) - (l(2)*l(2) + l(5)*l(5)))); + H_sqr[1] = (1./16.) * (4.*l(4)*l(4)*l(1)*l(1) - zs::sqr((l(2)*l(2) + l(5)*l(5)) - (l(3)*l(3) + l(0)*l(0)))); + H_sqr[2] = (1./16.) * (4.*l(5)*l(5)*l(2)*l(2) - zs::sqr((l(3)*l(3) + l(0)*l(0)) - (l(4)*l(4) + l(1)*l(1)))); + H_sqr[3] = (1./16.) * (4.*l(0)*l(0)*l(3)*l(3) - zs::sqr((l(4)*l(4) + l(1)*l(1)) - (l(5)*l(5) + l(2)*l(2)))); + H_sqr[4] = (1./16.) * (4.*l(1)*l(1)*l(4)*l(4) - zs::sqr((l(5)*l(5) + l(2)*l(2)) - (l(0)*l(0) + l(3)*l(3)))); + H_sqr[5] = (1./16.) * (4.*l(2)*l(2)*l(5)*l(5) - zs::sqr((l(0)*l(0) + l(3)*l(3)) - (l(1)*l(1) + l(4)*l(4)))); + + cos_theta(0) = (H_sqr(0) - s(1)*s(1) - s(2)*s(2)) / (-2.*s(1) * s(2)); + cos_theta(1) = (H_sqr(1) - s(2)*s(2) - s(0)*s(0)) / (-2.*s(2) * s(0)); + cos_theta(2) = (H_sqr(2) - s(0)*s(0) - s(1)*s(1)) / (-2.*s(0) * s(1)); + cos_theta(3) = (H_sqr(3) - s(3)*s(3) - s(0)*s(0)) / (-2.*s(3) * s(0)); + cos_theta(4) = (H_sqr(4) - s(3)*s(3) - s(1)*s(1)) / (-2.*s(3) * s(1)); + cos_theta(5) = (H_sqr(5) - s(3)*s(3) - s(2)*s(2)) / (-2.*s(3) * s(2)); + + //TODO the theta here might be invalid, might be a hidden bug + theta(0) = zs::acos(cos_theta(0)); + theta(1) = zs::acos(cos_theta(1)); + theta(2) = zs::acos(cos_theta(2)); + theta(3) = zs::acos(cos_theta(3)); + theta(4) = zs::acos(cos_theta(4)); + theta(5) = zs::acos(cos_theta(5)); + } + + + template + void compute_smooth_laplacian(Pol& pol, + const PosTileVec& verts,const zs::SmallString& xTag, + const SrcTileVec& src,const zs::SmallString& srcTag, + const HalfEdgeTileVec& halfEdges, + const PointTileVec& points, + const EdgeTileVec& edges, + const TriTileVec& tris, + DstTileVec& dst,const zs::SmallString& dstTag) { + using T = typename SrcTileVec::value_type; + using namespace zs; + constexpr auto space = Pol::exec_tag::value; + int space_dim = src.getPropertySize(srcTag); + + pol(range(points.size()),[ + verts = proxy({},verts),xTag, + src = proxy({},src),srcTag, + half_edges = proxy({},halfEdges), + points = proxy({},points), + edges = proxy({},edges), + tris = proxy({},tris), + dst = proxy({},dst),dstTag,space_dim] + ZS_LAMBDA(int pi) mutable { + auto vidx = reinterpret_bits(points("inds",pi)); + auto he_idx = reinterpret_bits(points("he_inds",pi)); + zs::vec pneighs = get_one_ring_neigh_points(he_idx,half_edges); + zs::vec eneighs = get_one_ring_neigh_edges(he_idx,half_edges); + T ws = (T)0.0; + for(int i = 0;i != MAX_NEIGHS;++i) { + auto npi = pneighs[i]; + if(npi < 0) + break; + auto nvidx = reinterpret_bits(points("inds",npi)); + auto w = (T)0.0; + // compute cotangent weight + { + auto li = eneighs[i]; + auto ne = edges.pack(dim_c<2>,"inds",li).reinterpret_bits(int_c); + auto fe_inds = edges.pack(dim_c<2>,"fe_inds",li).reinterpret_bits(int_c); + + auto t0 = fe_inds[0]; + auto t1 = fe_inds[1]; + + zs::vec l{}; + zs::vec l2{}; + zs::vec vs[3] = {}; + + for(int j = 0;j != 2;++j) { + if(fe_inds[j] < 0) + break; + auto tri = tris.pack(dim_c<3>,"inds",fe_inds[j]).reinterpret_bits(int_c); + int k = 0; + for(k = 0;k != 3;++k) { + if((tri[k] == ne[0] && tri[(k+1)%3] == ne[1]) || (tri[k] == ne[1] && tri[(k+1)%3] == ne[0])) + break; + } + if(k == 3) { + printf("invalid fe_inds detected"); + }else{ + for(int d = 0;d != 3;++d) + vs[d] = verts.pack(dim_c<3>,xTag,tri[(k + d) % 3]); + for(int d = 0;d != 3;++d){ + l2[d] = (vs[d] - vs[(d+1) % 3]).l2Norm(); + l[d] = zs::sqrt(l2[d]); + } + + auto dblA = doublearea(l[0],l[1],l[2]); + auto C = (l2[2] + l2[1] - l2[0])/dblA/(T)4.0; + w += C; + } + + } + } + ws += w; + for(int i = 0;i != space_dim;++i) + dst(dstTag,i,pi) += src(srcTag,i,pi) * w; + } + for(int i = 0;i != space_dim;++i) + dst(dstTag,i,pi) /= ws; + }); + } + + template + void compute_smooth(Pol& pol, + const SrcTileVec& src,const zs::SmallString& srcTag, + const HalfEdgeTileVec& halfEdges, + const PointTileVec& points, + const EdgeTileVec& edges, + const TriTileVec& tris, + DstTileVec& dst,const zs::SmallString& dstTag) { + using T = typename SrcTileVec::value_type; + using namespace zs; + constexpr auto space = Pol::exec_tag::value; + int space_dim = src.getPropertySize(srcTag); + pol(range(points.size()),[ + src = proxy({},src),srcTag, + half_edges = proxy({},halfEdges), + points = proxy({},points), + edges = proxy({},edges), + tris = proxy({},tris), + dst = proxy({},dst),dstTag,space_dim] + ZS_LAMBDA(int pi) mutable { + auto vidx = reinterpret_bits(points("inds",pi)); + auto he_idx = reinterpret_bits(points("he_inds",pi)); + zs::vec pneighs = get_one_ring_neigh_points(he_idx,half_edges); + T ws = (T)0.0; + + for(int i = 0;i != MAX_NEIGHS;++i) { + auto npi = pneighs[i]; + if(npi < 0) + break; + auto nvidx = reinterpret_bits(points("inds",npi)); + auto w = (T)1.0; + ws += w; + for(int d = 0;d != space_dim;++d) + dst(dstTag,d,pi) += w * src(srcTag,d,pi); + } + for(int d = 0;d != space_dim;++d) + dst(dstTag,d,pi) /= ws; + }); + } + + template + void compute_smooth_corrective(Pol& pol, + const SrcTileVec& src,const zs::SmallString& srcTag, + const HalfEdgeTileVec& halfEdges, + const PointTileVec& points, + const EdgeTileVec& edges, + const TriTileVec& tris, + DstTileVec& dst,const zs::SmallString& dstTag) { + + } + + + template + void compute_cotmatrix(Pol &pol,const ETileVec &eles, + const VTileVec &verts, const zs::SmallString& xTag, + ETmpTileVec& etemp, const zs::SmallString& HTag) { + + static_assert(zs::is_same_v,"precision not match"); + static_assert(zs::is_same_v,"precision not match"); + + using T = typename VTileVec::value_type; + + using namespace zs; + static_assert(simplex_size >= 3 && simplex_size <=4, "invalid co-dimension!\n"); + constexpr auto space = Pol::exec_tag::value; + + #if ZS_ENABLE_CUDA && defined(__CUDACC__) + static_assert(space == execspace_e::cuda, + "specified policy and compiler not match"); + #else + static_assert(space != execspace_e::cuda, + "specified policy and compiler not match"); + #endif + + if(!verts.hasProperty(xTag)){ + printf("the verts buffer does not contain specified channel\n"); + } + + // if(!etemp.hasProperty(HTag)){ + // printf("the etemp buffer does not contain specified channel\n"); + // } + + etemp.append_channels(pol,{{HTag,simplex_size*simplex_size}}); + + // zs::Vector C{eles.get_allocator(),eles.size()*simplex_size*(simplex_size-1)/2}; + + // compute cotangent entries + // fmt::print("COMPUTE COTANGENT ENTRIES\n"); + int nm_elms = etemp.size(); + pol(zs::range(etemp.size()), + [eles = proxy({},eles),verts = proxy({},verts), + etemp = proxy({},etemp),xTag,HTag,nm_elms] ZS_LAMBDA(int ei) mutable { + constexpr int ne = simplex_size*(simplex_size-1)/2; + auto inds = eles.template pack("inds",ei).template reinterpret_bits(); + + using IV = zs::vec; + using TV = zs::vec; + + TV C; + IV edges; + // printf("check_0\n"); + // compute the cotangent entris + if constexpr (simplex_size == 3){ + edges = IV{1,2,2,0,0,1}; + zs::vec l; + zs::vec l2; + for(size_t i = 0;i != ne;++i) { + l[i] = (verts.pack<3>(xTag,inds[edges[i*2+0]]) - verts.pack<3>(xTag,inds[edges[i*2+1]])).norm(); + l2[i] = l[i] * l[i]; + } + auto dblA = doublearea(l[0],l[1],l[2]);// check here, double area + for(size_t i = 0;i != ne;++i) + C[i] = (l2[edges[2*i+0]] + l2[edges[2*i+1]] - l2[3 - edges[2*i+0] - edges[2*i+1]])/dblA/4.0; + } + if constexpr (simplex_size == 4){ + // printf("check_1\n"); + edges = IV{1,2,2,0,0,1,3,0,3,1,3,2}; + zs::vec l{}; + l[0] = (verts.pack<3>(xTag,inds[3]) - verts.pack<3>(xTag,inds[0])).length(); + l[1] = (verts.pack<3>(xTag,inds[3]) - verts.pack<3>(xTag,inds[1])).length(); + l[2] = (verts.pack<3>(xTag,inds[3]) - verts.pack<3>(xTag,inds[2])).length(); + l[3] = (verts.pack<3>(xTag,inds[1]) - verts.pack<3>(xTag,inds[2])).length(); + l[4] = (verts.pack<3>(xTag,inds[2]) - verts.pack<3>(xTag,inds[0])).length(); + l[5] = (verts.pack<3>(xTag,inds[0]) - verts.pack<3>(xTag,inds[1])).length(); + // for(int i = 0;i != ne;++i) + // l[i] = (verts.pack<3>(xTag,inds[edges[i*2+0]]) - verts.pack<3>(xTag,inds[edges[i*2+1]])).norm(); + // printf("check_2\n"); + zs::vec s{ + area(l[1],l[2],l[3]), + area(l[0],l[2],l[4]), + area(l[0],l[1],l[5]), + area(l[3],l[4],l[5])}; + // printf("check_3\n"); + zs::vec cos_theta{},theta{}; + dihedral_angle_intrinsic(l,s,theta,cos_theta); + // printf("check_4\n"); + T vol = eles("vol",ei); + // T vol_cmp = volume(l); + // if(fabs(vol_cmp - vol) > 1e-6) + // printf("VOL_ERROR<%d> : %f\n",ei,(float)fabs(vol_cmp - vol)); + zs::vec sin_theta{}; + #if 0 + sin_theta(0) = vol / ((2./(3.*l(0))) * s(1) * s(2)); + sin_theta(1) = vol / ((2./(3.*l(1))) * s(2) * s(0)); + sin_theta(2) = vol / ((2./(3.*l(2))) * s(0) * s(1)); + sin_theta(3) = vol / ((2./(3.*l(3))) * s(3) * s(0)); + sin_theta(4) = vol / ((2./(3.*l(4))) * s(3) * s(1)); + sin_theta(5) = vol / ((2./(3.*l(5))) * s(3) * s(2)); + #else + for(size_t i = 0;i !=ne; ++i) + sin_theta(i) = zs::sin(theta(i)); + #endif + C = (1./6.) * l * cos_theta / sin_theta; + } + + constexpr int simplex_size2 = simplex_size*simplex_size; + etemp.template tuple(HTag,ei) = zs::vec::zeros(); + + + for(size_t i = 0;i != ne;++i){ + int source = edges(i*2 + 0); + int dest = edges(i*2 + 1); + etemp(HTag,simplex_size*source + dest,ei) -= C(i); + etemp(HTag,simplex_size*dest + source,ei) -= C(i); + etemp(HTag,simplex_size*source + source,ei) += C(i); + etemp(HTag,simplex_size*dest + dest,ei) += C(i); + } + + auto L = etemp.template pack(HTag,ei); + }); + + // fmt::print("FINISH COMPUTING COTANGENT ENTRIES\n"); + + } +}; \ No newline at end of file diff --git a/projects/CuLagrange/geometry/kernel/tiled_vector_ops.hpp b/projects/CuLagrange/geometry/kernel/tiled_vector_ops.hpp index 7c5d07fdba..2641ca9099 100644 --- a/projects/CuLagrange/geometry/kernel/tiled_vector_ops.hpp +++ b/projects/CuLagrange/geometry/kernel/tiled_vector_ops.hpp @@ -8,27 +8,41 @@ namespace zeno { namespace TILEVEC_OPS { using T = float; template - void copy(Pol& pol,const SrcTileVec& src,const zs::SmallString& src_tag,DstTileVec& dst,const zs::SmallString& dst_tag) { + void copy(Pol& pol,const SrcTileVec& src,const zs::SmallString& src_tag,DstTileVec& dst,const zs::SmallString& dst_tag,int offset = 0) { using namespace zs; constexpr auto space = execspace_e::cuda; // if(src.size() != dst.size()) // throw std::runtime_error("copy_ops_error::the size of src and dst not match"); pol(zs::range(src.size()), - [src = proxy({},src),src_tag,dst = proxy({},dst),dst_tag] __device__(int vi) mutable { - dst.template tuple(dst_tag,vi) = src.template pack(src_tag,vi); + [src = proxy({},src),src_tag,dst = proxy({},dst),dst_tag,offset] __device__(int vi) mutable { + dst.template tuple(dst_tag,vi + offset) = src.template pack(src_tag,vi); }); } template - void copy(Pol& pol,const SrcTileVec& src,const zs::SmallString& src_tag,DstTileVec& dst,const zs::SmallString& dst_tag) { + void copy(Pol& pol,const SrcTileVec& src,const zs::SmallString& src_tag,DstTileVec& dst,const zs::SmallString& dst_tag,int offset = 0) { using namespace zs; constexpr auto space = execspace_e::cuda; // if(src.size() != dst.size()) // throw std::runtime_error("copy_ops_error::the size of src and dst not match"); + if(!src.hasProperty(src_tag)){ + fmt::print(fg(fmt::color::red),"copy_ops_error::the src has no specified channel {}\n",src_tag); + throw std::runtime_error("copy_ops_error::the src has no specified channel"); + } + if(!dst.hasProperty(dst_tag)){ + fmt::print(fg(fmt::color::red),"copy_ops_error::the dst has no specified channel {}\n",dst_tag); + throw std::runtime_error("copy_ops_error::the dst has no specified channel"); + } + auto space_dim = src.getChannelSize(src_tag); + if(dst.getChannelSize(dst_tag) != space_dim){ + // std::cout << "invalid channel size : " << space_dim << "\t" << dst.getChannelSize(dst_tag) << std::endl; + throw std::runtime_error("copy_ops_error::the channel size of src and dst not match"); + } pol(zs::range(src.size()), - [src = proxy({},src),src_tag,dst = proxy({},dst),dst_tag] __device__(int vi) mutable { - dst(dst_tag,vi) = src(src_tag,vi); + [src = proxy({},src),src_tag,dst = proxy({},dst),dst_tag,offset,space_dim] __device__(int vi) mutable { + for(int i = 0;i != space_dim;++i) + dst(dst_tag,i,vi + offset) = src(src_tag,i,vi); }); } @@ -60,49 +74,133 @@ namespace zeno { namespace TILEVEC_OPS { constexpr auto space = execspace_e::cuda; pol(range(vtemp.size()), [vtemp = proxy({},vtemp),tag,value] __device__(int vi) mutable { - vtemp.template tuple(tag,vi) = value; + vtemp.tuple(dim_c,tag,vi) = value; }); } + template void fill(Pol& pol,VTileVec& vtemp,const zs::SmallString& tag,const T& value) { using namespace zs; constexpr auto space = execspace_e::cuda; + int space_dim = vtemp.getChannelSize(tag); pol(range(vtemp.size()), - [vtemp = proxy({},vtemp),tag,value] __device__(int vi) mutable { - vtemp(tag,vi) = value; + [vtemp = proxy({},vtemp),tag,value,space_dim] __device__(int vi) mutable { + for(int i= 0;i != space_dim;++i) + vtemp(tag,i,vi) = value; }); } - template + template + void fill_range(Pol& pol,VTileVec& vtemp,const zs::SmallString& tag,const zs::vec& value,int start,int length) { + using namespace zs; + constexpr auto space = execspace_e::cuda; + pol(range(length), + [vtemp = proxy({},vtemp),tag,value,start] __device__(int vi) mutable { + vtemp.template tuple(tag,vi + start) = value; + }); + } + + + template + void fill_range(Pol& pol,VTileVec& vtemp,const zs::SmallString& tag,const T& value,int start,int length) { + using namespace zs; + constexpr auto space = execspace_e::cuda; + int space_dim = vtemp.getChannelSize(tag); + pol(range(length), + [vtemp = proxy({},vtemp),tag,value,space_dim,start] __device__(int vi) mutable { + for(int i= 0;i != space_dim;++i) + vtemp(tag,i,vi + start) = value; + }); + } + + + + template void assemble(Pol& pol, const SrcTileVec& src,const zs::SmallString& srcTag,const zs::SmallString& srcTopoTag, DstTileVec& dst,const zs::SmallString& dstTag) { using namespace zs; constexpr auto space = execspace_e::cuda; - if(!src.hasProperty(srcTopoTag) || src.getPropertySize(srcTopoTag) != simplex_size) + if(!src.hasProperty(srcTopoTag)) throw std::runtime_error("tiledvec_ops::assemble::invalid src's topo channel"); if(!src.hasProperty(srcTag)) throw std::runtime_error("tiledvec_ops::assemble::src has no 'srcTag' channel"); if(!dst.hasProperty(dstTag)) throw std::runtime_error("tiledvec_ops::assemble::dst has no 'dstTag' channel"); + int simplex_size = src.getChannelSize(srcTopoTag); + int src_space_dim = src.getChannelSize(srcTag); + int dst_space_dim = dst.getChannelSize(dstTag); + + if(dst_space_dim * simplex_size != src_space_dim) + throw std::runtime_error("tiledvec_ops::assemble::src_space_dim and dst_space_dim not match"); + + // std::cout << "simplex_size : " << simplex_size << std::endl; + // std::cout << "space_dim : " << space_dim << std::endl; + // std::cout << "src_size : " << src.size() << std::endl; + // std::cout << "dst_size : " << dst.size() << std::endl; + + pol(range(src.size()), - [src = proxy({},src),dst = proxy({},dst),srcTag,srcTopoTag,dstTag] __device__(int si) mutable { - auto inds = src.template pack(srcTopoTag,si).reinterpret_bits(int_c); - for(int i = 0;i != simplex_size;++i) - if(inds[i] < 0) + [src = proxy({},src),dst = proxy({},dst),srcTag,srcTopoTag,dstTag,simplex_size,src_space_dim,dst_space_dim] __device__(int si) mutable { + for(int i = 0;i != simplex_size;++i){ + auto idx = reinterpret_bits(src(srcTopoTag,i,si)); + if(idx < 0) return; - auto data = src.template pack(srcTag,si); - for(int i = 0;i != simplex_size;++i) - for(int d = 0;d != space_dim;++d) - atomic_add(exec_cuda,&dst(dstTag,d,inds[i]),data[i*space_dim + d]); + } + + for(int i = 0;i != simplex_size;++i){ + auto idx = reinterpret_bits(src(srcTopoTag,i,si)); + for(int d = 0;d != dst_space_dim;++d){ + atomic_add(exec_cuda,&dst(dstTag,d,idx),src(srcTag,i * dst_space_dim + d,si)); + } + } }); } + template + void assemble_range(Pol& pol, + const SrcTileVec& src,const zs::SmallString& srcTag,const zs::SmallString& srcTopoTag, + DstTileVec& dst,const zs::SmallString& dstTag,int start,int alen) { + using namespace zs; + constexpr auto space = execspace_e::cuda; - template + if(!src.hasProperty(srcTopoTag)) + throw std::runtime_error("tiledvec_ops::assemble::invalid src's topo channel"); + if(!src.hasProperty(srcTag)) + throw std::runtime_error("tiledvec_ops::assemble::src has no 'srcTag' channel"); + if(!dst.hasProperty(dstTag)) + throw std::runtime_error("tiledvec_ops::assemble::dst has no 'dstTag' channel"); + + int simplex_size = src.getChannelSize(srcTopoTag); + int src_space_dim = src.getChannelSize(srcTag); + int dst_space_dim = dst.getChannelSize(dstTag); + + + if(dst_space_dim * simplex_size != src_space_dim) + throw std::runtime_error("tiledvec_ops::assemble::src_space_dim and dst_space_dim not match"); + + pol(range(alen), + [src = proxy({},src),dst = proxy({},dst),srcTag,srcTopoTag,dstTag,start,simplex_size,space_dim = dst_space_dim] __device__(int si) mutable { + for(int i = 0;i != simplex_size;++i){ + auto idx = reinterpret_bits(src(srcTopoTag,i,si + start)); + if(idx < 0) + return; + } + for(int i = 0;i != simplex_size;++i){ + auto idx = reinterpret_bits(src(srcTopoTag,i,si + start)); + for(int d = 0;d != space_dim;++d){ + atomic_add(exec_cuda,&dst(dstTag,d,idx),src(srcTag,i * space_dim + d,si + start)); + } + } + }); + } + + + + template void assemble(Pol& pol, const SrcTileVec& src,const zs::SmallString& srcTag, DstTileVec& dst,const zs::SmallString& dstTag) { @@ -111,7 +209,7 @@ namespace zeno { namespace TILEVEC_OPS { // TILEVEC_OPS::fill(pol,dst,"dir",zs::vec::uniform((T)0.0)); - // if(!src.hasProperty("inds") || src.getPropertySize("inds") != simplex_size) + // if(!src.hasProperty("inds") || src.getChannelSize("inds") != simplex_size) // throw std::runtime_error("tiledvec_ops::assemble::invalid src's topo channel inds"); // pol(range(src.size()), @@ -126,34 +224,97 @@ namespace zeno { namespace TILEVEC_OPS { // atomic_add(exec_cuda,&dst(dst_tag,d,inds[i]),data[i*space_dim + d]); // }); - assemble(pol,src,srcTag,"inds",dst,dstTag); + assemble(pol,src,srcTag,"inds",dst,dstTag); } - - template + template void assemble_from(Pol& pol, const SrcTileVec& src,const zs::SmallString& srcTag, - DstTileVec& dst,const zs::SmallString& dstTag,const zs::SmallString& dstTopoTag) { + DstTileVec& dst,const zs::SmallString& dstTag, + const DstTopoTileVec& topo,const zs::SmallString& dstTopoTag) { using namespace zs; constexpr auto space = execspace_e::cuda; - if(!dst.hasProperty(dstTopoTag) || dst.getPropertySize(dstTopoTag) != simplex_size) + if(!topo.hasProperty(dstTopoTag)) throw std::runtime_error("tiledvec_ops::assemble_from::invalid dst's topo channel"); if(!src.hasProperty(srcTag)) throw std::runtime_error("tiledvec_ops::assemble::src has no 'srcTag' channel"); if(!dst.hasProperty(dstTag)) throw std::runtime_error("tiledvec_ops::assemble::dst has no 'dstTag' channel"); + if(dst.size() != topo.size()) + throw std::runtime_error("tiledvec_ops::assemble::dst and topo size not match"); + + int simplex_size = topo.getChannelSize(dstTopoTag); + int space_dim = src.getChannelSize(srcTag); pol(zs::range(dst.size()), - [dst = proxy({},dst),src = proxy({},src),srcTag,dstTag,dstTopoTag] __device__(int di) mutable { - auto inds = dst.template pack(dstTopoTag,di).reinterpret_bits(int_c); - for(int i = 0;i != simplex_size;++i) - dst.template tuple(dstTag,di) += src.template pack(srcTag,inds[i]); + [dst = proxy({},dst),src = proxy({},src),srcTag,dstTag,topo = proxy({},topo),dstTopoTag,simplex_size,space_dim] __device__(int di) mutable { + for(int i = 0;i != simplex_size;++i){ + auto idx = reinterpret_bits(topo(dstTopoTag,i,di)); + for(int d = 0;d != space_dim;++d) + dst(dstTag,d,di) += src(srcTag,d,idx); + } }); } + template + void concatenate_two_tiled_vecs(Pol& pol, + const SrcTileVec0& src0, + const SrcTileVec1& src1, + DstTileVec& dst, + const std::vector& tags) { + using namespace zs; + constexpr auto space = execspace_e::cuda; + + for(int i = 0;i != tags.size();++i){ + auto name = tags[i].name; + auto numChannels = tags[i].numChannels; + + if(!src0.hasProperty(name) || src0.getChannelSize() != numChannels) + throw std::runtime_error("concatenate_two_tiled_vecs::src0's channels not aligned with specified tags"); + if(!src1.hasProperty(name) || src1.getChannelSize() != numChannels) + throw std::runtime_error("concatenate_two_tiled_vecs::src1's channels not aligned with specified tags"); + if(!dst.hasProperty(name) || dst.getChannelSize() != numChannels) + throw std::runtime_error("concatenate_two_tiled_vecs::dst's channels not aligned with specified tags"); + if(dst.size() != (src0.size() + src1.size())) + throw std::runtime_error("concatenate_two_tiled_vecs::dst.size() != src0.size() + src1.size()"); + } + + for(int i = 0;i != tags.size();++i) { + auto name = tags[i].name; + auto numChannels = tags[i].numChannels; + copy(pol,src0,name,dst,name,0); + copy(pol,src1,name,dst,name,src0.size()); + } + } + + + template + void assemble_from(Pol& pol, + const SrcTileVec& src,const zs::SmallString& srcTag, + DstTileVec& dst,const zs::SmallString& dstTag,const zs::SmallString& dstTopoTag) { + // using namespace zs; + // constexpr auto space = execspace_e::cuda; + + // if(!dst.hasProperty(dstTopoTag) || dst.getChannelSize(dstTopoTag) != simplex_size) + // throw std::runtime_error("tiledvec_ops::assemble_from::invalid dst's topo channel"); + // if(!src.hasProperty(srcTag)) + // throw std::runtime_error("tiledvec_ops::assemble::src has no 'srcTag' channel"); + // if(!dst.hasProperty(dstTag)) + // throw std::runtime_error("tiledvec_ops::assemble::dst has no 'dstTag' channel"); + + // pol(zs::range(dst.size()), + // [dst = proxy({},dst),src = proxy({},src),srcTag,dstTag,dstTopoTag] __device__(int di) mutable { + // auto inds = dst.template pack(dstTopoTag,di).reinterpret_bits(int_c); + // for(int i = 0;i != simplex_size;++i) + // dst.template tuple(dstTag,di) = dst.template pack(dstTag,di) + src.template pack(srcTag,inds[i]); + // }); + assemble_from(pol,src,srcTag,dst,dstTag,dst,dstTopoTag); + + } + // maybe we also need a weighted assemble func template @@ -165,7 +326,7 @@ namespace zeno { namespace TILEVEC_OPS { [vtemp = proxy({},vtemp),tag,eps] __device__(int vi) mutable { auto d = vtemp.template pack(tag,vi); auto dn = d.norm(); - d = dn < eps ? d/dn : zs::vec::zeros(); + d = dn > eps ? d/dn : zs::vec::zeros(); vtemp.template tuple(tag,vi) = d; }); } diff --git a/projects/CuLagrange/geometry/kernel/topology.hpp b/projects/CuLagrange/geometry/kernel/topology.hpp index 20b46e5f22..9590d57eb1 100644 --- a/projects/CuLagrange/geometry/kernel/topology.hpp +++ b/projects/CuLagrange/geometry/kernel/topology.hpp @@ -5,7 +5,9 @@ #include "zensim/cuda/execution/ExecutionPolicy.cuh" #include "zensim/omp/execution/ExecutionPolicy.hpp" #include "zensim/container/Bvh.hpp" - +#include "zensim/container/Bcht.hpp" +#include "zensim/zpc_tpls/fmt/format.h" +#include "tiled_vector_ops.hpp" namespace zeno { @@ -38,13 +40,70 @@ namespace zeno { return -1; } + template + bool compute_ft_neigh_topo(Pol& pol,const VTileVec& verts,TriTileVec& tris,const TetTileVec& tets,const zs::SmallString& neighTag,float bvh_thickness) { + using namespace zs; + using T = typename VTileVec::value_type; + using bv_t = AABBBox<3,T>; + + if(!tris.hasProperty(neighTag) || tris.getChannelSize(neighTag) != 1) + return false; + + constexpr auto space = zs::execspace_e::cuda; + auto tetsBvh = LBvh<3,int,T>{}; + + auto bvs = retrieve_bounding_volumes(pol,verts,tets,wrapv<4>{},bvh_thickness,"x"); + tetsBvh.build(pol,bvs); + + size_t nmTris = tris.size(); + pol(zs::range(nmTris), + [tets = proxy({},tets), + verts = proxy({},verts), + tris = proxy({},tris), + tetsBvh = proxy(tetsBvh), + neighTag] ZS_LAMBDA(int ti) mutable { + auto tri = tris.pack(dim_c<3>,"inds",ti).reinterpret_bits(int_c); + tris(neighTag,ti) = zs::reinterpret_bits((int)-1); + int nm_found = 0; + auto cv = zs::vec::zeros(); + for(int i = 0;i != 3;++i) + cv += verts.pack(dim_c<3>,"x",tri[i])/(T)3.0; + tetsBvh.iter_neighbors(cv,[&](int ntet) { + // if(ti == 0) + // printf("test tet[%d] and tri[%d]\n",ntet,ti); + if(nm_found > 0) + return; + auto tet = tets.pack(dim_c<4>,"inds",ntet).reinterpret_bits(int_c); + for(int i = 0;i != 3;++i){ + bool found_idx = false; + for(int j = 0;j != 4;++j) + if(tet[j] == tri[i]){ + found_idx = true; + break; + } + if(!found_idx) + return; + } + + nm_found++; + tris(neighTag,ti) = reinterpret_bits(ntet); + }); + + if(nm_found == 0) + printf("found no neighbored tet for tri[%d]\n",ti); + + }); + + return true; + } + template bool compute_ff_neigh_topo(Pol& pol,const VTileVec& verts,TTileVec& tris,const zs::SmallString neighTag,float bvh_thickness) { using namespace zs; using T = typename VTileVec::value_type; using bv_t = AABBBox<3,T>; - if(!tris.hasProperty(neighTag) || (tris.getPropertySize(neighTag) != 3)){ + if(!tris.hasProperty(neighTag) || (tris.getChannelSize(neighTag) != 3)){ return false; } @@ -55,12 +114,16 @@ namespace zeno { size_t nmTris = tris.size(); // std::cout << "CALCULATE INCIDENT TRIS " << nmTris << std::endl; + if(!tris.hasProperty("non_manfold")) + tris.append_channels(pol,{{"non_manifold",1}}); + pol(zs::range(nmTris), [tris = proxy({},tris), verts = proxy({},verts), trisBvh = proxy(trisBvh), neighTag] ZS_LAMBDA(int ti) mutable { + tris("non_manifold",ti) = (T)0; auto tri = tris.template pack<3>("inds",ti).template reinterpret_bits(); tris.template tuple<3>(neighTag,ti) = zs::vec{-1,-1,-1}.template reinterpret_bits(); for(int i = 0;i < 3; ++i) { @@ -98,6 +161,7 @@ namespace zeno { }); if(nm_found > 1) { printf("found a non-manifold facet %d %d\n",ti,nm_found); + tris("non_manifold",ti) = (T)1.0; } if(nm_found == 0) { printf("found boundary facet %d\n",ti); @@ -115,7 +179,7 @@ namespace zeno { using T = typename VTileVec::value_type; using bv_t = AABBBox<3,T>; - if(!tris.hasProperty(neighTag) || tris.getPropertySize(neighTag) != 3) + if(!tris.hasProperty(neighTag) || tris.getChannelSize(neighTag) != 3) return false; constexpr auto space = zs::execspace_e::cuda; @@ -146,6 +210,34 @@ namespace zeno { return true; } + // template + // bool compute_ep_neigh_topo(Pol& pol,const VTileVec& verts,PTileVec& points,ETileVec& edges,const zs::SmallString& neighTag,float bvh_thickness) { + // using namespace zs; + // using T = typename VTileVec::value_type; + // using bv_t = AABBBox<3,T>; + + // if(!edges.hasProperty(neighTag) || edges.getChannelSize(neighTag) != 2) + // return false; + + // constexpr auto space = zs::execspace_e::cuda; + // auto edgesBvh LBvh<3,int,T>{}; + // auto bvs = retrieve_bounding_volumes(pol,verts,tris,wrapv<2>{},bvh_thickness,"x"); + // edgesBvh.build(pol,bvs); + + // pol(range(points.size()),[ + // verts = proxy({},verts), + // points = proxy({},points), + // edges = proxy({},edges), + // edgesBvh = proxy(edgesBvh), + // neighTag,thickness = bvh_thickness] ZS_LAMBDA(int pi) mutable { + // auto pidx = reinterpret_bits(points("inds",pi)); + // auto v = verts.pack(dim_c<3>,"x",pidx); + + + // }); + + // } + template bool compute_fe_neigh_topo(Pol& pol,const VTileVec& verts,ETileVec& edges,TTileVec& tris,const zs::SmallString& neighTag,float bvh_thickness) { @@ -153,10 +245,10 @@ namespace zeno { using T = typename VTileVec::value_type; using bv_t = AABBBox<3,T>; - if(!edges.hasProperty(neighTag) || edges.getPropertySize(neighTag) != 2) + if(!edges.hasProperty(neighTag) || edges.getChannelSize(neighTag) != 2) return false; - if(!tris.hasProperty(neighTag) || tris.getPropertySize(neighTag) != 3) + if(!tris.hasProperty(neighTag) || tris.getChannelSize(neighTag) != 3) return false; constexpr auto space = zs::execspace_e::cuda; @@ -231,4 +323,295 @@ namespace zeno { return true; } + + // void c + + // the input mesh should be a manifold + template + bool build_surf_half_edge(Pol& cudaPol,SurfTriTileVec& tris,SurfEdgeTileVec& lines,SurfPointTileVec& points,HalfEdgeTileVec& halfEdge) { + using namespace zs; + using vec2i = zs::vec; + using vec3i = zs::vec; + using T = typename SurfTriTileVec::value_type; + + constexpr auto space = zs::execspace_e::cuda; + + TILEVEC_OPS::fill(cudaPol,halfEdge,"to_vertex",reinterpret_bits((int)-1)); + TILEVEC_OPS::fill(cudaPol,halfEdge,"to_face",reinterpret_bits((int)-1)); + // TILEVEC_OPS::fill(cudaPol,halfEdge,"edge",reinterpret_bits((int)-1)); + TILEVEC_OPS::fill(cudaPol,halfEdge,"opposite_he",reinterpret_bits((int)-1)); + TILEVEC_OPS::fill(cudaPol,halfEdge,"next_he",reinterpret_bits((int)-1)); + // we might also need a space hash structure here, map from [i1,i2]->[ej] + + // surface tri edges' indexing the halfedge list + bcht,32> hetab{tris.get_allocator(),tris.size() * 3}; + // bcht,32> etab{lines.get_allocator(),lines.size()}; + Vector sfi{tris.get_allocator(),tris.size() * 3}; + // surface points' indexing one of the connected half-edge + bcht,32> ptab{points.get_allocator(),points.size()}; + Vector spi{points.get_allocator(),points.size()}; + + bcht,32> de2fi{halfEdge.get_allocator(),halfEdge.size()}; + Vector sei(lines.get_allocator(),lines.size()); + + cudaPol(range(points.size()), + [ptab = proxy(ptab),points = proxy({},points),spi = proxy(spi)] ZS_LAMBDA(int pi) mutable { + auto pidx = reinterpret_bits(points("inds",pi)); + if(int no = ptab.insert(pidx);no >= 0) + spi[no] = pi; + }); + // cudaPol(range(lines.size()), + // [estab = proxy(estab),lines = proxy({},lines),sei = proxy(sei)] ZS_LAMBDA(int li) mutable { + // auto l = lines.pack(dim_c<2>,"inds",li).reinterpret_bits(int_c); + // if(no = estab.insert(vec2i{l[0],l[1]});no >= 0) + // sei[no] = li; + // }); + // initialize surface tri <-> halfedge connectivity + cudaPol(range(tris.size()), + [hetab = proxy(hetab), + ptab = proxy(ptab), + spi = proxy(spi), + points = proxy({},points), + halfEdge = proxy({},halfEdge), + sfi = proxy(sfi), + tris = proxy({},tris)] ZS_LAMBDA(int ti) mutable { + auto tri = tris.pack(dim_c<3>,"inds",ti).reinterpret_bits(int_c); + vec3i hinds{}; + for(int i = 0;i != 3;++i){ + if(hinds[i] = hetab.insert(vec2i{tri[i],tri[(i+1)%3]});hinds[i] >= 0){ + auto no = hinds[i]; + if(i == 0) + tris("he_inds",ti) = reinterpret_bits(no); + auto pno = ptab.query(tri[i]); + halfEdge("to_vertex",no) = reinterpret_bits(spi[pno]); + halfEdge("to_face",no) = reinterpret_bits(ti); + points("he_inds",spi[pno]) = reinterpret_bits(no); + }else { + auto no = hinds[i]; + int pid = hetab.query(vec2i{tri[i],tri[(i+1)%3]}); + int oti = sfi[pid]; + printf("the same directed edge <%d %d> has been inserted twice! original sfi[%d %d] = %d, cur: %d <%d %d %d>\n", + tri[i],tri[(i+1)%3],no,pid,oti,ti,tri[0],tri[1],tri[2]); + } + } + + for(int i = 0;i != 3;++i) + halfEdge("next_he",hinds[i]) = hinds[(i+1) % 3]; + }); + + cudaPol(range(halfEdge.size()), + [halfEdge = proxy({},halfEdge),hetab = proxy(hetab)] ZS_LAMBDA(int hi) mutable { + auto curPIdx = reinterpret_bits(halfEdge("to_vertex",hi)); + auto nxtHalfEdgeIdx = reinterpret_bits(halfEdge("next_he",hi)); + auto nxtPIdx = reinterpret_bits(halfEdge("to_vertex",reinterpret_bits(halfEdge("to_vertex",nxtHalfEdgeIdx)))); + auto key = vec2i{nxtPIdx,curPIdx}; + + if(auto hno = hetab.query(key);hno >= 0) { + halfEdge("opposite_he",hi) = reinterpret_bits(hno); + }else { + halfEdge("opposite_he",hi) = reinterpret_bits(-1); + } + + }); + + + + // // building the de2fi hash map + // cudaPol(zs::range(tris.size()), [ + // tris = proxy({},tris,"tris_access_fe_fp_inds"),de2fi = proxy(de2fi),halfEdge = proxy({},halfEdge)] ZS_LAMBDA(int ti) mutable { + // auto fe_inds = tris.pack(dim_c<3>,"fe_inds",ti).reinterpret_bits(int_c); + // auto fp_inds = tris.pack(dim_c<3>,"fp_inds",ti).reinterpret_bits(int_c); + + // vec3i nos{}; + // for(int i = 0;i != 3;++i) { + // if(auto no = de2fi.insert(vec2i{fp_inds[i],fp_inds[(i+1) % 3]});no >= 0 && no < halfEdge.size()){ + // nos[i] = no; + // halfEdge("to_vertex",no) = reinterpret_bits(fp_inds[i]); + // halfEdge("face",no) = reinterpret_bits(ti); + // halfEdge("edge",no) = reinterpret_bits(fe_inds[i]); + // // halfEdge("next_he",no) = ti * 3 + (i+1) % 3; + // } else + // printf("invalid de2fi query : %d\n",no); + // } + // for(int i = 0;i != 3;++i){ + // if(nos[i] >= 0 && nos[i] < halfEdge.size()) + // halfEdge("next_he",nos[i]) = reinterpret_bits(nos[(i+1) % 3]); + // else + // printf("invalid de2fi query : %d\n",nos[i]); + // } + // }); + // fmt::print("build success state: {}\n", de2fi._buildSuccess.getVal()); + // cudaPol(zs::range(halfEdge.size()), + // [halfEdge = proxy({},halfEdge),de2fi = proxy(de2fi)] ZS_LAMBDA(int hei) mutable { + // auto idx0 = reinterpret_bits(halfEdge("to_vertex",hei)); + // auto nexthei = reinterpret_bits(halfEdge("next_he",hei)); + // auto idx1 = reinterpret_bits(halfEdge("to_vertex",nexthei)); + // if(auto no = de2fi.query(vec2i{idx1,idx0});no >= 0) + // halfEdge("opposite_he",hei) = reinterpret_bits(no); + // else{ + // printf("detected boundary half edge : he[%d] : %d %d\n",hei,idx0,idx1); + // halfEdge("opposite_he",hei) = reinterpret_bits((int)-1); + // } + // }); + + // cudaPol(zs::range(lines.size()),[ + // lines = proxy({},lines,"halfedge::line_set_he_inds"),de2fi = proxy(de2fi)] ZS_LAMBDA(int li) mutable { + // auto ep_inds = lines.pack(dim_c<2>,"ep_inds",li).reinterpret_bits(int_c); + // if(auto no = de2fi.query(vec2i{ep_inds[0],ep_inds[1]});no >= 0){ + // lines("he_inds",li) = reinterpret_bits((int)no); + // }else { + // // some algorithm bug + // } + // }); + + // // std::cout << "problematic_fp_inds_size : " << tris.getPropertySize("fp_inds") << std::endl; + + // cudaPol(zs::range(tris.size()),[ + // points = proxy({},points),tris = proxy({},tris,"tris_access_fp_inds"),de2fi = proxy(de2fi)] __device__(int ti) mutable { + // auto fp_inds = tris.pack(dim_c<3>,"fp_inds",ti).reinterpret_bits(int_c); + // // if(auto no = de2fi.query(vec2i{fp_inds[0],fp_inds[1]});no >= 0){ + // // tris("he_inds",ti) = reinterpret_bits((int)no); + // // }else { + // // // some algorithm bug + // // printf("invalid de2fi query %d\n",no); + // // return; + // // } + + // // for(int i = 0;i != 3;++i) { + // // if(auto no = de2fi.query(vec2i{fp_inds[i],fp_inds[(i+1) % 3]});no >= 0){ + // // // if(fp_inds[i] >= 0 && fp_inds[i] < points.size()){ + // // // // points("he_inds",fp_inds[i]) = reinterpret_bits((int)no); + // // // }else + // // // printf("invalid fp_inds[%d] = %d with points.size() = %d\n",i,fp_inds[i],(int)points.size()); + // // }else { + // // // // some algorithm bug + // // } + // // } + + // // { + // // auto tmp = vec2i{fp_inds[0],fp_inds[1]}; + // // auto no_test = de2fi.query(tmp); + // // } + // // { + // for(int i = 0;i != 3;++i) { + // if(auto no = de2fi.query(vec2i{fp_inds[i],fp_inds[(i+1) % 3]});no >= 0){ + // if(i == 0) { + // tris("he_inds",ti) = reinterpret_bits((int)no); + // } + // if(fp_inds[i] >= 0 && fp_inds[i] < points.size()){ + // points("he_inds",fp_inds[i]) = reinterpret_bits((int)no); + // }else + // printf("invalid fp_inds[%d] = %d with points.size() = %d\n",i,fp_inds[i],(int)points.size()); + + // }else { + + // } + // } + // // } + // }); + + // // handle the boundary points + // cudaPol(zs::range(halfEdge.size()), + // [points = proxy({},points),halfEdge = proxy({},halfEdge)] ZS_LAMBDA(int hei) mutable { + // auto opposite_idx = reinterpret_bits(halfEdge("opposite_he",hei)); + // if(opposite_idx >= 0) + // return; + // // now the halfEdge is a boundary edge + // auto v_idx = reinterpret_bits(halfEdge("to_vertex",hei)); + // points("he_inds",v_idx) = reinterpret_bits((int)hei); + // }); + + return true; + + } + + template + constexpr int get_next_half_edge(int hei,const HalfEdgeTileVec& half_edges,int step = 1,bool reverse = false) { + using namespace zs; + for(int i = 0;i != step;++i) + hei = reinterpret_bits(half_edges("next_he",hei)); + if(reverse) + hei = reinterpret_bits(half_edges("opposite_he",hei)); + return hei; + } + + template + constexpr int half_edge_get_another_vertex(int hei,const HalfEdgeTileVec& half_edges) { + using namespace zs; + // hei = reinterpret_bits(half_edges("next_he",hei)); + hei = get_next_half_edge(hei,half_edges,1,false); + return reinterpret_bits(half_edges("to_vertex",hei)); + } + + // some operation with half edge structure + template + constexpr zs::vec get_one_ring_neigh_points(int hei,const HalfEdgeTileVec& half_edges) { + using namespace zs; + auto res = zs::vec::uniform(-1); + auto hei0 = hei; + int i = 0; + // res[0] = half_edge_get_another_vertex(hei,half_edges); + for(i = 0;i != MAX_NEIGHS;++i) { + res[i] = half_edge_get_another_vertex(hei,half_edges); + auto nhei = get_next_half_edge(hei,half_edges,2,true); + if(nhei == hei0) + break; + if(nhei < 0 && (i+1) < MAX_NEIGHS) { + nhei = get_next_half_edge(hei,half_edges,2,false); + if(nhei > 0){ + res[i + 1] = reinterpret_bits(half_edges("to_vertex",nhei)); + break; + } + } + hei = nhei; + } + if(i == MAX_NEIGHS) + printf("the max_one_ring_neighbor limit exceeds"); + + return res; + } + + template + constexpr zs::vec get_one_ring_neigh_edges(int hei,const HalfEdgeTileVec& half_edges) { + using namespace zs; + auto res = zs::vec::uniform(-1); + auto hei0 = hei; + auto nhei = hei; + int i = 0; + for(i = 0;i != MAX_NEIGHS;++i) { + res[i] = reinterpret_bits(half_edges("edge",hei)); + nhei = get_next_half_edge(hei,half_edges,2,true); + if(hei0 == nhei || nhei == -1) + break; + hei = nhei; + } + if(i < MAX_NEIGHS-1 && nhei == -1) { + ++i; + hei = get_next_half_edge(hei,half_edges,2,false); + res[i] = reinterpret_bits(half_edges("edge",hei)); + } + return res; + } + + template + constexpr zs::vec get_one_ring_neigh_tris(int hei,const HalfEdgeTileVec& half_edges) { + using namespace zs; + auto res = zs::vec::uniform(-1); + auto hei0 = hei; + int i = 0; + res[0] = reinterpret_bits(half_edges("face",hei)); + for(int i = 1;i != MAX_NEIGHS;++i) { + hei = get_next_half_edge(hei,half_edges,1,true); + if(hei == hei0 || hei < 0) + break; + res[i] = reinterpret_bits(half_edges("face",hei)); + } + + if(i == MAX_NEIGHS) + printf("the max_one_ring_neighbor limit exceeds"); + + return res; + + } + }; \ No newline at end of file diff --git a/projects/CuLagrange/geometry/linear_system/mfcg.hpp b/projects/CuLagrange/geometry/linear_system/mfcg.hpp index 6527176417..225d5b022d 100644 --- a/projects/CuLagrange/geometry/linear_system/mfcg.hpp +++ b/projects/CuLagrange/geometry/linear_system/mfcg.hpp @@ -105,16 +105,22 @@ namespace zeno { namespace PCG { void prepare_block_diagonal_preconditioner(Pol &pol,const zs::SmallString& HTag,const EBufTileVec& etemp,const zs::SmallString& PTag,VBufTileVec& vtemp,bool use_block = true) { using namespace zs; constexpr auto space = execspace_e::cuda; - pol(zs::range(vtemp.size()), - [vtemp = proxy({}, vtemp),PTag] ZS_LAMBDA (int vi) mutable { - constexpr int block_size = space_dim * space_dim; - vtemp.template tuple(PTag, vi) = zs::vec::zeros(); - }); + // pol(zs::range(vtemp.size()), + // [vtemp = proxy({}, vtemp),PTag] ZS_LAMBDA (int vi) mutable { + // constexpr int block_size = space_dim * space_dim; + // vtemp.template tuple(PTag, vi) = zs::vec::zeros(); + // }); + TILEVEC_OPS::fill(pol,vtemp,PTag,(T)0.0); + pol(zs::range(etemp.size()), [vtemp = proxy({},vtemp),etemp = proxy({},etemp),HTag,PTag,use_block] ZS_LAMBDA(int ei) mutable{ constexpr int h_width = space_dim * simplex_dim; auto inds = etemp.template pack("inds",ei).template reinterpret_bits(); + for(int i = 0;i != simplex_dim;++i) + if(inds[i] < 0) + return; + auto H = etemp.template pack(HTag,ei); for(int vi = 0;vi != simplex_dim;++vi) @@ -660,6 +666,11 @@ namespace zeno { namespace PCG { fmt::print(fg(fmt::color::dark_cyan),"negative zTrk detected = {}\n",zTrk); throw std::runtime_error("negative zTrk detected"); } + if(std::isnan(zTrk)) { + std::cout << "nan zTrk detected = " << zTrk << std::endl; + fmt::print(fg(fmt::color::dark_cyan),"nan zTrk detected = {}\n",zTrk); + throw std::runtime_error("nan zTrk detected"); + } if(residualPreconditionedNorm < localTol) break; // H * p -> tmp