From 656aab972d01fabd174f74e9911e69967cbea6aa Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Mon, 4 Oct 2021 10:52:44 +0200 Subject: [PATCH] Use llama for particle frame and shared memory DataBox layout Also support LLAMA frames in the IO. --- .gitmodules | 3 + include/picongpu/algorithms/Set.hpp | 4 +- include/picongpu/algorithms/Velocity.hpp | 2 +- include/picongpu/fields/FieldJ.kernel | 3 +- include/picongpu/fields/FieldTmp.kernel | 4 +- .../MaxwellSolver/AddCurrentDensity.kernel | 6 +- .../fields/MaxwellSolver/FDTD/FDTDBase.kernel | 4 +- .../fields/currentDeposition/Cache.hpp | 11 +- .../picongpu/fields/incidentField/Solver.hpp | 5 +- include/picongpu/param/memory.param | 15 ++ include/picongpu/particles/Particles.hpp | 5 +- include/picongpu/particles/Particles.kernel | 6 +- include/picongpu/particles/Particles.tpp | 20 +- .../ThomasFermi/ThomasFermi_Impl.hpp | 21 +- .../ionization/byField/ADK/ADK_Impl.hpp | 14 +- .../ionization/byField/BSI/BSI_Impl.hpp | 8 +- .../byField/Keldysh/Keldysh_Impl.hpp | 14 +- .../plugins/PhaseSpace/PhaseSpaceFunctors.hpp | 2 +- .../picongpu/plugins/openPMD/WriteSpecies.hpp | 36 ++-- .../LoadParticleAttributesFromOpenPMD.hpp | 13 +- .../plugins/openPMD/restart/LoadSpecies.hpp | 31 ++- .../openPMD/writer/ParticleAttribute.hpp | 11 +- .../plugins/output/WriteSpeciesCommon.hpp | 96 ++++----- .../picongpu/plugins/radiation/Radiation.hpp | 2 +- include/pmacc/PMaccConfig.cmake | 8 + include/pmacc/debug/VerboseLog.hpp | 6 +- include/pmacc/math/operation/Add.hpp | 8 +- include/pmacc/math/operation/Assign.hpp | 8 +- include/pmacc/math/vector/TwistComponents.hpp | 17 +- include/pmacc/math/vector/Vector.hpp | 84 +++++++- include/pmacc/math/vector/Vector.tpp | 12 +- include/pmacc/memory/boxes/CachedBox.hpp | 41 ++-- include/pmacc/memory/boxes/DataBox.hpp | 104 +++++++++- include/pmacc/memory/boxes/SharedBox.hpp | 18 +- .../pmacc/particles/ParticleDescription.hpp | 9 +- include/pmacc/particles/ParticlesBase.hpp | 3 +- include/pmacc/particles/ParticlesBase.tpp | 23 ++- .../memory/buffers/ParticlesBuffer.hpp | 38 ++-- .../particles/memory/dataTypes/Particle.hpp | 12 +- .../pmacc/particles/memory/frames/Frame.hpp | 195 +++++++++++++++--- .../include/picongpu/param/memory.param | 15 ++ .../include/picongpu/param/memory.param | 15 ++ .../include/picongpu/param/memory.param | 15 ++ .../include/picongpu/param/memory.param | 15 ++ .../include/picongpu/param/memory.param | 15 ++ .../include/picongpu/param/memory.param | 15 ++ .../include/picongpu/param/memory.param | 15 ++ .../include/picongpu/param/memory.param | 14 ++ .../include/picongpu/param/memory.param | 15 ++ .../include/picongpu/param/memory.param | 15 ++ .../include/picongpu/param/memory.param | 23 +++ .../include/picongpu/param/memory.param | 15 ++ .../gameOfLife2D/include/Evolution.hpp | 2 +- thirdParty/llama | 1 + 54 files changed, 852 insertions(+), 260 deletions(-) create mode 100644 .gitmodules create mode 160000 thirdParty/llama diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000000..2dfb55f01bc --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "thirdParty/llama"] + path = thirdParty/llama + url = https://github.com/alpaka-group/llama diff --git a/include/picongpu/algorithms/Set.hpp b/include/picongpu/algorithms/Set.hpp index 985501fe983..2476f44d722 100644 --- a/include/picongpu/algorithms/Set.hpp +++ b/include/picongpu/algorithms/Set.hpp @@ -33,9 +33,9 @@ namespace picongpu } template - HDINLINE void operator()(T_Worker const&, Dst& dst) const + HDINLINE void operator()(T_Worker const&, Dst&& dst) const { - dst = value; + std::forward(dst) = value; } private: diff --git a/include/picongpu/algorithms/Velocity.hpp b/include/picongpu/algorithms/Velocity.hpp index 7d72eee0ffb..ba4ab645345 100644 --- a/include/picongpu/algorithms/Velocity.hpp +++ b/include/picongpu/algorithms/Velocity.hpp @@ -28,7 +28,7 @@ namespace picongpu struct Velocity { template - HDINLINE MomType operator()(const MomType mom, const MassType mass0) + HDINLINE auto operator()(const MomType mom, const MassType mass0) { const float_X rc2 = MUE0_EPS0; const float_X m0_2 = mass0 * mass0; diff --git a/include/picongpu/fields/FieldJ.kernel b/include/picongpu/fields/FieldJ.kernel index dd3e2ef8bc5..fadaf6a9cd4 100644 --- a/include/picongpu/fields/FieldJ.kernel +++ b/include/picongpu/fields/FieldJ.kernel @@ -131,7 +131,8 @@ namespace picongpu // The rest uses normal weighting const float_X weighting = particle[weighting_]; Velocity velocity; - const float3_X vel = velocity(particle[momentum_], attribute::getMass(weighting, particle)); + const float3_X vel + = velocity(static_cast(particle[momentum_]), attribute::getMass(weighting, particle)); auto fieldJShiftToParticle = jBox.shift(localCell); ParticleAlgo perParticle; perParticle(worker, fieldJShiftToParticle, pos, vel, charge, m_deltaTime); diff --git a/include/picongpu/fields/FieldTmp.kernel b/include/picongpu/fields/FieldTmp.kernel index 554a3aca8ce..476426a922f 100644 --- a/include/picongpu/fields/FieldTmp.kernel +++ b/include/picongpu/fields/FieldTmp.kernel @@ -94,7 +94,9 @@ namespace picongpu if(!forEachParticle.hasParticles()) return; - auto cachedVal = CachedBox::create<0, typename T_TmpBox::ValueType>(worker, T_BlockDescription{}); + auto cachedVal = CachedBox::create<0, SharedDataBoxMemoryLayout, typename T_TmpBox::ValueType>( + worker, + T_BlockDescription{}); Set set(float_X(0.0)); auto collective = makeThreadCollective(); diff --git a/include/picongpu/fields/MaxwellSolver/AddCurrentDensity.kernel b/include/picongpu/fields/MaxwellSolver/AddCurrentDensity.kernel index 0c49a36e908..d20f4a9ed44 100644 --- a/include/picongpu/fields/MaxwellSolver/AddCurrentDensity.kernel +++ b/include/picongpu/fields/MaxwellSolver/AddCurrentDensity.kernel @@ -22,6 +22,8 @@ #include "picongpu/simulation_defines.hpp" +#include "picongpu/param/memory.param" + #include #include #include @@ -68,7 +70,9 @@ namespace picongpu::fields::maxwellSolver constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; - auto cachedJ = CachedBox::create<0, typename FieldJ::DataBoxType::ValueType>(worker, BlockArea()); + auto cachedJ = CachedBox::create<0, SharedDataBoxMemoryLayout, typename FieldJ::DataBoxType::ValueType>( + worker, + BlockArea()); pmacc::math::operation::Assign assign; DataSpace const block( diff --git a/include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel b/include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel index cdcaa5ecdcf..0756f4e9f97 100644 --- a/include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel +++ b/include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel @@ -179,7 +179,9 @@ namespace picongpu auto srcFieldBlock = srcField.shift(beginCellIdx); auto cacheStencilArea = makeThreadCollective(); auto cachedSrcField - = CachedBox::create<0u, typename T_SrcBox::ValueType>(worker, StencilCfg{}); + = CachedBox::create<0u, SharedDataBoxMemoryLayout, typename T_SrcBox::ValueType>( + worker, + StencilCfg{}); cacheStencilArea(worker, assign, cachedSrcField, srcFieldBlock); worker.sync(); diff --git a/include/picongpu/fields/currentDeposition/Cache.hpp b/include/picongpu/fields/currentDeposition/Cache.hpp index ae39c5f0580..4757a53ac90 100644 --- a/include/picongpu/fields/currentDeposition/Cache.hpp +++ b/include/picongpu/fields/currentDeposition/Cache.hpp @@ -48,15 +48,11 @@ namespace picongpu */ template DINLINE static auto create(T_Worker const& worker, T_FieldBox const& fieldBox) -#if(!BOOST_COMP_CLANG) - -> decltype(CachedBox::create<0u, typename T_FieldBox::ValueType>( - worker, - std::declval())) -#endif { using ValueType = typename T_FieldBox::ValueType; /* this memory is used by all virtual blocks */ - auto cache = CachedBox::create<0u, ValueType>(worker, T_BlockDescription{}); + auto cache + = CachedBox::create<0u, SharedDataBoxMemoryLayout, ValueType>(worker, T_BlockDescription{}); Set set(ValueType::create(0.0_X)); auto collectiveFill = makeThreadCollective(); @@ -90,9 +86,6 @@ namespace picongpu */ template DINLINE static auto create([[maybe_unused]] T_Worker const& worker, T_FieldBox const& fieldBox) -#if(!BOOST_COMP_CLANG) - -> T_FieldBox -#endif { return fieldBox; } diff --git a/include/picongpu/fields/incidentField/Solver.hpp b/include/picongpu/fields/incidentField/Solver.hpp index 6886ed9fd9c..d87bdc129dc 100644 --- a/include/picongpu/fields/incidentField/Solver.hpp +++ b/include/picongpu/fields/incidentField/Solver.hpp @@ -283,8 +283,9 @@ namespace picongpu using IntVector = pmacc::math::Vector; auto const beginLocalUserIdx = Index{math::max(IntVector{beginUserIdx - totalCellOffset}, IntVector::create(0))}; - auto const endLocalUserIdx - = Index{math::min(IntVector{endUserIdx - totalCellOffset}, IntVector{localDomain.size})}; + auto const endLocalUserIdx = Index{math::min( + IntVector{endUserIdx - totalCellOffset}, + static_cast(localDomain.size))}; // Check if we have any active cells in the local domain bool areAnyCellsInLocalDomain = true; diff --git a/include/picongpu/param/memory.param b/include/picongpu/param/memory.param index 06f8e0d7bda..1bfff7dd7f1 100644 --- a/include/picongpu/param/memory.param +++ b/include/picongpu/param/memory.param @@ -114,4 +114,19 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/include/picongpu/particles/Particles.hpp b/include/picongpu/particles/Particles.hpp index a61c03a7a41..533bb2005e4 100644 --- a/include/picongpu/particles/Particles.hpp +++ b/include/picongpu/particles/Particles.hpp @@ -22,6 +22,7 @@ #include "picongpu/fields/Fields.def" #include "picongpu/fields/Fields.hpp" +#include "picongpu/param/memory.param" #include "picongpu/particles/boundary/Description.hpp" #include "picongpu/particles/boundary/Utility.hpp" #include "picongpu/particles/manipulators/manipulators.def" @@ -89,6 +90,7 @@ namespace picongpu pmacc::HandleGuardRegion< pmacc::particles::policies::ExchangeParticles, pmacc::particles::policies::DoNothing>>>, + ParticleFrameMemoryLayout, MappingDesc, DeviceHeap> , public ISimulationData @@ -108,7 +110,8 @@ namespace picongpu pmacc::HandleGuardRegion< pmacc::particles::policies::ExchangeParticles, pmacc::particles::policies::DoNothing>>>; - using ParticlesBaseType = ParticlesBase; + using ParticlesBaseType + = ParticlesBase; using FrameType = typename ParticlesBaseType::FrameType; using FrameTypeBorder = typename ParticlesBaseType::FrameTypeBorder; using ParticlesBoxType = typename ParticlesBaseType::ParticlesBoxType; diff --git a/include/picongpu/particles/Particles.kernel b/include/picongpu/particles/Particles.kernel index ac46ad6c7b0..54daf736536 100644 --- a/include/picongpu/particles/Particles.kernel +++ b/include/picongpu/particles/Particles.kernel @@ -222,8 +222,10 @@ namespace picongpu onlyMaster([&]() { mustShiftSupercell = 0; }); - auto cachedB = CachedBox::create<0, typename T_BBox::ValueType>(worker, T_DataDomain()); - auto cachedE = CachedBox::create<1, typename T_EBox::ValueType>(worker, T_DataDomain()); + auto cachedB + = CachedBox::create<0, SharedDataBoxMemoryLayout, typename T_BBox::ValueType>(worker, T_DataDomain()); + auto cachedE + = CachedBox::create<1, SharedDataBoxMemoryLayout, typename T_EBox::ValueType>(worker, T_DataDomain()); worker.sync(); diff --git a/include/picongpu/particles/Particles.tpp b/include/picongpu/particles/Particles.tpp index 65c683458b7..c5766ffc036 100644 --- a/include/picongpu/particles/Particles.tpp +++ b/include/picongpu/particles/Particles.tpp @@ -43,6 +43,7 @@ #include #include +#include #include #include #include @@ -197,7 +198,9 @@ namespace picongpu const std::shared_ptr& heap, picongpu::MappingDesc cellDescription, SimulationDataId datasetID) - : ParticlesBase(heap, cellDescription) + : ParticlesBase( + heap, + cellDescription) , m_datasetID(datasetID) { constexpr bool particleHasShape = pmacc::traits::HasIdentifier>::type::value; @@ -212,6 +215,21 @@ namespace picongpu size_t sizeOfExchanges = 0u; +#if __has_include() + // dump the data layout of the particle frames + if constexpr(PIConGPUVerbose::log_level & picLog::MEMORY::lvl) + { + log( + "Dumping LLAMA memory layout for frame and border into llama_frame.* and llama_border_fream.*"); + auto fm = typename decltype(FrameType::view)::Mapping{}; + std::ofstream{"llama_frame.html"} << llama::toHtml(fm); + std::ofstream{"llama_frame.svg"} << llama::toSvg(fm); + auto bfm = typename decltype(FrameTypeBorder::view)::Mapping{}; + std::ofstream{"llama_border_frame.html"} << llama::toHtml(bfm); + std::ofstream{"llama_border_frame.svg"} << llama::toSvg(bfm); + } +#endif + const uint32_t commTag = pmacc::traits::getUniqueId(); log("communication tag for species %1%: %2%") % FrameType::getName() % commTag; diff --git a/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp b/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp index 0edcb9c8244..f3a45ec8ef8 100644 --- a/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp +++ b/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp @@ -23,6 +23,7 @@ #include "picongpu/fields/CellType.hpp" #include "picongpu/fields/FieldTmp.hpp" +#include "picongpu/param/memory.param" #include "picongpu/particles/atomicPhysics/SetChargeState.hpp" #include "picongpu/particles/ionization/byCollision/ThomasFermi/AlgorithmThomasFermi.hpp" #include "picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi.def" @@ -104,8 +105,20 @@ namespace picongpu PMACC_ALIGN(eneBox, FieldTmp::DataBoxType); /* shared memory EM-field device databoxes */ - PMACC_ALIGN(cachedRho, DataBox>); - PMACC_ALIGN(cachedEne, DataBox>); + PMACC_ALIGN( + cachedRho, + DataBox>); + PMACC_ALIGN( + cachedEne, + DataBox>); public: /* host constructor initializing member : random number generator */ @@ -185,8 +198,8 @@ namespace picongpu DINLINE void collectiveInit(const T_Worker& worker, const DataSpace& blockCell) { /* caching of density and "temperature" fields */ - cachedRho = CachedBox::create<0, ValueType_Rho>(worker, BlockArea()); - cachedEne = CachedBox::create<1, ValueType_Ene>(worker, BlockArea()); + cachedRho = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_Rho>(worker, BlockArea()); + cachedEne = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_Ene>(worker, BlockArea()); /* instance of nvidia assignment operator */ pmacc::math::operation::Assign assign; diff --git a/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp b/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp index 9dc24c7ebe9..a3e560f912b 100644 --- a/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp +++ b/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp @@ -102,8 +102,14 @@ namespace picongpu PMACC_ALIGN(bBox, FieldB::DataBoxType); PMACC_ALIGN(jBox, FieldJ::DataBoxType); /* shared memory EM-field device databoxes */ - PMACC_ALIGN(cachedE, DataBox>); - PMACC_ALIGN(cachedB, DataBox>); + PMACC_ALIGN( + cachedE, + DataBox< + SharedBox>); + PMACC_ALIGN( + cachedB, + DataBox< + SharedBox>); public: /* host constructor initializing member : random number generator */ @@ -137,8 +143,8 @@ namespace picongpu jBox = jBox.shift(blockCell); /* caching of E and B fields */ - cachedB = CachedBox::create<0, ValueType_B>(worker, BlockArea()); - cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea()); + cachedB = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_B>(worker, BlockArea()); + cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea()); /* instance of nvidia assignment operator */ pmacc::math::operation::Assign assign; diff --git a/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp b/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp index a41c0d0c310..33f40a159db 100644 --- a/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp +++ b/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp @@ -25,6 +25,7 @@ #include "picongpu/fields/FieldB.hpp" #include "picongpu/fields/FieldE.hpp" #include "picongpu/fields/FieldJ.hpp" +#include "picongpu/param/memory.param" #include "picongpu/particles/ParticlesFunctors.hpp" #include "picongpu/particles/atomicPhysics/SetChargeState.hpp" #include "picongpu/particles/ionization/byField/BSI/AlgorithmBSI.hpp" @@ -93,7 +94,10 @@ namespace picongpu FieldE::DataBoxType eBox; FieldJ::DataBoxType jBox; /* shared memory EM-field device databoxes */ - PMACC_ALIGN(cachedE, DataBox>); + PMACC_ALIGN( + cachedE, + DataBox< + SharedBox>); public: /* host constructor */ @@ -125,7 +129,7 @@ namespace picongpu jBox = jBox.shift(blockCell); /* caching of E field */ - cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea()); + cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea()); /* instance of nvidia assignment operator */ pmacc::math::operation::Assign assign; diff --git a/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp b/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp index b48b027e812..a9ca275787c 100644 --- a/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp +++ b/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp @@ -102,8 +102,14 @@ namespace picongpu PMACC_ALIGN(bBox, FieldB::DataBoxType); PMACC_ALIGN(jBox, FieldJ::DataBoxType); /* shared memory EM-field device databoxes */ - PMACC_ALIGN(cachedE, DataBox>); - PMACC_ALIGN(cachedB, DataBox>); + PMACC_ALIGN( + cachedE, + DataBox< + SharedBox>); + PMACC_ALIGN( + cachedB, + DataBox< + SharedBox>); public: /* host constructor initializing member : random number generator */ @@ -137,8 +143,8 @@ namespace picongpu jBox = jBox.shift(blockCell); /* caching of E and B fields */ - cachedB = CachedBox::create<0, ValueType_B>(worker, BlockArea()); - cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea()); + cachedB = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_B>(worker, BlockArea()); + cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea()); /* instance of nvidia assignment operator */ pmacc::math::operation::Assign assign; diff --git a/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp b/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp index c6508b1de55..393c57afe85 100644 --- a/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp +++ b/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp @@ -160,7 +160,7 @@ namespace picongpu /* create shared mem */ constexpr int blockCellsInDir = SuperCellSize::template at::type::value; using SharedMemSize = SuperCellDescription>; - auto sharedMemHist = CachedBox::create<0u, float_PS>(worker, SharedMemSize{}); + auto sharedMemHist = CachedBox::create<0u, SharedDataBoxMemoryLayout, float_PS>(worker, SharedMemSize{}); Set set(float_PS{0.0}); auto collectiveOnSharedHistogram = makeThreadCollective(); diff --git a/include/picongpu/plugins/openPMD/WriteSpecies.hpp b/include/picongpu/plugins/openPMD/WriteSpecies.hpp index 4a747863e79..87d97383b63 100644 --- a/include/picongpu/plugins/openPMD/WriteSpecies.hpp +++ b/include/picongpu/plugins/openPMD/WriteSpecies.hpp @@ -44,8 +44,10 @@ #include #include +#include #include +#include #include // std::remove_reference_t namespace picongpu @@ -112,15 +114,13 @@ namespace picongpu { /* malloc host memory */ log("openPMD: (begin) malloc host memory: %1%") % name; - meta::ForEach> mallocMem; - mallocMem(hostFrame, myNumParticles); + mallocFrameMemory(hostFrame); log("openPMD: ( end ) malloc host memory: %1%") % name; } void free(openPMDFrameType& hostFrame) override { - meta::ForEach> freeMem; - freeMem(hostFrame); + freeFrameMemory(hostFrame); } @@ -182,16 +182,13 @@ namespace picongpu void malloc(std::string name, openPMDFrameType& mappedFrame, uint64_cu const myNumParticles) override { log("openPMD: (begin) malloc mapped memory: %1%") % name; - /*malloc mapped memory*/ - meta::ForEach> mallocMem; - mallocMem(mappedFrame, myNumParticles); + mallocMappedFrameMemory(mappedFrame); log("openPMD: ( end ) malloc mapped memory: %1%") % name; } void free(openPMDFrameType& mappedFrame) override { - meta::ForEach> freeMem; - freeMem(mappedFrame); + freeMappedFrameMemory(mappedFrame); } void prepare(uint32_t currentStep, std::string name, openPMDFrameType& mappedFrame, RunParameters rp) @@ -248,7 +245,7 @@ namespace picongpu using NewParticleDescription = typename ReplaceValueTypeSeq::type; - using openPMDFrameType = Frame; + using openPMDFrameType = Frame; void setParticleAttributes( ::openPMD::ParticleSpecies& record, @@ -360,14 +357,12 @@ namespace picongpu { case WriteSpeciesStrategy::ADIOS: { - using type = StrategyADIOS; - strategy = std::unique_ptr(dynamic_cast(new type)); + strategy = std::make_unique>(); break; } case WriteSpeciesStrategy::HDF5: { - using type = StrategyHDF5; - strategy = std::unique_ptr(dynamic_cast(new type)); + strategy = std::make_unique>(); break; } } @@ -409,7 +404,18 @@ namespace picongpu ::openPMD::ParticleSpecies& particleSpecies = iteration.particles[speciesGroup]; // copy over particles to host - openPMDFrameType hostFrame; + openPMDFrameType hostFrame{boost::numeric_cast(myNumParticles)}; +#if __has_include() + if constexpr(PIConGPUVerbose::log_level & picLog::INPUT_OUTPUT::lvl) + { + log( + "Dumping LLAMA memory layout for openPMD frame into llama_openPMD_write_frame.*"); + auto m = typename openPMDFrameType::Mapping{ + llama::ArrayExtentsDynamic{std::min(hostFrame.view.mapping().extents()[0], 1024)}}; + std::ofstream{"llama_openPMD_write_frame.svg"} << llama::toSvg(m); + std::ofstream{"llama_openPMD_write_frame.html"} << llama::toHtml(m); + } +#endif strategy->malloc(T_SpeciesFilter::getName(), hostFrame, myNumParticles); RunParameters_T runParameters( diff --git a/include/picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp b/include/picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp index b2031490339..58b55abf296 100644 --- a/include/picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp +++ b/include/picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp @@ -68,7 +68,7 @@ namespace picongpu { using Identifier = T_Identifier; using ValueType = typename pmacc::traits::Resolve::type::type; - const uint32_t components = GetNComponents::value; + constexpr uint32_t components = GetNComponents::value; using ComponentType = typename GetComponentsType::type; OpenPMDName openPMDName; @@ -76,6 +76,7 @@ namespace picongpu const std::string name_lookup[] = {"x", "y", "z"}; + // TODO(bgruber): make this a std::shared_ptr with openPMD 0.15 std::shared_ptr loadBfr; if(elements > 0) { @@ -90,7 +91,6 @@ namespace picongpu ::openPMD::RecordComponent rc = components > 1 ? record[name_lookup[n]] : record[::openPMD::RecordComponent::SCALAR]; - ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer(); if(elements > 0) { @@ -119,12 +119,15 @@ namespace picongpu "%3%") % elements % globalNumElements % openPMDName(); -/* copy component from temporary array to array of structs */ + /* copy component from temporary array to array of structs */ #pragma omp parallel for simd for(size_t i = 0; i < elements; ++i) { - ComponentType* ref = &reinterpret_cast(dataPtr)[i * components + n]; - *ref = loadBfr.get()[i]; + auto& attrib = frame[i][Identifier{}]; + if constexpr(components == 1) + attrib = loadBfr.get()[i]; + else + reinterpret_cast(&attrib)[n] = loadBfr.get()[i]; } } diff --git a/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp b/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp index 4d1e343d801..815120c3120 100644 --- a/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp +++ b/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp @@ -33,8 +33,10 @@ #include #include +#include #include +#include #include #include @@ -69,7 +71,7 @@ namespace picongpu using NewParticleDescription = typename ReplaceValueTypeSeq::type; - using openPMDFrameType = Frame; + using openPMDFrameType = Frame; /** Load species from openPMD checkpoint storage * @@ -130,15 +132,24 @@ namespace picongpu % (long long unsigned) totalNumParticles % (long long unsigned) particleOffset; // memory is visible on host and device - openPMDFrameType mappedFrame; + openPMDFrameType mappedFrame{boost::numeric_cast(totalNumParticles)}; +#if __has_include() + if constexpr(PIConGPUVerbose::log_level & picLog::INPUT_OUTPUT::lvl) + { + log( + "Dumping LLAMA memory layout for openPMD frame into llama_openPMD_load_frame.*"); + auto m = typename openPMDFrameType::Mapping{ + llama::ArrayExtentsDynamic{std::min(mappedFrame.view.mapping().extents()[0], 1024)}}; + std::ofstream{"llama_openPMD_load_frame.svg"} << llama::toSvg(m); + std::ofstream{"llama_openPMD_load_frame.html"} << llama::toHtml(m); + } +#endif log("openPMD: malloc mapped memory: %1%") % speciesName; - /*malloc mapped memory*/ - meta::ForEach> mallocMem; - mallocMem(mappedFrame, totalNumParticles); - meta:: - ForEach> - loadAttributes; + mallocMappedFrameMemory(mappedFrame); + + meta::ForEach> + loadAttributes; loadAttributes(params, mappedFrame, particleSpecies, particleOffset, totalNumParticles); if(totalNumParticles != 0) @@ -153,9 +164,7 @@ namespace picongpu *(params->cellDescription), picLog::INPUT_OUTPUT()); - /*free host memory*/ - meta::ForEach> freeMem; - freeMem(mappedFrame); + freeMappedFrameMemory(mappedFrame); } log("openPMD: ( end ) load species: %1%") % speciesName; } diff --git a/include/picongpu/plugins/openPMD/writer/ParticleAttribute.hpp b/include/picongpu/plugins/openPMD/writer/ParticleAttribute.hpp index cbc968fe966..3f4c35c8178 100644 --- a/include/picongpu/plugins/openPMD/writer/ParticleAttribute.hpp +++ b/include/picongpu/plugins/openPMD/writer/ParticleAttribute.hpp @@ -64,7 +64,7 @@ namespace picongpu { using Identifier = T_Identifier; using ValueType = typename pmacc::traits::Resolve::type::type; - const uint32_t components = GetNComponents::value; + constexpr uint32_t components = GetNComponents::value; using ComponentType = typename GetComponentsType::type; OpenPMDName openPMDName; @@ -93,7 +93,7 @@ namespace picongpu ::openPMD::RecordComponent recordComponent = components > 1 ? record[name_lookup[d]] : record[::openPMD::MeshRecordComponent::SCALAR]; - std::string datasetName = components > 1 ? baseName + "/" + name_lookup[d] : baseName; + const std::string datasetName = components > 1 ? baseName + "/" + name_lookup[d] : baseName; params->initDataset(recordComponent, openPMDType, {globalElements}, datasetName); if(unit.size() >= (d + 1)) @@ -107,7 +107,6 @@ namespace picongpu continue; } - ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer(); // can be moved up? // ask openPMD to create a buffer for us // in some backends (ADIOS2), this allows avoiding memcopies auto span = storeChunkSpan( @@ -132,7 +131,11 @@ namespace picongpu #pragma omp parallel for simd for(size_t i = 0; i < elements; ++i) { - span[i] = reinterpret_cast(dataPtr)[d + i * components]; + const auto attrib = frame[i][Identifier{}]; + if constexpr(components == 1) + span[i] = attrib; + else + span[i] = reinterpret_cast(&attrib)[d]; } flushSeries(*params->openPMDSeries, PreferredFlushTarget::Disk); diff --git a/include/picongpu/plugins/output/WriteSpeciesCommon.hpp b/include/picongpu/plugins/output/WriteSpeciesCommon.hpp index 683fe1767ed..178775d4af0 100644 --- a/include/picongpu/plugins/output/WriteSpeciesCommon.hpp +++ b/include/picongpu/plugins/output/WriteSpeciesCommon.hpp @@ -28,51 +28,50 @@ namespace picongpu { using namespace pmacc; - - template - struct MallocMappedMemory + template + void mallocMappedFrameMemory(Frame& frame) { - template - HINLINE void operator()(ValueType& v1, const size_t size) const - { - using type = typename pmacc::traits::Resolve::type::type; - - bool isMappedMemorySupported = alpaka::hasMappedBufSupport<::alpaka::Platform>; + constexpr bool isMappedMemorySupported = alpaka::hasMappedBufSupport<::alpaka::Platform>; + PMACC_VERIFY_MSG(isMappedMemorySupported, "Device must support mapped memory!"); - PMACC_VERIFY_MSG(isMappedMemorySupported, "Device must support mapped memory!"); - - type* ptr = nullptr; + int i = 0; + for(std::byte*& ptr : frame.blobs()) + { + const auto size = frame.blobSize(i); if(size != 0) { // Memory is automatically mapped to the device if supported. - CUDA_CHECK(cuplaMallocHost((void**) &ptr, size * sizeof(type))); + CUDA_CHECK(cuplaMallocHost((void**) &ptr, size)); } - v1.getIdentifier(T_Type()) = VectorDataBox(ptr); + else + ptr = nullptr; + log("openPMD: blob %1%, size: %2%, ptr: %3%") % i % size % ptr; + i++; } - }; + } /** allocate memory on host * * This functor use `new[]` to allocate memory */ - template - struct MallocHostMemory + template + void mallocFrameMemory(Frame& frame) { - template - HINLINE void operator()(ValueType& v1, const size_t size) const - { - using Attribute = T_Attribute; - using type = typename pmacc::traits::Resolve::type::type; + constexpr bool isMappedMemorySupported = alpaka::hasMappedBufSupport<::alpaka::Pltf>; + PMACC_VERIFY_MSG(isMappedMemorySupported, "Device must support mapped memory!"); - type* ptr = nullptr; + int i = 0; + for(std::byte*& ptr : frame.blobs()) + { + const auto size = frame.blobSize(i); if(size != 0) - { - ptr = new type[size]; - } - v1.getIdentifier(Attribute()) = VectorDataBox(ptr); + ptr = new std::byte[size]; + else + ptr = nullptr; + log("openPMD: blob %1%, size: %2%, ptr: %3%") % i % size % ptr; + i++; } - }; - + } /** copy species to host memory * @@ -92,43 +91,24 @@ namespace picongpu } }; - template - struct FreeMappedMemory + template + void freeMappedFrameMemory(Frame& frame) { - template - HINLINE void operator()(ValueType& value) const - { - auto* ptr = value.getIdentifier(T_Type()).getPointer(); + for(std::byte*& ptr : frame.blobs()) if(ptr != nullptr) { CUDA_CHECK(cuplaFreeHost(ptr)); + ptr = nullptr; } - } - }; + } - //! Free memory - template - struct FreeHostMemory + template + void freeFrameMemory(Frame& frame) { - template - HINLINE void operator()(ValueType& value) const + for(auto* ptr : frame.blobs()) { - using Attribute = T_Attribute; - - auto* ptr = value.getIdentifier(Attribute()).getPointer(); delete[] ptr; + ptr = nullptr; } - }; - - /*functor to create a pair for a MapTuple map*/ - struct OperatorCreateVectorBox - { - template - struct apply - { - using type - = pmacc::meta::Pair::type::type>>; - }; - }; - + } } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/Radiation.hpp b/include/picongpu/plugins/radiation/Radiation.hpp index 5f6ddaf28b7..fc9d7fb64d4 100644 --- a/include/picongpu/plugins/radiation/Radiation.hpp +++ b/include/picongpu/plugins/radiation/Radiation.hpp @@ -285,7 +285,7 @@ namespace picongpu } - void checkpoint(uint32_t timeStep, const std::string restartDirectory) + void checkpoint(uint32_t timeStep, const std::string restartDirectory) override { // only write backup if radiation is calculated: if(notifyPeriod.empty()) diff --git a/include/pmacc/PMaccConfig.cmake b/include/pmacc/PMaccConfig.cmake index fb34f4cf01e..61a84bca778 100644 --- a/include/pmacc/PMaccConfig.cmake +++ b/include/pmacc/PMaccConfig.cmake @@ -384,3 +384,11 @@ endif(PMACC_BLOCKING_KERNEL) set(PMACC_VERBOSE "0" CACHE STRING "set verbose level for PMacc") target_compile_definitions(pmacc PUBLIC "-DPMACC_VERBOSE_LVL=${PMACC_VERBOSE}") + +################################################################################ +# LLAMA +################################################################################ + +add_subdirectory(${PMacc_DIR}/../../thirdParty/llama ${CMAKE_BINARY_DIR}/llama EXCLUDE_FROM_ALL) +target_link_libraries(pmacc PUBLIC llama::llama) + diff --git a/include/pmacc/debug/VerboseLog.hpp b/include/pmacc/debug/VerboseLog.hpp index 674f98b95df..243bdf047de 100644 --- a/include/pmacc/debug/VerboseLog.hpp +++ b/include/pmacc/debug/VerboseLog.hpp @@ -100,7 +100,7 @@ namespace pmacc * If you get an linker error in the next two lines you have not used * DEFINE_LOGLVL makro to define a named logLvl */ - if(logLvl & LogParent::log_level) /*compile-time check*/ + if constexpr((logLvl & LogParent::log_level) != 0) { std::cout << LogParent::getName() << " " << getLogName(LogClass()) << "(" << (logLvl & LogParent::log_level) << ")" @@ -109,9 +109,9 @@ namespace pmacc } template - VerboseLog& operator%(T value) + VerboseLog& operator%([[maybe_unused]] T value) { - if(logLvl & LogParent::log_level) /*compile-time check*/ + if constexpr((logLvl & LogParent::log_level) != 0) fmt % value; return *this; } diff --git a/include/pmacc/math/operation/Add.hpp b/include/pmacc/math/operation/Add.hpp index 3b02143d16a..079e3bfc87d 100644 --- a/include/pmacc/math/operation/Add.hpp +++ b/include/pmacc/math/operation/Add.hpp @@ -33,15 +33,15 @@ namespace pmacc struct Add { template - HDINLINE void operator()(Dst& dst, const Src& src) const + HDINLINE void operator()(Dst&& dst, const Src& src) const { - dst += src; + std::forward(dst) += src; } template - HDINLINE void operator()(const T_Worker&, Dst& dst, const Src& src) const + HDINLINE void operator()(const T_Worker&, Dst&& dst, const Src& src) const { - dst += src; + std::forward(dst) += src; } }; } // namespace operation diff --git a/include/pmacc/math/operation/Assign.hpp b/include/pmacc/math/operation/Assign.hpp index 847163311fd..bfe30b343d8 100644 --- a/include/pmacc/math/operation/Assign.hpp +++ b/include/pmacc/math/operation/Assign.hpp @@ -32,15 +32,15 @@ namespace pmacc struct Assign { template - HDINLINE void operator()(Dst& dst, const Src& src) const + HDINLINE void operator()(Dst&& dst, const Src& src) const { - dst = src; + std::forward(dst) = src; } template - HDINLINE void operator()(const T_Worker&, Dst& dst, const Src& src) const + HDINLINE void operator()(const T_Worker&, Dst&& dst, const Src& src) const { - dst = src; + std::forward(dst) = src; } }; } // namespace operation diff --git a/include/pmacc/math/vector/TwistComponents.hpp b/include/pmacc/math/vector/TwistComponents.hpp index 2ed3c0d5e41..df727884c8c 100644 --- a/include/pmacc/math/vector/TwistComponents.hpp +++ b/include/pmacc/math/vector/TwistComponents.hpp @@ -51,9 +51,8 @@ namespace pmacc T_dim, T_Accessor, math::StackedNavigator>, - T_Storage>&; + T_Storage>; }; - } // namespace detail /** Returns a reference of vector with twisted axes. @@ -69,13 +68,17 @@ namespace pmacc * @return reference of the input vector with twisted axes. */ template - HDINLINE auto twistComponents(T_Vector& vector) + HDINLINE auto& twistComponents(T_Vector& vector) { - /* The reinterpret_cast is valid because the target type is the same as the - * input type except its navigator policy which does not occupy any memory though. - */ - return reinterpret_cast::type>(vector); + // cast to reference with new navigator + return reinterpret_cast::type&>(vector); } + template + HDINLINE auto twistComponents(T_Vector&& vector) + { + // construct new vector (with reference semantic) with new navigator + return typename detail::TwistComponents::type(std::move(vector)); + } } // namespace math } // namespace pmacc diff --git a/include/pmacc/math/vector/Vector.hpp b/include/pmacc/math/vector/Vector.hpp index 3c7999d933f..fc891538565 100644 --- a/include/pmacc/math/vector/Vector.hpp +++ b/include/pmacc/math/vector/Vector.hpp @@ -34,6 +34,8 @@ #include #include +#include + namespace pmacc { namespace math @@ -74,6 +76,36 @@ namespace pmacc } }; + template + struct VectorLlamaRecordRefStorage + { + static_assert(llama::isRecordRef); + + inline static constexpr bool isConst = false; + inline static constexpr int dim = T_Dim; + using type = T_Type; + + RecordRef rr; + + HDINLINE + type& operator[](const int idx) + { + return mp_with_index( + idx, + [&](auto ic) LLAMA_LAMBDA_INLINE -> decltype(auto) + { return rr(llama::RecordCoord{}); }); + } + + HDINLINE + const type& operator[](const int idx) const + { + return mp_with_index( + idx, + [&](auto ic) LLAMA_LAMBDA_INLINE -> decltype(auto) + { return rr(llama::RecordCoord{}); }); + } + }; + } // namespace detail namespace tag @@ -108,6 +140,11 @@ namespace pmacc { } + HDINLINE + constexpr explicit Vector(Storage s) : Storage{std::move(s)} + { + } + HDINLINE constexpr Vector(const type x) { @@ -135,9 +172,14 @@ namespace pmacc HDINLINE constexpr Vector(const Vector& other) = default; + template + HDINLINE Vector(const Vector& other) + : Storage{static_cast(other)} + { + } + template HDINLINE Vector(const Vector& other) - : Storage{} { for(uint32_t i = 0u; i < dim; i++) (*this)[i] = other[i]; @@ -531,6 +573,12 @@ namespace pmacc } }; + template + inline constexpr bool isVector = false; + + template + inline constexpr bool isVector> = true; + template std::ostream& operator<<(std::ostream& s, const Vector& vec) { @@ -763,6 +811,40 @@ namespace pmacc template HDINLINE T_Vector basisVector(); + /** Creates a \ref pmacc::math::Vector backed by a LLAMA RecordRef as storage. All other properties of the + * Vector are taken from ProtoVec. + */ + template + HDINLINE auto makeVectorWithLlamaStorage(RecordRef rr) + { + return Vector< + typename ProtoVec::type, + ProtoVec::dim, + typename ProtoVec::Accessor, + typename ProtoVec::Navigator, + detail::VectorLlamaRecordRefStorage>{{rr}}; + } + + namespace detail + { + template + struct ReplaceVectorByArrayImpl + { + using type = T; + }; + + template + struct ReplaceVectorByArrayImpl> + { + using type = T_Type[T_dim]; + }; + } // namespace detail + + /** If T is a \ref pmacc::math::Vector, replaced it by an equally sized and typed array. Otherwise, just passes + * the type through. + */ + template + using ReplaceVectorByArray = typename detail::ReplaceVectorByArrayImpl::type; } // namespace math } // namespace pmacc diff --git a/include/pmacc/math/vector/Vector.tpp b/include/pmacc/math/vector/Vector.tpp index 7f5ec3d2ae2..e55c6913974 100644 --- a/include/pmacc/math/vector/Vector.tpp +++ b/include/pmacc/math/vector/Vector.tpp @@ -108,8 +108,8 @@ namespace pmacc /** specialize l2norm2 algorithm */ - template - struct L2norm2<::pmacc::math::Vector> + template + struct L2norm2<::pmacc::math::Vector> { using result = typename ::pmacc::math::Vector::type; @@ -124,12 +124,12 @@ namespace pmacc /** specialize l2norm algorithm */ - template - struct L2norm<::pmacc::math::Vector> + template + struct L2norm<::pmacc::math::Vector> { - using result = typename ::pmacc::math::Vector::type; + using result = typename ::pmacc::math::Vector::type; - HDINLINE result operator()(const ::pmacc::math::Vector& vector) + HDINLINE result operator()(const ::pmacc::math::Vector& vector) { result tmp = pmacc::math::l2norm2(vector); return cupla::math::sqrt(tmp); diff --git a/include/pmacc/memory/boxes/CachedBox.hpp b/include/pmacc/memory/boxes/CachedBox.hpp index 02cc92edc67..34e6a691594 100644 --- a/include/pmacc/memory/boxes/CachedBox.hpp +++ b/include/pmacc/memory/boxes/CachedBox.hpp @@ -26,23 +26,30 @@ #include "pmacc/memory/boxes/SharedBox.hpp" #include "pmacc/types.hpp" - -namespace pmacc +namespace pmacc::CachedBox { - namespace CachedBox + template< + uint32_t Id_, + typename T_MemoryMapping, + typename ValueType_, + typename BlockDescription_, + typename T_Worker> + DINLINE auto create(T_Worker const& worker, const BlockDescription_ block) { - template - DINLINE auto create(T_Worker const& worker, const BlockDescription_ block) - { - using OffsetOrigin = typename BlockDescription_::OffsetOrigin; - using Type = DataBox>; - return Type{Type::init(worker)}.shift(DataSpace{OffsetOrigin::toRT()}); - } + using OffsetOrigin = typename BlockDescription_::OffsetOrigin; + using Type + = DataBox>; + return Type{Type::init(worker)}.shift(DataSpace{OffsetOrigin::toRT()}); + } - template - DINLINE auto create(T_Worker const& worker, const ValueType_& value, const BlockDescription_ block) - { - return create(worker); - } - } // namespace CachedBox -} // namespace pmacc + template< + uint32_t Id_, + typename T_MemoryMapping, + typename ValueType_, + typename BlockDescription_, + typename T_Worker> + DINLINE auto create(T_Worker const& worker, const ValueType_& value, const BlockDescription_ block) + { + return create(worker); + } +} // namespace pmacc::CachedBox diff --git a/include/pmacc/memory/boxes/DataBox.hpp b/include/pmacc/memory/boxes/DataBox.hpp index 5641e09dd42..ebda218e9fb 100644 --- a/include/pmacc/memory/boxes/DataBox.hpp +++ b/include/pmacc/memory/boxes/DataBox.hpp @@ -22,8 +22,12 @@ #pragma once +#include "SharedBox.hpp" #include "pmacc/attribute/FunctionSpecifier.hpp" #include "pmacc/dimensions/DataSpace.hpp" +#include "pmacc/memory/shared/Allocate.hpp" + +#include namespace pmacc { @@ -48,7 +52,7 @@ namespace pmacc } } // namespace detail - template + template struct DataBox : Base { HDINLINE DataBox() = default; @@ -61,7 +65,8 @@ namespace pmacc HDINLINE decltype(auto) operator()(DataSpace const& idx = {}) const { - ///@todo(bgruber): inline and replace this by if constexpr in C++17 + /// @TODO(bgruber): inline and replace this by if constexpr in C++17 at some point. however, nvcc generates + /// worse code with if constexpr. Ask Rene about it. return detail::access(*this, idx); } @@ -72,4 +77,97 @@ namespace pmacc return result; } }; -} // namespace pmacc + + namespace internal + { + template + HDINLINE constexpr auto toArrayExtents(math::CT::Vector) + { + using V = math::CT::Vector; + using IndexType = typename math::CT::Vector::type; + if constexpr(V::dim == 1) + { + return llama::ArrayExtents{}; + } + else if constexpr(V::dim == 2) + { + return llama::ArrayExtents{}; + } + else if constexpr(V::dim == 3) + { + return llama::ArrayExtents{}; + } + else + { + static_assert(sizeof(IndexType) == 0, "Vector dimension must be 1, 2 or 3"); + } + } + + template + HDINLINE auto toArrayIndex(DataSpace idx) + { + llama::ArrayIndex::type, Dim> ai; + for(int i = 0; i < Dim; i++) + ai[i] = idx[Dim - 1 - i]; + return ai; + } + } // namespace internal + + // handle DataBox wrapping SharedBox with LLAMA + template + struct DataBox< + SharedBox, + std::enable_if_t>> + { + using SharedBoxBase = SharedBox; + + inline static constexpr std::uint32_t Dim = T_dim; + using ValueType = T_TYPE; + using Size = T_SizeVector; + + using SplitRecordDim = llama::TransformLeaves; + using RecordDim = std::conditional_t; + using ArrayExtents = decltype(internal::toArrayExtents(T_SizeVector{})); + using Mapping = typename T_MemoryMapping::template fn; + using View = llama::View; + + View view; + DataSpace offset{}; + + HDINLINE DataBox() = default; + + HDINLINE DataBox(SharedBoxBase sb) + : view{ + Mapping{{}}, + llama::Array{ + const_cast(reinterpret_cast(sb.fixedPointer))}} + { + } + + HDINLINE decltype(auto) operator()(DataSpace idx = {}) const + { + auto&& ref = const_cast(view)(internal::toArrayIndex(DataSpace{idx + offset})); + if constexpr(math::isVector && llama::isRecordRef>) + return math::makeVectorWithLlamaStorage(ref); + else + return ref; + } + + HDINLINE DataBox shift(const DataSpace& offset) const + { + // TODO(bgruber): can we enhance LLAMA to make this smarter than just keeping the offset? + DataBox result(*this); + result.offset += offset; + return result; + } + + template + static DINLINE SharedBoxBase init(T_Worker const& worker) + { + auto& mem_sh + = memory::shared::allocate::type::value>>( + worker); + return {mem_sh.data()}; + } + }; +} // namespace pmacc \ No newline at end of file diff --git a/include/pmacc/memory/boxes/SharedBox.hpp b/include/pmacc/memory/boxes/SharedBox.hpp index 18b4d618e67..ec4e55b2c9a 100644 --- a/include/pmacc/memory/boxes/SharedBox.hpp +++ b/include/pmacc/memory/boxes/SharedBox.hpp @@ -21,11 +21,14 @@ #pragma once +#include "pmacc/mappings/kernel/MappingDescription.hpp" #include "pmacc/math/Vector.hpp" #include "pmacc/memory/Array.hpp" #include "pmacc/memory/shared/Allocate.hpp" #include "pmacc/types.hpp" +#include + namespace pmacc { namespace detail @@ -49,7 +52,7 @@ namespace pmacc } } // namespace detail - /** create shared memory on gpu + /** A shared memory on gpu. Used in conjunction with \ref pmacc::DataBox. * * @tparam T_TYPE type of memory objects * @tparam T_Vector CT::Vector with size description (per dimension) @@ -57,7 +60,12 @@ namespace pmacc * (is needed if more than one instance of shared memory in one kernel is used) * @tparam T_dim dimension of the memory (supports DIM1,DIM2 and DIM3) */ - template + template< + typename T_TYPE, + typename T_Vector, + uint32_t T_id, + typename T_MemoryMapping, + uint32_t T_dim = T_Vector::dim> struct SharedBox { static constexpr std::uint32_t Dim = T_dim; @@ -74,8 +82,8 @@ namespace pmacc HDINLINE SharedBox(SharedBox const&) = default; using ReducedType1D = T_TYPE&; - using ReducedType2D = SharedBox::type, T_id>; - using ReducedType3D = SharedBox::type, T_id>; + using ReducedType2D = SharedBox::type, T_id, T_MemoryMapping>; + using ReducedType3D = SharedBox::type, T_id, T_MemoryMapping>; using ReducedType = std::conditional_t>; @@ -116,7 +124,7 @@ namespace pmacc return {mem_sh.data()}; } - protected: + public: PMACC_ALIGN(fixedPointer, ValueType*); }; } // namespace pmacc diff --git a/include/pmacc/particles/ParticleDescription.hpp b/include/pmacc/particles/ParticleDescription.hpp index 2e0bc8dc0b7..4f79522a78a 100644 --- a/include/pmacc/particles/ParticleDescription.hpp +++ b/include/pmacc/particles/ParticleDescription.hpp @@ -68,14 +68,7 @@ namespace pmacc using HandleGuardRegion = T_HandleGuardRegion; using MethodsList = ToSeq; using FrameExtensionList = ToSeq; - using ThisType = ParticleDescription< - Name, - SuperCellSize, - ValueTypeSeq, - FlagsList, - HandleGuardRegion, - MethodsList, - FrameExtensionList>; + using ThisType = ParticleDescription; // Compile-time check uniqueness of attributes and flags PMACC_CASSERT_MSG( diff --git a/include/pmacc/particles/ParticlesBase.hpp b/include/pmacc/particles/ParticlesBase.hpp index 71ece0b21ef..45b12870a58 100644 --- a/include/pmacc/particles/ParticlesBase.hpp +++ b/include/pmacc/particles/ParticlesBase.hpp @@ -41,7 +41,7 @@ namespace pmacc /* Tag used for marking particle types */ struct ParticlesTag; - template + template class ParticlesBase : public SimulationFieldHelper { using ParticleDescription = T_ParticleDescription; @@ -53,6 +53,7 @@ namespace pmacc */ using BufferType = ParticlesBuffer< ParticleDescription, + T_FrameMemoryLayout, typename MappingDesc::SuperCellSize, T_DeviceHeap, MappingDesc::Dim>; diff --git a/include/pmacc/particles/ParticlesBase.tpp b/include/pmacc/particles/ParticlesBase.tpp index 48836c6ce7d..9620ecbac62 100644 --- a/include/pmacc/particles/ParticlesBase.tpp +++ b/include/pmacc/particles/ParticlesBase.tpp @@ -32,8 +32,9 @@ namespace pmacc { - template - void ParticlesBase::deleteGuardParticles(uint32_t exchangeType) + template + void ParticlesBase::deleteGuardParticles( + uint32_t exchangeType) { ExchangeMapping mapper(this->cellDescription, exchangeType); @@ -43,9 +44,9 @@ namespace pmacc (mapper.getGridDim())(particlesBuffer->getDeviceParticleBox(), mapper); } - template + template template - void ParticlesBase::deleteParticlesInArea() + void ParticlesBase::deleteParticlesInArea() { auto const mapper = makeAreaMapper(this->cellDescription); @@ -55,15 +56,16 @@ namespace pmacc (mapper.getGridDim())(particlesBuffer->getDeviceParticleBox(), mapper); } - template - void ParticlesBase::reset(uint32_t) + template + void ParticlesBase::reset(uint32_t) { deleteParticlesInArea(); particlesBuffer->reset(); } - template - void ParticlesBase::copyGuardToExchange(uint32_t exchangeType) + template + void ParticlesBase::copyGuardToExchange( + uint32_t exchangeType) { if(particlesBuffer->hasSendExchange(exchangeType)) { @@ -81,8 +83,9 @@ namespace pmacc } } - template - void ParticlesBase::insertParticles(uint32_t exchangeType) + template + void ParticlesBase::insertParticles( + uint32_t exchangeType) { if(particlesBuffer->hasReceiveExchange(exchangeType)) { diff --git a/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp b/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp index 665561742a9..31f68752e92 100644 --- a/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp +++ b/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp @@ -43,8 +43,22 @@ namespace pmacc { - namespace detail + /** + * Describes DIM-dimensional buffer for particles data on the host. + * + * @tParam T_ParticleDescription Object which describe a frame @see ParticleDescription.hpp + * @tparam SuperCellSize_ TVec which descripe size of a superce + * @tparam DIM dimension of the buffer (1-3) + */ + template< + typename T_ParticleDescription, + typename T_FrameMemoryLayout, + class SuperCellSize_, + typename T_DeviceHeap, + unsigned DIM> + class ParticlesBuffer { + public: /** create static array */ template @@ -53,24 +67,9 @@ namespace pmacc template struct apply { - using type = meta::Pair< - X, - StaticArray::type::type, std::integral_constant>>; }; }; - } // namespace detail - /** - * Describes DIM-dimensional buffer for particles data on the host. - * - * @tParam T_ParticleDescription Object which describe a frame @see ParticleDescription.hpp - * @tparam SuperCellSize_ TVec which descripe size of a superce - * @tparam DIM dimension of the buffer (1-3) - */ - template - class ParticlesBuffer - { - public: /** type of the border frame management object * * contains: @@ -100,9 +99,8 @@ namespace pmacc * * a group of particles is stored as frame */ - using FrameType = Frame< - detail::OperatorCreatePairStaticArray::type::value>, - FrameDescription>; + using FrameType + = Frame::type::value, FrameDescription, T_FrameMemoryLayout>; using FrameDescriptionBorder = typename ReplaceValueTypeSeq::type; @@ -112,7 +110,7 @@ namespace pmacc * - each frame contains only one particle * - local administration attributes of a particle are removed */ - using FrameTypeBorder = Frame, FrameDescriptionBorder>; + using FrameTypeBorder = Frame<1, FrameDescriptionBorder, llama::mapping::BindOne<>>; using SuperCellType = SuperCell; diff --git a/include/pmacc/particles/memory/dataTypes/Particle.hpp b/include/pmacc/particles/memory/dataTypes/Particle.hpp index 67b94bb5f80..b93f13617f3 100644 --- a/include/pmacc/particles/memory/dataTypes/Particle.hpp +++ b/include/pmacc/particles/memory/dataTypes/Particle.hpp @@ -41,6 +41,8 @@ #include +#include + namespace pmacc { /** A single particle of a @see Frame @@ -119,20 +121,18 @@ namespace pmacc * @return result of operator[] of the Frame */ template - HDINLINE auto& operator[](const T_Key key) + HDINLINE decltype(auto) operator[](const T_Key key) { PMACC_CASSERT_MSG_TYPE(key_not_available, T_Key, traits::HasIdentifier::type::value); - - return frame->getIdentifier(key)[idx]; + return frame->get(idx, key); } /** const version of method operator(const T_Key) */ template - HDINLINE const auto& operator[](const T_Key key) const + HDINLINE decltype(auto) operator[](const T_Key key) const { PMACC_CASSERT_MSG_TYPE(key_not_available, T_Key, traits::HasIdentifier::type::value); - - return frame->getIdentifier(key)[idx]; + return frame->get(idx, key); } HDINLINE diff --git a/include/pmacc/particles/memory/frames/Frame.hpp b/include/pmacc/particles/memory/frames/Frame.hpp index 678c7cbbe67..e64ca9e8c30 100644 --- a/include/pmacc/particles/memory/frames/Frame.hpp +++ b/include/pmacc/particles/memory/frames/Frame.hpp @@ -34,36 +34,145 @@ #include "pmacc/traits/HasIdentifier.hpp" #include "pmacc/types.hpp" +#include +#include #include +#include + namespace pmacc { namespace pmath = pmacc::math; + namespace detail + { + template + using MakeLlamaField = llama::Field::type::type>; + + template + using RecordDimFromValueTypeSeq = mp_rename, llama::Record>; + + template + inline constexpr bool splitVector = false; + + template + inline constexpr bool splitVector> + = T_MemoryLayout::splitVector; + + template + struct ViewHolder + { + private: + using IndexType = int; // TODO(bgruber): where do I get this type from? + inline static constexpr IndexType particlesPerFrame + = (T_size == llama::dyn) ? static_cast(llama::dyn) : static_cast(T_size); + using RawRecordDim = RecordDimFromValueTypeSeq; + using SplitRecordDim = llama::TransformLeaves; + + public: + using RecordDim = std::conditional_t, SplitRecordDim, RawRecordDim>; + using ArrayExtents = llama::ArrayExtents; + using Mapping = typename T_MemoryLayout::template fn; + static_assert( + particlesPerFrame == llama::dyn || Mapping::blobCount == 1, + "For statically sizes frames, only mappings with a single blob are supported"); + using BlobType = std::conditional_t< + particlesPerFrame == llama::dyn, + std::byte*, + llama::Array>; + using View = llama::View; + + private: + inline static constexpr std::size_t alignment + = particlesPerFrame == llama::dyn ? alignof(std::byte*) : llama::alignOf; + + public: + alignas(alignment) View view; + + ViewHolder() = default; + + HDINLINE ViewHolder(IndexType size) : view{Mapping{ArrayExtents{size}}} + { + } + + HDINLINE auto& blobs() + { + return view.blobs(); + } + + HDINLINE auto blobSize(int i) + { + return view.mapping().blobSize(i); + } + }; + + /** Proxy reference for particle attributes which are backed by a LLAMA RecordRef. This could become obsolete + * when LLAMA's RecordRef supports operator= from TupleLike objects. Ask bgruber about it every now and then. + */ + template + struct LlamaParticleAttribute + { + template + auto operator=(const LlamaParticleAttribute& lpa) -> LlamaParticleAttribute& + { + rr = lpa.rr; + return *this; + } + + template + auto operator=(LlamaParticleAttribute&& lpa) -> LlamaParticleAttribute& + { + rr = lpa.rr; + return *this; + } + + template + auto operator=(T&& t) -> LlamaParticleAttribute& + { + rr.store(std::forward(t)); + return *this; + } + + template + operator T() const + { + return rr.template loadAs(); + } + + RecordRef rr; + }; + } // namespace detail + /** Frame is a storage for arbitrary number >0 of Particles with attributes * - * @tparam T_CreatePairOperator unary template operator to create a boost pair - * from single type ( pair ) - * @see MapTupel + * @tparam T_size Static number of particles this frame stores, or llama::dyn for dynamic size * @tparam T_ValueTypeSeq sequence with value_identifier * @tparam T_MethodsList sequence of classes with particle methods * (e.g. calculate mass, gamma, ...) * @tparam T_Flags sequence with identifiers to add flags on a frame * (e.g. useSolverXY, calcRadiation, ...) + * @tparam T_MemoryLayout Memory layout to be used for the particle attribute data. */ - template + template struct Frame; - template + template struct Frame - : protected pmath::MapTuple< - typename SeqToMap::type> + : public detail::ViewHolder , public InheritLinearly>>::type>> + boost::mpl::apply1>>::type>> { + using ViewHolder = detail::ViewHolder; + static_assert( + T_size == llama::dyn + || sizeof(ViewHolder) == + typename ViewHolder::Mapping{llama::ArrayExtents(T_size)>{}}.blobSize(0)); + + using ViewHolder::ViewHolder; + using ParticleDescription = T_ParticleDescription; using Name = typename ParticleDescription::Name; using SuperCellSize = typename ParticleDescription::SuperCellSize; @@ -71,9 +180,7 @@ namespace pmacc using MethodsList = typename ParticleDescription::MethodsList; using FlagList = typename ParticleDescription::FlagsList; using FrameExtensionList = typename ParticleDescription::FrameExtensionList; - using ThisType = Frame; - /* definition of the MapTupel where we inherit from*/ - using BaseType = pmath::MapTuple::type>; + using ThisType = Frame; /* type of a single particle*/ using ParticleType = pmacc::Particle; @@ -90,25 +197,35 @@ namespace pmacc return ParticleType(*this, idx); } - /** access attribute with a identifier - * - * @param T_Key instance of identifier type - * (can be an alias, value_identifier or any other class) - * @return result of operator[] of MapTupel - */ - template - HDINLINE auto& getIdentifier(const T_Key) + private: + template + static HDINLINE decltype(auto) at(Frame& f, uint32_t i, const T_Key key) { using Key = typename GetKeyFromAlias::type; - return BaseType::operator[](Key()); + auto&& ref = f.view(i)(Key{}); + + using OldDstType = typename traits::Resolve::type::type; + using RefType = std::remove_reference_t; + + if constexpr(pmath::isVector && llama::isRecordRef) + return pmath::makeVectorWithLlamaStorage(ref); + else if constexpr(llama::isRecordRef) + return detail::LlamaParticleAttribute{ref}; + else + return ref; } - /** const version of method getIdentifier(const T_Key) */ + public: template - HDINLINE const auto& getIdentifier(const T_Key) const + HDINLINE decltype(auto) get(uint32_t i, const T_Key) { - using Key = typename GetKeyFromAlias::type; - return BaseType::operator[](Key()); + return at(*this, i, T_Key{}); + } + + template + HDINLINE decltype(auto) get(uint32_t i, const T_Key) const + { + return at(*this, i, T_Key{}); } HINLINE static std::string getName() @@ -119,11 +236,15 @@ namespace pmacc namespace traits { - template - struct HasIdentifier, T_IdentifierName> + template< + typename T_IdentifierName, + std::size_t T_size, + typename T_ParticleDescription, + typename T_MemoryLayout> + struct HasIdentifier, T_IdentifierName> { private: - using FrameType = pmacc::Frame; + using FrameType = pmacc::Frame; public: using ValueTypeSeq = typename FrameType::ValueTypeSeq; @@ -136,11 +257,15 @@ namespace pmacc // needed because of nvcc 11.0 bug }; - template - struct HasFlag, T_IdentifierName> + template< + typename T_IdentifierName, + std::size_t T_size, + typename T_ParticleDescription, + typename T_MemoryLayout> + struct HasFlag, T_IdentifierName> { private: - using FrameType = pmacc::Frame; + using FrameType = pmacc::Frame; using SolvedAliasName = typename pmacc::traits::GetFlagType::type; using FlagList = typename FrameType::FlagList; @@ -148,11 +273,15 @@ namespace pmacc using type = mp_contains; }; - template - struct GetFlagType, T_IdentifierName> + template< + typename T_IdentifierName, + std::size_t T_size, + typename T_ParticleDescription, + typename T_MemoryLayout> + struct GetFlagType, T_IdentifierName> { private: - using FrameType = pmacc::Frame; + using FrameType = pmacc::Frame; using FlagList = typename FrameType::FlagList; public: diff --git a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/memory.param b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/memory.param index 42718ab30d9..cdc48e1deb5 100644 --- a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/memory.param +++ b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/memory.param @@ -114,4 +114,19 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/picongpu/benchmarks/Thermal/include/picongpu/param/memory.param b/share/picongpu/benchmarks/Thermal/include/picongpu/param/memory.param index 38fae0c61e9..af35db5b358 100644 --- a/share/picongpu/benchmarks/Thermal/include/picongpu/param/memory.param +++ b/share/picongpu/benchmarks/Thermal/include/picongpu/param/memory.param @@ -112,4 +112,19 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = false; + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/memory.param b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/memory.param index ffb1f234f6d..d3b093da5b1 100644 --- a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/memory.param +++ b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/memory.param @@ -100,4 +100,19 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param b/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param index bbea879ac50..1e774267cab 100644 --- a/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param +++ b/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param @@ -114,4 +114,19 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param index 5eb280f01e6..a630167f568 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param @@ -114,4 +114,19 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param index 257fc0b9b00..734df0f3434 100644 --- a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param +++ b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param @@ -114,4 +114,19 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/picongpu/examples/atomicPhysics/include/picongpu/param/memory.param b/share/picongpu/examples/atomicPhysics/include/picongpu/param/memory.param index 1a5af5925c2..b7e58d30790 100644 --- a/share/picongpu/examples/atomicPhysics/include/picongpu/param/memory.param +++ b/share/picongpu/examples/atomicPhysics/include/picongpu/param/memory.param @@ -114,4 +114,19 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/picongpu/pypicongpu/template/include/picongpu/param/memory.param b/share/picongpu/pypicongpu/template/include/picongpu/param/memory.param index f966b7ffa80..0f87144675e 100644 --- a/share/picongpu/pypicongpu/template/include/picongpu/param/memory.param +++ b/share/picongpu/pypicongpu/template/include/picongpu/param/memory.param @@ -114,4 +114,18 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + struct ParticleFrameMemoryLayout : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/picongpu/tests/CollisionsBeamRelaxation/include/picongpu/param/memory.param b/share/picongpu/tests/CollisionsBeamRelaxation/include/picongpu/param/memory.param index 4ef4235ef26..52522663950 100644 --- a/share/picongpu/tests/CollisionsBeamRelaxation/include/picongpu/param/memory.param +++ b/share/picongpu/tests/CollisionsBeamRelaxation/include/picongpu/param/memory.param @@ -114,4 +114,19 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/picongpu/tests/CollisionsThermalisation/include/picongpu/param/memory.param b/share/picongpu/tests/CollisionsThermalisation/include/picongpu/param/memory.param index 4ef4235ef26..52522663950 100644 --- a/share/picongpu/tests/CollisionsThermalisation/include/picongpu/param/memory.param +++ b/share/picongpu/tests/CollisionsThermalisation/include/picongpu/param/memory.param @@ -114,4 +114,19 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/picongpu/tests/KHI_growthRate/include/picongpu/param/memory.param b/share/picongpu/tests/KHI_growthRate/include/picongpu/param/memory.param index 5eb280f01e6..77baf7380fb 100644 --- a/share/picongpu/tests/KHI_growthRate/include/picongpu/param/memory.param +++ b/share/picongpu/tests/KHI_growthRate/include/picongpu/param/memory.param @@ -114,4 +114,27 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + // struct ParticleFrameMemoryLayout : llama::mapping::BindAoS + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + // struct ParticleFrameMemoryLayout : llama::mapping::BindAoSoA<16> + // struct ParticleFrameMemoryLayout : llama::mapping::BindAoSoA<32> + // struct ParticleFrameMemoryLayout : llama::mapping::BindAoSoA<64> + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + // struct SharedDataBoxMemoryLayout : llama::mapping::BindSoA + // struct SharedDataBoxMemoryLayout : llama::mapping::BindSoA struct SharedDataBoxMemoryLayout : llama::mapping::BindAoSoA<32> + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + // struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/picongpu/tests/compileCombinedAttributes/include/picongpu/param/memory.param b/share/picongpu/tests/compileCombinedAttributes/include/picongpu/param/memory.param index 9f88b040b7d..19f999657cf 100644 --- a/share/picongpu/tests/compileCombinedAttributes/include/picongpu/param/memory.param +++ b/share/picongpu/tests/compileCombinedAttributes/include/picongpu/param/memory.param @@ -114,4 +114,19 @@ namespace picongpu */ constexpr bool fieldTmpSupportGatherCommunication = true; + struct ParticleFrameMemoryLayout + : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA + { + inline static constexpr bool splitVector = false; + }; + + struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<> + { + inline static constexpr bool splitVector = false; + }; } // namespace picongpu diff --git a/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp b/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp index 1b94c9144f2..518f8094b85 100644 --- a/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp +++ b/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp @@ -71,7 +71,7 @@ namespace gol using Type = typename T_BoxReadOnly::ValueType; using SuperCellSize = typename T_Mapping::SuperCellSize; using BlockArea = SuperCellDescription, math::CT::Int<1, 1>>; - auto cache = CachedBox::create<0, Type>(worker, BlockArea()); + auto cache = CachedBox::create<0, SharedDataBoxMemoryLayout, Type>(worker, BlockArea()); Space const block(mapper.getSuperCellIndex(Space(cupla::blockIdx(worker.getAcc())))); Space const blockCell = block * T_Mapping::SuperCellSize::toRT(); diff --git a/thirdParty/llama b/thirdParty/llama new file mode 160000 index 00000000000..415009123fd --- /dev/null +++ b/thirdParty/llama @@ -0,0 +1 @@ +Subproject commit 415009123fd00b955bb2f63be30c7733c2fb006d