From 656aab972d01fabd174f74e9911e69967cbea6aa Mon Sep 17 00:00:00 2001
From: Bernhard Manfred Gruber <bernhardmgruber@gmail.com>
Date: Mon, 4 Oct 2021 10:52:44 +0200
Subject: [PATCH] Use llama for particle frame and shared memory DataBox layout

Also support LLAMA frames in the IO.
---
 .gitmodules                                   |   3 +
 include/picongpu/algorithms/Set.hpp           |   4 +-
 include/picongpu/algorithms/Velocity.hpp      |   2 +-
 include/picongpu/fields/FieldJ.kernel         |   3 +-
 include/picongpu/fields/FieldTmp.kernel       |   4 +-
 .../MaxwellSolver/AddCurrentDensity.kernel    |   6 +-
 .../fields/MaxwellSolver/FDTD/FDTDBase.kernel |   4 +-
 .../fields/currentDeposition/Cache.hpp        |  11 +-
 .../picongpu/fields/incidentField/Solver.hpp  |   5 +-
 include/picongpu/param/memory.param           |  15 ++
 include/picongpu/particles/Particles.hpp      |   5 +-
 include/picongpu/particles/Particles.kernel   |   6 +-
 include/picongpu/particles/Particles.tpp      |  20 +-
 .../ThomasFermi/ThomasFermi_Impl.hpp          |  21 +-
 .../ionization/byField/ADK/ADK_Impl.hpp       |  14 +-
 .../ionization/byField/BSI/BSI_Impl.hpp       |   8 +-
 .../byField/Keldysh/Keldysh_Impl.hpp          |  14 +-
 .../plugins/PhaseSpace/PhaseSpaceFunctors.hpp |   2 +-
 .../picongpu/plugins/openPMD/WriteSpecies.hpp |  36 ++--
 .../LoadParticleAttributesFromOpenPMD.hpp     |  13 +-
 .../plugins/openPMD/restart/LoadSpecies.hpp   |  31 ++-
 .../openPMD/writer/ParticleAttribute.hpp      |  11 +-
 .../plugins/output/WriteSpeciesCommon.hpp     |  96 ++++-----
 .../picongpu/plugins/radiation/Radiation.hpp  |   2 +-
 include/pmacc/PMaccConfig.cmake               |   8 +
 include/pmacc/debug/VerboseLog.hpp            |   6 +-
 include/pmacc/math/operation/Add.hpp          |   8 +-
 include/pmacc/math/operation/Assign.hpp       |   8 +-
 include/pmacc/math/vector/TwistComponents.hpp |  17 +-
 include/pmacc/math/vector/Vector.hpp          |  84 +++++++-
 include/pmacc/math/vector/Vector.tpp          |  12 +-
 include/pmacc/memory/boxes/CachedBox.hpp      |  41 ++--
 include/pmacc/memory/boxes/DataBox.hpp        | 104 +++++++++-
 include/pmacc/memory/boxes/SharedBox.hpp      |  18 +-
 .../pmacc/particles/ParticleDescription.hpp   |   9 +-
 include/pmacc/particles/ParticlesBase.hpp     |   3 +-
 include/pmacc/particles/ParticlesBase.tpp     |  23 ++-
 .../memory/buffers/ParticlesBuffer.hpp        |  38 ++--
 .../particles/memory/dataTypes/Particle.hpp   |  12 +-
 .../pmacc/particles/memory/frames/Frame.hpp   | 195 +++++++++++++++---
 .../include/picongpu/param/memory.param       |  15 ++
 .../include/picongpu/param/memory.param       |  15 ++
 .../include/picongpu/param/memory.param       |  15 ++
 .../include/picongpu/param/memory.param       |  15 ++
 .../include/picongpu/param/memory.param       |  15 ++
 .../include/picongpu/param/memory.param       |  15 ++
 .../include/picongpu/param/memory.param       |  15 ++
 .../include/picongpu/param/memory.param       |  14 ++
 .../include/picongpu/param/memory.param       |  15 ++
 .../include/picongpu/param/memory.param       |  15 ++
 .../include/picongpu/param/memory.param       |  23 +++
 .../include/picongpu/param/memory.param       |  15 ++
 .../gameOfLife2D/include/Evolution.hpp        |   2 +-
 thirdParty/llama                              |   1 +
 54 files changed, 852 insertions(+), 260 deletions(-)
 create mode 100644 .gitmodules
 create mode 160000 thirdParty/llama
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000000..2dfb55f01bc
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "thirdParty/llama"]
+	path = thirdParty/llama
+	url = https://github.com/alpaka-group/llama
diff --git a/include/picongpu/algorithms/Set.hpp b/include/picongpu/algorithms/Set.hpp
index 985501fe983..2476f44d722 100644
--- a/include/picongpu/algorithms/Set.hpp
+++ b/include/picongpu/algorithms/Set.hpp
@@ -33,9 +33,9 @@ namespace picongpu
         }
 
         template<typename Dst, typename T_Worker>
-        HDINLINE void operator()(T_Worker const&, Dst& dst) const
+        HDINLINE void operator()(T_Worker const&, Dst&& dst) const
         {
-            dst = value;
+            std::forward<Dst>(dst) = value;
         }
 
     private:
diff --git a/include/picongpu/algorithms/Velocity.hpp b/include/picongpu/algorithms/Velocity.hpp
index 7d72eee0ffb..ba4ab645345 100644
--- a/include/picongpu/algorithms/Velocity.hpp
+++ b/include/picongpu/algorithms/Velocity.hpp
@@ -28,7 +28,7 @@ namespace picongpu
     struct Velocity
     {
         template<typename MomType, typename MassType>
-        HDINLINE MomType operator()(const MomType mom, const MassType mass0)
+        HDINLINE auto operator()(const MomType mom, const MassType mass0)
         {
             const float_X rc2 = MUE0_EPS0;
             const float_X m0_2 = mass0 * mass0;
diff --git a/include/picongpu/fields/FieldJ.kernel b/include/picongpu/fields/FieldJ.kernel
index dd3e2ef8bc5..fadaf6a9cd4 100644
--- a/include/picongpu/fields/FieldJ.kernel
+++ b/include/picongpu/fields/FieldJ.kernel
@@ -131,7 +131,8 @@ namespace picongpu
                 // The rest uses normal weighting
                 const float_X weighting = particle[weighting_];
                 Velocity velocity;
-                const float3_X vel = velocity(particle[momentum_], attribute::getMass(weighting, particle));
+                const float3_X vel
+                    = velocity(static_cast<float3_X>(particle[momentum_]), attribute::getMass(weighting, particle));
                 auto fieldJShiftToParticle = jBox.shift(localCell);
                 ParticleAlgo perParticle;
                 perParticle(worker, fieldJShiftToParticle, pos, vel, charge, m_deltaTime);
diff --git a/include/picongpu/fields/FieldTmp.kernel b/include/picongpu/fields/FieldTmp.kernel
index 554a3aca8ce..476426a922f 100644
--- a/include/picongpu/fields/FieldTmp.kernel
+++ b/include/picongpu/fields/FieldTmp.kernel
@@ -94,7 +94,9 @@ namespace picongpu
             if(!forEachParticle.hasParticles())
                 return;
 
-            auto cachedVal = CachedBox::create<0, typename T_TmpBox::ValueType>(worker, T_BlockDescription{});
+            auto cachedVal = CachedBox::create<0, SharedDataBoxMemoryLayout, typename T_TmpBox::ValueType>(
+                worker,
+                T_BlockDescription{});
             Set<typename T_TmpBox::ValueType> set(float_X(0.0));
 
             auto collective = makeThreadCollective<T_BlockDescription>();
diff --git a/include/picongpu/fields/MaxwellSolver/AddCurrentDensity.kernel b/include/picongpu/fields/MaxwellSolver/AddCurrentDensity.kernel
index 0c49a36e908..d20f4a9ed44 100644
--- a/include/picongpu/fields/MaxwellSolver/AddCurrentDensity.kernel
+++ b/include/picongpu/fields/MaxwellSolver/AddCurrentDensity.kernel
@@ -22,6 +22,8 @@
 
 #include "picongpu/simulation_defines.hpp"
 
+#include "picongpu/param/memory.param"
+
 #include <pmacc/dimensions/SuperCellDescription.hpp>
 #include <pmacc/lockstep.hpp>
 #include <pmacc/mappings/threads/ThreadCollective.hpp>
@@ -68,7 +70,9 @@ namespace picongpu::fields::maxwellSolver
 
             constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume<SuperCellSize>::type::value;
 
-            auto cachedJ = CachedBox::create<0, typename FieldJ::DataBoxType::ValueType>(worker, BlockArea());
+            auto cachedJ = CachedBox::create<0, SharedDataBoxMemoryLayout, typename FieldJ::DataBoxType::ValueType>(
+                worker,
+                BlockArea());
 
             pmacc::math::operation::Assign assign;
             DataSpace<simDim> const block(
diff --git a/include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel b/include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel
index cdcaa5ecdcf..0756f4e9f97 100644
--- a/include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel
+++ b/include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel
@@ -179,7 +179,9 @@ namespace picongpu
                         auto srcFieldBlock = srcField.shift(beginCellIdx);
                         auto cacheStencilArea = makeThreadCollective<StencilCfg>();
                         auto cachedSrcField
-                            = CachedBox::create<0u, typename T_SrcBox::ValueType>(worker, StencilCfg{});
+                            = CachedBox::create<0u, SharedDataBoxMemoryLayout, typename T_SrcBox::ValueType>(
+                                worker,
+                                StencilCfg{});
                         cacheStencilArea(worker, assign, cachedSrcField, srcFieldBlock);
 
                         worker.sync();
diff --git a/include/picongpu/fields/currentDeposition/Cache.hpp b/include/picongpu/fields/currentDeposition/Cache.hpp
index ae39c5f0580..4757a53ac90 100644
--- a/include/picongpu/fields/currentDeposition/Cache.hpp
+++ b/include/picongpu/fields/currentDeposition/Cache.hpp
@@ -48,15 +48,11 @@ namespace picongpu
                  */
                 template<typename T_BlockDescription, typename T_Worker, typename T_FieldBox>
                 DINLINE static auto create(T_Worker const& worker, T_FieldBox const& fieldBox)
-#if(!BOOST_COMP_CLANG)
-                    -> decltype(CachedBox::create<0u, typename T_FieldBox::ValueType>(
-                        worker,
-                        std::declval<T_BlockDescription>()))
-#endif
                 {
                     using ValueType = typename T_FieldBox::ValueType;
                     /* this memory is used by all virtual blocks */
-                    auto cache = CachedBox::create<0u, ValueType>(worker, T_BlockDescription{});
+                    auto cache
+                        = CachedBox::create<0u, SharedDataBoxMemoryLayout, ValueType>(worker, T_BlockDescription{});
 
                     Set<ValueType> set(ValueType::create(0.0_X));
                     auto collectiveFill = makeThreadCollective<T_BlockDescription>();
@@ -90,9 +86,6 @@ namespace picongpu
                  */
                 template<typename T_BlockDescription, typename T_Worker, typename T_FieldBox>
                 DINLINE static auto create([[maybe_unused]] T_Worker const& worker, T_FieldBox const& fieldBox)
-#if(!BOOST_COMP_CLANG)
-                    -> T_FieldBox
-#endif
                 {
                     return fieldBox;
                 }
diff --git a/include/picongpu/fields/incidentField/Solver.hpp b/include/picongpu/fields/incidentField/Solver.hpp
index 6886ed9fd9c..d87bdc129dc 100644
--- a/include/picongpu/fields/incidentField/Solver.hpp
+++ b/include/picongpu/fields/incidentField/Solver.hpp
@@ -283,8 +283,9 @@ namespace picongpu
                     using IntVector = pmacc::math::Vector<int, simDim>;
                     auto const beginLocalUserIdx
                         = Index{math::max(IntVector{beginUserIdx - totalCellOffset}, IntVector::create(0))};
-                    auto const endLocalUserIdx
-                        = Index{math::min(IntVector{endUserIdx - totalCellOffset}, IntVector{localDomain.size})};
+                    auto const endLocalUserIdx = Index{math::min(
+                        IntVector{endUserIdx - totalCellOffset},
+                        static_cast<const IntVector&>(localDomain.size))};
 
                     // Check if we have any active cells in the local domain
                     bool areAnyCellsInLocalDomain = true;
diff --git a/include/picongpu/param/memory.param b/include/picongpu/param/memory.param
index 06f8e0d7bda..1bfff7dd7f1 100644
--- a/include/picongpu/param/memory.param
+++ b/include/picongpu/param/memory.param
@@ -114,4 +114,19 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/include/picongpu/particles/Particles.hpp b/include/picongpu/particles/Particles.hpp
index a61c03a7a41..533bb2005e4 100644
--- a/include/picongpu/particles/Particles.hpp
+++ b/include/picongpu/particles/Particles.hpp
@@ -22,6 +22,7 @@
 
 #include "picongpu/fields/Fields.def"
 #include "picongpu/fields/Fields.hpp"
+#include "picongpu/param/memory.param"
 #include "picongpu/particles/boundary/Description.hpp"
 #include "picongpu/particles/boundary/Utility.hpp"
 #include "picongpu/particles/manipulators/manipulators.def"
@@ -89,6 +90,7 @@ namespace picongpu
                       pmacc::HandleGuardRegion<
                           pmacc::particles::policies::ExchangeParticles,
                           pmacc::particles::policies::DoNothing>>>,
+              ParticleFrameMemoryLayout,
               MappingDesc,
               DeviceHeap>
         , public ISimulationData
@@ -108,7 +110,8 @@ namespace picongpu
                 pmacc::HandleGuardRegion<
                     pmacc::particles::policies::ExchangeParticles,
                     pmacc::particles::policies::DoNothing>>>;
-        using ParticlesBaseType = ParticlesBase<SpeciesParticleDescription, picongpu::MappingDesc, DeviceHeap>;
+        using ParticlesBaseType
+            = ParticlesBase<SpeciesParticleDescription, ParticleFrameMemoryLayout, picongpu::MappingDesc, DeviceHeap>;
         using FrameType = typename ParticlesBaseType::FrameType;
         using FrameTypeBorder = typename ParticlesBaseType::FrameTypeBorder;
         using ParticlesBoxType = typename ParticlesBaseType::ParticlesBoxType;
diff --git a/include/picongpu/particles/Particles.kernel b/include/picongpu/particles/Particles.kernel
index ac46ad6c7b0..54daf736536 100644
--- a/include/picongpu/particles/Particles.kernel
+++ b/include/picongpu/particles/Particles.kernel
@@ -222,8 +222,10 @@ namespace picongpu
 
             onlyMaster([&]() { mustShiftSupercell = 0; });
 
-            auto cachedB = CachedBox::create<0, typename T_BBox::ValueType>(worker, T_DataDomain());
-            auto cachedE = CachedBox::create<1, typename T_EBox::ValueType>(worker, T_DataDomain());
+            auto cachedB
+                = CachedBox::create<0, SharedDataBoxMemoryLayout, typename T_BBox::ValueType>(worker, T_DataDomain());
+            auto cachedE
+                = CachedBox::create<1, SharedDataBoxMemoryLayout, typename T_EBox::ValueType>(worker, T_DataDomain());
 
             worker.sync();
 
diff --git a/include/picongpu/particles/Particles.tpp b/include/picongpu/particles/Particles.tpp
index 65c683458b7..c5766ffc036 100644
--- a/include/picongpu/particles/Particles.tpp
+++ b/include/picongpu/particles/Particles.tpp
@@ -43,6 +43,7 @@
 #include <pmacc/traits/Resolve.hpp>
 
 #include <algorithm>
+#include <fstream>
 #include <iostream>
 #include <limits>
 #include <memory>
@@ -197,7 +198,9 @@ namespace picongpu
         const std::shared_ptr<DeviceHeap>& heap,
         picongpu::MappingDesc cellDescription,
         SimulationDataId datasetID)
-        : ParticlesBase<SpeciesParticleDescription, picongpu::MappingDesc, DeviceHeap>(heap, cellDescription)
+        : ParticlesBase<SpeciesParticleDescription, ParticleFrameMemoryLayout, picongpu::MappingDesc, DeviceHeap>(
+            heap,
+            cellDescription)
         , m_datasetID(datasetID)
     {
         constexpr bool particleHasShape = pmacc::traits::HasIdentifier<FrameType, shape<>>::type::value;
@@ -212,6 +215,21 @@ namespace picongpu
 
         size_t sizeOfExchanges = 0u;
 
+#if __has_include(<fmt/format.h>)
+        // dump the data layout of the particle frames
+        if constexpr(PIConGPUVerbose::log_level & picLog::MEMORY::lvl)
+        {
+            log<picLog::MEMORY>(
+                "Dumping LLAMA memory layout for frame and border into llama_frame.* and llama_border_fream.*");
+            auto fm = typename decltype(FrameType::view)::Mapping{};
+            std::ofstream{"llama_frame.html"} << llama::toHtml(fm);
+            std::ofstream{"llama_frame.svg"} << llama::toSvg(fm);
+            auto bfm = typename decltype(FrameTypeBorder::view)::Mapping{};
+            std::ofstream{"llama_border_frame.html"} << llama::toHtml(bfm);
+            std::ofstream{"llama_border_frame.svg"} << llama::toSvg(bfm);
+        }
+#endif
+
         const uint32_t commTag = pmacc::traits::getUniqueId();
         log<picLog::MEMORY>("communication tag for species %1%: %2%") % FrameType::getName() % commTag;
 
diff --git a/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp b/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp
index 0edcb9c8244..f3a45ec8ef8 100644
--- a/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp
+++ b/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp
@@ -23,6 +23,7 @@
 
 #include "picongpu/fields/CellType.hpp"
 #include "picongpu/fields/FieldTmp.hpp"
+#include "picongpu/param/memory.param"
 #include "picongpu/particles/atomicPhysics/SetChargeState.hpp"
 #include "picongpu/particles/ionization/byCollision/ThomasFermi/AlgorithmThomasFermi.hpp"
 #include "picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi.def"
@@ -104,8 +105,20 @@ namespace picongpu
                 PMACC_ALIGN(eneBox, FieldTmp::DataBoxType);
 
                 /* shared memory EM-field device databoxes */
-                PMACC_ALIGN(cachedRho, DataBox<SharedBox<ValueType_Rho, typename BlockArea::FullSuperCellSize, 0>>);
-                PMACC_ALIGN(cachedEne, DataBox<SharedBox<ValueType_Ene, typename BlockArea::FullSuperCellSize, 1>>);
+                PMACC_ALIGN(
+                    cachedRho,
+                    DataBox<SharedBox<
+                        ValueType_Rho,
+                        typename BlockArea::FullSuperCellSize,
+                        0,
+                        SharedDataBoxMemoryLayout>>);
+                PMACC_ALIGN(
+                    cachedEne,
+                    DataBox<SharedBox<
+                        ValueType_Ene,
+                        typename BlockArea::FullSuperCellSize,
+                        1,
+                        SharedDataBoxMemoryLayout>>);
 
             public:
                 /* host constructor initializing member : random number generator */
@@ -185,8 +198,8 @@ namespace picongpu
                 DINLINE void collectiveInit(const T_Worker& worker, const DataSpace<simDim>& blockCell)
                 {
                     /* caching of density and "temperature" fields */
-                    cachedRho = CachedBox::create<0, ValueType_Rho>(worker, BlockArea());
-                    cachedEne = CachedBox::create<1, ValueType_Ene>(worker, BlockArea());
+                    cachedRho = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_Rho>(worker, BlockArea());
+                    cachedEne = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_Ene>(worker, BlockArea());
 
                     /* instance of nvidia assignment operator */
                     pmacc::math::operation::Assign assign;
diff --git a/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp b/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp
index 9dc24c7ebe9..a3e560f912b 100644
--- a/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp
+++ b/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp
@@ -102,8 +102,14 @@ namespace picongpu
                 PMACC_ALIGN(bBox, FieldB::DataBoxType);
                 PMACC_ALIGN(jBox, FieldJ::DataBoxType);
                 /* shared memory EM-field device databoxes */
-                PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
-                PMACC_ALIGN(cachedB, DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0>>);
+                PMACC_ALIGN(
+                    cachedE,
+                    DataBox<
+                        SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMemoryLayout>>);
+                PMACC_ALIGN(
+                    cachedB,
+                    DataBox<
+                        SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0, SharedDataBoxMemoryLayout>>);
 
             public:
                 /* host constructor initializing member : random number generator */
@@ -137,8 +143,8 @@ namespace picongpu
                     jBox = jBox.shift(blockCell);
 
                     /* caching of E and B fields */
-                    cachedB = CachedBox::create<0, ValueType_B>(worker, BlockArea());
-                    cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
+                    cachedB = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_B>(worker, BlockArea());
+                    cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea());
 
                     /* instance of nvidia assignment operator */
                     pmacc::math::operation::Assign assign;
diff --git a/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp b/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp
index a41c0d0c310..33f40a159db 100644
--- a/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp
+++ b/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp
@@ -25,6 +25,7 @@
 #include "picongpu/fields/FieldB.hpp"
 #include "picongpu/fields/FieldE.hpp"
 #include "picongpu/fields/FieldJ.hpp"
+#include "picongpu/param/memory.param"
 #include "picongpu/particles/ParticlesFunctors.hpp"
 #include "picongpu/particles/atomicPhysics/SetChargeState.hpp"
 #include "picongpu/particles/ionization/byField/BSI/AlgorithmBSI.hpp"
@@ -93,7 +94,10 @@ namespace picongpu
                 FieldE::DataBoxType eBox;
                 FieldJ::DataBoxType jBox;
                 /* shared memory EM-field device databoxes */
-                PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
+                PMACC_ALIGN(
+                    cachedE,
+                    DataBox<
+                        SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMemoryLayout>>);
 
             public:
                 /* host constructor */
@@ -125,7 +129,7 @@ namespace picongpu
                     jBox = jBox.shift(blockCell);
 
                     /* caching of E field */
-                    cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
+                    cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea());
 
                     /* instance of nvidia assignment operator */
                     pmacc::math::operation::Assign assign;
diff --git a/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp b/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp
index b48b027e812..a9ca275787c 100644
--- a/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp
+++ b/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp
@@ -102,8 +102,14 @@ namespace picongpu
                 PMACC_ALIGN(bBox, FieldB::DataBoxType);
                 PMACC_ALIGN(jBox, FieldJ::DataBoxType);
                 /* shared memory EM-field device databoxes */
-                PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
-                PMACC_ALIGN(cachedB, DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0>>);
+                PMACC_ALIGN(
+                    cachedE,
+                    DataBox<
+                        SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMemoryLayout>>);
+                PMACC_ALIGN(
+                    cachedB,
+                    DataBox<
+                        SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0, SharedDataBoxMemoryLayout>>);
 
             public:
                 /* host constructor initializing member : random number generator */
@@ -137,8 +143,8 @@ namespace picongpu
                     jBox = jBox.shift(blockCell);
 
                     /* caching of E and B fields */
-                    cachedB = CachedBox::create<0, ValueType_B>(worker, BlockArea());
-                    cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
+                    cachedB = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_B>(worker, BlockArea());
+                    cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea());
 
                     /* instance of nvidia assignment operator */
                     pmacc::math::operation::Assign assign;
diff --git a/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp b/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp
index c6508b1de55..393c57afe85 100644
--- a/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp
+++ b/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp
@@ -160,7 +160,7 @@ namespace picongpu
             /* create shared mem */
             constexpr int blockCellsInDir = SuperCellSize::template at<r_dir>::type::value;
             using SharedMemSize = SuperCellDescription<pmacc::math::CT::Int<num_pbins, blockCellsInDir>>;
-            auto sharedMemHist = CachedBox::create<0u, float_PS>(worker, SharedMemSize{});
+            auto sharedMemHist = CachedBox::create<0u, SharedDataBoxMemoryLayout, float_PS>(worker, SharedMemSize{});
 
             Set<float_PS> set(float_PS{0.0});
             auto collectiveOnSharedHistogram = makeThreadCollective<SharedMemSize>();
diff --git a/include/picongpu/plugins/openPMD/WriteSpecies.hpp b/include/picongpu/plugins/openPMD/WriteSpecies.hpp
index 4a747863e79..87d97383b63 100644
--- a/include/picongpu/plugins/openPMD/WriteSpecies.hpp
+++ b/include/picongpu/plugins/openPMD/WriteSpecies.hpp
@@ -44,8 +44,10 @@
 #include <pmacc/particles/particleFilter/PositionFilter.hpp>
 
 #include <boost/mpl/placeholders.hpp>
+#include <boost/numeric/conversion/cast.hpp>
 
 #include <algorithm>
+#include <fstream>
 #include <type_traits> // std::remove_reference_t
 
 namespace picongpu
@@ -112,15 +114,13 @@ namespace picongpu
             {
                 /* malloc host memory */
                 log<picLog::INPUT_OUTPUT>("openPMD:   (begin) malloc host memory: %1%") % name;
-                meta::ForEach<typename openPMDFrameType::ValueTypeSeq, MallocHostMemory<boost::mpl::_1>> mallocMem;
-                mallocMem(hostFrame, myNumParticles);
+                mallocFrameMemory(hostFrame);
                 log<picLog::INPUT_OUTPUT>("openPMD:   ( end ) malloc host memory: %1%") % name;
             }
 
             void free(openPMDFrameType& hostFrame) override
             {
-                meta::ForEach<typename openPMDFrameType::ValueTypeSeq, FreeHostMemory<boost::mpl::_1>> freeMem;
-                freeMem(hostFrame);
+                freeFrameMemory(hostFrame);
             }
 
 
@@ -182,16 +182,13 @@ namespace picongpu
             void malloc(std::string name, openPMDFrameType& mappedFrame, uint64_cu const myNumParticles) override
             {
                 log<picLog::INPUT_OUTPUT>("openPMD:  (begin) malloc mapped memory: %1%") % name;
-                /*malloc mapped memory*/
-                meta::ForEach<typename openPMDFrameType::ValueTypeSeq, MallocMappedMemory<boost::mpl::_1>> mallocMem;
-                mallocMem(mappedFrame, myNumParticles);
+                mallocMappedFrameMemory(mappedFrame);
                 log<picLog::INPUT_OUTPUT>("openPMD:  ( end ) malloc mapped memory: %1%") % name;
             }
 
             void free(openPMDFrameType& mappedFrame) override
             {
-                meta::ForEach<typename openPMDFrameType::ValueTypeSeq, FreeMappedMemory<boost::mpl::_1>> freeMem;
-                freeMem(mappedFrame);
+                freeMappedFrameMemory(mappedFrame);
             }
 
             void prepare(uint32_t currentStep, std::string name, openPMDFrameType& mappedFrame, RunParameters rp)
@@ -248,7 +245,7 @@ namespace picongpu
             using NewParticleDescription =
                 typename ReplaceValueTypeSeq<ParticleDescription, ParticleNewAttributeList>::type;
 
-            using openPMDFrameType = Frame<OperatorCreateVectorBox, NewParticleDescription>;
+            using openPMDFrameType = Frame<llama::dyn, NewParticleDescription, ParticleFrameMemoryLayoutOpenPMD>;
 
             void setParticleAttributes(
                 ::openPMD::ParticleSpecies& record,
@@ -360,14 +357,12 @@ namespace picongpu
                 {
                 case WriteSpeciesStrategy::ADIOS:
                     {
-                        using type = StrategyADIOS<openPMDFrameType, RunParameters_T>;
-                        strategy = std::unique_ptr<AStrategy>(dynamic_cast<AStrategy*>(new type));
+                        strategy = std::make_unique<StrategyADIOS<openPMDFrameType, RunParameters_T>>();
                         break;
                     }
                 case WriteSpeciesStrategy::HDF5:
                     {
-                        using type = StrategyHDF5<openPMDFrameType, RunParameters_T>;
-                        strategy = std::unique_ptr<AStrategy>(dynamic_cast<AStrategy*>(new type));
+                        strategy = std::make_unique<StrategyHDF5<openPMDFrameType, RunParameters_T>>();
                         break;
                     }
                 }
@@ -409,7 +404,18 @@ namespace picongpu
                 ::openPMD::ParticleSpecies& particleSpecies = iteration.particles[speciesGroup];
 
                 // copy over particles to host
-                openPMDFrameType hostFrame;
+                openPMDFrameType hostFrame{boost::numeric_cast<int>(myNumParticles)};
+#if __has_include(<fmt/format.h>)
+                if constexpr(PIConGPUVerbose::log_level & picLog::INPUT_OUTPUT::lvl)
+                {
+                    log<picLog::INPUT_OUTPUT>(
+                        "Dumping LLAMA memory layout for openPMD frame into llama_openPMD_write_frame.*");
+                    auto m = typename openPMDFrameType::Mapping{
+                        llama::ArrayExtentsDynamic<int, 1>{std::min(hostFrame.view.mapping().extents()[0], 1024)}};
+                    std::ofstream{"llama_openPMD_write_frame.svg"} << llama::toSvg(m);
+                    std::ofstream{"llama_openPMD_write_frame.html"} << llama::toHtml(m);
+                }
+#endif
 
                 strategy->malloc(T_SpeciesFilter::getName(), hostFrame, myNumParticles);
                 RunParameters_T runParameters(
diff --git a/include/picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp b/include/picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp
index b2031490339..58b55abf296 100644
--- a/include/picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp
+++ b/include/picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp
@@ -68,7 +68,7 @@ namespace picongpu
             {
                 using Identifier = T_Identifier;
                 using ValueType = typename pmacc::traits::Resolve<Identifier>::type::type;
-                const uint32_t components = GetNComponents<ValueType>::value;
+                constexpr uint32_t components = GetNComponents<ValueType>::value;
                 using ComponentType = typename GetComponentsType<ValueType>::type;
                 OpenPMDName<Identifier> openPMDName;
 
@@ -76,6 +76,7 @@ namespace picongpu
 
                 const std::string name_lookup[] = {"x", "y", "z"};
 
+                // TODO(bgruber): make this a std::shared_ptr<ComponentType[]> with openPMD 0.15
                 std::shared_ptr<ComponentType> loadBfr;
                 if(elements > 0)
                 {
@@ -90,7 +91,6 @@ namespace picongpu
                     ::openPMD::RecordComponent rc
                         = components > 1 ? record[name_lookup[n]] : record[::openPMD::RecordComponent::SCALAR];
 
-                    ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer();
 
                     if(elements > 0)
                     {
@@ -119,12 +119,15 @@ namespace picongpu
                                               "%3%")
                         % elements % globalNumElements % openPMDName();
 
-/* copy component from temporary array to array of structs */
+                    /* copy component from temporary array to array of structs */
 #pragma omp parallel for simd
                     for(size_t i = 0; i < elements; ++i)
                     {
-                        ComponentType* ref = &reinterpret_cast<ComponentType*>(dataPtr)[i * components + n];
-                        *ref = loadBfr.get()[i];
+                        auto& attrib = frame[i][Identifier{}];
+                        if constexpr(components == 1)
+                            attrib = loadBfr.get()[i];
+                        else
+                            reinterpret_cast<ComponentType*>(&attrib)[n] = loadBfr.get()[i];
                     }
                 }
 
diff --git a/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp b/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp
index 4d1e343d801..815120c3120 100644
--- a/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp
+++ b/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp
@@ -33,8 +33,10 @@
 #include <pmacc/particles/operations/splitIntoListOfFrames.kernel>
 
 #include <boost/mpl/placeholders.hpp>
+#include <boost/numeric/conversion/cast.hpp>
 
 #include <cassert>
+#include <fstream>
 #include <stdexcept>
 
 #include <openPMD/openPMD.hpp>
@@ -69,7 +71,7 @@ namespace picongpu
             using NewParticleDescription =
                 typename ReplaceValueTypeSeq<ParticleDescription, ParticleNewAttributeList>::type;
 
-            using openPMDFrameType = Frame<OperatorCreateVectorBox, NewParticleDescription>;
+            using openPMDFrameType = Frame<llama::dyn, NewParticleDescription, ParticleFrameMemoryLayoutOpenPMD>;
 
             /** Load species from openPMD checkpoint storage
              *
@@ -130,15 +132,24 @@ namespace picongpu
                     % (long long unsigned) totalNumParticles % (long long unsigned) particleOffset;
 
                 // memory is visible on host and device
-                openPMDFrameType mappedFrame;
+                openPMDFrameType mappedFrame{boost::numeric_cast<int>(totalNumParticles)};
+#if __has_include(<fmt/format.h>)
+                if constexpr(PIConGPUVerbose::log_level & picLog::INPUT_OUTPUT::lvl)
+                {
+                    log<picLog::INPUT_OUTPUT>(
+                        "Dumping LLAMA memory layout for openPMD frame into llama_openPMD_load_frame.*");
+                    auto m = typename openPMDFrameType::Mapping{
+                        llama::ArrayExtentsDynamic<int, 1>{std::min(mappedFrame.view.mapping().extents()[0], 1024)}};
+                    std::ofstream{"llama_openPMD_load_frame.svg"} << llama::toSvg(m);
+                    std::ofstream{"llama_openPMD_load_frame.html"} << llama::toHtml(m);
+                }
+#endif
                 log<picLog::INPUT_OUTPUT>("openPMD: malloc mapped memory: %1%") % speciesName;
-                /*malloc mapped memory*/
-                meta::ForEach<typename openPMDFrameType::ValueTypeSeq, MallocMappedMemory<boost::mpl::_1>> mallocMem;
-                mallocMem(mappedFrame, totalNumParticles);
 
-                meta::
-                    ForEach<typename openPMDFrameType::ValueTypeSeq, LoadParticleAttributesFromOpenPMD<boost::mpl::_1>>
-                        loadAttributes;
+                mallocMappedFrameMemory(mappedFrame);
+
+                meta::ForEach<typename openPMDFrameType::ValueTypeSeq, LoadParticleAttributesFromOpenPMD<boost::mpl::_1>>
+                    loadAttributes;
                 loadAttributes(params, mappedFrame, particleSpecies, particleOffset, totalNumParticles);
 
                 if(totalNumParticles != 0)
@@ -153,9 +164,7 @@ namespace picongpu
                         *(params->cellDescription),
                         picLog::INPUT_OUTPUT());
 
-                    /*free host memory*/
-                    meta::ForEach<typename openPMDFrameType::ValueTypeSeq, FreeMappedMemory<boost::mpl::_1>> freeMem;
-                    freeMem(mappedFrame);
+                    freeMappedFrameMemory(mappedFrame);
                 }
                 log<picLog::INPUT_OUTPUT>("openPMD: ( end ) load species: %1%") % speciesName;
             }
diff --git a/include/picongpu/plugins/openPMD/writer/ParticleAttribute.hpp b/include/picongpu/plugins/openPMD/writer/ParticleAttribute.hpp
index cbc968fe966..3f4c35c8178 100644
--- a/include/picongpu/plugins/openPMD/writer/ParticleAttribute.hpp
+++ b/include/picongpu/plugins/openPMD/writer/ParticleAttribute.hpp
@@ -64,7 +64,7 @@ namespace picongpu
             {
                 using Identifier = T_Identifier;
                 using ValueType = typename pmacc::traits::Resolve<Identifier>::type::type;
-                const uint32_t components = GetNComponents<ValueType>::value;
+                constexpr uint32_t components = GetNComponents<ValueType>::value;
                 using ComponentType = typename GetComponentsType<ValueType>::type;
 
                 OpenPMDName<T_Identifier> openPMDName;
@@ -93,7 +93,7 @@ namespace picongpu
                     ::openPMD::RecordComponent recordComponent
                         = components > 1 ? record[name_lookup[d]] : record[::openPMD::MeshRecordComponent::SCALAR];
 
-                    std::string datasetName = components > 1 ? baseName + "/" + name_lookup[d] : baseName;
+                    const std::string datasetName = components > 1 ? baseName + "/" + name_lookup[d] : baseName;
                     params->initDataset<DIM1>(recordComponent, openPMDType, {globalElements}, datasetName);
 
                     if(unit.size() >= (d + 1))
@@ -107,7 +107,6 @@ namespace picongpu
                         continue;
                     }
 
-                    ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer(); // can be moved up?
                     // ask openPMD to create a buffer for us
                     // in some backends (ADIOS2), this allows avoiding memcopies
                     auto span = storeChunkSpan<ComponentType>(
@@ -132,7 +131,11 @@ namespace picongpu
 #pragma omp parallel for simd
                     for(size_t i = 0; i < elements; ++i)
                     {
-                        span[i] = reinterpret_cast<ComponentType*>(dataPtr)[d + i * components];
+                        const auto attrib = frame[i][Identifier{}];
+                        if constexpr(components == 1)
+                            span[i] = attrib;
+                        else
+                            span[i] = reinterpret_cast<const ComponentType*>(&attrib)[d];
                     }
 
                     flushSeries(*params->openPMDSeries, PreferredFlushTarget::Disk);
diff --git a/include/picongpu/plugins/output/WriteSpeciesCommon.hpp b/include/picongpu/plugins/output/WriteSpeciesCommon.hpp
index 683fe1767ed..178775d4af0 100644
--- a/include/picongpu/plugins/output/WriteSpeciesCommon.hpp
+++ b/include/picongpu/plugins/output/WriteSpeciesCommon.hpp
@@ -28,51 +28,50 @@ namespace picongpu
 {
     using namespace pmacc;
 
-
-    template<typename T_Type>
-    struct MallocMappedMemory
+    template<typename Frame>
+    void mallocMappedFrameMemory(Frame& frame)
     {
-        template<typename ValueType>
-        HINLINE void operator()(ValueType& v1, const size_t size) const
-        {
-            using type = typename pmacc::traits::Resolve<T_Type>::type::type;
-
-            bool isMappedMemorySupported = alpaka::hasMappedBufSupport<::alpaka::Platform<cupla::AccDev>>;
+        constexpr bool isMappedMemorySupported = alpaka::hasMappedBufSupport<::alpaka::Platform<cupla::AccDev>>;
+        PMACC_VERIFY_MSG(isMappedMemorySupported, "Device must support mapped memory!");
 
-            PMACC_VERIFY_MSG(isMappedMemorySupported, "Device must support mapped memory!");
-
-            type* ptr = nullptr;
+        int i = 0;
+        for(std::byte*& ptr : frame.blobs())
+        {
+            const auto size = frame.blobSize(i);
             if(size != 0)
             {
                 // Memory is automatically mapped to the device if supported.
-                CUDA_CHECK(cuplaMallocHost((void**) &ptr, size * sizeof(type)));
+                CUDA_CHECK(cuplaMallocHost((void**) &ptr, size));
             }
-            v1.getIdentifier(T_Type()) = VectorDataBox<type>(ptr);
+            else
+                ptr = nullptr;
+            log<picLog::INPUT_OUTPUT>("openPMD:     blob %1%, size: %2%, ptr: %3%") % i % size % ptr;
+            i++;
         }
-    };
+    }
 
     /** allocate memory on host
      *
      * This functor use `new[]` to allocate memory
      */
-    template<typename T_Attribute>
-    struct MallocHostMemory
+    template<typename Frame>
+    void mallocFrameMemory(Frame& frame)
     {
-        template<typename ValueType>
-        HINLINE void operator()(ValueType& v1, const size_t size) const
-        {
-            using Attribute = T_Attribute;
-            using type = typename pmacc::traits::Resolve<Attribute>::type::type;
+        constexpr bool isMappedMemorySupported = alpaka::hasMappedBufSupport<::alpaka::Pltf<cupla::AccDev>>;
+        PMACC_VERIFY_MSG(isMappedMemorySupported, "Device must support mapped memory!");
 
-            type* ptr = nullptr;
+        int i = 0;
+        for(std::byte*& ptr : frame.blobs())
+        {
+            const auto size = frame.blobSize(i);
             if(size != 0)
-            {
-                ptr = new type[size];
-            }
-            v1.getIdentifier(Attribute()) = VectorDataBox<type>(ptr);
+                ptr = new std::byte[size];
+            else
+                ptr = nullptr;
+            log<picLog::INPUT_OUTPUT>("openPMD:     blob %1%, size: %2%, ptr: %3%") % i % size % ptr;
+            i++;
         }
-    };
-
+    }
 
     /** copy species to host memory
      *
@@ -92,43 +91,24 @@ namespace picongpu
         }
     };
 
-    template<typename T_Type>
-    struct FreeMappedMemory
+    template<typename Frame>
+    void freeMappedFrameMemory(Frame& frame)
     {
-        template<typename ValueType>
-        HINLINE void operator()(ValueType& value) const
-        {
-            auto* ptr = value.getIdentifier(T_Type()).getPointer();
+        for(std::byte*& ptr : frame.blobs())
             if(ptr != nullptr)
             {
                 CUDA_CHECK(cuplaFreeHost(ptr));
+                ptr = nullptr;
             }
-        }
-    };
+    }
 
-    //! Free memory
-    template<typename T_Attribute>
-    struct FreeHostMemory
+    template<typename Frame>
+    void freeFrameMemory(Frame& frame)
     {
-        template<typename ValueType>
-        HINLINE void operator()(ValueType& value) const
+        for(auto* ptr : frame.blobs())
         {
-            using Attribute = T_Attribute;
-
-            auto* ptr = value.getIdentifier(Attribute()).getPointer();
             delete[] ptr;
+            ptr = nullptr;
         }
-    };
-
-    /*functor to create a pair for a MapTuple map*/
-    struct OperatorCreateVectorBox
-    {
-        template<typename InType>
-        struct apply
-        {
-            using type
-                = pmacc::meta::Pair<InType, pmacc::VectorDataBox<typename pmacc::traits::Resolve<InType>::type::type>>;
-        };
-    };
-
+    }
 } // namespace picongpu
diff --git a/include/picongpu/plugins/radiation/Radiation.hpp b/include/picongpu/plugins/radiation/Radiation.hpp
index 5f6ddaf28b7..fc9d7fb64d4 100644
--- a/include/picongpu/plugins/radiation/Radiation.hpp
+++ b/include/picongpu/plugins/radiation/Radiation.hpp
@@ -285,7 +285,7 @@ namespace picongpu
                 }
 
 
-                void checkpoint(uint32_t timeStep, const std::string restartDirectory)
+                void checkpoint(uint32_t timeStep, const std::string restartDirectory) override
                 {
                     // only write backup if radiation is calculated:
                     if(notifyPeriod.empty())
diff --git a/include/pmacc/PMaccConfig.cmake b/include/pmacc/PMaccConfig.cmake
index fb34f4cf01e..61a84bca778 100644
--- a/include/pmacc/PMaccConfig.cmake
+++ b/include/pmacc/PMaccConfig.cmake
@@ -384,3 +384,11 @@ endif(PMACC_BLOCKING_KERNEL)
 
 set(PMACC_VERBOSE "0" CACHE STRING "set verbose level for PMacc")
 target_compile_definitions(pmacc PUBLIC "-DPMACC_VERBOSE_LVL=${PMACC_VERBOSE}")
+
+################################################################################
+# LLAMA
+################################################################################
+
+add_subdirectory(${PMacc_DIR}/../../thirdParty/llama ${CMAKE_BINARY_DIR}/llama EXCLUDE_FROM_ALL)
+target_link_libraries(pmacc PUBLIC llama::llama)
+
diff --git a/include/pmacc/debug/VerboseLog.hpp b/include/pmacc/debug/VerboseLog.hpp
index 674f98b95df..243bdf047de 100644
--- a/include/pmacc/debug/VerboseLog.hpp
+++ b/include/pmacc/debug/VerboseLog.hpp
@@ -100,7 +100,7 @@ namespace pmacc
                  * If you get an linker error in the next two lines you have not used
                  * DEFINE_LOGLVL makro to define a named logLvl
                  */
-                if(logLvl & LogParent::log_level) /*compile-time check*/
+                if constexpr((logLvl & LogParent::log_level) != 0)
                 {
                     std::cout << LogParent::getName() << " " << getLogName(LogClass()) << "("
                               << (logLvl & LogParent::log_level) << ")"
@@ -109,9 +109,9 @@ namespace pmacc
             }
 
             template<typename T>
-            VerboseLog& operator%(T value)
+            VerboseLog& operator%([[maybe_unused]] T value)
             {
-                if(logLvl & LogParent::log_level) /*compile-time check*/
+                if constexpr((logLvl & LogParent::log_level) != 0)
                     fmt % value;
                 return *this;
             }
diff --git a/include/pmacc/math/operation/Add.hpp b/include/pmacc/math/operation/Add.hpp
index 3b02143d16a..079e3bfc87d 100644
--- a/include/pmacc/math/operation/Add.hpp
+++ b/include/pmacc/math/operation/Add.hpp
@@ -33,15 +33,15 @@ namespace pmacc
             struct Add
             {
                 template<typename Dst, typename Src>
-                HDINLINE void operator()(Dst& dst, const Src& src) const
+                HDINLINE void operator()(Dst&& dst, const Src& src) const
                 {
-                    dst += src;
+                    std::forward<Dst>(dst) += src;
                 }
 
                 template<typename Dst, typename Src, typename T_Worker>
-                HDINLINE void operator()(const T_Worker&, Dst& dst, const Src& src) const
+                HDINLINE void operator()(const T_Worker&, Dst&& dst, const Src& src) const
                 {
-                    dst += src;
+                    std::forward<Dst>(dst) += src;
                 }
             };
         } // namespace operation
diff --git a/include/pmacc/math/operation/Assign.hpp b/include/pmacc/math/operation/Assign.hpp
index 847163311fd..bfe30b343d8 100644
--- a/include/pmacc/math/operation/Assign.hpp
+++ b/include/pmacc/math/operation/Assign.hpp
@@ -32,15 +32,15 @@ namespace pmacc
             struct Assign
             {
                 template<typename Dst, typename Src>
-                HDINLINE void operator()(Dst& dst, const Src& src) const
+                HDINLINE void operator()(Dst&& dst, const Src& src) const
                 {
-                    dst = src;
+                    std::forward<Dst>(dst) = src;
                 }
 
                 template<typename Dst, typename Src, typename T_Worker>
-                HDINLINE void operator()(const T_Worker&, Dst& dst, const Src& src) const
+                HDINLINE void operator()(const T_Worker&, Dst&& dst, const Src& src) const
                 {
-                    dst = src;
+                    std::forward<Dst>(dst) = src;
                 }
             };
         } // namespace operation
diff --git a/include/pmacc/math/vector/TwistComponents.hpp b/include/pmacc/math/vector/TwistComponents.hpp
index 2ed3c0d5e41..df727884c8c 100644
--- a/include/pmacc/math/vector/TwistComponents.hpp
+++ b/include/pmacc/math/vector/TwistComponents.hpp
@@ -51,9 +51,8 @@ namespace pmacc
                     T_dim,
                     T_Accessor,
                     math::StackedNavigator<T_Navigator, math::PermutedNavigator<T_Axes>>,
-                    T_Storage>&;
+                    T_Storage>;
             };
-
         } // namespace detail
 
         /** Returns a reference of vector with twisted axes.
@@ -69,13 +68,17 @@ namespace pmacc
          * @return reference of the input vector with twisted axes.
          */
         template<typename T_Axes, typename T_Vector>
-        HDINLINE auto twistComponents(T_Vector& vector)
+        HDINLINE auto& twistComponents(T_Vector& vector)
         {
-            /* The reinterpret_cast is valid because the target type is the same as the
-             * input type except its navigator policy which does not occupy any memory though.
-             */
-            return reinterpret_cast<typename detail::TwistComponents<T_Axes, T_Vector>::type>(vector);
+            // cast to reference with new navigator
+            return reinterpret_cast<typename detail::TwistComponents<T_Axes, T_Vector>::type&>(vector);
         }
 
+        template<typename T_Axes, typename T_Vector>
+        HDINLINE auto twistComponents(T_Vector&& vector)
+        {
+            // construct new vector (with reference semantic) with new navigator
+            return typename detail::TwistComponents<T_Axes, T_Vector>::type(std::move(vector));
+        }
     } // namespace math
 } // namespace pmacc
diff --git a/include/pmacc/math/vector/Vector.hpp b/include/pmacc/math/vector/Vector.hpp
index 3c7999d933f..fc891538565 100644
--- a/include/pmacc/math/vector/Vector.hpp
+++ b/include/pmacc/math/vector/Vector.hpp
@@ -34,6 +34,8 @@
 #include <iostream>
 #include <type_traits>
 
+#include <llama/llama.hpp>
+
 namespace pmacc
 {
     namespace math
@@ -74,6 +76,36 @@ namespace pmacc
                 }
             };
 
+            template<typename T_Type, int T_Dim, typename RecordRef>
+            struct VectorLlamaRecordRefStorage
+            {
+                static_assert(llama::isRecordRef<RecordRef>);
+
+                inline static constexpr bool isConst = false;
+                inline static constexpr int dim = T_Dim;
+                using type = T_Type;
+
+                RecordRef rr;
+
+                HDINLINE
+                type& operator[](const int idx)
+                {
+                    return mp_with_index<T_Dim>(
+                        idx,
+                        [&](auto ic) LLAMA_LAMBDA_INLINE -> decltype(auto)
+                        { return rr(llama::RecordCoord<decltype(ic)::value>{}); });
+                }
+
+                HDINLINE
+                const type& operator[](const int idx) const
+                {
+                    return mp_with_index<T_Dim>(
+                        idx,
+                        [&](auto ic) LLAMA_LAMBDA_INLINE -> decltype(auto)
+                        { return rr(llama::RecordCoord<decltype(ic)::value>{}); });
+                }
+            };
+
         } // namespace detail
 
         namespace tag
@@ -108,6 +140,11 @@ namespace pmacc
             {
             }
 
+            HDINLINE
+            constexpr explicit Vector(Storage s) : Storage{std::move(s)}
+            {
+            }
+
             HDINLINE
             constexpr Vector(const type x)
             {
@@ -135,9 +172,14 @@ namespace pmacc
             HDINLINE
             constexpr Vector(const Vector& other) = default;
 
+            template<typename T_OtherAccessor, typename T_OtherNavigator>
+            HDINLINE Vector(const Vector<T_Type, dim, T_OtherAccessor, T_OtherNavigator, Storage>& other)
+                : Storage{static_cast<const Storage&>(other)}
+            {
+            }
+
             template<typename T_OtherAccessor, typename T_OtherNavigator, typename T_OtherStorage>
             HDINLINE Vector(const Vector<T_Type, dim, T_OtherAccessor, T_OtherNavigator, T_OtherStorage>& other)
-                : Storage{}
             {
                 for(uint32_t i = 0u; i < dim; i++)
                     (*this)[i] = other[i];
@@ -531,6 +573,12 @@ namespace pmacc
             }
         };
 
+        template<typename T>
+        inline constexpr bool isVector = false;
+
+        template<typename T_Type, int T_dim, typename T_Accessor, typename T_Navigator, typename T_Storage>
+        inline constexpr bool isVector<pmacc::math::Vector<T_Type, T_dim, T_Accessor, T_Navigator, T_Storage>> = true;
+
         template<typename Type, uint32_t dim, typename Accessor, typename Navigator>
         std::ostream& operator<<(std::ostream& s, const Vector<Type, dim, Accessor, Navigator>& vec)
         {
@@ -763,6 +811,40 @@ namespace pmacc
         template<typename T_Vector, uint32_t T_direction>
         HDINLINE T_Vector basisVector();
 
+        /** Creates a \ref pmacc::math::Vector backed by a LLAMA RecordRef as storage. All other properties of the
+         * Vector are taken from ProtoVec.
+         */
+        template<typename ProtoVec, typename RecordRef>
+        HDINLINE auto makeVectorWithLlamaStorage(RecordRef rr)
+        {
+            return Vector<
+                typename ProtoVec::type,
+                ProtoVec::dim,
+                typename ProtoVec::Accessor,
+                typename ProtoVec::Navigator,
+                detail::VectorLlamaRecordRefStorage<typename ProtoVec::type, ProtoVec::dim, RecordRef>>{{rr}};
+        }
+
+        namespace detail
+        {
+            template<typename T>
+            struct ReplaceVectorByArrayImpl
+            {
+                using type = T;
+            };
+
+            template<typename T_Type, int T_dim, typename T_Accessor, typename T_Navigator, typename T_Storage>
+            struct ReplaceVectorByArrayImpl<Vector<T_Type, T_dim, T_Accessor, T_Navigator, T_Storage>>
+            {
+                using type = T_Type[T_dim];
+            };
+        } // namespace detail
+
+        /** If T is a \ref pmacc::math::Vector, replaced it by an equally sized and typed array. Otherwise, just passes
+         * the type through.
+         */
+        template<typename T>
+        using ReplaceVectorByArray = typename detail::ReplaceVectorByArrayImpl<T>::type;
     } // namespace math
 } // namespace pmacc
 
diff --git a/include/pmacc/math/vector/Vector.tpp b/include/pmacc/math/vector/Vector.tpp
index 7f5ec3d2ae2..e55c6913974 100644
--- a/include/pmacc/math/vector/Vector.tpp
+++ b/include/pmacc/math/vector/Vector.tpp
@@ -108,8 +108,8 @@ namespace pmacc
 
         /** specialize l2norm2 algorithm
          */
-        template<typename Type, uint32_t dim>
-        struct L2norm2<::pmacc::math::Vector<Type, dim>>
+        template<typename Type, uint32_t dim, typename Storage>
+        struct L2norm2<::pmacc::math::Vector<Type, dim, StandardAccessor, StandardNavigator, Storage>>
         {
             using result = typename ::pmacc::math::Vector<Type, dim>::type;
 
@@ -124,12 +124,12 @@ namespace pmacc
 
         /** specialize l2norm algorithm
          */
-        template<typename Type, uint32_t dim>
-        struct L2norm<::pmacc::math::Vector<Type, dim>>
+        template<typename Type, uint32_t dim, typename Storage>
+        struct L2norm<::pmacc::math::Vector<Type, dim, StandardAccessor, StandardNavigator, Storage>>
         {
-            using result = typename ::pmacc::math::Vector<Type, dim>::type;
+            using result = typename ::pmacc::math::Vector<Type, dim, StandardAccessor, StandardNavigator, Storage>::type;
 
-            HDINLINE result operator()(const ::pmacc::math::Vector<Type, dim>& vector)
+            HDINLINE result operator()(const ::pmacc::math::Vector<Type, dim, StandardAccessor, StandardNavigator, Storage>& vector)
             {
                 result tmp = pmacc::math::l2norm2(vector);
                 return cupla::math::sqrt(tmp);
diff --git a/include/pmacc/memory/boxes/CachedBox.hpp b/include/pmacc/memory/boxes/CachedBox.hpp
index 02cc92edc67..34e6a691594 100644
--- a/include/pmacc/memory/boxes/CachedBox.hpp
+++ b/include/pmacc/memory/boxes/CachedBox.hpp
@@ -26,23 +26,30 @@
 #include "pmacc/memory/boxes/SharedBox.hpp"
 #include "pmacc/types.hpp"
 
-
-namespace pmacc
+namespace pmacc::CachedBox
 {
-    namespace CachedBox
+    template<
+        uint32_t Id_,
+        typename T_MemoryMapping,
+        typename ValueType_,
+        typename BlockDescription_,
+        typename T_Worker>
+    DINLINE auto create(T_Worker const& worker, const BlockDescription_ block)
     {
-        template<uint32_t Id_, typename ValueType_, class BlockDescription_, typename T_Worker>
-        DINLINE auto create(T_Worker const& worker, const BlockDescription_ block)
-        {
-            using OffsetOrigin = typename BlockDescription_::OffsetOrigin;
-            using Type = DataBox<SharedBox<ValueType_, typename BlockDescription_::FullSuperCellSize, Id_>>;
-            return Type{Type::init(worker)}.shift(DataSpace<OffsetOrigin::dim>{OffsetOrigin::toRT()});
-        }
+        using OffsetOrigin = typename BlockDescription_::OffsetOrigin;
+        using Type
+            = DataBox<SharedBox<ValueType_, typename BlockDescription_::FullSuperCellSize, Id_, T_MemoryMapping>>;
+        return Type{Type::init(worker)}.shift(DataSpace<OffsetOrigin::dim>{OffsetOrigin::toRT()});
+    }
 
-        template<uint32_t Id_, typename ValueType_, class BlockDescription_, typename T_Worker>
-        DINLINE auto create(T_Worker const& worker, const ValueType_& value, const BlockDescription_ block)
-        {
-            return create<Id_, ValueType_, BlockDescription_>(worker);
-        }
-    } // namespace CachedBox
-} // namespace pmacc
+    template<
+        uint32_t Id_,
+        typename T_MemoryMapping,
+        typename ValueType_,
+        typename BlockDescription_,
+        typename T_Worker>
+    DINLINE auto create(T_Worker const& worker, const ValueType_& value, const BlockDescription_ block)
+    {
+        return create<Id_, ValueType_, BlockDescription_, T_MemoryMapping>(worker);
+    }
+} // namespace pmacc::CachedBox
diff --git a/include/pmacc/memory/boxes/DataBox.hpp b/include/pmacc/memory/boxes/DataBox.hpp
index 5641e09dd42..ebda218e9fb 100644
--- a/include/pmacc/memory/boxes/DataBox.hpp
+++ b/include/pmacc/memory/boxes/DataBox.hpp
@@ -22,8 +22,12 @@
 
 #pragma once
 
+#include "SharedBox.hpp"
 #include "pmacc/attribute/FunctionSpecifier.hpp"
 #include "pmacc/dimensions/DataSpace.hpp"
+#include "pmacc/memory/shared/Allocate.hpp"
+
+#include <llama/llama.hpp>
 
 namespace pmacc
 {
@@ -48,7 +52,7 @@ namespace pmacc
         }
     } // namespace detail
 
-    template<typename Base>
+    template<typename Base, typename SFINAE = void>
     struct DataBox : Base
     {
         HDINLINE DataBox() = default;
@@ -61,7 +65,8 @@ namespace pmacc
 
         HDINLINE decltype(auto) operator()(DataSpace<Base::Dim> const& idx = {}) const
         {
-            ///@todo(bgruber): inline and replace this by if constexpr in C++17
+            /// @TODO(bgruber): inline and replace this by if constexpr in C++17 at some point. however, nvcc generates
+            /// worse code with if constexpr. Ask Rene about it.
             return detail::access(*this, idx);
         }
 
@@ -72,4 +77,97 @@ namespace pmacc
             return result;
         }
     };
-} // namespace pmacc
+
+    namespace internal
+    {
+        template<typename... Sizes>
+        HDINLINE constexpr auto toArrayExtents(math::CT::Vector<Sizes...>)
+        {
+            using V = math::CT::Vector<Sizes...>;
+            using IndexType = typename math::CT::Vector<Sizes...>::type;
+            if constexpr(V::dim == 1)
+            {
+                return llama::ArrayExtents<IndexType, V::x::value>{};
+            }
+            else if constexpr(V::dim == 2)
+            {
+                return llama::ArrayExtents<IndexType, V::y::value, V::x::value>{};
+            }
+            else if constexpr(V::dim == 3)
+            {
+                return llama::ArrayExtents<IndexType, V::z::value, V::y::value, V::x::value>{};
+            }
+            else
+            {
+                static_assert(sizeof(IndexType) == 0, "Vector dimension must be 1, 2 or 3");
+            }
+        }
+
+        template<unsigned Dim>
+        HDINLINE auto toArrayIndex(DataSpace<Dim> idx)
+        {
+            llama::ArrayIndex<typename DataSpace<Dim>::type, Dim> ai;
+            for(int i = 0; i < Dim; i++)
+                ai[i] = idx[Dim - 1 - i];
+            return ai;
+        }
+    } // namespace internal
+
+    // handle DataBox wrapping SharedBox with LLAMA
+    template<typename T_TYPE, class T_SizeVector, typename T_MemoryMapping, uint32_t T_id, uint32_t T_dim>
+    struct DataBox<
+        SharedBox<T_TYPE, T_SizeVector, T_id, T_MemoryMapping, T_dim>,
+        std::enable_if_t<!std::is_void_v<T_MemoryMapping>>>
+    {
+        using SharedBoxBase = SharedBox<T_TYPE, T_SizeVector, T_id, T_MemoryMapping, T_dim>;
+
+        inline static constexpr std::uint32_t Dim = T_dim;
+        using ValueType = T_TYPE;
+        using Size = T_SizeVector;
+
+        using SplitRecordDim = llama::TransformLeaves<T_TYPE, math::ReplaceVectorByArray>;
+        using RecordDim = std::conditional_t<T_MemoryMapping::splitVector, SplitRecordDim, T_TYPE>;
+        using ArrayExtents = decltype(internal::toArrayExtents(T_SizeVector{}));
+        using Mapping = typename T_MemoryMapping::template fn<ArrayExtents, RecordDim>;
+        using View = llama::View<Mapping, std::byte*>;
+
+        View view;
+        DataSpace<T_dim> offset{};
+
+        HDINLINE DataBox() = default;
+
+        HDINLINE DataBox(SharedBoxBase sb)
+            : view{
+                Mapping{{}},
+                llama::Array<std::byte*, 1>{
+                    const_cast<std::byte*>(reinterpret_cast<const std::byte*>(sb.fixedPointer))}}
+        {
+        }
+
+        HDINLINE decltype(auto) operator()(DataSpace<T_dim> idx = {}) const
+        {
+            auto&& ref = const_cast<View&>(view)(internal::toArrayIndex(DataSpace<T_dim>{idx + offset}));
+            if constexpr(math::isVector<T_TYPE> && llama::isRecordRef<std::remove_reference_t<decltype(ref)>>)
+                return math::makeVectorWithLlamaStorage<T_TYPE>(ref);
+            else
+                return ref;
+        }
+
+        HDINLINE DataBox shift(const DataSpace<T_dim>& offset) const
+        {
+            // TODO(bgruber): can we enhance LLAMA to make this smarter than just keeping the offset?
+            DataBox result(*this);
+            result.offset += offset;
+            return result;
+        }
+
+        template<typename T_Worker>
+        static DINLINE SharedBoxBase init(T_Worker const& worker)
+        {
+            auto& mem_sh
+                = memory::shared::allocate<T_id, memory::Array<ValueType, math::CT::volume<Size>::type::value>>(
+                    worker);
+            return {mem_sh.data()};
+        }
+    };
+} // namespace pmacc
\ No newline at end of file
diff --git a/include/pmacc/memory/boxes/SharedBox.hpp b/include/pmacc/memory/boxes/SharedBox.hpp
index 18b4d618e67..ec4e55b2c9a 100644
--- a/include/pmacc/memory/boxes/SharedBox.hpp
+++ b/include/pmacc/memory/boxes/SharedBox.hpp
@@ -21,11 +21,14 @@
 
 #pragma once
 
+#include "pmacc/mappings/kernel/MappingDescription.hpp"
 #include "pmacc/math/Vector.hpp"
 #include "pmacc/memory/Array.hpp"
 #include "pmacc/memory/shared/Allocate.hpp"
 #include "pmacc/types.hpp"
 
+#include <cstdint>
+
 namespace pmacc
 {
     namespace detail
@@ -49,7 +52,7 @@ namespace pmacc
         }
     } // namespace detail
 
-    /** create shared memory on gpu
+    /** A shared memory on gpu. Used in conjunction with \ref pmacc::DataBox.
      *
      * @tparam T_TYPE type of memory objects
      * @tparam T_Vector CT::Vector with size description (per dimension)
@@ -57,7 +60,12 @@ namespace pmacc
      *              (is needed if more than one instance of shared memory in one kernel is used)
      * @tparam T_dim dimension of the memory (supports DIM1,DIM2 and DIM3)
      */
-    template<typename T_TYPE, typename T_Vector, uint32_t T_id = 0, uint32_t T_dim = T_Vector::dim>
+    template<
+        typename T_TYPE,
+        typename T_Vector,
+        uint32_t T_id,
+        typename T_MemoryMapping,
+        uint32_t T_dim = T_Vector::dim>
     struct SharedBox
     {
         static constexpr std::uint32_t Dim = T_dim;
@@ -74,8 +82,8 @@ namespace pmacc
         HDINLINE SharedBox(SharedBox const&) = default;
 
         using ReducedType1D = T_TYPE&;
-        using ReducedType2D = SharedBox<T_TYPE, typename math::CT::shrinkTo<T_Vector, 1>::type, T_id>;
-        using ReducedType3D = SharedBox<T_TYPE, typename math::CT::shrinkTo<T_Vector, 2>::type, T_id>;
+        using ReducedType2D = SharedBox<T_TYPE, typename math::CT::shrinkTo<T_Vector, 1>::type, T_id, T_MemoryMapping>;
+        using ReducedType3D = SharedBox<T_TYPE, typename math::CT::shrinkTo<T_Vector, 2>::type, T_id, T_MemoryMapping>;
         using ReducedType
             = std::conditional_t<Dim == 1, ReducedType1D, std::conditional_t<Dim == 2, ReducedType2D, ReducedType3D>>;
 
@@ -116,7 +124,7 @@ namespace pmacc
             return {mem_sh.data()};
         }
 
-    protected:
+    public:
         PMACC_ALIGN(fixedPointer, ValueType*);
     };
 } // namespace pmacc
diff --git a/include/pmacc/particles/ParticleDescription.hpp b/include/pmacc/particles/ParticleDescription.hpp
index 2e0bc8dc0b7..4f79522a78a 100644
--- a/include/pmacc/particles/ParticleDescription.hpp
+++ b/include/pmacc/particles/ParticleDescription.hpp
@@ -68,14 +68,7 @@ namespace pmacc
         using HandleGuardRegion = T_HandleGuardRegion;
         using MethodsList = ToSeq<T_MethodsList>;
         using FrameExtensionList = ToSeq<T_FrameExtensionList>;
-        using ThisType = ParticleDescription<
-            Name,
-            SuperCellSize,
-            ValueTypeSeq,
-            FlagsList,
-            HandleGuardRegion,
-            MethodsList,
-            FrameExtensionList>;
+        using ThisType = ParticleDescription;
 
         // Compile-time check uniqueness of attributes and flags
         PMACC_CASSERT_MSG(
diff --git a/include/pmacc/particles/ParticlesBase.hpp b/include/pmacc/particles/ParticlesBase.hpp
index 71ece0b21ef..45b12870a58 100644
--- a/include/pmacc/particles/ParticlesBase.hpp
+++ b/include/pmacc/particles/ParticlesBase.hpp
@@ -41,7 +41,7 @@ namespace pmacc
     /* Tag used for marking particle types */
     struct ParticlesTag;
 
-    template<typename T_ParticleDescription, class T_MappingDesc, typename T_DeviceHeap>
+    template<typename T_ParticleDescription, typename T_FrameMemoryLayout, class T_MappingDesc, typename T_DeviceHeap>
     class ParticlesBase : public SimulationFieldHelper<T_MappingDesc>
     {
         using ParticleDescription = T_ParticleDescription;
@@ -53,6 +53,7 @@ namespace pmacc
          */
         using BufferType = ParticlesBuffer<
             ParticleDescription,
+            T_FrameMemoryLayout,
             typename MappingDesc::SuperCellSize,
             T_DeviceHeap,
             MappingDesc::Dim>;
diff --git a/include/pmacc/particles/ParticlesBase.tpp b/include/pmacc/particles/ParticlesBase.tpp
index 48836c6ce7d..9620ecbac62 100644
--- a/include/pmacc/particles/ParticlesBase.tpp
+++ b/include/pmacc/particles/ParticlesBase.tpp
@@ -32,8 +32,9 @@
 
 namespace pmacc
 {
-    template<typename T_ParticleDescription, class MappingDesc, typename T_DeviceHeap>
-    void ParticlesBase<T_ParticleDescription, MappingDesc, T_DeviceHeap>::deleteGuardParticles(uint32_t exchangeType)
+    template<typename T_ParticleDescription, typename T_FrameMemoryLayout, class MappingDesc, typename T_DeviceHeap>
+    void ParticlesBase<T_ParticleDescription, T_FrameMemoryLayout, MappingDesc, T_DeviceHeap>::deleteGuardParticles(
+        uint32_t exchangeType)
     {
         ExchangeMapping<GUARD, MappingDesc> mapper(this->cellDescription, exchangeType);
 
@@ -43,9 +44,9 @@ namespace pmacc
         (mapper.getGridDim())(particlesBuffer->getDeviceParticleBox(), mapper);
     }
 
-    template<typename T_ParticleDescription, class MappingDesc, typename T_DeviceHeap>
+    template<typename T_ParticleDescription, typename T_FrameMemoryLayout, class MappingDesc, typename T_DeviceHeap>
     template<uint32_t T_area>
-    void ParticlesBase<T_ParticleDescription, MappingDesc, T_DeviceHeap>::deleteParticlesInArea()
+    void ParticlesBase<T_ParticleDescription, T_FrameMemoryLayout, MappingDesc, T_DeviceHeap>::deleteParticlesInArea()
     {
         auto const mapper = makeAreaMapper<T_area>(this->cellDescription);
 
@@ -55,15 +56,16 @@ namespace pmacc
         (mapper.getGridDim())(particlesBuffer->getDeviceParticleBox(), mapper);
     }
 
-    template<typename T_ParticleDescription, class MappingDesc, typename T_DeviceHeap>
-    void ParticlesBase<T_ParticleDescription, MappingDesc, T_DeviceHeap>::reset(uint32_t)
+    template<typename T_ParticleDescription, typename T_FrameMemoryLayout, class MappingDesc, typename T_DeviceHeap>
+    void ParticlesBase<T_ParticleDescription, T_FrameMemoryLayout, MappingDesc, T_DeviceHeap>::reset(uint32_t)
     {
         deleteParticlesInArea<CORE + BORDER + GUARD>();
         particlesBuffer->reset();
     }
 
-    template<typename T_ParticleDescription, class MappingDesc, typename T_DeviceHeap>
-    void ParticlesBase<T_ParticleDescription, MappingDesc, T_DeviceHeap>::copyGuardToExchange(uint32_t exchangeType)
+    template<typename T_ParticleDescription, typename T_FrameMemoryLayout, class MappingDesc, typename T_DeviceHeap>
+    void ParticlesBase<T_ParticleDescription, T_FrameMemoryLayout, MappingDesc, T_DeviceHeap>::copyGuardToExchange(
+        uint32_t exchangeType)
     {
         if(particlesBuffer->hasSendExchange(exchangeType))
         {
@@ -81,8 +83,9 @@ namespace pmacc
         }
     }
 
-    template<typename T_ParticleDescription, class MappingDesc, typename T_DeviceHeap>
-    void ParticlesBase<T_ParticleDescription, MappingDesc, T_DeviceHeap>::insertParticles(uint32_t exchangeType)
+    template<typename T_ParticleDescription, typename T_FrameMemoryLayout, class MappingDesc, typename T_DeviceHeap>
+    void ParticlesBase<T_ParticleDescription, T_FrameMemoryLayout, MappingDesc, T_DeviceHeap>::insertParticles(
+        uint32_t exchangeType)
     {
         if(particlesBuffer->hasReceiveExchange(exchangeType))
         {
diff --git a/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp b/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp
index 665561742a9..31f68752e92 100644
--- a/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp
+++ b/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp
@@ -43,8 +43,22 @@
 
 namespace pmacc
 {
-    namespace detail
+    /**
+     * Describes DIM-dimensional buffer for particles data on the host.
+     *
+     * @tParam T_ParticleDescription Object which describe a frame @see ParticleDescription.hpp
+     * @tparam SuperCellSize_ TVec which descripe size of a superce
+     * @tparam DIM dimension of the buffer (1-3)
+     */
+    template<
+        typename T_ParticleDescription,
+        typename T_FrameMemoryLayout,
+        class SuperCellSize_,
+        typename T_DeviceHeap,
+        unsigned DIM>
+    class ParticlesBuffer
     {
+    public:
         /** create static array
          */
         template<uint32_t T_size>
@@ -53,24 +67,9 @@ namespace pmacc
             template<typename X>
             struct apply
             {
-                using type = meta::Pair<
-                    X,
-                    StaticArray<typename traits::Resolve<X>::type::type, std::integral_constant<uint32_t, T_size>>>;
             };
         };
-    } // namespace detail
 
-    /**
-     * Describes DIM-dimensional buffer for particles data on the host.
-     *
-     * @tParam T_ParticleDescription Object which describe a frame @see ParticleDescription.hpp
-     * @tparam SuperCellSize_ TVec which descripe size of a superce
-     * @tparam DIM dimension of the buffer (1-3)
-     */
-    template<typename T_ParticleDescription, class SuperCellSize_, typename T_DeviceHeap, unsigned DIM>
-    class ParticlesBuffer
-    {
-    public:
         /** type of the border frame management object
          *
          * contains:
@@ -100,9 +99,8 @@ namespace pmacc
          *
          * a group of particles is stored as frame
          */
-        using FrameType = Frame<
-            detail::OperatorCreatePairStaticArray<pmacc::math::CT::volume<SuperCellSize>::type::value>,
-            FrameDescription>;
+        using FrameType
+            = Frame<pmacc::math::CT::volume<SuperCellSize>::type::value, FrameDescription, T_FrameMemoryLayout>;
 
         using FrameDescriptionBorder =
             typename ReplaceValueTypeSeq<T_ParticleDescription, ParticleAttributeListBorder>::type;
@@ -112,7 +110,7 @@ namespace pmacc
          * - each frame contains only one particle
          * - local administration attributes of a particle are removed
          */
-        using FrameTypeBorder = Frame<detail::OperatorCreatePairStaticArray<1U>, FrameDescriptionBorder>;
+        using FrameTypeBorder = Frame<1, FrameDescriptionBorder, llama::mapping::BindOne<>>;
 
         using SuperCellType = SuperCell<FrameType>;
 
diff --git a/include/pmacc/particles/memory/dataTypes/Particle.hpp b/include/pmacc/particles/memory/dataTypes/Particle.hpp
index 67b94bb5f80..b93f13617f3 100644
--- a/include/pmacc/particles/memory/dataTypes/Particle.hpp
+++ b/include/pmacc/particles/memory/dataTypes/Particle.hpp
@@ -41,6 +41,8 @@
 
 #include <type_traits>
 
+#include <llama/llama.hpp>
+
 namespace pmacc
 {
     /** A single particle of a @see Frame
@@ -119,20 +121,18 @@ namespace pmacc
          * @return result of operator[] of the Frame
          */
         template<typename T_Key>
-        HDINLINE auto& operator[](const T_Key key)
+        HDINLINE decltype(auto) operator[](const T_Key key)
         {
             PMACC_CASSERT_MSG_TYPE(key_not_available, T_Key, traits::HasIdentifier<Particle, T_Key>::type::value);
-
-            return frame->getIdentifier(key)[idx];
+            return frame->get(idx, key);
         }
 
         /** const version of method operator(const T_Key) */
         template<typename T_Key>
-        HDINLINE const auto& operator[](const T_Key key) const
+        HDINLINE decltype(auto) operator[](const T_Key key) const
         {
             PMACC_CASSERT_MSG_TYPE(key_not_available, T_Key, traits::HasIdentifier<Particle, T_Key>::type::value);
-
-            return frame->getIdentifier(key)[idx];
+            return frame->get(idx, key);
         }
 
         HDINLINE
diff --git a/include/pmacc/particles/memory/frames/Frame.hpp b/include/pmacc/particles/memory/frames/Frame.hpp
index 678c7cbbe67..e64ca9e8c30 100644
--- a/include/pmacc/particles/memory/frames/Frame.hpp
+++ b/include/pmacc/particles/memory/frames/Frame.hpp
@@ -34,36 +34,145 @@
 #include "pmacc/traits/HasIdentifier.hpp"
 #include "pmacc/types.hpp"
 
+#include <boost/mp11.hpp>
+#include <boost/mp11/mpl.hpp>
 #include <boost/mpl/apply.hpp>
 
+#include <llama/llama.hpp>
+
 namespace pmacc
 {
     namespace pmath = pmacc::math;
 
+    namespace detail
+    {
+        template<typename PICValueType>
+        using MakeLlamaField = llama::Field<PICValueType, typename traits::Resolve<PICValueType>::type::type>;
+
+        template<typename ValueTypeSeq>
+        using RecordDimFromValueTypeSeq = mp_rename<mp_transform<MakeLlamaField, ValueTypeSeq>, llama::Record>;
+
+        template<typename T_MemoryLayout, typename SFINAE = void>
+        inline constexpr bool splitVector = false;
+
+        template<typename T_MemoryLayout>
+        inline constexpr bool splitVector<T_MemoryLayout, std::void_t<decltype(T_MemoryLayout::splitVector)>>
+            = T_MemoryLayout::splitVector;
+
+        template<std::size_t T_size, typename T_ParticleDescription, typename T_MemoryLayout>
+        struct ViewHolder
+        {
+        private:
+            using IndexType = int; // TODO(bgruber): where do I get this type from?
+            inline static constexpr IndexType particlesPerFrame
+                = (T_size == llama::dyn) ? static_cast<IndexType>(llama::dyn) : static_cast<IndexType>(T_size);
+            using RawRecordDim = RecordDimFromValueTypeSeq<typename T_ParticleDescription::ValueTypeSeq>;
+            using SplitRecordDim = llama::TransformLeaves<RawRecordDim, pmath::ReplaceVectorByArray>;
+
+        public:
+            using RecordDim = std::conditional_t<splitVector<T_MemoryLayout>, SplitRecordDim, RawRecordDim>;
+            using ArrayExtents = llama::ArrayExtents<IndexType, particlesPerFrame>;
+            using Mapping = typename T_MemoryLayout::template fn<ArrayExtents, RecordDim>;
+            static_assert(
+                particlesPerFrame == llama::dyn || Mapping::blobCount == 1,
+                "For statically sizes frames, only mappings with a single blob are supported");
+            using BlobType = std::conditional_t<
+                particlesPerFrame == llama::dyn,
+                std::byte*,
+                llama::Array<std::byte, Mapping{ArrayExtents{}}.blobSize(0)>>;
+            using View = llama::View<Mapping, BlobType>;
+
+        private:
+            inline static constexpr std::size_t alignment
+                = particlesPerFrame == llama::dyn ? alignof(std::byte*) : llama::alignOf<RecordDim>;
+
+        public:
+            alignas(alignment) View view;
+
+            ViewHolder() = default;
+
+            HDINLINE ViewHolder(IndexType size) : view{Mapping{ArrayExtents{size}}}
+            {
+            }
+
+            HDINLINE auto& blobs()
+            {
+                return view.blobs();
+            }
+
+            HDINLINE auto blobSize(int i)
+            {
+                return view.mapping().blobSize(i);
+            }
+        };
+
+        /** Proxy reference for particle attributes which are backed by a LLAMA RecordRef. This could become obsolete
+         * when LLAMA's RecordRef supports operator= from TupleLike objects. Ask bgruber about it every now and then.
+         */
+        template<typename RecordRef>
+        struct LlamaParticleAttribute
+        {
+            template<typename OtherRecordRef>
+            auto operator=(const LlamaParticleAttribute<OtherRecordRef>& lpa) -> LlamaParticleAttribute&
+            {
+                rr = lpa.rr;
+                return *this;
+            }
+
+            template<typename OtherRecordRef>
+            auto operator=(LlamaParticleAttribute<OtherRecordRef>&& lpa) -> LlamaParticleAttribute&
+            {
+                rr = lpa.rr;
+                return *this;
+            }
+
+            template<typename T>
+            auto operator=(T&& t) -> LlamaParticleAttribute&
+            {
+                rr.store(std::forward<T>(t));
+                return *this;
+            }
+
+            template<typename T>
+            operator T() const
+            {
+                return rr.template loadAs<T>();
+            }
+
+            RecordRef rr;
+        };
+    } // namespace detail
+
     /** Frame is a storage for arbitrary number >0 of Particles with attributes
      *
-     * @tparam T_CreatePairOperator unary template operator to create a boost pair
-     *                              from single type ( pair<name,dataType> )
-     *                              @see MapTupel
+     * @tparam T_size Static number of particles this frame stores, or llama::dyn for dynamic size
      * @tparam T_ValueTypeSeq sequence with value_identifier
      * @tparam T_MethodsList sequence of classes with particle methods
      *                       (e.g. calculate mass, gamma, ...)
      * @tparam T_Flags sequence with identifiers to add flags on a frame
      *                 (e.g. useSolverXY, calcRadiation, ...)
+     * @tparam T_MemoryLayout Memory layout to be used for the particle attribute data.
      */
-    template<typename T_CreatePairOperator, typename T_ParticleDescription>
+    template<std::size_t T_size, typename T_ParticleDescription, typename T_MemoryLayout>
     struct Frame;
 
-    template<typename T_CreatePairOperator, typename T_ParticleDescription>
+    template<std::size_t T_size, typename T_ParticleDescription, typename T_MemoryLayout>
     struct Frame
-        : protected pmath::MapTuple<
-              typename SeqToMap<typename T_ParticleDescription::ValueTypeSeq, T_CreatePairOperator>::type>
+        : public detail::ViewHolder<T_size, T_ParticleDescription, T_MemoryLayout>
         , public InheritLinearly<mp_append<
               typename T_ParticleDescription::MethodsList,
               typename OperateOnSeq<
                   typename T_ParticleDescription::FrameExtensionList,
-                  boost::mpl::apply1<boost::mpl::_1, Frame<T_CreatePairOperator, T_ParticleDescription>>>::type>>
+                  boost::mpl::apply1<boost::mpl::_1, Frame<T_size, T_ParticleDescription, T_MemoryLayout>>>::type>>
     {
+        using ViewHolder = detail::ViewHolder<T_size, T_ParticleDescription, T_MemoryLayout>;
+        static_assert(
+            T_size == llama::dyn
+            || sizeof(ViewHolder) ==
+                typename ViewHolder::Mapping{llama::ArrayExtents<int, static_cast<int>(T_size)>{}}.blobSize(0));
+
+        using ViewHolder::ViewHolder;
+
         using ParticleDescription = T_ParticleDescription;
         using Name = typename ParticleDescription::Name;
         using SuperCellSize = typename ParticleDescription::SuperCellSize;
@@ -71,9 +180,7 @@ namespace pmacc
         using MethodsList = typename ParticleDescription::MethodsList;
         using FlagList = typename ParticleDescription::FlagsList;
         using FrameExtensionList = typename ParticleDescription::FrameExtensionList;
-        using ThisType = Frame<T_CreatePairOperator, ParticleDescription>;
-        /* definition of the MapTupel where we inherit from*/
-        using BaseType = pmath::MapTuple<typename SeqToMap<ValueTypeSeq, T_CreatePairOperator>::type>;
+        using ThisType = Frame;
 
         /* type of a single particle*/
         using ParticleType = pmacc::Particle<ThisType>;
@@ -90,25 +197,35 @@ namespace pmacc
             return ParticleType(*this, idx);
         }
 
-        /** access attribute with a identifier
-         *
-         * @param T_Key instance of identifier type
-         *              (can be an alias, value_identifier or any other class)
-         * @return result of operator[] of MapTupel
-         */
-        template<typename T_Key>
-        HDINLINE auto& getIdentifier(const T_Key)
+    private:
+        template<typename Frame, typename T_Key>
+        static HDINLINE decltype(auto) at(Frame& f, uint32_t i, const T_Key key)
         {
             using Key = typename GetKeyFromAlias<ValueTypeSeq, T_Key, errorHandlerPolicies::ThrowValueNotFound>::type;
-            return BaseType::operator[](Key());
+            auto&& ref = f.view(i)(Key{});
+
+            using OldDstType = typename traits::Resolve<Key>::type::type;
+            using RefType = std::remove_reference_t<decltype(ref)>;
+
+            if constexpr(pmath::isVector<OldDstType> && llama::isRecordRef<RefType>)
+                return pmath::makeVectorWithLlamaStorage<OldDstType>(ref);
+            else if constexpr(llama::isRecordRef<RefType>)
+                return detail::LlamaParticleAttribute<RefType>{ref};
+            else
+                return ref;
         }
 
-        /** const version of method getIdentifier(const T_Key) */
+    public:
         template<typename T_Key>
-        HDINLINE const auto& getIdentifier(const T_Key) const
+        HDINLINE decltype(auto) get(uint32_t i, const T_Key)
         {
-            using Key = typename GetKeyFromAlias<ValueTypeSeq, T_Key, errorHandlerPolicies::ThrowValueNotFound>::type;
-            return BaseType::operator[](Key());
+            return at(*this, i, T_Key{});
+        }
+
+        template<typename T_Key>
+        HDINLINE decltype(auto) get(uint32_t i, const T_Key) const
+        {
+            return at(*this, i, T_Key{});
         }
 
         HINLINE static std::string getName()
@@ -119,11 +236,15 @@ namespace pmacc
 
     namespace traits
     {
-        template<typename T_IdentifierName, typename T_CreatePairOperator, typename T_ParticleDescription>
-        struct HasIdentifier<pmacc::Frame<T_CreatePairOperator, T_ParticleDescription>, T_IdentifierName>
+        template<
+            typename T_IdentifierName,
+            std::size_t T_size,
+            typename T_ParticleDescription,
+            typename T_MemoryLayout>
+        struct HasIdentifier<pmacc::Frame<T_size, T_ParticleDescription, T_MemoryLayout>, T_IdentifierName>
         {
         private:
-            using FrameType = pmacc::Frame<T_CreatePairOperator, T_ParticleDescription>;
+            using FrameType = pmacc::Frame<T_size, T_ParticleDescription, T_MemoryLayout>;
 
         public:
             using ValueTypeSeq = typename FrameType::ValueTypeSeq;
@@ -136,11 +257,15 @@ namespace pmacc
                                                                                   // needed because of nvcc 11.0 bug
         };
 
-        template<typename T_IdentifierName, typename T_CreatePairOperator, typename T_ParticleDescription>
-        struct HasFlag<pmacc::Frame<T_CreatePairOperator, T_ParticleDescription>, T_IdentifierName>
+        template<
+            typename T_IdentifierName,
+            std::size_t T_size,
+            typename T_ParticleDescription,
+            typename T_MemoryLayout>
+        struct HasFlag<pmacc::Frame<T_size, T_ParticleDescription, T_MemoryLayout>, T_IdentifierName>
         {
         private:
-            using FrameType = pmacc::Frame<T_CreatePairOperator, T_ParticleDescription>;
+            using FrameType = pmacc::Frame<T_size, T_ParticleDescription, T_MemoryLayout>;
             using SolvedAliasName = typename pmacc::traits::GetFlagType<FrameType, T_IdentifierName>::type;
             using FlagList = typename FrameType::FlagList;
 
@@ -148,11 +273,15 @@ namespace pmacc
             using type = mp_contains<FlagList, SolvedAliasName>;
         };
 
-        template<typename T_IdentifierName, typename T_CreatePairOperator, typename T_ParticleDescription>
-        struct GetFlagType<pmacc::Frame<T_CreatePairOperator, T_ParticleDescription>, T_IdentifierName>
+        template<
+            typename T_IdentifierName,
+            std::size_t T_size,
+            typename T_ParticleDescription,
+            typename T_MemoryLayout>
+        struct GetFlagType<pmacc::Frame<T_size, T_ParticleDescription, T_MemoryLayout>, T_IdentifierName>
         {
         private:
-            using FrameType = pmacc::Frame<T_CreatePairOperator, T_ParticleDescription>;
+            using FrameType = pmacc::Frame<T_size, T_ParticleDescription, T_MemoryLayout>;
             using FlagList = typename FrameType::FlagList;
 
         public:
diff --git a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/memory.param b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/memory.param
index 42718ab30d9..cdc48e1deb5 100644
--- a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/memory.param
+++ b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/memory.param
@@ -114,4 +114,19 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/picongpu/benchmarks/Thermal/include/picongpu/param/memory.param b/share/picongpu/benchmarks/Thermal/include/picongpu/param/memory.param
index 38fae0c61e9..af35db5b358 100644
--- a/share/picongpu/benchmarks/Thermal/include/picongpu/param/memory.param
+++ b/share/picongpu/benchmarks/Thermal/include/picongpu/param/memory.param
@@ -112,4 +112,19 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = false;
 
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/memory.param b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/memory.param
index ffb1f234f6d..d3b093da5b1 100644
--- a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/memory.param
+++ b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/memory.param
@@ -100,4 +100,19 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param b/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param
index bbea879ac50..1e774267cab 100644
--- a/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param
+++ b/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param
@@ -114,4 +114,19 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param
index 5eb280f01e6..a630167f568 100644
--- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param
+++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param
@@ -114,4 +114,19 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param
index 257fc0b9b00..734df0f3434 100644
--- a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param
+++ b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param
@@ -114,4 +114,19 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/picongpu/examples/atomicPhysics/include/picongpu/param/memory.param b/share/picongpu/examples/atomicPhysics/include/picongpu/param/memory.param
index 1a5af5925c2..b7e58d30790 100644
--- a/share/picongpu/examples/atomicPhysics/include/picongpu/param/memory.param
+++ b/share/picongpu/examples/atomicPhysics/include/picongpu/param/memory.param
@@ -114,4 +114,19 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/picongpu/pypicongpu/template/include/picongpu/param/memory.param b/share/picongpu/pypicongpu/template/include/picongpu/param/memory.param
index f966b7ffa80..0f87144675e 100644
--- a/share/picongpu/pypicongpu/template/include/picongpu/param/memory.param
+++ b/share/picongpu/pypicongpu/template/include/picongpu/param/memory.param
@@ -114,4 +114,18 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    struct ParticleFrameMemoryLayout : llama::mapping::BindSoA<false, true>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<true>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/picongpu/tests/CollisionsBeamRelaxation/include/picongpu/param/memory.param b/share/picongpu/tests/CollisionsBeamRelaxation/include/picongpu/param/memory.param
index 4ef4235ef26..52522663950 100644
--- a/share/picongpu/tests/CollisionsBeamRelaxation/include/picongpu/param/memory.param
+++ b/share/picongpu/tests/CollisionsBeamRelaxation/include/picongpu/param/memory.param
@@ -114,4 +114,19 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/picongpu/tests/CollisionsThermalisation/include/picongpu/param/memory.param b/share/picongpu/tests/CollisionsThermalisation/include/picongpu/param/memory.param
index 4ef4235ef26..52522663950 100644
--- a/share/picongpu/tests/CollisionsThermalisation/include/picongpu/param/memory.param
+++ b/share/picongpu/tests/CollisionsThermalisation/include/picongpu/param/memory.param
@@ -114,4 +114,19 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/picongpu/tests/KHI_growthRate/include/picongpu/param/memory.param b/share/picongpu/tests/KHI_growthRate/include/picongpu/param/memory.param
index 5eb280f01e6..77baf7380fb 100644
--- a/share/picongpu/tests/KHI_growthRate/include/picongpu/param/memory.param
+++ b/share/picongpu/tests/KHI_growthRate/include/picongpu/param/memory.param
@@ -114,4 +114,27 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    // struct ParticleFrameMemoryLayout : llama::mapping::BindAoS<false>
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    // struct ParticleFrameMemoryLayout : llama::mapping::BindAoSoA<16>
+    // struct ParticleFrameMemoryLayout : llama::mapping::BindAoSoA<32>
+    // struct ParticleFrameMemoryLayout : llama::mapping::BindAoSoA<64>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    // struct SharedDataBoxMemoryLayout : llama::mapping::BindSoA<false>
+    // struct SharedDataBoxMemoryLayout : llama::mapping::BindSoA<false, false,
+    // llama::mapping::LinearizeArrayDimsFortran> struct SharedDataBoxMemoryLayout : llama::mapping::BindAoSoA<32>
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    // struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<true, llama::mapping::LinearizeArrayDimsFortran>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/picongpu/tests/compileCombinedAttributes/include/picongpu/param/memory.param b/share/picongpu/tests/compileCombinedAttributes/include/picongpu/param/memory.param
index 9f88b040b7d..19f999657cf 100644
--- a/share/picongpu/tests/compileCombinedAttributes/include/picongpu/param/memory.param
+++ b/share/picongpu/tests/compileCombinedAttributes/include/picongpu/param/memory.param
@@ -114,4 +114,19 @@ namespace picongpu
      */
     constexpr bool fieldTmpSupportGatherCommunication = true;
 
+    struct ParticleFrameMemoryLayout
+        : llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
+    {
+        inline static constexpr bool splitVector = false;
+    };
+
+    struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
+    {
+        inline static constexpr bool splitVector = false;
+    };
 } // namespace picongpu
diff --git a/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp b/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp
index 1b94c9144f2..518f8094b85 100644
--- a/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp
+++ b/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp
@@ -71,7 +71,7 @@ namespace gol
                 using Type = typename T_BoxReadOnly::ValueType;
                 using SuperCellSize = typename T_Mapping::SuperCellSize;
                 using BlockArea = SuperCellDescription<SuperCellSize, math::CT::Int<1, 1>, math::CT::Int<1, 1>>;
-                auto cache = CachedBox::create<0, Type>(worker, BlockArea());
+                auto cache = CachedBox::create<0, SharedDataBoxMemoryLayout, Type>(worker, BlockArea());
 
                 Space const block(mapper.getSuperCellIndex(Space(cupla::blockIdx(worker.getAcc()))));
                 Space const blockCell = block * T_Mapping::SuperCellSize::toRT();
diff --git a/thirdParty/llama b/thirdParty/llama
new file mode 160000
index 00000000000..415009123fd
--- /dev/null
+++ b/thirdParty/llama
@@ -0,0 +1 @@
+Subproject commit 415009123fd00b955bb2f63be30c7733c2fb006d