Skip to content

Commit

Permalink
Use llama for particle frame and shared memory DataBox layout
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Dec 1, 2022
1 parent c319769 commit 158f295
Show file tree
Hide file tree
Showing 41 changed files with 550 additions and 193 deletions.
4 changes: 2 additions & 2 deletions include/picongpu/algorithms/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ namespace picongpu
}

template<typename Dst, typename T_Worker>
HDINLINE void operator()(T_Worker const&, Dst& dst) const
HDINLINE void operator()(T_Worker const&, Dst&& dst) const
{
dst = value;
std::forward<Dst>(dst) = value;
}

private:
Expand Down
3 changes: 2 additions & 1 deletion include/picongpu/fields/FieldJ.kernel
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ namespace picongpu
// The rest uses normal weighting
const float_X weighting = particle[weighting_];
Velocity velocity;
const float3_X vel = velocity(particle[momentum_], attribute::getMass(weighting, particle));
const float3_X vel
= velocity(static_cast<float3_X>(particle[momentum_]), attribute::getMass(weighting, particle));
auto fieldJShiftToParticle = jBox.shift(localCell);
ParticleAlgo perParticle;
perParticle(worker, fieldJShiftToParticle, pos, vel, charge, m_deltaTime);
Expand Down
4 changes: 3 additions & 1 deletion include/picongpu/fields/FieldTmp.kernel
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,9 @@ namespace picongpu
if(!forEachParticle.hasParticles())
return;

auto cachedVal = CachedBox::create<0, typename T_TmpBox::ValueType>(worker, T_BlockDescription{});
auto cachedVal = CachedBox::create<0, SharedDataBoxMapping, typename T_TmpBox::ValueType>(
worker,
T_BlockDescription{});
Set<typename T_TmpBox::ValueType> set(float_X(0.0));

auto collective = makeThreadCollective<T_BlockDescription>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

#include "picongpu/simulation_defines.hpp"

#include "picongpu/param/memory.param"

#include <pmacc/dimensions/SuperCellDescription.hpp>
#include <pmacc/lockstep.hpp>
#include <pmacc/mappings/threads/ThreadCollective.hpp>
Expand Down Expand Up @@ -68,7 +70,9 @@ namespace picongpu::fields::maxwellSolver

constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume<SuperCellSize>::type::value;

auto cachedJ = CachedBox::create<0, typename FieldJ::DataBoxType::ValueType>(worker, BlockArea());
auto cachedJ = CachedBox::create<0, SharedDataBoxMapping, typename FieldJ::DataBoxType::ValueType>(
worker,
BlockArea());

pmacc::math::operation::Assign assign;
DataSpace<simDim> const block(
Expand Down
4 changes: 3 additions & 1 deletion include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ namespace picongpu
auto srcFieldBlock = srcField.shift(beginCellIdx);
auto cacheStencilArea = makeThreadCollective<StencilCfg>();
auto cachedSrcField
= CachedBox::create<0u, typename T_SrcBox::ValueType>(worker, StencilCfg{});
= CachedBox::create<0u, SharedDataBoxMapping, typename T_SrcBox::ValueType>(
worker,
StencilCfg{});
cacheStencilArea(worker, assign, cachedSrcField, srcFieldBlock);

worker.sync();
Expand Down
10 changes: 1 addition & 9 deletions include/picongpu/fields/currentDeposition/Cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,10 @@ namespace picongpu
*/
template<typename T_BlockDescription, typename T_Worker, typename T_FieldBox>
DINLINE static auto create(T_Worker const& worker, T_FieldBox const& fieldBox)
#if(!BOOST_COMP_CLANG)
-> decltype(CachedBox::create<0u, typename T_FieldBox::ValueType>(
worker,
std::declval<T_BlockDescription>()))
#endif
{
using ValueType = typename T_FieldBox::ValueType;
/* this memory is used by all virtual blocks */
auto cache = CachedBox::create<0u, ValueType>(worker, T_BlockDescription{});
auto cache = CachedBox::create<0u, SharedDataBoxMapping, ValueType>(worker, T_BlockDescription{});

Set<ValueType> set(ValueType::create(0.0_X));
auto collectiveFill = makeThreadCollective<T_BlockDescription>();
Expand Down Expand Up @@ -90,9 +85,6 @@ namespace picongpu
*/
template<typename T_BlockDescription, typename T_Worker, typename T_FieldBox>
DINLINE static auto create([[maybe_unused]] T_Worker const& worker, T_FieldBox const& fieldBox)
#if(!BOOST_COMP_CLANG)
-> T_FieldBox
#endif
{
return fieldBox;
}
Expand Down
2 changes: 2 additions & 0 deletions include/picongpu/param/memory.param
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,6 @@ namespace picongpu
*/
constexpr bool fieldTmpSupportGatherCommunication = true;

using ParticleFrameMapping = llama::mapping::BindSoA<false>;
using SharedDataBoxMapping = llama::mapping::BindAoS<false>;
} // namespace picongpu
11 changes: 9 additions & 2 deletions include/picongpu/particles/Particles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "picongpu/fields/Fields.def"
#include "picongpu/fields/Fields.hpp"
#include "picongpu/param/memory.param"
#include "picongpu/particles/boundary/Description.hpp"
#include "picongpu/particles/boundary/Utility.hpp"
#include "picongpu/particles/manipulators/manipulators.def"
Expand Down Expand Up @@ -91,7 +92,10 @@ namespace picongpu
// fallback if the species has not defined the alias boundaryCondition
pmacc::HandleGuardRegion<
pmacc::particles::policies::ExchangeParticles,
pmacc::particles::policies::DoNothing>>::type>,
pmacc::particles::policies::DoNothing>>::type,
bmpl::vector0<>,
bmpl::vector0<>,
picongpu::ParticleFrameMapping>,
MappingDesc,
DeviceHeap>
, public ISimulationData
Expand All @@ -110,7 +114,10 @@ namespace picongpu
// fallback if the species has not defined the alias boundaryCondition
pmacc::HandleGuardRegion<
pmacc::particles::policies::ExchangeParticles,
pmacc::particles::policies::DoNothing>>::type>;
pmacc::particles::policies::DoNothing>>::type,
bmpl::vector0<>,
bmpl::vector0<>,
picongpu::ParticleFrameMapping>;
using ParticlesBaseType = ParticlesBase<SpeciesParticleDescription, picongpu::MappingDesc, DeviceHeap>;
using FrameType = typename ParticlesBaseType::FrameType;
using FrameTypeBorder = typename ParticlesBaseType::FrameTypeBorder;
Expand Down
6 changes: 4 additions & 2 deletions include/picongpu/particles/Particles.kernel
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,10 @@ namespace picongpu

onlyMaster([&]() { mustShiftSupercell = 0; });

auto cachedB = CachedBox::create<0, typename T_BBox::ValueType>(worker, T_DataDomain());
auto cachedE = CachedBox::create<1, typename T_EBox::ValueType>(worker, T_DataDomain());
auto cachedB
= CachedBox::create<0, SharedDataBoxMapping, typename T_BBox::ValueType>(worker, T_DataDomain());
auto cachedE
= CachedBox::create<1, SharedDataBoxMapping, typename T_EBox::ValueType>(worker, T_DataDomain());

worker.sync();

Expand Down
29 changes: 29 additions & 0 deletions include/picongpu/particles/Particles.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <pmacc/traits/Resolve.hpp>

#include <algorithm>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
Expand Down Expand Up @@ -214,6 +215,34 @@ namespace picongpu

size_t sizeOfExchanges = 0u;

// std::cout << "Frame:\n\tSize: " << sizeof(FrameType) << "\n\tLLAMA view offset: " << offsetof(FrameType,
// view) << "\n\tLLAMA view size: " << sizeof(FrameType::view) << std::endl;

{
using View = decltype(FrameType::view);
using M = typename View::Mapping;
auto m = M{};
// View view;
// std::cout << "view begin " << &view << "\n";
// std::cout << "storage begin " << &view.storageBlobs[0][0] << "\n";
// for(auto ai : llama::ArrayIndexRange{m.extents()})
//{
// llama::forEachLeafCoord<typename M::RecordDim>(
// [&](auto rc)
// {
// auto& e = view(ai)(rc);
// std::cout << "ai " << ai << " rc " << rc << " addr " << (void*) &e << "\n";
// });
// }
// std::cout << "view end " << (&view + 1) << "\n";

std::ofstream{"llama_frame.html"} << llama::toHtml(m);
std::ofstream{"llama_frame.svg"} << llama::toSvg(m);

// PMACC_VERIFY(
// reinterpret_cast<std::byte*>(&view) == reinterpret_cast<std::byte*>(&view.storageBlobs[0][0]));
}

const uint32_t commTag = pmacc::traits::GetUniqueTypeId<FrameType, uint32_t>::uid();
log<picLog::MEMORY>("communication tag for species %1%: %2%") % FrameType::getName() % commTag;

Expand Down
6 changes: 5 additions & 1 deletion include/picongpu/particles/bremsstrahlung/Bremsstrahlung.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,11 @@ namespace picongpu
/* shared memory ion density device databoxes */
PMACC_ALIGN(
cachedIonDensity,
DataBox<SharedBox<ValueTypeIonDensity, typename BlockArea::FullSuperCellSize, 0>>);
DataBox<SharedBox<
ValueTypeIonDensity,
typename BlockArea::FullSuperCellSize,
0,
SharedDataBoxMapping>>);

PMACC_ALIGN(scaledSpectrumFunctor, ScaledSpectrum::LookupTableFunctor);
PMACC_ALIGN(stoppingPowerFunctor, ScaledSpectrum::LookupTableFunctor);
Expand Down
3 changes: 2 additions & 1 deletion include/picongpu/particles/bremsstrahlung/Bremsstrahlung.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ namespace picongpu
const DataSpace<simDim>& blockCell)
{
/* caching of ion density field */
cachedIonDensity = CachedBox::create<0, ValueTypeIonDensity>(worker, BlockArea());
cachedIonDensity
= CachedBox::create<0, SharedDataBoxMapping, ValueTypeIonDensity>(worker, BlockArea());

/* instance of nvidia assignment operator */
pmacc::math::operation::Assign assign;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "picongpu/fields/CellType.hpp"
#include "picongpu/fields/FieldTmp.hpp"
#include "picongpu/param/memory.param"
#include "picongpu/particles/atomicPhysics/SetToAtomicGroundStateForChargeState.hpp"
#include "picongpu/particles/ionization/byCollision/ThomasFermi/AlgorithmThomasFermi.hpp"
#include "picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi.def"
Expand Down Expand Up @@ -104,8 +105,12 @@ namespace picongpu
PMACC_ALIGN(eneBox, FieldTmp::DataBoxType);

/* shared memory EM-field device databoxes */
PMACC_ALIGN(cachedRho, DataBox<SharedBox<ValueType_Rho, typename BlockArea::FullSuperCellSize, 0>>);
PMACC_ALIGN(cachedEne, DataBox<SharedBox<ValueType_Ene, typename BlockArea::FullSuperCellSize, 1>>);
PMACC_ALIGN(
cachedRho,
DataBox<SharedBox<ValueType_Rho, typename BlockArea::FullSuperCellSize, 0, SharedDataBoxMapping>>);
PMACC_ALIGN(
cachedEne,
DataBox<SharedBox<ValueType_Ene, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMapping>>);

public:
/* host constructor initializing member : random number generator */
Expand Down Expand Up @@ -185,8 +190,8 @@ namespace picongpu
DINLINE void collectiveInit(const T_Worker& worker, const DataSpace<simDim>& blockCell)
{
/* caching of density and "temperature" fields */
cachedRho = CachedBox::create<0, ValueType_Rho>(worker, BlockArea());
cachedEne = CachedBox::create<1, ValueType_Ene>(worker, BlockArea());
cachedRho = CachedBox::create<0, SharedDataBoxMapping, ValueType_Rho>(worker, BlockArea());
cachedEne = CachedBox::create<1, SharedDataBoxMapping, ValueType_Ene>(worker, BlockArea());

/* instance of nvidia assignment operator */
pmacc::math::operation::Assign assign;
Expand Down
12 changes: 8 additions & 4 deletions include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,12 @@ namespace picongpu
PMACC_ALIGN(bBox, FieldB::DataBoxType);
PMACC_ALIGN(jBox, FieldJ::DataBoxType);
/* shared memory EM-field device databoxes */
PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
PMACC_ALIGN(cachedB, DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0>>);
PMACC_ALIGN(
cachedE,
DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMapping>>);
PMACC_ALIGN(
cachedB,
DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0, SharedDataBoxMapping>>);

public:
/* host constructor initializing member : random number generator */
Expand Down Expand Up @@ -137,8 +141,8 @@ namespace picongpu
jBox = jBox.shift(blockCell);

/* caching of E and B fields */
cachedB = CachedBox::create<0, ValueType_B>(worker, BlockArea());
cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
cachedB = CachedBox::create<0, SharedDataBoxMapping, ValueType_B>(worker, BlockArea());
cachedE = CachedBox::create<1, SharedDataBoxMapping, ValueType_E>(worker, BlockArea());

/* instance of nvidia assignment operator */
pmacc::math::operation::Assign assign;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "picongpu/fields/FieldB.hpp"
#include "picongpu/fields/FieldE.hpp"
#include "picongpu/fields/FieldJ.hpp"
#include "picongpu/param/memory.param"
#include "picongpu/particles/ParticlesFunctors.hpp"
#include "picongpu/particles/atomicPhysics/SetToAtomicGroundStateForChargeState.hpp"
#include "picongpu/particles/ionization/byField/BSI/AlgorithmBSI.hpp"
Expand Down Expand Up @@ -93,7 +94,9 @@ namespace picongpu
FieldE::DataBoxType eBox;
FieldJ::DataBoxType jBox;
/* shared memory EM-field device databoxes */
PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
PMACC_ALIGN(
cachedE,
DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMapping>>);

public:
/* host constructor */
Expand Down Expand Up @@ -125,7 +128,7 @@ namespace picongpu
jBox = jBox.shift(blockCell);

/* caching of E field */
cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
cachedE = CachedBox::create<1, SharedDataBoxMapping, ValueType_E>(worker, BlockArea());

/* instance of nvidia assignment operator */
pmacc::math::operation::Assign assign;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,12 @@ namespace picongpu
PMACC_ALIGN(bBox, FieldB::DataBoxType);
PMACC_ALIGN(jBox, FieldJ::DataBoxType);
/* shared memory EM-field device databoxes */
PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
PMACC_ALIGN(cachedB, DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0>>);
PMACC_ALIGN(
cachedE,
DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMapping>>);
PMACC_ALIGN(
cachedB,
DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0, SharedDataBoxMapping>>);

public:
/* host constructor initializing member : random number generator */
Expand Down Expand Up @@ -137,8 +141,8 @@ namespace picongpu
jBox = jBox.shift(blockCell);

/* caching of E and B fields */
cachedB = CachedBox::create<0, ValueType_B>(worker, BlockArea());
cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
cachedB = CachedBox::create<0, SharedDataBoxMapping, ValueType_B>(worker, BlockArea());
cachedE = CachedBox::create<1, SharedDataBoxMapping, ValueType_E>(worker, BlockArea());

/* instance of nvidia assignment operator */
pmacc::math::operation::Assign assign;
Expand Down
12 changes: 8 additions & 4 deletions include/picongpu/particles/synchrotronPhotons/PhotonCreator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,12 @@ namespace picongpu
PMACC_ALIGN(eBox, FieldE::DataBoxType);
PMACC_ALIGN(bBox, FieldB::DataBoxType);
/* shared memory EM-field device databoxes */
PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
PMACC_ALIGN(cachedB, DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0>>);
PMACC_ALIGN(
cachedE,
DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMapping>>);
PMACC_ALIGN(
cachedB,
DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0, SharedDataBoxMapping>>);

PMACC_ALIGN(curF_1, SynchrotronFunctions::SyncFuncCursor);
PMACC_ALIGN(curF_2, SynchrotronFunctions::SyncFuncCursor);
Expand Down Expand Up @@ -143,8 +147,8 @@ namespace picongpu
DINLINE void collectiveInit(const T_Worker& worker, const DataSpace<simDim>& blockCell)
{
/* caching of E and B fields */
cachedB = CachedBox::create<0, ValueType_B>(worker, BlockArea());
cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
cachedB = CachedBox::create<0, SharedDataBoxMapping, ValueType_B>(worker, BlockArea());
cachedE = CachedBox::create<1, SharedDataBoxMapping, ValueType_E>(worker, BlockArea());

/* instance of nvidia assignment operator */
pmacc::math::operation::Assign assign;
Expand Down
8 changes: 4 additions & 4 deletions include/pmacc/cuSTL/algorithm/functor/GetComponent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ namespace pmacc
}

template<typename Array, typename T_Worker>
HDINLINE Type& operator()(T_Worker const&, Array& array) const
HDINLINE Type& operator()(T_Worker const&, Array&& array) const
{
return array[m_component];
return std::forward<Array>(array)[m_component];
}

template<typename Array>
HDINLINE Type& operator()(Array& array) const
HDINLINE Type& operator()(Array&& array) const
{
return array[m_component];
return std::forward<Array>(array)[m_component];
}
};

Expand Down
17 changes: 16 additions & 1 deletion include/pmacc/cuSTL/cursor/Cursor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,21 @@ namespace pmacc
{
namespace cursor
{
namespace detail
{
template<typename Accessor, typename = void>
struct ValueType
{
using type = std::remove_reference_t<typename Accessor::Reference>;
};

template<typename Accessor>
struct ValueType<Accessor, std::void_t<typename Accessor::ValueType>>
{
using type = typename Accessor::ValueType;
};
} // namespace detail

/** A cursor is used to access a single datum and to jump to another one.
* It is always located at a certain datum. Think of a generalized iterator.
* @tparam _Accessor Policy functor class that is called inside operator*().
Expand All @@ -51,7 +66,7 @@ namespace pmacc
{
public:
using Reference = typename _Accessor::Reference;
using ValueType = std::remove_reference_t<Reference>;
using ValueType = typename detail::ValueType<_Accessor>::type;
using Accessor = _Accessor;
using Navigator = _Navigator;
using Marker = _Marker;
Expand Down
Loading

0 comments on commit 158f295

Please sign in to comment.