From badced127851859a4827e0c6751661220bcd6d23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Tue, 23 May 2023 15:59:30 +0200 Subject: [PATCH 01/22] allocate ManyPencilArrayInplaceRFFT for RFFT! plan --- src/allocate.jl | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/allocate.jl b/src/allocate.jl index 5a23ba19..3642234b 100644 --- a/src/allocate.jl +++ b/src/allocate.jl @@ -12,9 +12,13 @@ size `dims`, and a tuple of `N` `PencilArray`s. !!! note "In-place plans" - If `p` is an in-place plan, a + If `p` is an in-place real-to-real or complex-to-complex plan, a [`ManyPencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.ManyPencilArray) - is allocated. This + is allocated. If `p` is an in-place real-to-complex plan, a + [`ManyPencilArrayInplaceRFFT`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.ManyPencilArray) + is allocated. + + This type holds `PencilArray` wrappers for the input and output transforms (as well as for intermediate transforms) which share the same space in memory. The input and output `PencilArray`s should be respectively accessed by @@ -61,6 +65,18 @@ function allocate_input(p::PencilFFTPlan{T,N,true} where {T,N}) ManyPencilArray{T}(undef, pencils...; extra_dims=p.extra_dims) end +# In-place version for real-to-complex RFFT! +function allocate_input(plan::PencilFFTPlan{T,N,true, + Nt, # number of transformed dimensions + Nd, # number of decomposed dimensions + Ne, # number of extra dimensions + G } + ; unsafe::Bool = true, fill_with_zeros::Bool = false) where {N,Nd,Ne,G<:PencilFFTs.GlobalFFTParams{T,Nt,true,TR}} where {T<:FFTReal,Nt,TR<:Tuple{Transforms.RFFT!,Vararg{Transforms.FFT!}}} + plans = plan.plans + pencils = (first(plans).pencil_in, first(plans).pencil_out, map(pp -> pp.pencil_in, plans[2:end])...) + ManyPencilArrayInplaceRFFT{T}(undef, pencils...; extra_dims=plan.extra_dims, unsafe=unsafe, fill_with_zeros = fill_with_zeros) +end + allocate_input(p::PencilFFTPlan, dims...) = _allocate_many(allocate_input, p, dims...) From 5df2f7b26e5451a1f325a0127b3a190b425aab47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Tue, 23 May 2023 16:02:05 +0200 Subject: [PATCH 02/22] definition of ManyPencilArrayInplaceRFFT for RFFT! --- src/multiarrays_r2c.jl | 155 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 src/multiarrays_r2c.jl diff --git a/src/multiarrays_r2c.jl b/src/multiarrays_r2c.jl new file mode 100644 index 00000000..a39c25af --- /dev/null +++ b/src/multiarrays_r2c.jl @@ -0,0 +1,155 @@ +# copied and modified from https://github.com/jipolanco/PencilArrays.jl/blob/master/src/multiarrays.jl + +""" + ManyPencilArrayInplaceRFFT{T,N,M} + +Container holding `M` different [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) views to the same +underlying data buffer. All views share the same and dimensionality `N`. +The element type `T` of the first view is real, that of subsequent views is +`Complex{T}`. + +This can be used to perform in-place real-to-complex plan, see also[`Tranforms.RFFT!`](@ref). +It is used internally for such transforms by [`allocate_input`](@ref) and should not be constructed directly. + +--- + + ManyPencilArrayInplaceRFFT{T}(undef, pencils...; extra_dims=()) + +Create a `ManyPencilArrayInplaceRFFT` container that can hold data of type `T` and `Complex{T}` associated +to all the given [`Pencil`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.Pencil)s. + +The optional `extra_dims` argument is the same as for [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray). + +See also [`ManyPencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.ManyPencilArray) +""" +struct ManyPencilArrayInplaceRFFT{ + T, # element type of real array + N, # number of dimensions of each array (including extra_dims) + M, # number of arrays + Arrays <: Tuple{Vararg{PencilArray,M}}, + DataVector <: AbstractVector{T}, + DataVectorComplex <: AbstractVector{Complex{T}}, + } + data :: DataVector + data_complex :: DataVectorComplex + arrays :: Arrays + + function ManyPencilArrayInplaceRFFT{T}( + init, real_pencil::Pencil{Np}, complex_pencils::Vararg{Pencil{Np}}; + extra_dims::Dims=() + ) where {Np,T} + # real_pencil is a Pencil with dimensions `dims` of a real array with no padding and no permutation + # the padded dimensions are (2*(dims[1] ÷ 2 + 1), dims[2:end]...) + # first(complex_pencils) is a Pencil with dimensions of a complex array (dims[1] ÷ 2 + 1, dims[2:end]...) and no permutation + pencils = (real_pencil, complex_pencils...) + BufType = PencilArrays.typeof_array(real_pencil) + @assert all(p -> PencilArrays.typeof_array(p) === BufType, complex_pencils) + # TODO : checks dims of pencils + data_length = max(2 .* length.(complex_pencils)...) * prod(extra_dims) + data_real = BufType{T}(init, data_length) + fill_with_zeros && fill!(data_real, 0.0) + # we don't use data_complex = reinterpret(Complex{T}, data_real) + # since there is an issue with StridedView of ReinterpretArray, called by _permutedims in PencilArrays.Transpositions + ptr_complex = convert(Ptr{Complex{T}}, pointer(data_real)) + data_complex = unsafe_wrap(BufType, ptr_complex, data_length ÷ 2) + array_real = _make_real_array(data_real, extra_dims, real_pencil) + arrays_complex = _make_arrays(data_complex, extra_dims, complex_pencils...) + arrays = (array_real, arrays_complex...) + N = Np + length(extra_dims) + M = length(pencils) + new{T, N, M, typeof(arrays), typeof(data_real), typeof(data_complex)}(data_real, data_complex, arrays) + end +end + +function _make_real_array(data, extra_dims, p) + dims_space_local = size_local(p, MemoryOrder()) + dims_padded_local = (2*(dims_space_local[1] ÷ 2 + 1), dims_space_local[2:end]...) + dims = (dims_padded_local..., extra_dims...) + axes_local = (Base.OneTo.(dims_space_local)..., Base.OneTo.(extra_dims)...) + n = prod(dims) + vec = view(data, Base.OneTo(n)) + parent_arr = reshape(vec, dims) + arr = view(parent_arr, axes_local...) + PencilArray(p, arr) +end + +function _make_arrays(data::V, extra_dims::Dims, p::Pencil, + pens::Vararg{Pencil}) where {V<:Union{DV, Base.ReinterpretArray{Complex{T}, 1, T, DV}}} where {DV <: DenseVector{T}} where {T} + dims = (size_local(p, MemoryOrder())..., extra_dims...) + n = prod(dims) + @assert n == length_local(p) * prod(extra_dims) + vec = view(data, Base.OneTo(n)) + arr = reshape(vec, dims) + A = PencilArray(p, arr) + (A, _make_arrays(data, extra_dims, pens...)...) +end + +_make_arrays(::V, ::Dims) where {V<:Union{DV, Base.ReinterpretArray{Complex{T}, 1, T, DV}}} where {DV <: DenseVector{T}} where {T} = () + +Base.eltype(::ManyPencilArrayInplaceRFFT{T}) where {T} = T +Base.ndims(::ManyPencilArrayInplaceRFFT{T,N}) where {T,N} = N + +""" + Tuple(A::ManyPencilArrayInplaceRFFT) -> (u1, u2, …) + +Returns the [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray)s wrapped by `A`. + +This can be useful for iterating over all the wrapped arrays. +""" +@inline Base.Tuple(A::ManyPencilArrayInplaceRFFT) = A.arrays + +""" + length(A::ManyPencilArrayInplaceRFFT) + +Returns the number of [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray)s wrapped by `A`. +""" +Base.length(::ManyPencilArrayInplaceRFFT{T,N,M}) where {T,N,M} = M + +""" + first(A::ManyPencilArrayInplaceRFFT) + +Returns the first [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) wrapped by `A`. +""" +Base.first(A::ManyPencilArrayInplaceRFFT) = first(A.arrays) + +""" + last(A::ManyPencilArrayInplaceRFFT) + +Returns the last [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) wrapped by `A`. +""" +Base.last(A::ManyPencilArrayInplaceRFFT) = last(A.arrays) + +""" + getindex(A::ManyPencilArrayInplaceRFFT, ::Val{i}) + getindex(A::ManyPencilArrayInplaceRFFT, i::Integer) + +Returns the i-th [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) wrapped by `A`. + +If possible, the `Val{i}` form should be preferred, as it ensures that the full +type of the returned `PencilArray` is known by the compiler. + +See also [`first(::ManyPencilArrayInplaceRFFT)`](@ref), [`last(::ManyPencilArrayInplaceRFFT)`](@ref). + +# Example + +```julia +A = ManyPencilArrayInplaceRFFT(pencil1, pencil2, pencil3) + +# Get the PencilArray associated to `pencil2`. +u2 = A[2] +u2 = A[Val(2)] +``` +""" +Base.getindex(A::ManyPencilArrayInplaceRFFT, ::Val{i}) where {i} = + _getindex(Val(i), A.arrays...) +@inline Base.getindex(A::ManyPencilArrayInplaceRFFT, i) = A[Val(i)] + +@inline function _getindex(::Val{i}, a, t::Vararg) where {i} + i :: Integer + i <= 0 && throw(BoundsError("index must be >= 1")) + i == 1 && return a + _getindex(Val(i - 1), t...) +end + +# This will happen if the index `i` intially passed is too large. +@inline _getindex(::Val) = throw(BoundsError("invalid index")) \ No newline at end of file From f4a153827c287109994eab7e8a034f0eb72a4e2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Tue, 23 May 2023 16:03:10 +0200 Subject: [PATCH 03/22] inplace RFFT! mul! and ldiv! operations --- src/operations.jl | 55 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/src/operations.jl b/src/operations.jl index 4bd4e485..155d9ac9 100644 --- a/src/operations.jl +++ b/src/operations.jl @@ -2,9 +2,9 @@ const RealOrComplex{T} = Union{T, Complex{T}} where T <: FFTReal const PlanArrayPair{P,A} = Pair{P,A} where {P <: PencilPlan1D, A <: PencilArray} # Types of array over which a PencilFFTPlan can operate. -# PencilArray and ManyPencilArray are respectively for out-of-place and in-place +# PencilArray, ManyPencilArray and ManyPencilArrayInplaceRFFT are respectively for out-of-place, in-place and in-place rfft # transforms. -const FFTArray{T,N} = Union{PencilArray{T,N}, ManyPencilArray{T,N}} where {T,N} +const FFTArray{T,N} = Union{PencilArray{T,N}, ManyPencilArray{T,N}, ManyPencilArrayInplaceRFFT{T,N}} where {T,N} # Collections of FFTArray (e.g. for vector components), for broadcasting plans # to each array. These types are basically those returned by `allocate_input` @@ -12,6 +12,8 @@ const FFTArray{T,N} = Union{PencilArray{T,N}, ManyPencilArray{T,N}} where {T,N} const FFTArrayCollection = Union{Tuple{Vararg{A}}, AbstractArray{A}} where {A <: FFTArray} +const PencilMultiarray{T,N} = Union{ManyPencilArray{T,N}, ManyPencilArrayInplaceRFFT{T,N}} where {T,N} + # This allows to treat plans as scalars when broadcasting. # This means that, if u = (u1, u2, u3) is a tuple of PencilArrays # compatible with p, then p .* u does what one would expect, that is, it @@ -54,7 +56,7 @@ _maybe_allocate(allocator::Function, p::PencilFFTPlan{T,N,false} where {T,N}, # In-place version _maybe_allocate(::Function, ::PencilFFTPlan{T,N,true} where {T,N}, - src::ManyPencilArray) = src + src::PencilMultiarray) = src # Fallback case. function _maybe_allocate(::Function, p::PencilFFTPlan, src::A) where {A} @@ -76,7 +78,7 @@ end function _check_arrays( p::PencilFFTPlan{T,N,true} where {T,N}, - Ain::ManyPencilArray, Aout::ManyPencilArray, + Ain::PencilMultiarray, Aout::PencilMultiarray, ) if Ain !== Aout throw(ArgumentError( @@ -171,6 +173,45 @@ function _apply_plans!( A end +# In-place RFFT version +function _apply_plans!( + dir::Val, full_plan::PencilFFTPlan{T,N,true}, + A::ManyPencilArrayInplaceRFFT{T,N}, A_again::ManyPencilArrayInplaceRFFT{T,N}) where {T<:FFTReal,N} + @assert A === A_again + # pairs for 1D FFT! plans, RFFT! plan is treated separately + pairs = _make_pairs(full_plan.plans[2:end], A.arrays[3:end]) + + # Backward transforms are applied in reverse order. + pp = dir === Val(FFTW.BACKWARD) ? reverse(pairs) : pairs + + if dir === Val(FFTW.FORWARD) + # apply separately first transform (RFFT!) + _apply_rfft_plan_in_place!(dir, full_plan, A.arrays[2], first(full_plan.plans), A.arrays[1]) + # apply recursively all successive transforms (FFT!) + _apply_plans_in_place!(dir, full_plan, A.arrays[2], pp...) + elseif dir === Val(FFTW.BACKWARD) + # apply recursively all transforms but last (BFFT!) + _apply_plans_in_place!(dir, full_plan, nothing, pp...) + # transpose before last transform + t = if pp ==() + nothing + else + @assert Base.mightalias(A.arrays[3], A.arrays[2]) # they're aliased! + t = Transpositions.Transposition(A.arrays[2], A.arrays[3], + method=full_plan.transpose_method) + transpose!(t, waitall=false) + end + # apply separately last transform (BRFFT!) + _apply_rfft_plan_in_place!(dir, full_plan, A.arrays[1], first(full_plan.plans), A.arrays[2]) + + _wait_mpi_operations!(t, full_plan.timer) + # Scale transform. + first(A) ./= scale_factor(full_plan) + end + + A +end + function _apply_plans_out_of_place!( dir::Val, full_plan::PencilFFTPlan, y::PencilArray, x::PencilArray, plan::PencilPlan1D, next_plans::Vararg{PencilPlan1D}) @@ -240,6 +281,12 @@ end _apply_plans_in_place!(::Val, ::PencilFFTPlan, u_prev::PencilArray) = u_prev +function _apply_rfft_plan_in_place!(dir::Val, full_plan::PencilFFTPlan, A_out ::PencilArray{To,N}, p::PencilPlan1D{ti,to,Pi,Po,Tr}, A_in ::PencilArray{Ti,N}) where + {Ti<:RealOrComplex{T},To<:RealOrComplex{T},ti<:RealOrComplex{T},to<:RealOrComplex{T},Pi,Po,N,Tr<:Union{Transforms.RFFT!,Transforms.BRFFT!}} where T<:FFTReal + fft_plan = dir === Val(FFTW.FORWARD) ? p.fft_plan : p.bfft_plan + @timeit_debug full_plan.timer "FFT!" mul!(parent(A_out), fft_plan, parent(A_in)) #fft_plan * parent(A_in) # A_in_padded +end + _split_first(a, b...) = (a, b) # (x, y, z, w) -> (x, (y, z, w)) function _make_pairs(plans::Tuple{Vararg{PencilPlan1D,N}}, From 0079ee7505ba135998b78247f337ff6710d7cc6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Tue, 23 May 2023 16:04:16 +0200 Subject: [PATCH 04/22] define RFFT! and BRFFT! + wrappers to FFTW plans --- src/Transforms/r2c.jl | 99 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 11 deletions(-) diff --git a/src/Transforms/r2c.jl b/src/Transforms/r2c.jl index 27af8a78..1f32bc6e 100644 --- a/src/Transforms/r2c.jl +++ b/src/Transforms/r2c.jl @@ -10,6 +10,13 @@ See also """ struct RFFT <: AbstractTransform end +""" + RFFT!() + +In-place version of [`RFFT`](@ref). +""" +struct RFFT! <: AbstractTransform end + """ BRFFT(d::Integer) BRFFT((d1, d2, ..., dN)) @@ -40,23 +47,40 @@ struct BRFFT <: AbstractTransform even_output :: Bool end -_show_extra_info(io::IO, tr::BRFFT) = print(io, tr.even_output ? "{even}" : "{odd}") +""" + BRFFT!(d::Integer) + BRFFT!((d1, d2, ..., dN)) + +In-place version of [`BRFFT`](@ref). +""" +struct BRFFT! <: AbstractTransform + even_output :: Bool +end + +const TransformR2C = Union{RFFT, RFFT!} +const TransformC2R = Union{BRFFT, BRFFT!} + +_show_extra_info(io::IO, tr::TransformC2R) = print(io, tr.even_output ? "{even}" : "{odd}") BRFFT(d::Integer) = BRFFT(iseven(d)) BRFFT(ts::Tuple) = BRFFT(last(ts)) # c2r transform is applied along the **last** dimension (opposite of FFTW) +BRFFT!(d::Integer) = BRFFT!(iseven(d)) +BRFFT!(ts::Tuple) = BRFFT!(last(ts)) # c2r transform is applied along the **last** dimension (opposite of FFTW) is_inplace(::Union{RFFT, BRFFT}) = false +is_inplace(::Union{RFFT!, BRFFT!}) = true -length_output(::RFFT, length_in::Integer) = div(length_in, 2) + 1 -length_output(tr::BRFFT, length_in::Integer) = 2 * length_in - 1 - tr.even_output +length_output(::TransformR2C, length_in::Integer) = div(length_in, 2) + 1 +length_output(tr::TransformC2R, length_in::Integer) = 2 * length_in - 1 - tr.even_output -eltype_output(::RFFT, ::Type{T}) where {T <: FFTReal} = Complex{T} -eltype_output(::BRFFT, ::Type{Complex{T}}) where {T <: FFTReal} = T +eltype_output(::TransformR2C, ::Type{T}) where {T <: FFTReal} = Complex{T} +eltype_output(::TransformC2R, ::Type{Complex{T}}) where {T <: FFTReal} = T -eltype_input(::RFFT, ::Type{T}) where {T <: FFTReal} = T -eltype_input(::BRFFT, ::Type{T}) where {T <: FFTReal} = Complex{T} +eltype_input(::TransformR2C, ::Type{T}) where {T <: FFTReal} = T +eltype_input(::TransformC2R, ::Type{T}) where {T <: FFTReal} = Complex{T} plan(::RFFT, A::AbstractArray, args...; kwargs...) = FFTW.plan_rfft(A, args...; kwargs...) +plan(::RFFT!, A::AbstractArray, args...; kwargs...) = plan_rfft!(A, args...; kwargs...) # NOTE: unlike most FFTW plans, this function also requires the length `d` of # the transform output along the first transformed dimension. @@ -65,23 +89,76 @@ function plan(tr::BRFFT, A::AbstractArray, dims; kwargs...) d = length_output(tr, Nin) FFTW.plan_brfft(A, d, dims; kwargs...) end +function plan(tr::BRFFT!, A::AbstractArray, dims; kwargs...) + Nin = size(A, first(dims)) # input length along first dimension + d = length_output(tr, Nin) + plan_brfft!(A, d, dims; kwargs...) +end binv(::RFFT, d) = BRFFT(d) binv(::BRFFT, d) = RFFT() +binv(::RFFT!, d) = BRFFT!(d) +binv(::BRFFT!, d) = RFFT!() -function scale_factor(tr::BRFFT, A::ComplexArray, dims) +function scale_factor(tr::TransformC2R, A::ComplexArray, dims) prod(dims; init = one(Int)) do i n = size(A, i) i == last(dims) ? length_output(tr, n) : n end end -scale_factor(::RFFT, A::RealArray, dims) = _prod_dims(A, dims) +scale_factor(::TransformR2C, A::RealArray, dims) = _prod_dims(A, dims) # r2c along the first dimension, then c2c for the other dimensions. expand_dims(tr::RFFT, ::Val{N}) where {N} = N === 0 ? () : (tr, expand_dims(FFT(), Val(N - 1))...) +expand_dims(tr::RFFT!, ::Val{N}) where {N} = + N === 0 ? () : (tr, expand_dims(FFT!(), Val(N - 1))...) expand_dims(tr::BRFFT, ::Val{N}) where {N} = (BFFT(), expand_dims(tr, Val(N - 1))...) -expand_dims(tr::BRFFT, ::Val{1}) = (tr, ) -expand_dims(tr::BRFFT, ::Val{0}) = () +expand_dims(tr::BRFFT!, ::Val{N}) where {N} = (BFFT!(), expand_dims(tr, Val(N - 1))...) +expand_dims(tr::TransformC2R, ::Val{1}) = (tr, ) +expand_dims(tr::TransformC2R, ::Val{0}) = () + +## FFTW wrappers for inplace RFFT plans + +function plan_rfft!(X::StridedArray{T,N}, region; + flags::Integer=FFTW.ESTIMATE, + timelimit::Real=FFTW.NO_TIMELIMIT) where {T<:FFTW.fftwReal,N} + sz = size(X) # physical input size (real) + osize = FFTW.rfft_output_size(sz, region) # output size (complex) + isize = ntuple(i -> i == first(region) ? 2osize[i] : osize[i], Val(N)) # padded input size (real) + X_padded = flags&FFTW.ESTIMATE != 0 ? # is time measurement not required ? + FFTW.FakeArray{T,N}(sz, FFTW.colmajorstrides(isize)) : # fake allocation, only size and strides matter + view(Array{T}(undef, isize), Base.OneTo.(sz)...) # allocation + Y = flags&FFTW.ESTIMATE != 0 ? + FFTW.FakeArray{Complex{T}}(osize) : # fake allocation, only size and strides matter + Array{Complex{T}}(undef, osize) # allocation + return FFTW.rFFTWPlan{T,FFTW.FORWARD,true,N}(X_padded, Y, region, flags, timelimit) +end + +function plan_brfft!(X::StridedArray{Complex{T},N}, d, region; + flags::Integer=FFTW.ESTIMATE, + timelimit::Real=FFTW.NO_TIMELIMIT) where {T<:FFTW.fftwReal,N} + isize = size(X) # input size (complex) + osize = ntuple(i -> i == first(region) ? 2isize[i] : isize[i], Val(N)) # padded output size (real) + sz = FFTW.brfft_output_size(X, d, region) # physical output size (real) + # Y is padded + Y = flags&FFTW.ESTIMATE != 0 ? # is time measurement not required ? + FFTW.FakeArray{T,N}(sz, FFTW.colmajorstrides(osize)) : # fake allocation, only size and strides matter + view(Array{T}(undef, osize), Base.OneTo.(sz)...) # allocation + return FFTW.rFFTWPlan{Complex{T},FFTW.BACKWARD,true,N}(X, Y, region, flags, timelimit) +end +#= function Base.:*(p::FFTW.rFFTWPlan{T,FFTW.FORWARD,true,N}, X::StridedArray{T,N}) where {T<:FFTW.fftwReal,N} + ptr_complex = convert(Ptr{Complex{T}}, pointer(X)) + Y = unsafe_wrap(Array, ptr_complex, p.osz) # reinterpretation of X as a complex Array + mul!(Y, p, X) # assesses size, strides and memory alignment +end + +function Base.:*(p::FFTW.rFFTWPlan{Complex{T},FFTW.BACKWARD,true,N}, X::StridedArray{Complex{T},N}) where {T<:FFTW.fftwReal,N} + ptr_real = convert(Ptr{T}, pointer(X)) + osize = ntuple(i -> i == first(p.region) ? 2p.sz[i] : p.sz[i], Val(N)) # padded real size + Y_parent = unsafe_wrap(Array, ptr_real, osize) # reinterpretation of X as a real Array + Y = view(Y_parent, Base.OneTo.(p.osz)...) # skip padded values, view to physical real size only + mul!(Y, p, X) # assesses size, strides and memory alignment +end =# \ No newline at end of file From fd51debba80524ae4b8c17779b7e5885905578c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Tue, 23 May 2023 16:04:47 +0200 Subject: [PATCH 05/22] updated tests for rfft! --- test/rfft.jl | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/test/rfft.jl b/test/rfft.jl index d955ca59..cec24ba9 100644 --- a/test/rfft.jl +++ b/test/rfft.jl @@ -5,6 +5,7 @@ import MPI using BenchmarkTools using LinearAlgebra +using FFTW using Printf using Random using Test @@ -190,6 +191,78 @@ function test_rfft(size_in; benchmark=true) MPI.Barrier(comm) end +function test_rfft!(size_in) + comm = MPI.COMM_WORLD + rank = MPI.Comm_rank(comm) + + rank == 0 && @info "Input data size: $size_in" + + # Test creating Pencil and creating plan. + pen = Pencil(size_in, comm) + + inplace_plan = @inferred PencilFFTPlan(pen, Transforms.RFFT!()) + outofplace_place = @inferred PencilFFTPlan(pen, Transforms.RFFT()) + + # Allocate and initialise scalar fields + u = allocate_input(inplace_plan) #, Val(3)) + x = first(u) ; x̂ = last(u) # Real and Complex views + + v = allocate_input(outofplace_place) + v̂ = allocate_output(outofplace_place) + + fill!(x, 0.0) ; rank == 0 && (x[1] = 1.0) ; rank == 0 && (x[2] = 2.0) ; + fill!(v, 0.0) ; rank == 0 && (v[1] = 1.0) ; rank == 0 && (v[2] = 2.0) ; + + @testset "RFFT! vs RFFT" begin + mul!(u, inplace_plan, u) + mul!(v̂, outofplace_place, v) + @test all(isapprox.(x̂, v̂, atol=1e-8)) + + ldiv!(u, inplace_plan, u) + ldiv!(v, outofplace_place, v̂) + @test all(isapprox.(x, v, atol=1e-8)) + @test all(isapprox.(x[1:3], [1.0, 2.0, 0.0], atol = 1e-8)) + end + + MPI.Barrier(comm) +end + +function test_1D_rfft!(size_in; flags=FFTW.ESTIMATE) + dims = (size_in,) ; + dims_padded = (2(dims[1] ÷ 2 + 1), dims[2:end]...) ; + dims_fourier = ((dims[1] ÷ 2 + 1), dims[2:end]...) + + A = zeros(Float64, dims_padded) ; + a = view(A, Base.OneTo.(dims)...) ; + â = reinterpret(Complex{Float64}, A) + + â2 = zeros(Complex{Float64}, dims_fourier) ; + a2 = zeros(Float64, dims) ; + + p = Transforms.plan_rfft!(a, 1, flags=flags) ; #display(p) + p2 = FFTW.plan_rfft(a2, 1, flags=flags) ; + bp = Transforms.plan_brfft!(â, dims[1], 1, flags=flags) ; + bp2 = FFTW.plan_brfft(â, dims[1], 1, flags=flags) ; + + fill!(a2, 0.0) ; a2[1] = 1 ; a2[2] = 2 ; + fill!(a, 0.0) ; a[1] = 1 ; a[2] = 2 ; + + @testset "1D RFFT! vs RFFT" begin + mul!(â, p, a) ; + mul!(â2, p2, a2) ; + @test all(isapprox.(â2, â, atol = 1e-8)) + + mul!(a, bp, â) ; + mul!(a2, bp2, â2) ; + @test all(isapprox.(a2, a, atol = 1e-8)) + + a /= size_in ; a2 /= size_in ; + @test all(isapprox.(a[1:3], [1.0, 2.0, 0.0], atol = 1e-8)) + end + + MPI.Barrier(comm) +end + MPI.Init() comm = MPI.COMM_WORLD rank = MPI.Comm_rank(comm) @@ -198,3 +271,11 @@ rank == 0 || redirect_stdout(devnull) test_rfft(DATA_DIMS_EVEN) println() test_rfft(DATA_DIMS_ODD, benchmark=false) + +test_1D_rfft!(first(DATA_DIMS_ODD)) +test_1D_rfft!(first(DATA_DIMS_EVEN)) +# test_1D_rfft!(first(DATA_DIMS_EVEN), flags = FFTW.MEASURE) # test fails for this specific dimension, possibly due to a FFTW bug + +test_rfft!(DATA_DIMS_ODD) +# test_rfft!(DATA_DIMS_EVEN) # test fails for this specific dimension, possibly due to a FFTW bug +test_rfft!(DATA_DIMS_EVEN .+ 2) From 1618d71ea24e31ec745eacbfbecb9059d9914906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Tue, 23 May 2023 16:05:23 +0200 Subject: [PATCH 06/22] include ManyPencilArrayInplaceRFFT --- src/PencilFFTs.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/PencilFFTs.jl b/src/PencilFFTs.jl index f2784cea..8c70555a 100644 --- a/src/PencilFFTs.jl +++ b/src/PencilFFTs.jl @@ -27,6 +27,7 @@ const AbstractTransformList{N} = NTuple{N, AbstractTransform} where N include("global_params.jl") include("plans.jl") +include("multiarrays_r2c.jl") include("allocate.jl") include("operations.jl") From a0ab500533623bb280ceaeb2dbb2c2d6070e10f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Wed, 24 May 2023 13:23:47 +0200 Subject: [PATCH 07/22] changes to ManyPencilArrayRFFT! --- src/allocate.jl | 11 +++---- src/multiarrays_r2c.jl | 72 +++++++++++++++++++----------------------- 2 files changed, 38 insertions(+), 45 deletions(-) diff --git a/src/allocate.jl b/src/allocate.jl index 3642234b..e1698dcf 100644 --- a/src/allocate.jl +++ b/src/allocate.jl @@ -15,11 +15,11 @@ size `dims`, and a tuple of `N` `PencilArray`s. If `p` is an in-place real-to-real or complex-to-complex plan, a [`ManyPencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.ManyPencilArray) is allocated. If `p` is an in-place real-to-complex plan, a - [`ManyPencilArrayInplaceRFFT`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.ManyPencilArray) + []`ManyPencilArrayRFFT!`](@ref) is allocated. - This - type holds `PencilArray` wrappers for the input and output transforms (as + These + types hold `PencilArray` wrappers for the input and output transforms (as well as for intermediate transforms) which share the same space in memory. The input and output `PencilArray`s should be respectively accessed by calling [`first(::ManyPencilArray)`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#Base.first-Tuple{ManyPencilArray}) and @@ -70,11 +70,10 @@ function allocate_input(plan::PencilFFTPlan{T,N,true, Nt, # number of transformed dimensions Nd, # number of decomposed dimensions Ne, # number of extra dimensions - G } - ; unsafe::Bool = true, fill_with_zeros::Bool = false) where {N,Nd,Ne,G<:PencilFFTs.GlobalFFTParams{T,Nt,true,TR}} where {T<:FFTReal,Nt,TR<:Tuple{Transforms.RFFT!,Vararg{Transforms.FFT!}}} + G }) where {N,Nd,Ne,G<:PencilFFTs.GlobalFFTParams{T,Nt,true,TR}} where {T<:FFTReal,Nt,TR<:Tuple{Transforms.RFFT!,Vararg{Transforms.FFT!}}} plans = plan.plans pencils = (first(plans).pencil_in, first(plans).pencil_out, map(pp -> pp.pencil_in, plans[2:end])...) - ManyPencilArrayInplaceRFFT{T}(undef, pencils...; extra_dims=plan.extra_dims, unsafe=unsafe, fill_with_zeros = fill_with_zeros) + ManyPencilArrayRFFT!{T}(undef, pencils...; extra_dims=plan.extra_dims) end allocate_input(p::PencilFFTPlan, dims...) = diff --git a/src/multiarrays_r2c.jl b/src/multiarrays_r2c.jl index a39c25af..2a04de3c 100644 --- a/src/multiarrays_r2c.jl +++ b/src/multiarrays_r2c.jl @@ -1,7 +1,7 @@ # copied and modified from https://github.com/jipolanco/PencilArrays.jl/blob/master/src/multiarrays.jl - +import PencilArrays:ManyPencilArray, _make_arrays """ - ManyPencilArrayInplaceRFFT{T,N,M} + ManyPencilArrayRFFT!{T,N,M} Container holding `M` different [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) views to the same underlying data buffer. All views share the same and dimensionality `N`. @@ -13,16 +13,16 @@ It is used internally for such transforms by [`allocate_input`](@ref) and should --- - ManyPencilArrayInplaceRFFT{T}(undef, pencils...; extra_dims=()) + ManyPencilArrayRFFT!{T}(undef, pencils...; extra_dims=()) -Create a `ManyPencilArrayInplaceRFFT` container that can hold data of type `T` and `Complex{T}` associated +Create a `ManyPencilArrayRFFT!` container that can hold data of type `T` and `Complex{T}` associated to all the given [`Pencil`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.Pencil)s. The optional `extra_dims` argument is the same as for [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray). See also [`ManyPencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.ManyPencilArray) """ -struct ManyPencilArrayInplaceRFFT{ +struct ManyPencilArrayRFFT!{ ## TODO: redefine PencilArrays.ManyPencilArray instead or creating new struct, with eltype RealOrComplex{T} T, # element type of real array N, # number of dimensions of each array (including extra_dims) M, # number of arrays @@ -34,27 +34,31 @@ struct ManyPencilArrayInplaceRFFT{ data_complex :: DataVectorComplex arrays :: Arrays - function ManyPencilArrayInplaceRFFT{T}( + function ManyPencilArrayRFFT!{T}( init, real_pencil::Pencil{Np}, complex_pencils::Vararg{Pencil{Np}}; extra_dims::Dims=() - ) where {Np,T} + ) where {Np,T<:FFTReal} # real_pencil is a Pencil with dimensions `dims` of a real array with no padding and no permutation # the padded dimensions are (2*(dims[1] ÷ 2 + 1), dims[2:end]...) # first(complex_pencils) is a Pencil with dimensions of a complex array (dims[1] ÷ 2 + 1, dims[2:end]...) and no permutation pencils = (real_pencil, complex_pencils...) BufType = PencilArrays.typeof_array(real_pencil) @assert all(p -> PencilArrays.typeof_array(p) === BufType, complex_pencils) - # TODO : checks dims of pencils + @assert size_global(real_pencil)[2:end] == size_global(first(complex_pencils))[2:end] + @assert first(size_global(real_pencil)) ÷ 2 + 1 == first(size_global(first(complex_pencils))) + data_length = max(2 .* length.(complex_pencils)...) * prod(extra_dims) data_real = BufType{T}(init, data_length) - fill_with_zeros && fill!(data_real, 0.0) + # we don't use data_complex = reinterpret(Complex{T}, data_real) # since there is an issue with StridedView of ReinterpretArray, called by _permutedims in PencilArrays.Transpositions ptr_complex = convert(Ptr{Complex{T}}, pointer(data_real)) data_complex = unsafe_wrap(BufType, ptr_complex, data_length ÷ 2) + array_real = _make_real_array(data_real, extra_dims, real_pencil) - arrays_complex = _make_arrays(data_complex, extra_dims, complex_pencils...) + arrays_complex = PencilArrays._make_arrays(data_complex, extra_dims, complex_pencils...) arrays = (array_real, arrays_complex...) + N = Np + length(extra_dims) M = length(pencils) new{T, N, M, typeof(arrays), typeof(data_real), typeof(data_complex)}(data_real, data_complex, arrays) @@ -73,76 +77,66 @@ function _make_real_array(data, extra_dims, p) PencilArray(p, arr) end -function _make_arrays(data::V, extra_dims::Dims, p::Pencil, - pens::Vararg{Pencil}) where {V<:Union{DV, Base.ReinterpretArray{Complex{T}, 1, T, DV}}} where {DV <: DenseVector{T}} where {T} - dims = (size_local(p, MemoryOrder())..., extra_dims...) - n = prod(dims) - @assert n == length_local(p) * prod(extra_dims) - vec = view(data, Base.OneTo(n)) - arr = reshape(vec, dims) - A = PencilArray(p, arr) - (A, _make_arrays(data, extra_dims, pens...)...) -end - -_make_arrays(::V, ::Dims) where {V<:Union{DV, Base.ReinterpretArray{Complex{T}, 1, T, DV}}} where {DV <: DenseVector{T}} where {T} = () - -Base.eltype(::ManyPencilArrayInplaceRFFT{T}) where {T} = T -Base.ndims(::ManyPencilArrayInplaceRFFT{T,N}) where {T,N} = N +# all methods redefinitions below could be avoided by using a +# ManyPencilArray{ComplexOrReal{T}} instead of a ManyPencilArrayRFFT!{T} +# or defining a supertype in PencilArrays/src/multiarrays.jl +Base.eltype(::ManyPencilArrayRFFT!{T}) where {T} = T +Base.ndims(::ManyPencilArrayRFFT!{T,N}) where {T,N} = N """ - Tuple(A::ManyPencilArrayInplaceRFFT) -> (u1, u2, …) + Tuple(A::ManyPencilArrayRFFT!) -> (u1, u2, …) Returns the [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray)s wrapped by `A`. This can be useful for iterating over all the wrapped arrays. """ -@inline Base.Tuple(A::ManyPencilArrayInplaceRFFT) = A.arrays +@inline Base.Tuple(A::ManyPencilArrayRFFT!) = A.arrays """ - length(A::ManyPencilArrayInplaceRFFT) + length(A::ManyPencilArrayRFFT!) Returns the number of [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray)s wrapped by `A`. """ -Base.length(::ManyPencilArrayInplaceRFFT{T,N,M}) where {T,N,M} = M +Base.length(::ManyPencilArrayRFFT!{T,N,M}) where {T,N,M} = M """ - first(A::ManyPencilArrayInplaceRFFT) + first(A::ManyPencilArrayRFFT!) Returns the first [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) wrapped by `A`. """ -Base.first(A::ManyPencilArrayInplaceRFFT) = first(A.arrays) +Base.first(A::ManyPencilArrayRFFT!) = first(A.arrays) """ - last(A::ManyPencilArrayInplaceRFFT) + last(A::ManyPencilArrayRFFT!) Returns the last [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) wrapped by `A`. """ -Base.last(A::ManyPencilArrayInplaceRFFT) = last(A.arrays) +Base.last(A::ManyPencilArrayRFFT!) = last(A.arrays) """ - getindex(A::ManyPencilArrayInplaceRFFT, ::Val{i}) - getindex(A::ManyPencilArrayInplaceRFFT, i::Integer) + getindex(A::ManyPencilArrayRFFT!, ::Val{i}) + getindex(A::ManyPencilArrayRFFT!, i::Integer) Returns the i-th [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) wrapped by `A`. If possible, the `Val{i}` form should be preferred, as it ensures that the full type of the returned `PencilArray` is known by the compiler. -See also [`first(::ManyPencilArrayInplaceRFFT)`](@ref), [`last(::ManyPencilArrayInplaceRFFT)`](@ref). +See also [`first(::ManyPencilArrayRFFT!)`](@ref), [`last(::ManyPencilArrayRFFT!)`](@ref). # Example ```julia -A = ManyPencilArrayInplaceRFFT(pencil1, pencil2, pencil3) +A = ManyPencilArrayRFFT!(pencil1, pencil2, pencil3) # Get the PencilArray associated to `pencil2`. u2 = A[2] u2 = A[Val(2)] ``` """ -Base.getindex(A::ManyPencilArrayInplaceRFFT, ::Val{i}) where {i} = +Base.getindex(A::ManyPencilArrayRFFT!, ::Val{i}) where {i} = _getindex(Val(i), A.arrays...) -@inline Base.getindex(A::ManyPencilArrayInplaceRFFT, i) = A[Val(i)] +@inline Base.getindex(A::ManyPencilArrayRFFT!, i) = A[Val(i)] @inline function _getindex(::Val{i}, a, t::Vararg) where {i} i :: Integer From 8029a10487b77be6883207f357ace16988e8a868 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Wed, 24 May 2023 13:25:02 +0200 Subject: [PATCH 08/22] separated scaling and bacward transform --- src/operations.jl | 47 +++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/src/operations.jl b/src/operations.jl index 155d9ac9..39407e53 100644 --- a/src/operations.jl +++ b/src/operations.jl @@ -2,9 +2,9 @@ const RealOrComplex{T} = Union{T, Complex{T}} where T <: FFTReal const PlanArrayPair{P,A} = Pair{P,A} where {P <: PencilPlan1D, A <: PencilArray} # Types of array over which a PencilFFTPlan can operate. -# PencilArray, ManyPencilArray and ManyPencilArrayInplaceRFFT are respectively for out-of-place, in-place and in-place rfft +# PencilArray, ManyPencilArray and ManyPencilArrayRFFT! are respectively for out-of-place, in-place and in-place rfft # transforms. -const FFTArray{T,N} = Union{PencilArray{T,N}, ManyPencilArray{T,N}, ManyPencilArrayInplaceRFFT{T,N}} where {T,N} +const FFTArray{T,N} = Union{PencilArray{T,N}, ManyPencilArray{T,N}, ManyPencilArrayRFFT!{T,N}} where {T,N} # Collections of FFTArray (e.g. for vector components), for broadcasting plans # to each array. These types are basically those returned by `allocate_input` @@ -12,7 +12,7 @@ const FFTArray{T,N} = Union{PencilArray{T,N}, ManyPencilArray{T,N}, ManyPencilAr const FFTArrayCollection = Union{Tuple{Vararg{A}}, AbstractArray{A}} where {A <: FFTArray} -const PencilMultiarray{T,N} = Union{ManyPencilArray{T,N}, ManyPencilArrayInplaceRFFT{T,N}} where {T,N} +const PencilMultiarray{T,N} = Union{ManyPencilArray{T,N}, ManyPencilArrayRFFT!{T,N}} where {T,N} # This allows to treat plans as scalars when broadcasting. # This means that, if u = (u1, u2, u3) is a tuple of PencilArrays @@ -30,13 +30,24 @@ function LinearAlgebra.mul!( end end -# Backward transforms +# Backward transforms (unscaled) +function bmul!( + dst::FFTArray{T,N}, p::PencilFFTPlan{T,N}, src::FFTArray{Ti,N}, + ) where {T, N, Ti <: RealOrComplex} + @timeit_debug p.timer "PencilFFTs bmul!" begin + _check_arrays(p, dst, src) + _apply_plans!(Val(FFTW.BACKWARD), p, dst, src) + end +end + +# Inverse transforms (scaled) function LinearAlgebra.ldiv!( dst::FFTArray{T,N}, p::PencilFFTPlan{T,N}, src::FFTArray{Ti,N}, ) where {T, N, Ti <: RealOrComplex} @timeit_debug p.timer "PencilFFTs ldiv!" begin _check_arrays(p, dst, src) _apply_plans!(Val(FFTW.BACKWARD), p, dst, src) + _scale!(dst, inv(scale_factor(p))) end end @@ -50,6 +61,14 @@ function Base.:\(p::PencilFFTPlan, src::FFTArray) ldiv!(dst, p, src) end +function _scale!(dst::PencilArray{<:RealOrComplex{T},N}, inv_scale::T) where {T,N} + parent(dst) .*= inv_scale +end + +function _scale!(dst::PencilMultiarray{<:RealOrComplex{T},N}, inv_scale::T) where {T,N} + parent(first(dst)) .*= inv_scale +end + # Out-of-place version _maybe_allocate(allocator::Function, p::PencilFFTPlan{T,N,false} where {T,N}, ::PencilArray) = allocator(p) @@ -123,6 +142,10 @@ for f in (:mul!, :ldiv!) (check_compatible(dst, src); $f.(dst, p, src)) end +bmul!(dst::FFTArrayCollection, p::PencilFFTPlan, + src::FFTArrayCollection) = + (check_compatible(dst, src); bmul!.(dst, p, src)) + for f in (:*, :\) @eval Base.$f(p::PencilFFTPlan, src::FFTArrayCollection) = $f.(p, src) @@ -145,11 +168,6 @@ function _apply_plans!( _apply_plans_out_of_place!(dir, full_plan, y, x, plans...) - if dir === Val(FFTW.BACKWARD) - # Scale transform. - y ./= scale_factor(full_plan) - end - y end @@ -165,18 +183,13 @@ function _apply_plans!( _apply_plans_in_place!(dir, full_plan, nothing, pp...) - if dir === Val(FFTW.BACKWARD) - # Scale transform. - first(A) ./= scale_factor(full_plan) - end - A end # In-place RFFT version function _apply_plans!( dir::Val, full_plan::PencilFFTPlan{T,N,true}, - A::ManyPencilArrayInplaceRFFT{T,N}, A_again::ManyPencilArrayInplaceRFFT{T,N}) where {T<:FFTReal,N} + A::ManyPencilArrayRFFT!{T,N}, A_again::ManyPencilArrayRFFT!{T,N}) where {T<:FFTReal,N} @assert A === A_again # pairs for 1D FFT! plans, RFFT! plan is treated separately pairs = _make_pairs(full_plan.plans[2:end], A.arrays[3:end]) @@ -205,8 +218,6 @@ function _apply_plans!( _apply_rfft_plan_in_place!(dir, full_plan, A.arrays[1], first(full_plan.plans), A.arrays[2]) _wait_mpi_operations!(t, full_plan.timer) - # Scale transform. - first(A) ./= scale_factor(full_plan) end A @@ -284,7 +295,7 @@ _apply_plans_in_place!(::Val, ::PencilFFTPlan, u_prev::PencilArray) = u_prev function _apply_rfft_plan_in_place!(dir::Val, full_plan::PencilFFTPlan, A_out ::PencilArray{To,N}, p::PencilPlan1D{ti,to,Pi,Po,Tr}, A_in ::PencilArray{Ti,N}) where {Ti<:RealOrComplex{T},To<:RealOrComplex{T},ti<:RealOrComplex{T},to<:RealOrComplex{T},Pi,Po,N,Tr<:Union{Transforms.RFFT!,Transforms.BRFFT!}} where T<:FFTReal fft_plan = dir === Val(FFTW.FORWARD) ? p.fft_plan : p.bfft_plan - @timeit_debug full_plan.timer "FFT!" mul!(parent(A_out), fft_plan, parent(A_in)) #fft_plan * parent(A_in) # A_in_padded + @timeit_debug full_plan.timer "FFT!" mul!(parent(A_out), fft_plan, parent(A_in)) end _split_first(a, b...) = (a, b) # (x, y, z, w) -> (x, (y, z, w)) From 658bd5e1391bc72e427ed1b03b29376ce5b4eab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Wed, 24 May 2023 13:25:32 +0200 Subject: [PATCH 09/22] fixed rfft! and brff! plan creation --- src/Transforms/r2c.jl | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/src/Transforms/r2c.jl b/src/Transforms/r2c.jl index 1f32bc6e..a5fa4203 100644 --- a/src/Transforms/r2c.jl +++ b/src/Transforms/r2c.jl @@ -1,5 +1,5 @@ ## Real-to-complex and complex-to-real transforms. - +import FFTW: rFFTWPlan, FORWARD, BACKWARD, ESTIMATE, NO_TIMELIMIT, FakeArray, colmajorstrides, brfft_output_size """ RFFT() @@ -128,12 +128,14 @@ function plan_rfft!(X::StridedArray{T,N}, region; sz = size(X) # physical input size (real) osize = FFTW.rfft_output_size(sz, region) # output size (complex) isize = ntuple(i -> i == first(region) ? 2osize[i] : osize[i], Val(N)) # padded input size (real) - X_padded = flags&FFTW.ESTIMATE != 0 ? # is time measurement not required ? - FFTW.FakeArray{T,N}(sz, FFTW.colmajorstrides(isize)) : # fake allocation, only size and strides matter - view(Array{T}(undef, isize), Base.OneTo.(sz)...) # allocation - Y = flags&FFTW.ESTIMATE != 0 ? - FFTW.FakeArray{Complex{T}}(osize) : # fake allocation, only size and strides matter - Array{Complex{T}}(undef, osize) # allocation + if flags&FFTW.ESTIMATE != 0 # time measurement not required + X_padded = FFTW.FakeArray{T,N}(sz, FFTW.colmajorstrides(isize)) # fake allocation, only pointer, size and strides matter + Y = FFTW.FakeArray{Complex{T}}(osize) + else # need to allocate new array since size of X is too small... + data = Array{T}(undef, prod(isize)) + X_padded = view(reshape(data, isize), Base.OneTo.(sz)...) # allocation + Y = reshape(reinterpret(Complex{T}, data), osize) + end return FFTW.rFFTWPlan{T,FFTW.FORWARD,true,N}(X_padded, Y, region, flags, timelimit) end @@ -143,22 +145,7 @@ function plan_brfft!(X::StridedArray{Complex{T},N}, d, region; isize = size(X) # input size (complex) osize = ntuple(i -> i == first(region) ? 2isize[i] : isize[i], Val(N)) # padded output size (real) sz = FFTW.brfft_output_size(X, d, region) # physical output size (real) - # Y is padded - Y = flags&FFTW.ESTIMATE != 0 ? # is time measurement not required ? - FFTW.FakeArray{T,N}(sz, FFTW.colmajorstrides(osize)) : # fake allocation, only size and strides matter - view(Array{T}(undef, osize), Base.OneTo.(sz)...) # allocation + Yflat = reinterpret(T, reshape(X, prod(isize))) + Y = view(reshape(Yflat, osize), Base.OneTo.(sz)...) # Y is padded return FFTW.rFFTWPlan{Complex{T},FFTW.BACKWARD,true,N}(X, Y, region, flags, timelimit) -end -#= function Base.:*(p::FFTW.rFFTWPlan{T,FFTW.FORWARD,true,N}, X::StridedArray{T,N}) where {T<:FFTW.fftwReal,N} - ptr_complex = convert(Ptr{Complex{T}}, pointer(X)) - Y = unsafe_wrap(Array, ptr_complex, p.osz) # reinterpretation of X as a complex Array - mul!(Y, p, X) # assesses size, strides and memory alignment -end - -function Base.:*(p::FFTW.rFFTWPlan{Complex{T},FFTW.BACKWARD,true,N}, X::StridedArray{Complex{T},N}) where {T<:FFTW.fftwReal,N} - ptr_real = convert(Ptr{T}, pointer(X)) - osize = ntuple(i -> i == first(p.region) ? 2p.sz[i] : p.sz[i], Val(N)) # padded real size - Y_parent = unsafe_wrap(Array, ptr_real, osize) # reinterpretation of X as a real Array - Y = view(Y_parent, Base.OneTo.(p.osz)...) # skip padded values, view to physical real size only - mul!(Y, p, X) # assesses size, strides and memory alignment -end =# \ No newline at end of file +end \ No newline at end of file From 0d886128655b4943ddc86f57ab1065794aeb3d5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Wed, 24 May 2023 13:26:01 +0200 Subject: [PATCH 10/22] further rfft! tests --- test/rfft.jl | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/test/rfft.jl b/test/rfft.jl index cec24ba9..fb5e25c1 100644 --- a/test/rfft.jl +++ b/test/rfft.jl @@ -191,7 +191,7 @@ function test_rfft(size_in; benchmark=true) MPI.Barrier(comm) end -function test_rfft!(size_in) +function test_rfft!(size_in; flags = FFTW.ESTIMATE, benchmark=true) comm = MPI.COMM_WORLD rank = MPI.Comm_rank(comm) @@ -200,8 +200,8 @@ function test_rfft!(size_in) # Test creating Pencil and creating plan. pen = Pencil(size_in, comm) - inplace_plan = @inferred PencilFFTPlan(pen, Transforms.RFFT!()) - outofplace_place = @inferred PencilFFTPlan(pen, Transforms.RFFT()) + inplace_plan = @inferred PencilFFTPlan(pen, Transforms.RFFT!(), fftw_flags=flags) + outofplace_place = @inferred PencilFFTPlan(pen, Transforms.RFFT(), fftw_flags=flags) # Allocate and initialise scalar fields u = allocate_input(inplace_plan) #, Val(3)) @@ -221,9 +221,26 @@ function test_rfft!(size_in) ldiv!(u, inplace_plan, u) ldiv!(v, outofplace_place, v̂) @test all(isapprox.(x, v, atol=1e-8)) - @test all(isapprox.(x[1:3], [1.0, 2.0, 0.0], atol = 1e-8)) - end + rank == 0 && @test all(isapprox.(x[1:3], [1.0, 2.0, 0.0], atol = 1e-8)) + + rng = MersenneTwister(42) + init_random_field!(x̂, rng) + copyto!(parent(v̂), parent(x̂)) + PencilFFTs.bmul!(u, inplace_plan, u) + PencilFFTs.bmul!(v, outofplace_place, v̂) + @test all(isapprox.(x, v, atol=1e-8)) + + mul!(u, inplace_plan, u) + mul!(v̂, outofplace_place, v) + @test all(isapprox.(x̂, v̂, atol=1e-8)) + end + if benchmark + println("micro-benchmarks: ") + println("- rfft!...\t") ; @time mul!(u, inplace_plan, u) ; @time mul!(u, inplace_plan, u) + println("- rfft...\t") ; @time mul!(v̂, outofplace_place, v) ; @time mul!(v̂, outofplace_place, v) + println("done ") + end MPI.Barrier(comm) end @@ -273,9 +290,9 @@ println() test_rfft(DATA_DIMS_ODD, benchmark=false) test_1D_rfft!(first(DATA_DIMS_ODD)) +test_1D_rfft!(first(DATA_DIMS_EVEN), flags = FFTW.MEASURE) test_1D_rfft!(first(DATA_DIMS_EVEN)) -# test_1D_rfft!(first(DATA_DIMS_EVEN), flags = FFTW.MEASURE) # test fails for this specific dimension, possibly due to a FFTW bug -test_rfft!(DATA_DIMS_ODD) -# test_rfft!(DATA_DIMS_EVEN) # test fails for this specific dimension, possibly due to a FFTW bug -test_rfft!(DATA_DIMS_EVEN .+ 2) +test_rfft!(DATA_DIMS_ODD, benchmark=false) +test_rfft!(DATA_DIMS_EVEN, benchmark=false) +# test_rfft!((256,256,256)) # similar execution times for large rfft and rfft! \ No newline at end of file From 936ecff42f44b55f3a3cb48056b963c44b4a0cc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Wed, 24 May 2023 13:29:30 +0200 Subject: [PATCH 11/22] further rfft! tests --- test/rfft.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/rfft.jl b/test/rfft.jl index fb5e25c1..c8353159 100644 --- a/test/rfft.jl +++ b/test/rfft.jl @@ -204,7 +204,7 @@ function test_rfft!(size_in; flags = FFTW.ESTIMATE, benchmark=true) outofplace_place = @inferred PencilFFTPlan(pen, Transforms.RFFT(), fftw_flags=flags) # Allocate and initialise scalar fields - u = allocate_input(inplace_plan) #, Val(3)) + u = allocate_input(inplace_plan) x = first(u) ; x̂ = last(u) # Real and Complex views v = allocate_input(outofplace_place) @@ -256,7 +256,7 @@ function test_1D_rfft!(size_in; flags=FFTW.ESTIMATE) â2 = zeros(Complex{Float64}, dims_fourier) ; a2 = zeros(Float64, dims) ; - p = Transforms.plan_rfft!(a, 1, flags=flags) ; #display(p) + p = Transforms.plan_rfft!(a, 1, flags=flags) ; p2 = FFTW.plan_rfft(a2, 1, flags=flags) ; bp = Transforms.plan_brfft!(â, dims[1], 1, flags=flags) ; bp2 = FFTW.plan_brfft(â, dims[1], 1, flags=flags) ; From 00c6a10b5c8219f8be4bebe36ba5f3d3f521209c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Wed, 24 May 2023 14:41:54 +0200 Subject: [PATCH 12/22] fixed docstring --- src/allocate.jl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/allocate.jl b/src/allocate.jl index e1698dcf..24e19ec4 100644 --- a/src/allocate.jl +++ b/src/allocate.jl @@ -15,11 +15,9 @@ size `dims`, and a tuple of `N` `PencilArray`s. If `p` is an in-place real-to-real or complex-to-complex plan, a [`ManyPencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.ManyPencilArray) is allocated. If `p` is an in-place real-to-complex plan, a - []`ManyPencilArrayRFFT!`](@ref) - is allocated. + [`ManyPencilArrayRFFT!`](@ref) is allocated. - These - types hold `PencilArray` wrappers for the input and output transforms (as + These types hold `PencilArray` wrappers for the input and output transforms (as well as for intermediate transforms) which share the same space in memory. The input and output `PencilArray`s should be respectively accessed by calling [`first(::ManyPencilArray)`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#Base.first-Tuple{ManyPencilArray}) and From 254a7683a3289ff85dbba084db4bebf816668c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Wed, 24 May 2023 16:25:35 +0200 Subject: [PATCH 13/22] fixed output of _scale! --- src/operations.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/operations.jl b/src/operations.jl index 39407e53..32e4a0cb 100644 --- a/src/operations.jl +++ b/src/operations.jl @@ -61,12 +61,12 @@ function Base.:\(p::PencilFFTPlan, src::FFTArray) ldiv!(dst, p, src) end -function _scale!(dst::PencilArray{<:RealOrComplex{T},N}, inv_scale::T) where {T,N} - parent(dst) .*= inv_scale +function _scale!(dst::PencilArray{<:RealOrComplex{T},N}, inv_scale::Number) where {T,N} + dst .*= inv_scale end -function _scale!(dst::PencilMultiarray{<:RealOrComplex{T},N}, inv_scale::T) where {T,N} - parent(first(dst)) .*= inv_scale +function _scale!(dst::PencilMultiarray{<:RealOrComplex{T},N}, inv_scale::Number) where {T,N} + first(dst) .*= inv_scale end # Out-of-place version From 14eb06dd6837f7d2a1984dd67ae614d3ed9d0918 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bignonnet?= Date: Wed, 24 May 2023 16:26:18 +0200 Subject: [PATCH 14/22] fixed triangular dispatch for expand_dims --- src/Transforms/r2c.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Transforms/r2c.jl b/src/Transforms/r2c.jl index a5fa4203..d9658ea7 100644 --- a/src/Transforms/r2c.jl +++ b/src/Transforms/r2c.jl @@ -117,8 +117,10 @@ expand_dims(tr::RFFT!, ::Val{N}) where {N} = expand_dims(tr::BRFFT, ::Val{N}) where {N} = (BFFT(), expand_dims(tr, Val(N - 1))...) expand_dims(tr::BRFFT!, ::Val{N}) where {N} = (BFFT!(), expand_dims(tr, Val(N - 1))...) -expand_dims(tr::TransformC2R, ::Val{1}) = (tr, ) -expand_dims(tr::TransformC2R, ::Val{0}) = () +expand_dims(tr::BRFFT, ::Val{1}) = (tr, ) +expand_dims(tr::BRFFT, ::Val{0}) = () +expand_dims(tr::BRFFT!, ::Val{1}) = (tr, ) +expand_dims(tr::BRFFT!, ::Val{0}) = () ## FFTW wrappers for inplace RFFT plans From 1f823243120049ce353d927ee9afd0bc2672f7dd Mon Sep 17 00:00:00 2001 From: Juan Ignacio Polanco Date: Thu, 25 May 2023 09:31:05 +0200 Subject: [PATCH 15/22] Test: use rtol so that commented 256^3 tests pass --- test/rfft.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/rfft.jl b/test/rfft.jl index c8353159..2e0ae895 100644 --- a/test/rfft.jl +++ b/test/rfft.jl @@ -233,7 +233,7 @@ function test_rfft!(size_in; flags = FFTW.ESTIMATE, benchmark=true) mul!(u, inplace_plan, u) mul!(v̂, outofplace_place, v) - @test all(isapprox.(x̂, v̂, atol=1e-8)) + @test all(isapprox.(x̂, v̂, rtol=1e-8)) end if benchmark println("micro-benchmarks: ") @@ -295,4 +295,4 @@ test_1D_rfft!(first(DATA_DIMS_EVEN)) test_rfft!(DATA_DIMS_ODD, benchmark=false) test_rfft!(DATA_DIMS_EVEN, benchmark=false) -# test_rfft!((256,256,256)) # similar execution times for large rfft and rfft! \ No newline at end of file +# test_rfft!((256,256,256)) # similar execution times for large rfft and rfft! From 6517407fae3fb955ce990cf97060336f10174e80 Mon Sep 17 00:00:00 2001 From: Juan Ignacio Polanco Date: Thu, 25 May 2023 09:44:50 +0200 Subject: [PATCH 16/22] Revert "Test: use rtol so that commented 256^3 tests pass" This reverts commit 1f823243120049ce353d927ee9afd0bc2672f7dd. --- test/rfft.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/rfft.jl b/test/rfft.jl index 2e0ae895..c8353159 100644 --- a/test/rfft.jl +++ b/test/rfft.jl @@ -233,7 +233,7 @@ function test_rfft!(size_in; flags = FFTW.ESTIMATE, benchmark=true) mul!(u, inplace_plan, u) mul!(v̂, outofplace_place, v) - @test all(isapprox.(x̂, v̂, rtol=1e-8)) + @test all(isapprox.(x̂, v̂, atol=1e-8)) end if benchmark println("micro-benchmarks: ") @@ -295,4 +295,4 @@ test_1D_rfft!(first(DATA_DIMS_EVEN)) test_rfft!(DATA_DIMS_ODD, benchmark=false) test_rfft!(DATA_DIMS_EVEN, benchmark=false) -# test_rfft!((256,256,256)) # similar execution times for large rfft and rfft! +# test_rfft!((256,256,256)) # similar execution times for large rfft and rfft! \ No newline at end of file From cf431908a38a95ad464482972bfe8d25b59fd264 Mon Sep 17 00:00:00 2001 From: Juan Ignacio Polanco Date: Thu, 25 May 2023 10:00:27 +0200 Subject: [PATCH 17/22] Formatting --- src/Transforms/r2c.jl | 5 ++-- test/rfft.jl | 63 ++++++++++++++++++++++++------------------- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/src/Transforms/r2c.jl b/src/Transforms/r2c.jl index d9658ea7..4dd0cb30 100644 --- a/src/Transforms/r2c.jl +++ b/src/Transforms/r2c.jl @@ -1,5 +1,6 @@ ## Real-to-complex and complex-to-real transforms. -import FFTW: rFFTWPlan, FORWARD, BACKWARD, ESTIMATE, NO_TIMELIMIT, FakeArray, colmajorstrides, brfft_output_size +using FFTW: FFTW + """ RFFT() @@ -150,4 +151,4 @@ function plan_brfft!(X::StridedArray{Complex{T},N}, d, region; Yflat = reinterpret(T, reshape(X, prod(isize))) Y = view(reshape(Yflat, osize), Base.OneTo.(sz)...) # Y is padded return FFTW.rFFTWPlan{Complex{T},FFTW.BACKWARD,true,N}(X, Y, region, flags, timelimit) -end \ No newline at end of file +end diff --git a/test/rfft.jl b/test/rfft.jl index c8353159..e1212c14 100644 --- a/test/rfft.jl +++ b/test/rfft.jl @@ -204,14 +204,18 @@ function test_rfft!(size_in; flags = FFTW.ESTIMATE, benchmark=true) outofplace_place = @inferred PencilFFTPlan(pen, Transforms.RFFT(), fftw_flags=flags) # Allocate and initialise scalar fields - u = allocate_input(inplace_plan) - x = first(u) ; x̂ = last(u) # Real and Complex views + u = @inferred allocate_input(inplace_plan) + x = first(u); x̂ = last(u) # Real and Complex views - v = allocate_input(outofplace_place) - v̂ = allocate_output(outofplace_place) + v = @inferred allocate_input(outofplace_place) + v̂ = @inferred allocate_output(outofplace_place) - fill!(x, 0.0) ; rank == 0 && (x[1] = 1.0) ; rank == 0 && (x[2] = 2.0) ; - fill!(v, 0.0) ; rank == 0 && (v[1] = 1.0) ; rank == 0 && (v[2] = 2.0) ; + fill!(x, 0.0) + fill!(v, 0.0) + if rank == 0 + x[1] = 1.0; x[2] = 2.0 + v[1] = 1.0; v[2] = 2.0 + end @testset "RFFT! vs RFFT" begin mul!(u, inplace_plan, u) @@ -237,43 +241,48 @@ function test_rfft!(size_in; flags = FFTW.ESTIMATE, benchmark=true) end if benchmark println("micro-benchmarks: ") - println("- rfft!...\t") ; @time mul!(u, inplace_plan, u) ; @time mul!(u, inplace_plan, u) - println("- rfft...\t") ; @time mul!(v̂, outofplace_place, v) ; @time mul!(v̂, outofplace_place, v) + println("- rfft!...\t") + @time mul!(u, inplace_plan, u) + @time mul!(u, inplace_plan, u) + println("- rfft...\t") + @time mul!(v̂, outofplace_place, v) + @time mul!(v̂, outofplace_place, v) println("done ") end MPI.Barrier(comm) end function test_1D_rfft!(size_in; flags=FFTW.ESTIMATE) - dims = (size_in,) ; - dims_padded = (2(dims[1] ÷ 2 + 1), dims[2:end]...) ; + dims = (size_in,) + dims_padded = (2(dims[1] ÷ 2 + 1), dims[2:end]...) dims_fourier = ((dims[1] ÷ 2 + 1), dims[2:end]...) - A = zeros(Float64, dims_padded) ; - a = view(A, Base.OneTo.(dims)...) ; + A = zeros(Float64, dims_padded) + a = view(A, Base.OneTo.(dims)...) â = reinterpret(Complex{Float64}, A) - â2 = zeros(Complex{Float64}, dims_fourier) ; - a2 = zeros(Float64, dims) ; - - p = Transforms.plan_rfft!(a, 1, flags=flags) ; - p2 = FFTW.plan_rfft(a2, 1, flags=flags) ; - bp = Transforms.plan_brfft!(â, dims[1], 1, flags=flags) ; - bp2 = FFTW.plan_brfft(â, dims[1], 1, flags=flags) ; + â2 = zeros(Complex{Float64}, dims_fourier) + a2 = zeros(Float64, dims) + + p = Transforms.plan_rfft!(a, 1, flags=flags) + p2 = FFTW.plan_rfft(a2, 1, flags=flags) + bp = Transforms.plan_brfft!(â, dims[1], 1, flags=flags) + bp2 = FFTW.plan_brfft(â, dims[1], 1, flags=flags) - fill!(a2, 0.0) ; a2[1] = 1 ; a2[2] = 2 ; - fill!(a, 0.0) ; a[1] = 1 ; a[2] = 2 ; + fill!(a2, 0.0); a2[1] = 1; a2[2] = 2; + fill!(a, 0.0); a[1] = 1; a[2] = 2; @testset "1D RFFT! vs RFFT" begin - mul!(â, p, a) ; - mul!(â2, p2, a2) ; + mul!(â, p, a) + mul!(â2, p2, a2) @test all(isapprox.(â2, â, atol = 1e-8)) - mul!(a, bp, â) ; - mul!(a2, bp2, â2) ; + mul!(a, bp, â) + mul!(a2, bp2, â2) @test all(isapprox.(a2, a, atol = 1e-8)) - a /= size_in ; a2 /= size_in ; + a /= size_in + a2 /= size_in @test all(isapprox.(a[1:3], [1.0, 2.0, 0.0], atol = 1e-8)) end @@ -295,4 +304,4 @@ test_1D_rfft!(first(DATA_DIMS_EVEN)) test_rfft!(DATA_DIMS_ODD, benchmark=false) test_rfft!(DATA_DIMS_EVEN, benchmark=false) -# test_rfft!((256,256,256)) # similar execution times for large rfft and rfft! \ No newline at end of file +# test_rfft!((256,256,256)) # similar execution times for large rfft and rfft! From 0ca7322fbf89eff4348fd848ffe494614b70326e Mon Sep 17 00:00:00 2001 From: Juan Ignacio Polanco Date: Thu, 25 May 2023 10:11:44 +0200 Subject: [PATCH 18/22] Simplify dispatch in allocate_input --- src/allocate.jl | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/allocate.jl b/src/allocate.jl index 24e19ec4..ec0f18d2 100644 --- a/src/allocate.jl +++ b/src/allocate.jl @@ -41,17 +41,26 @@ size `dims`, and a tuple of `N` `PencilArray`s. # p * v_in # not allowed!! ``` """ -function allocate_input end +function allocate_input(p::PencilFFTPlan) + inplace = is_inplace(p) + _allocate_input(Val(inplace), p) +end # Out-of-place version -function allocate_input(p::PencilFFTPlan{T,N,false} where {T,N}) +function _allocate_input(inplace::Val{false}, p::PencilFFTPlan) T = eltype_input(p) pen = pencil_input(p) PencilArray{T}(undef, pen, p.extra_dims...) end # In-place version -function allocate_input(p::PencilFFTPlan{T,N,true} where {T,N}) +function _allocate_input(inplace::Val{true}, p::PencilFFTPlan) + (; transforms,) = p.global_params + _allocate_input(inplace, p, transforms...) +end + +# In-place: generic case +function _allocate_input(inplace::Val{true}, p::PencilFFTPlan, transforms...) pencils = map(pp -> pp.pencil_in, p.plans) # Note that for each 1D plan, the input and output pencils are the same. @@ -63,15 +72,14 @@ function allocate_input(p::PencilFFTPlan{T,N,true} where {T,N}) ManyPencilArray{T}(undef, pencils...; extra_dims=p.extra_dims) end -# In-place version for real-to-complex RFFT! -function allocate_input(plan::PencilFFTPlan{T,N,true, - Nt, # number of transformed dimensions - Nd, # number of decomposed dimensions - Ne, # number of extra dimensions - G }) where {N,Nd,Ne,G<:PencilFFTs.GlobalFFTParams{T,Nt,true,TR}} where {T<:FFTReal,Nt,TR<:Tuple{Transforms.RFFT!,Vararg{Transforms.FFT!}}} - plans = plan.plans +# In-place: specific case of RFFT! +function _allocate_input( + inplace::Val{true}, p::PencilFFTPlan{T}, + ::Transforms.RFFT!, ::Vararg{Transforms.FFT!}, + ) where {T} + plans = p.plans pencils = (first(plans).pencil_in, first(plans).pencil_out, map(pp -> pp.pencil_in, plans[2:end])...) - ManyPencilArrayRFFT!{T}(undef, pencils...; extra_dims=plan.extra_dims) + ManyPencilArrayRFFT!{T}(undef, pencils...; extra_dims=p.extra_dims) end allocate_input(p::PencilFFTPlan, dims...) = @@ -89,17 +97,20 @@ If `p` is an in-place plan, a [`ManyPencilArray`](https://jipolanco.github.io/Pe See [`allocate_input`](@ref) for details. """ -function allocate_output end +function allocate_output(p::PencilFFTPlan) + inplace = is_inplace(p) + _allocate_output(Val(inplace), p) +end # Out-of-place version. -function allocate_output(p::PencilFFTPlan{T,N,false} where {T,N}) +function _allocate_output(inplace::Val{false}, p::PencilFFTPlan) T = eltype_output(p) pen = pencil_output(p) PencilArray{T}(undef, pen, p.extra_dims...) end # For in-place plans, the output and input are the same ManyPencilArray. -allocate_output(p::PencilFFTPlan{T,N,true} where {T,N}) = allocate_input(p) +_allocate_output(inplace::Val{true}, p::PencilFFTPlan) = _allocate_input(inplace, p) allocate_output(p::PencilFFTPlan, dims...) = _allocate_many(allocate_output, p, dims...) From 3dee0a62d0f62b9b86b19e281c1364851856b621 Mon Sep 17 00:00:00 2001 From: Juan Ignacio Polanco Date: Thu, 25 May 2023 12:20:47 +0200 Subject: [PATCH 19/22] Use PencilArrays.AbstractManyPencilArray --- Project.toml | 2 +- src/multiarrays_r2c.jl | 80 +++--------------------------------------- src/operations.jl | 3 +- 3 files changed, 8 insertions(+), 77 deletions(-) diff --git a/Project.toml b/Project.toml index 92f7c81f..f6abdf62 100644 --- a/Project.toml +++ b/Project.toml @@ -16,7 +16,7 @@ TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" AbstractFFTs = "1" FFTW = "1.6" MPI = "0.19, 0.20" -PencilArrays = "0.17" +PencilArrays = "0.18" Reexport = "1" TimerOutputs = "0.5" julia = "1.7" diff --git a/src/multiarrays_r2c.jl b/src/multiarrays_r2c.jl index 2a04de3c..ad02c15d 100644 --- a/src/multiarrays_r2c.jl +++ b/src/multiarrays_r2c.jl @@ -1,7 +1,8 @@ # copied and modified from https://github.com/jipolanco/PencilArrays.jl/blob/master/src/multiarrays.jl -import PencilArrays:ManyPencilArray, _make_arrays +import PencilArrays: AbstractManyPencilArray, _make_arrays + """ - ManyPencilArrayRFFT!{T,N,M} + ManyPencilArrayRFFT!{T,N,M} <: AbstractManyPencilArray{N,M} Container holding `M` different [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) views to the same underlying data buffer. All views share the same and dimensionality `N`. @@ -22,14 +23,14 @@ The optional `extra_dims` argument is the same as for [`PencilArray`](https://ji See also [`ManyPencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.ManyPencilArray) """ -struct ManyPencilArrayRFFT!{ ## TODO: redefine PencilArrays.ManyPencilArray instead or creating new struct, with eltype RealOrComplex{T} +struct ManyPencilArrayRFFT!{ T, # element type of real array N, # number of dimensions of each array (including extra_dims) M, # number of arrays Arrays <: Tuple{Vararg{PencilArray,M}}, DataVector <: AbstractVector{T}, DataVectorComplex <: AbstractVector{Complex{T}}, - } + } <: AbstractManyPencilArray{N, M} data :: DataVector data_complex :: DataVectorComplex arrays :: Arrays @@ -76,74 +77,3 @@ function _make_real_array(data, extra_dims, p) arr = view(parent_arr, axes_local...) PencilArray(p, arr) end - -# all methods redefinitions below could be avoided by using a -# ManyPencilArray{ComplexOrReal{T}} instead of a ManyPencilArrayRFFT!{T} -# or defining a supertype in PencilArrays/src/multiarrays.jl -Base.eltype(::ManyPencilArrayRFFT!{T}) where {T} = T -Base.ndims(::ManyPencilArrayRFFT!{T,N}) where {T,N} = N - -""" - Tuple(A::ManyPencilArrayRFFT!) -> (u1, u2, …) - -Returns the [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray)s wrapped by `A`. - -This can be useful for iterating over all the wrapped arrays. -""" -@inline Base.Tuple(A::ManyPencilArrayRFFT!) = A.arrays - -""" - length(A::ManyPencilArrayRFFT!) - -Returns the number of [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray)s wrapped by `A`. -""" -Base.length(::ManyPencilArrayRFFT!{T,N,M}) where {T,N,M} = M - -""" - first(A::ManyPencilArrayRFFT!) - -Returns the first [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) wrapped by `A`. -""" -Base.first(A::ManyPencilArrayRFFT!) = first(A.arrays) - -""" - last(A::ManyPencilArrayRFFT!) - -Returns the last [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) wrapped by `A`. -""" -Base.last(A::ManyPencilArrayRFFT!) = last(A.arrays) - -""" - getindex(A::ManyPencilArrayRFFT!, ::Val{i}) - getindex(A::ManyPencilArrayRFFT!, i::Integer) - -Returns the i-th [`PencilArray`](https://jipolanco.github.io/PencilArrays.jl/dev/PencilArrays/#PencilArrays.PencilArray) wrapped by `A`. - -If possible, the `Val{i}` form should be preferred, as it ensures that the full -type of the returned `PencilArray` is known by the compiler. - -See also [`first(::ManyPencilArrayRFFT!)`](@ref), [`last(::ManyPencilArrayRFFT!)`](@ref). - -# Example - -```julia -A = ManyPencilArrayRFFT!(pencil1, pencil2, pencil3) - -# Get the PencilArray associated to `pencil2`. -u2 = A[2] -u2 = A[Val(2)] -``` -""" -Base.getindex(A::ManyPencilArrayRFFT!, ::Val{i}) where {i} = - _getindex(Val(i), A.arrays...) -@inline Base.getindex(A::ManyPencilArrayRFFT!, i) = A[Val(i)] - -@inline function _getindex(::Val{i}, a, t::Vararg) where {i} - i :: Integer - i <= 0 && throw(BoundsError("index must be >= 1")) - i == 1 && return a - _getindex(Val(i - 1), t...) -end - -# This will happen if the index `i` intially passed is too large. -@inline _getindex(::Val) = throw(BoundsError("invalid index")) \ No newline at end of file diff --git a/src/operations.jl b/src/operations.jl index 32e4a0cb..8caab361 100644 --- a/src/operations.jl +++ b/src/operations.jl @@ -191,6 +191,7 @@ function _apply_plans!( dir::Val, full_plan::PencilFFTPlan{T,N,true}, A::ManyPencilArrayRFFT!{T,N}, A_again::ManyPencilArrayRFFT!{T,N}) where {T<:FFTReal,N} @assert A === A_again + # pairs for 1D FFT! plans, RFFT! plan is treated separately pairs = _make_pairs(full_plan.plans[2:end], A.arrays[3:end]) @@ -206,7 +207,7 @@ function _apply_plans!( # apply recursively all transforms but last (BFFT!) _apply_plans_in_place!(dir, full_plan, nothing, pp...) # transpose before last transform - t = if pp ==() + t = if pp == () nothing else @assert Base.mightalias(A.arrays[3], A.arrays[2]) # they're aliased! From 7ee3f883bbd0f4df0e3adf92a19e04429f7cea38 Mon Sep 17 00:00:00 2001 From: Juan Ignacio Polanco Date: Thu, 25 May 2023 13:54:59 +0200 Subject: [PATCH 20/22] Update version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f6abdf62..95eae0c8 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PencilFFTs" uuid = "4a48f351-57a6-4416-9ec4-c37015456aae" authors = ["Juan Ignacio Polanco "] -version = "0.14.4" +version = "0.15.0" [deps] AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" From 3e1991b13a508b10acde12d462b276b75bb43dea Mon Sep 17 00:00:00 2001 From: Juan Ignacio Polanco Date: Thu, 25 May 2023 13:55:07 +0200 Subject: [PATCH 21/22] Don't test on Julia 1.8 --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index daac645e..d2677881 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,8 +21,7 @@ jobs: experimental: [false] version: - '1.7' - - '1.8' - - '~1.9.0-0' + - '1.9' os: - ubuntu-latest arch: From c22f1df1d765510e3e599a4e08718c635e8e6eb9 Mon Sep 17 00:00:00 2001 From: Juan Ignacio Polanco Date: Thu, 25 May 2023 16:56:46 +0200 Subject: [PATCH 22/22] Update docs --- docs/src/PencilFFTs.md | 6 ++++++ docs/src/Transforms.md | 2 ++ src/multiarrays_r2c.jl | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/src/PencilFFTs.md b/docs/src/PencilFFTs.md index cb03891b..803824ea 100644 --- a/docs/src/PencilFFTs.md +++ b/docs/src/PencilFFTs.md @@ -28,3 +28,9 @@ scale_factor(::PencilFFTPlan) timer(::PencilFFTPlan) is_inplace(::PencilFFTPlan) ``` + +## Internals + +```@docs +ManyPencilArrayRFFT! +``` diff --git a/docs/src/Transforms.md b/docs/src/Transforms.md index 3c2e2c10..d4fe4a42 100644 --- a/docs/src/Transforms.md +++ b/docs/src/Transforms.md @@ -17,7 +17,9 @@ BFFT BFFT! RFFT +RFFT! BRFFT +BRFFT! R2R R2R! diff --git a/src/multiarrays_r2c.jl b/src/multiarrays_r2c.jl index ad02c15d..0faf8c5b 100644 --- a/src/multiarrays_r2c.jl +++ b/src/multiarrays_r2c.jl @@ -9,7 +9,7 @@ underlying data buffer. All views share the same and dimensionality `N`. The element type `T` of the first view is real, that of subsequent views is `Complex{T}`. -This can be used to perform in-place real-to-complex plan, see also[`Tranforms.RFFT!`](@ref). +This can be used to perform in-place real-to-complex plan, see also[`Transforms.RFFT!`](@ref). It is used internally for such transforms by [`allocate_input`](@ref) and should not be constructed directly. ---