diff --git a/CHANGELOG.md b/CHANGELOG.md index bcad921d..61e6f532 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.4.0] - unreleased + +## Changed + +- Major refactoring in `PSparseMatrix` (and in `PVector` in a lesser extent). +The old code is still available (but deprecated), and can be recovered applying this renaming to your code-base: + - `PSparseMatrix -> OldPSparseMatrix` + - `psparse! -> old_psparse!` + - `pvector! -> old_pvector!` + - `trivial_partition -> old_trivial_partition` + +- The default parallel sparse matrix format is now split into 4 blocks corresponding to own/ghost columns/rows. +The previous "monolithic" storage is not implemented anymore for the new version of `PSparseMatrix`, but can be implemented in the new setup if needed. +- `emit` renamed to `multicast`. The former name is still available but deprecated. + +## Added + +- Efficient re-construction of `PSparseMatrix` and `PVector` objects. +- Functions `assemble` and `consistent` (allocating versions of `assemble!` and `consistent!` with a slightly different +treatment of the ghost rows). +- Function `consistent` for `PSparseMatrix`. +- Functions `repartition` and `repartition!` used to change the data partition of `PSparseMatrix` and `PVector` objects. +- Functions `psystem` and `psystem!` for generating a system matrix and vector at once. +- Function `trivial_partition`. +- Support for sub-assembled matrices in `PSparseMatrix`. + + ## [0.3.4] - 2023-09-06 ### Added diff --git a/Project.toml b/Project.toml index 35e5e577..c26044fd 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PartitionedArrays" uuid = "5a9dfac6-5c52-46f7-8278-5e2210713be9" authors = ["Francesc Verdugo and contributors"] -version = "0.3.4" +version = "0.4.0" [deps] CircularArrays = "7a955b69-7140-5f4e-a0ed-f168c5e2e749" diff --git a/docs/Manifest.toml b/docs/Manifest.toml index baa401af..f0ea8a0b 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.8.4" +julia_version = "1.10.0" manifest_format = "2.0" project_hash = "e37729e95710f39fa1cd840707ef95d2e3bbf22c" @@ -14,12 +14,6 @@ git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c" uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" version = "0.4.4" -[[deps.Adapt]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "0310e08cb19f5da31d08341c6120c047598f5b9c" -uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "3.5.0" - [[deps.ArgTools]] uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" version = "1.1.1" @@ -32,24 +26,32 @@ uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" [[deps.CircularArrays]] deps = ["OffsetArrays"] -git-tree-sha1 = "3587fdbecba8c44f7e7285a1957182711b95f580" +git-tree-sha1 = "3f7b8a37359ae592cfa7aca7f811da045deff222" uuid = "7a955b69-7140-5f4e-a0ed-f168c5e2e749" -version = "1.3.1" +version = "1.3.3" [[deps.CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.0.1+0" +version = "1.0.5+1" [[deps.Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" [[deps.Distances]] -deps = ["LinearAlgebra", "SparseArrays", "Statistics", "StatsAPI"] -git-tree-sha1 = "3258d0659f812acde79e8a74b11f17ac06d0ca04" +deps = ["LinearAlgebra", "Statistics", "StatsAPI"] +git-tree-sha1 = "66c4c81f259586e8f002eacebc177e1fb06363b0" uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" -version = "0.10.7" +version = "0.10.11" + + [deps.Distances.extensions] + DistancesChainRulesCoreExt = "ChainRulesCore" + DistancesSparseArraysExt = "SparseArrays" + + [deps.Distances.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [[deps.Distributed]] deps = ["Random", "Serialization", "Sockets"] @@ -62,32 +64,64 @@ uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" version = "0.9.3" [[deps.Documenter]] -deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "REPL", "Test", "Unicode"] -git-tree-sha1 = "26156affd5dcff3cf8c1f4d3562f43d43bd8397c" -repo-rev = "master" -repo-url = "https://github.com/JuliaDocs/Documenter.jl.git" +deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "Test", "Unicode"] +git-tree-sha1 = "2613dbec8f4748273bbe30ba71fd5cb369966bac" uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "0.28.0-DEV" +version = "1.2.1" [[deps.Downloads]] deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" version = "1.6.0" +[[deps.Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.5.0+0" + [[deps.FileWatching]] uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" [[deps.FillArrays]] -deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"] -git-tree-sha1 = "d3ba08ab64bdfd27234d3f61956c966266757fe6" +deps = ["LinearAlgebra", "Random"] +git-tree-sha1 = "5b93957f6dcd33fc343044af3d48c215be2562f1" uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" -version = "0.13.7" +version = "1.9.3" + + [deps.FillArrays.extensions] + FillArraysPDMatsExt = "PDMats" + FillArraysSparseArraysExt = "SparseArrays" + FillArraysStatisticsExt = "Statistics" + + [deps.FillArrays.weakdeps] + PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[deps.Git]] +deps = ["Git_jll"] +git-tree-sha1 = "51764e6c2e84c37055e846c516e9015b4a291c7d" +uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" +version = "1.3.0" + +[[deps.Git_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"] +git-tree-sha1 = "bb8f7cc77ec1152414b2af6db533d9471cfbb2d1" +uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb" +version = "2.42.0+0" + +[[deps.Hwloc_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "ca0f6bf568b4bfc807e7537f081c81e35ceca114" +uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" +version = "2.10.0+0" [[deps.IOCapture]] deps = ["Logging", "Random"] -git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a" +git-tree-sha1 = "d75853a0bdbfb1ac815478bacd89cd27b550ace6" uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" -version = "0.2.2" +version = "0.2.3" [[deps.InteractiveUtils]] deps = ["Markdown"] @@ -95,21 +129,26 @@ uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" [[deps.IterativeSolvers]] deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"] -git-tree-sha1 = "1169632f425f79429f245113b775a0e3d121457c" +git-tree-sha1 = "b435d190ef8369cf4d79cc9dd5fba88ba0165307" uuid = "42fd0dbc-a981-5370-80f2-aaf504508153" -version = "0.9.2" +version = "0.9.3" [[deps.JLLWrappers]] -deps = ["Preferences"] -git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.4.1" +version = "1.5.0" [[deps.JSON]] deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "3c837543ddb02250ef42f4738347454f95079d4e" +git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.3" +version = "0.21.4" + +[[deps.LazilyInitializedFields]] +git-tree-sha1 = "8f7f3cabab0fd1800699663533b6d5cb3fc0e612" +uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf" +version = "1.2.2" [[deps.LazyArtifacts]] deps = ["Artifacts", "Pkg"] @@ -118,61 +157,80 @@ uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" [[deps.LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -version = "0.6.3" +version = "0.6.4" [[deps.LibCURL_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "7.84.0+0" +version = "8.4.0+0" [[deps.LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.6.4+0" + [[deps.LibSSH2_jll]] deps = ["Artifacts", "Libdl", "MbedTLS_jll"] uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.10.2+0" +version = "1.11.0+1" [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +[[deps.Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.17.0+0" + [[deps.LinearAlgebra]] -deps = ["Libdl", "libblastrampoline_jll"] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[deps.Literate]] deps = ["Base64", "IOCapture", "JSON", "REPL"] -git-tree-sha1 = "1c4418beaa6664041e0f9b48f0710f57bff2fcbe" +git-tree-sha1 = "bad26f1ccd99c553886ec0725e99a509589dcd11" uuid = "98b081ad-f1c9-55d3-8b20-4c87d4299306" -version = "2.14.0" +version = "2.16.1" [[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" [[deps.MPI]] -deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "Requires", "Serialization", "Sockets"] -git-tree-sha1 = "6d72bafd3960f9c119ceb8f034fef28346490fe5" +deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "PkgVersion", "PrecompileTools", "Requires", "Serialization", "Sockets"] +git-tree-sha1 = "b4d8707e42b693720b54f0b3434abee6dd4d947a" uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195" -version = "0.20.8" +version = "0.20.16" + + [deps.MPI.extensions] + AMDGPUExt = "AMDGPU" + CUDAExt = "CUDA" + + [deps.MPI.weakdeps] + AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" [[deps.MPICH_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "Pkg", "TOML"] -git-tree-sha1 = "6d4fa43afab4611d090b11617ecea1a144b21d35" +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] +git-tree-sha1 = "2ee75365ca243c1a39d467e35ffd3d4d32eef11e" uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4" -version = "4.0.2+5" +version = "4.1.2+1" [[deps.MPIPreferences]] deps = ["Libdl", "Preferences"] -git-tree-sha1 = "71f937129731a29eabe6969db2c90368a4408933" +git-tree-sha1 = "8f6af051b9e8ec597fa09d8885ed79fd582f33c9" uuid = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" -version = "0.1.7" +version = "0.1.10" [[deps.MPItrampoline_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "Pkg", "TOML"] -git-tree-sha1 = "b3f9e42685b4ad614eca0b44bd863cd41b1c86ea" +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] +git-tree-sha1 = "8eeb3c73bbc0ca203d0dc8dad4008350bbe5797b" uuid = "f1f71cc9-e9ae-5b93-9b94-4fe0e1ad3748" -version = "5.0.2+1" +version = "5.3.1+1" [[deps.Markdown]] deps = ["Base64"] @@ -180,71 +238,105 @@ uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[deps.MarkdownAST]] deps = ["AbstractTrees", "Markdown"] -git-tree-sha1 = "e8513266815200c0c8f522d6d44ffb5e9b366ae4" +git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899" uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391" -version = "0.1.1" +version = "0.1.2" [[deps.MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.28.0+0" +version = "2.28.2+1" [[deps.MicrosoftMPI_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "a16aa086d335ed7e0170c5265247db29172af2f9" +git-tree-sha1 = "b01beb91d20b0d1312a9471a36017b5b339d26de" uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf" -version = "10.1.3+2" +version = "10.1.4+1" [[deps.Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" [[deps.MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -version = "2022.2.1" +version = "2023.1.10" [[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" version = "1.2.0" [[deps.OffsetArrays]] -deps = ["Adapt"] -git-tree-sha1 = "f71d8950b724e9ff6110fc948dff5a329f901d64" +git-tree-sha1 = "6a731f2b5c03157418a20c12195eb4b74c8f8621" uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" -version = "1.12.8" +version = "1.13.0" + + [deps.OffsetArrays.extensions] + OffsetArraysAdaptExt = "Adapt" + + [deps.OffsetArrays.weakdeps] + Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.20+0" +version = "0.3.23+2" [[deps.OpenMPI_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "Pkg", "TOML"] -git-tree-sha1 = "346d6b357a480300ed7854dbc70e746ac52e10fd" +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "PMIx_jll", "TOML", "Zlib_jll", "libevent_jll", "prrte_jll"] +git-tree-sha1 = "1d1421618bab0e820bdc7ae1a2b46ce576981273" uuid = "fe0851c0-eecd-5654-98d4-656369965a5c" -version = "4.1.3+3" +version = "5.0.1+0" + +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "cc6e1927ac521b659af340e0ca45828a3ffc748f" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "3.0.12+0" + +[[deps.PCRE2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" +version = "10.42.0+1" + +[[deps.PMIx_jll]] +deps = ["Artifacts", "Hwloc_jll", "JLLWrappers", "Libdl", "Zlib_jll", "libevent_jll"] +git-tree-sha1 = "8b3b19351fa24791f94d7ae85faf845ca1362541" +uuid = "32165bc3-0280-59bc-8c0b-c33b6203efab" +version = "4.2.7+0" [[deps.Parsers]] -deps = ["Dates", "SnoopPrecompile"] -git-tree-sha1 = "8175fc2b118a3755113c8e68084dc1a9e63c61ee" +deps = ["Dates", "PrecompileTools", "UUIDs"] +git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "2.5.3" +version = "2.8.1" [[deps.PartitionedArrays]] deps = ["CircularArrays", "Distances", "FillArrays", "IterativeSolvers", "LinearAlgebra", "MPI", "Printf", "Random", "SparseArrays", "SparseMatricesCSR"] path = ".." uuid = "5a9dfac6-5c52-46f7-8278-5e2210713be9" -version = "0.3.0" +version = "0.3.4" [[deps.Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -version = "1.8.0" +version = "1.10.0" + +[[deps.PkgVersion]] +deps = ["Pkg"] +git-tree-sha1 = "f9501cc0430a26bc3d156ae1b5b0c1b47af4d6da" +uuid = "eebad327-c553-4316-9ea0-9fa01ccd7688" +version = "0.3.3" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.0" [[deps.Preferences]] deps = ["TOML"] -git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d" +git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e" uuid = "21216c6a-2e73-6563-6e65-726566657250" -version = "1.3.0" +version = "1.4.1" [[deps.Printf]] deps = ["Unicode"] @@ -255,14 +347,20 @@ deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [[deps.Random]] -deps = ["SHA", "Serialization"] +deps = ["SHA"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [[deps.RecipesBase]] -deps = ["SnoopPrecompile"] -git-tree-sha1 = "261dddd3b862bd2c940cf6ca4d1c8fe593e457c8" +deps = ["PrecompileTools"] +git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff" uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" -version = "1.3.3" +version = "1.3.4" + +[[deps.RegistryInstances]] +deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"] +git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51" +uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3" +version = "0.1.0" [[deps.Requires]] deps = ["UUIDs"] @@ -277,18 +375,13 @@ version = "0.7.0" [[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" -[[deps.SnoopPrecompile]] -deps = ["Preferences"] -git-tree-sha1 = "e760a70afdcd461cf01a575947738d359234665c" -uuid = "66db9d55-30c0-4569-8b51-7e840670fc0c" -version = "1.0.3" - [[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" [[deps.SparseArrays]] -deps = ["LinearAlgebra", "Random"] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +version = "1.10.0" [[deps.SparseMatricesCSR]] deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] @@ -299,26 +392,32 @@ version = "0.6.7" [[deps.Statistics]] deps = ["LinearAlgebra", "SparseArrays"] uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.10.0" [[deps.StatsAPI]] deps = ["LinearAlgebra"] -git-tree-sha1 = "f9af7f195fb13589dd2e2d57fdb401717d2eb1f6" +git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed" uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" -version = "1.5.0" +version = "1.7.0" [[deps.SuiteSparse]] deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "7.2.1+1" + [[deps.TOML]] deps = ["Dates"] uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" -version = "1.0.0" +version = "1.0.3" [[deps.Tar]] deps = ["ArgTools", "SHA"] uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" -version = "1.10.1" +version = "1.10.0" [[deps.Test]] deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] @@ -334,19 +433,31 @@ uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [[deps.Zlib_jll]] deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.12+3" +version = "1.2.13+1" [[deps.libblastrampoline_jll]] -deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] +deps = ["Artifacts", "Libdl"] uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" -version = "5.1.1+0" +version = "5.8.0+1" + +[[deps.libevent_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "OpenSSL_jll"] +git-tree-sha1 = "f04ec6d9a186115fb38f858f05c0c4e1b7fc9dcb" +uuid = "1080aeaf-3a6a-583e-a51c-c537b09f60ec" +version = "2.1.13+1" [[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.48.0+0" +version = "1.52.0+1" [[deps.p7zip_jll]] deps = ["Artifacts", "Libdl"] uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" -version = "17.4.0+0" +version = "17.4.0+2" + +[[deps.prrte_jll]] +deps = ["Artifacts", "Hwloc_jll", "JLLWrappers", "Libdl", "PMIx_jll", "libevent_jll"] +git-tree-sha1 = "5adb2d7a18a30280feb66cad6f1a1dfdca2dc7b0" +uuid = "eb928a42-fffd-568d-ab9c-3f5d54fc65b9" +version = "3.0.2+0" diff --git a/docs/examples.jl b/docs/examples.jl index 66e303bc..bad730fb 100644 --- a/docs/examples.jl +++ b/docs/examples.jl @@ -144,8 +144,8 @@ IV = map(row_partition) do row_indices end I,V end -I,V = tuple_of_arrays(IV) -b = pvector!(I,V,row_partition) |> fetch +II,VV = tuple_of_arrays(IV) +b = pvector(II,VV,row_partition) |> fetch # Compute the system matrix @@ -172,7 +172,7 @@ IJV = map(row_partition) do row_indices end I,J,V = tuple_of_arrays(IJV) col_partition = row_partition -A = psparse!(I,J,V,row_partition,col_partition) |> fetch +A = psparse(I,J,V,row_partition,col_partition) |> fetch # Generate an initial guess that fulfills # the boundary conditions. @@ -183,6 +183,30 @@ IterativeSolvers.cg!(x,A,b) r = A*x - b norm(r) +# Imagine, that we want to create the system several times +# (e.g., in a nonlinear solver). We can use the key-word argument `reuse` +# to enable efficient re-construction of the matrix/vector. + +b,cacheb = pvector(II,VV,row_partition,reuse=true) |> fetch +A,cacheA= psparse(I,J,V,row_partition,col_partition,reuse=true) |> fetch; + +# Now modify the values. For example: + +V = map(i->2*i,V) + +# + +VV = map(i->2*i,VV) + +# Update the matrix and the vector accordingly: + +pvector!(b,VV,cacheb) |> wait +psparse!(A,V,cacheA) |> wait + +# The solution should be the same as before in this case. + +r = A*x - b +norm(r) diff --git a/docs/make.jl b/docs/make.jl index fec2a74b..a952afc0 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -7,7 +7,6 @@ src_dir = joinpath(@__DIR__,"src") Literate.markdown(examples_jl,src_dir) makedocs( - strict=true, sitename = "PartitionedArrays.jl", format = Documenter.HTML( assets = ["assets/custom.css", "assets/favicon.ico",], diff --git a/docs/src/reference/partition.md b/docs/src/reference/partition.md index 4f7ad290..6d751e8b 100644 --- a/docs/src/reference/partition.md +++ b/docs/src/reference/partition.md @@ -5,6 +5,7 @@ ```@docs uniform_partition variable_partition +trivial_partition partition_from_color ``` diff --git a/docs/src/reference/primitives.md b/docs/src/reference/primitives.md index aadee8f7..761f2ad6 100644 --- a/docs/src/reference/primitives.md +++ b/docs/src/reference/primitives.md @@ -19,9 +19,9 @@ allocate_scatter ## Emit ```@docs -emit -emit! -allocate_emit +multicast +multicast! +allocate_multicast ``` ## Scan diff --git a/docs/src/reference/psparsematrix.md b/docs/src/reference/psparsematrix.md index 331ff67c..27b2b0bd 100644 --- a/docs/src/reference/psparsematrix.md +++ b/docs/src/reference/psparsematrix.md @@ -10,20 +10,35 @@ PSparseMatrix ```@docs local_values(::PSparseMatrix) -own_values(::PSparseMatrix) -ghost_values(::PSparseMatrix) +own_own_values(::PSparseMatrix) own_ghost_values(::PSparseMatrix) ghost_own_values(::PSparseMatrix) +ghost_ghost_values(::PSparseMatrix) ``` ## Constructors ```@docs -PSparseMatrix(a,b,c) -psparse +PSparseMatrix(a,b,c,d) +psparse(f,b,c) +psparse(f,a,b,c,d,e) psparse! +psystem +psystem! ``` ## Assembly ```@docs -assemble!(o,::PSparseMatrix) +assemble(::PSparseMatrix,rows) +assemble!(::PSparseMatrix,::PSparseMatrix,cache) +consistent(::PSparseMatrix,rows) +consistent!(::PSparseMatrix,::PSparseMatrix,cache) +``` + +## Re-partition + +```@docs +repartition(::PSparseMatrix,rows,cols) +repartition!(::PSparseMatrix,::PSparseMatrix,cache) +repartition(::PSparseMatrix,::PVector,rows,cols) +repartition!(::PSparseMatrix,::PVector,::PSparseMatrix,::PVector,cache) ``` diff --git a/docs/src/reference/pvector.md b/docs/src/reference/pvector.md index d96abb5c..7540fd1d 100644 --- a/docs/src/reference/pvector.md +++ b/docs/src/reference/pvector.md @@ -17,7 +17,8 @@ ghost_values(::PVector) ```@docs PVector(a,b) PVector{V}(::UndefInitializer,b) where V -pvector +pvector(f,a) +pvector(f,a,b,c) pvector! pfill pzeros @@ -29,5 +30,16 @@ prandn ```@docs assemble!(::PVector) +assemble(::PVector,rows) +assemble!(::PVector,::PVector,cache) consistent!(::PVector) +consistent(::PVector,cols) +consistent!(::PVector,::PVector,cache) +``` + +## Re-partition + +```@docs +repartition(::PVector,rows) +repartition!(::PVector,::PVector,cache) ``` diff --git a/src/PartitionedArrays.jl b/src/PartitionedArrays.jl index b2029575..82101776 100644 --- a/src/PartitionedArrays.jl +++ b/src/PartitionedArrays.jl @@ -34,6 +34,9 @@ export allocate_gather export scatter export scatter! export allocate_scatter +export multicast +export multicast! +export allocate_multicast export emit export emit! export allocate_emit @@ -62,6 +65,8 @@ export PRange export uniform_partition export variable_partition export partition_from_color +export trivial_partition +export renumber_partition export AbstractLocalIndices export OwnAndGhostIndices export LocalIndices @@ -88,6 +93,7 @@ export ghost_to_local export local_to_own export local_to_ghost export replace_ghost +export remove_ghost export union_ghost export find_owner export assemble! @@ -97,6 +103,12 @@ export partition export assembly_graph export assembly_neighbors export assembly_local_indices +export map_local_to_global! +export map_global_to_local! +export map_ghost_to_global! +export map_global_to_ghost! +export map_own_to_global! +export map_global_to_own! include("p_range.jl") export local_values @@ -105,6 +117,8 @@ export ghost_values export OwnAndGhostVectors export PVector export pvector +export old_pvector! +export pvector export pvector! export pfill export pzeros @@ -112,13 +126,27 @@ export pones export prand export prandn export consistent! +export assemble +export consistent +export repartition +export repartition! include("p_vector.jl") +export OldPSparseMatrix +export SplitMatrix export PSparseMatrix +export old_psparse export psparse export psparse! +export split_format +export split_format! +export old_psparse! export own_ghost_values export ghost_own_values +export own_own_values +export ghost_ghost_values +export psystem +export psystem! include("p_sparse_matrix.jl") export PTimer diff --git a/src/debug_array.jl b/src/debug_array.jl index c31f7961..cf7e4e7b 100644 --- a/src/debug_array.jl +++ b/src/debug_array.jl @@ -90,6 +90,9 @@ function Base.show(io::IO,k::MIME"text/plain",data::DebugArray) println(io,"$index = $(data.items[i])") end end +function Base.show(io::IO,data::DebugArray) + print(io,"DebugArray(…)") +end getany(a::DebugArray) = getany(a.items) function Base.similar(a::DebugArray,::Type{T},dims::Dims) where T @@ -110,6 +113,14 @@ function Base.map!(f,r::DebugArray,args::DebugArray...) r end +function Base.all(a::DebugArray) + reduce(&,a;init=true) +end +function Base.all(p::Function,a::DebugArray) + b = map(p,a) + all(b) +end + function gather_impl!( rcv::DebugArray, snd::DebugArray, destination, ::Type{T}) where T @@ -134,16 +145,16 @@ function scatter_impl!( scatter_impl!(rcv.items,snd.items,source,T) end -function emit_impl!( +function multicast_impl!( rcv::DebugArray,snd::DebugArray, source,::Type{T}) where T - emit_impl!(rcv.items,snd.items,source,T) + multicast_impl!(rcv.items,snd.items,source,T) end -function emit_impl!( +function multicast_impl!( rcv::DebugArray,snd::DebugArray, source,::Type{T}) where T<:AbstractVector - emit_impl!(rcv.items,snd.items,source,T) + multicast_impl!(rcv.items,snd.items,source,T) end Base.reduce(op,a::DebugArray;kwargs...) = reduce(op,a.items;kwargs...) @@ -162,7 +173,7 @@ function exchange_impl!( ::Type{T}) where T g = ExchangeGraph(graph.snd.items,graph.rcv.items) @async begin - sleep(0.2) # This is to make more likely to have errors if we don't wait + yield() # This is to make more likely to have errors if we don't wait exchange_impl!(rcv.items,snd.items,g,T) |> wait rcv end @@ -175,7 +186,7 @@ function exchange_impl!( ::Type{T}) where T <: AbstractVector g = ExchangeGraph(graph.snd.items,graph.rcv.items) @async begin - sleep(0.2) # This is to make more likely to have errors if we don't wait + yield() # This is to make more likely to have errors if we don't wait exchange_impl!(rcv.items,snd.items,g,T) |> wait rcv end diff --git a/src/mpi_array.jl b/src/mpi_array.jl index b5e9e560..6a2466b1 100644 --- a/src/mpi_array.jl +++ b/src/mpi_array.jl @@ -196,6 +196,11 @@ function Base.show(io::IO,k::MIME"text/plain",data::MPIArray) MPI.Barrier(data.comm) end end +function Base.show(io::IO,data::MPIArray) + if MPI.Comm_rank(data.comm) == 0 + print(io,"MPIArray(…)") + end +end getany(a::MPIArray) = a.item i_am_main(a::MPIArray) = MPI.Comm_rank(a.comm)+1 == MAIN @@ -365,7 +370,7 @@ function scatter_impl!( rcv end -function emit_impl!( +function multicast_impl!( rcv::MPIArray,snd::MPIArray, source,::Type{T}) where T @assert rcv.comm === snd.comm @@ -377,7 +382,7 @@ function emit_impl!( MPI.Bcast!(rcv.item_ref,root,comm) end -function emit_impl!( +function multicast_impl!( rcv::MPIArray,snd::MPIArray, source,::Type{T}) where T<:AbstractVector @assert rcv.comm === snd.comm @@ -463,6 +468,14 @@ function Base.collect(a::MPIArray) c.item end +function Base.all(a::MPIArray) + reduce(&,a;init=true) +end +function Base.all(p::Function,a::MPIArray) + b = map(p,a) + all(b) +end + function exchange_impl!( rcv::MPIArray, snd::MPIArray, @@ -577,7 +590,7 @@ function find_rcv_ids_ibarrier(snd_ids::MPIArray{<:AbstractVector{T}}) where T end rcv_ids=T[] done=false - barrier_emitted=false + barrier_multicastted=false all_sends_done=false barrier_req=nothing status = Ref(MPI.STATUS_ZERO) @@ -594,13 +607,13 @@ function find_rcv_ids_ibarrier(snd_ids::MPIArray{<:AbstractVector{T}}) where T @boundscheck @assert tag_rcv == tag "Inconsistent tag in ExchangeGraph_impl()!" end - if (barrier_emitted) + if (barrier_multicastted) done=MPI.Test(barrier_req) else all_sends_done = MPI.Testall(requests) if (all_sends_done) barrier_req=MPI.Ibarrier(comm) - barrier_emitted=true + barrier_multicastted=true end end end diff --git a/src/p_range.jl b/src/p_range.jl index 795f8351..0382e6e7 100644 --- a/src/p_range.jl +++ b/src/p_range.jl @@ -198,6 +198,10 @@ only makes sense if `indices` stores ghost ids in separate vectors like in """ function replace_ghost end +function remove_ghost(indices) + replace_ghost(indices,Int[],Int32[]) +end + function filter_ghost(indices,gids,owners) set = Set{Int}() part_owner = part_id(indices) @@ -274,6 +278,26 @@ function to_global!(I,indices) I end +map_global_to_local!(I,indices) = map_x_to_y!(global_to_local,I,indices) +map_global_to_ghost!(I,indices) = map_x_to_y!(global_to_ghost,I,indices) +map_global_to_own!(I,indices) = map_x_to_y!(global_to_own,I,indices) +map_local_to_global!(I,indices) = map_x_to_y!(local_to_global,I,indices) +map_local_to_ghost!(I,indices) = map_x_to_y!(local_to_ghost,I,indices) +map_local_to_own!(I,indices) = map_x_to_y!(local_to_own,I,indices) +map_own_to_global!(I,indices) = map_x_to_y!(own_to_global,I,indices) +map_own_to_local!(I,indices) = map_x_to_y!(own_to_local,I,indices) +map_ghost_to_global!(I,indices) = map_x_to_y!(ghost_to_global,I,indices) +map_ghost_to_local!(I,indices) = map_x_to_y!(ghost_to_local,I,indices) + +function map_x_to_y!(x_to_y,I,indices) + local_to_global_indices = x_to_y(indices) + for k in 1:length(I) + Ik = I[k] + I[k] = local_to_global_indices[Ik] + end + I +end + """ find_owner(index_partition,global_ids) @@ -718,7 +742,7 @@ The argument `global_to_color` is the usual output of such tools. """ function partition_from_color(ranks,global_to_color;multicast=false,source=MAIN) if multicast == true - global_to_owner = getany(emit(global_to_color;source)) + global_to_owner = getany(PartitionedArrays.multicast(global_to_color;source)) else global_to_owner = global_to_color end @@ -733,6 +757,41 @@ function partition_from_color(ranks,global_to_color;multicast=false,source=MAIN) end end +""" + trivial_partition(ranks,n;destination=MAIN) + +!!! warning + Document me! +""" +function trivial_partition(ranks,n;destination=MAIN) + n_own = map(ranks) do rank + rank == destination ? Int(n) : 0 + end + partition_in_main = variable_partition(n_own,n) + partition_in_main +end + +function renumber_partition(partition_in) + own_ids = map(own_to_global,partition_in) + if eltype(own_ids) <: BlockPartitionOwnToGlobal{1} + return partition_in + end + n_global = PartitionedArrays.getany(map(global_length,partition_in)) + n_own = map(own_length,partition_in) + new_gids = variable_partition(n_own,n_global) + v = PVector{Vector{Int}}(undef,partition_in) + map(own_values(v),new_gids) do own_v, new_gids + own_v .= own_to_global(new_gids) + end + consistent!(v) |> wait + I = ghost_values(v) + I_owner = map(ghost_to_owner,partition_in) + new_ids2 = map(union_ghost,new_gids,I,I_owner) + perm = map(PartitionedArrays.local_permutation,partition_in) + partition_out = map(permute_indices,new_ids2,perm) + partition_out +end + function local_range(p,np,n,ghost=false,periodic=false) l = n ÷ np offset = l * (p-1) @@ -1185,7 +1244,7 @@ assembly_cache(a::OwnAndGhostIndices) = a.assembly_cache local_permutation(a::OwnAndGhostIndices) = Int32(1):Int32(local_length(a)) function replace_ghost(a::OwnAndGhostIndices,ghost::GhostIndices) - OwnAndGhostIndices(a.own,ghost) + OwnAndGhostIndices(a.own,ghost,a.global_to_owner) end function find_owner(indices,global_ids,::Type{<:OwnAndGhostIndices{T}}) where T diff --git a/src/p_sparse_matrix.jl b/src/p_sparse_matrix.jl index 193fb442..4964e80e 100644 --- a/src/p_sparse_matrix.jl +++ b/src/p_sparse_matrix.jl @@ -20,14 +20,18 @@ function local_values(values,indices_rows,indices_cols) end function own_values(values,indices_rows,indices_cols) - subindices = (own_to_local(indices_rows),own_to_local(indices_cols)) - subindices_inv = (local_to_own(indices_rows),local_to_own(indices_cols)) - SubSparseMatrix(values,subindices,subindices_inv) + # TODO deprecate this one + own_own_values(values,indices_rows,indices_cols) end function ghost_values(values,indices_rows,indices_cols) - subindices = (ghost_to_local(indices_rows),ghost_to_local(indices_cols)) - subindices_inv = (local_to_ghost(indices_rows),local_to_ghost(indices_cols)) + # TODO deprecate this one + ghost_ghost_values(values,indices_rows,indices_cols) +end + +function own_own_values(values,indices_rows,indices_cols) + subindices = (own_to_local(indices_rows),own_to_local(indices_cols)) + subindices_inv = (local_to_own(indices_rows),local_to_own(indices_cols)) SubSparseMatrix(values,subindices,subindices_inv) end @@ -43,52 +47,18 @@ function ghost_own_values(values,indices_rows,indices_cols) SubSparseMatrix(values,subindices,subindices_inv) end -""" - struct PSparseMatrix{V,A,B,C,...} - -`PSparseMatrix` (partitioned sparse matrix) -is a type representing a matrix whose rows are -distributed (a.k.a. partitioned) over different parts for distributed-memory -parallel computations. Each part stores a subset of the rows of the matrix and their -corresponding non zero columns. - -This type overloads numerous array-like operations with corresponding -parallel implementations. - -# Properties - -- `matrix_partition::A` -- `row_partition::B` -- `col_partition::C` - -`matrix_partition[i]` contains a (sparse) matrix with the local rows and the -corresponding nonzero columns (the local columns) in the part number `i`. -`eltype(matrix_partition) == V`. -`row_partition[i]` and `col_partition[i]` contain information -about the local, own, and ghost rows and columns respectively in part number `i`. -The types `eltype(row_partition)` and `eltype(col_partition)` implement the -[`AbstractLocalIndices`](@ref) interface. - -The rest of fields of this struct and type parameters are private. - -# Supertype hierarchy - - PSparseMatrix{V,A,B,C,...} <: AbstractMatrix{T} +function ghost_ghost_values(values,indices_rows,indices_cols) + subindices = (ghost_to_local(indices_rows),ghost_to_local(indices_cols)) + subindices_inv = (local_to_ghost(indices_rows),local_to_ghost(indices_cols)) + SubSparseMatrix(values,subindices,subindices_inv) +end -with `T=eltype(V)`. -""" -struct PSparseMatrix{V,A,B,C,D,T} <: AbstractMatrix{T} +struct OldPSparseMatrix{V,A,B,C,D,T} <: AbstractMatrix{T} matrix_partition::A row_partition::B col_partition::C cache::D - @doc """ - PSparseMatrix(matrix_partition,row_partition,col_partition) - - Build an instance for [`PSparseMatrix`](@ref) from the underlying fields - `matrix_partition`, `row_partition`, and `col_partition`. - """ - function PSparseMatrix( + function OldPSparseMatrix( matrix_partition, row_partition, col_partition, @@ -103,103 +73,44 @@ struct PSparseMatrix{V,A,B,C,D,T} <: AbstractMatrix{T} end end -partition(a::PSparseMatrix) = a.matrix_partition -Base.axes(a::PSparseMatrix) = (PRange(a.row_partition),PRange(a.col_partition)) - -""" - local_values(a::PSparseMatrix) - -Get a vector of matrices containing the local rows and columns -in each part of `a`. +partition(a::OldPSparseMatrix) = a.matrix_partition +Base.axes(a::OldPSparseMatrix) = (PRange(a.row_partition),PRange(a.col_partition)) -The row and column indices of the returned matrices can be mapped to global -indices, own indices, ghost indices, and owner by using -[`local_to_global`](@ref), [`local_to_own`](@ref), [`local_to_ghost`](@ref), -and [`local_to_owner`](@ref), respectively. -""" -function local_values(a::PSparseMatrix) +function local_values(a::OldPSparseMatrix) partition(a) end -""" - own_values(a::PSparseMatrix) - -Get a vector of matrices containing the own rows and columns -in each part of `a`. - -The row and column indices of the returned matrices can be mapped to global -indices, local indices, and owner by using [`own_to_global`](@ref), -[`own_to_local`](@ref), and [`own_to_owner`](@ref), respectively. -""" -function own_values(a::PSparseMatrix) +function own_values(a::OldPSparseMatrix) map(own_values,partition(a),partition(axes(a,1)),partition(axes(a,2))) end -""" - ghost_values(a::PSparseMatrix) - -Get a vector of matrices containing the ghost rows and columns -in each part of `a`. - -The row and column indices of the returned matrices can be mapped to global -indices, local indices, and owner by using [`ghost_to_global`](@ref), -[`ghost_to_local`](@ref), and [`ghost_to_owner`](@ref), respectively. -""" -function ghost_values(a::PSparseMatrix) +function ghost_values(a::OldPSparseMatrix) map(ghost_values,partition(a),partition(axes(a,1)),partition(axes(a,2))) end -""" - own_ghost_values(a::PSparseMatrix) - -Get a vector of matrices containing the own rows and ghost columns -in each part of `a`. - -The *row* indices of the returned matrices can be mapped to global indices, -local indices, and owner by using [`own_to_global`](@ref), -[`own_to_local`](@ref), and [`own_to_owner`](@ref), respectively. - -The *column* indices of the returned matrices can be mapped to global indices, -local indices, and owner by using [`ghost_to_global`](@ref), -[`ghost_to_local`](@ref), and [`ghost_to_owner`](@ref), respectively. -""" -function own_ghost_values(a::PSparseMatrix) +function own_ghost_values(a::OldPSparseMatrix) map(own_ghost_values,partition(a),partition(axes(a,1)),partition(axes(a,2))) end -""" - ghost_own_values(a::PSparseMatrix) - -Get a vector of matrices containing the ghost rows and own columns -in each part of `a`. - -The *row* indices of the returned matrices can be mapped to global indices, -local indices, and owner by using [`ghost_to_global`](@ref), -[`ghost_to_local`](@ref), and [`ghost_to_owner`](@ref), respectively. - -The *column* indices of the returned matrices can be mapped to global indices, -local indices, and owner by using [`own_to_global`](@ref), -[`own_to_local`](@ref), and [`own_to_owner`](@ref), respectively. -""" -function ghost_own_values(a::PSparseMatrix) +function ghost_own_values(a::OldPSparseMatrix) map(ghost_own_values,partition(a),partition(axes(a,1)),partition(axes(a,2))) end -Base.size(a::PSparseMatrix) = map(length,axes(a)) -Base.IndexStyle(::Type{<:PSparseMatrix}) = IndexCartesian() -function Base.getindex(a::PSparseMatrix,gi::Int,gj::Int) +Base.size(a::OldPSparseMatrix) = map(length,axes(a)) +Base.IndexStyle(::Type{<:OldPSparseMatrix}) = IndexCartesian() +function Base.getindex(a::OldPSparseMatrix,gi::Int,gj::Int) scalar_indexing_action(a) end -function Base.setindex!(a::PSparseMatrix,v,gi::Int,gj::Int) +function Base.setindex!(a::OldPSparseMatrix,v,gi::Int,gj::Int) scalar_indexing_action(a) end -function Base.show(io::IO,k::MIME"text/plain",data::PSparseMatrix) +function Base.show(io::IO,k::MIME"text/plain",data::OldPSparseMatrix) T = eltype(partition(data)) m,n = size(data) np = length(partition(data)) map_main(partition(data)) do values - println(io,"$(m)×$(n) PSparseMatrix{$T} partitioned into $np parts") + println(io,"$(m)×$(n) OldPSparseMatrix{$T} partitioned into $np parts") end end @@ -281,19 +192,11 @@ function assemble_impl!(f,matrix_partition,cache,::Type{<:SparseMatrixAssemblyCa assemble!(f,data,vcache) end -function assemble!(a::PSparseMatrix) +function assemble!(a::OldPSparseMatrix) assemble!(+,a) end -""" - assemble!([op,] a::PSparseMatrix) -> Task - -Transfer the ghost rows to their owner part -and insert them according with the insertion operation `op` (`+` by default). -It returns a task that produces `a` with updated values. After the transfer, -the source ghost rows are set to zero. -""" -function assemble!(o,a::PSparseMatrix) +function assemble!(o,a::OldPSparseMatrix) t = assemble!(o,partition(a),a.cache) @async begin wait(t) @@ -385,65 +288,65 @@ function assemble_coo!(I,J,V,row_partition) end end -function PSparseMatrix{V}(::UndefInitializer,row_partition,col_partition) where V +function OldPSparseMatrix{V}(::UndefInitializer,row_partition,col_partition) where V matrix_partition = map(row_partition,col_partition) do row_indices, col_indices allocate_local_values(V,row_indices,col_indices) end - PSparseMatrix(matrix_partition,row_partition,col_partition) + OldPSparseMatrix(matrix_partition,row_partition,col_partition) end -function Base.similar(a::PSparseMatrix,::Type{T},inds::Tuple{<:PRange,<:PRange}) where T +function Base.similar(a::OldPSparseMatrix,::Type{T},inds::Tuple{<:PRange,<:PRange}) where T rows,cols = inds matrix_partition = map(partition(a),partition(rows),partition(cols)) do values, row_indices, col_indices allocate_local_values(values,T,row_indices,col_indices) end - PSparseMatrix(matrix_partition,partition(rows),partition(cols)) + OldPSparseMatrix(matrix_partition,partition(rows),partition(cols)) end -function Base.similar(::Type{<:PSparseMatrix{V}},inds::Tuple{<:PRange,<:PRange}) where V +function Base.similar(::Type{<:OldPSparseMatrix{V}},inds::Tuple{<:PRange,<:PRange}) where V rows,cols = inds matrix_partition = map(partition(rows),partition(cols)) do row_indices, col_indices allocate_local_values(V,row_indices,col_indices) end - PSparseMatrix(matrix_partition,partition(rows),partition(cols)) + OldPSparseMatrix(matrix_partition,partition(rows),partition(cols)) end -function Base.copy!(a::PSparseMatrix,b::PSparseMatrix) +function Base.copy!(a::OldPSparseMatrix,b::OldPSparseMatrix) @assert size(a) == size(b) copyto!(a,b) end -function Base.copyto!(a::PSparseMatrix,b::PSparseMatrix) +function Base.copyto!(a::OldPSparseMatrix,b::OldPSparseMatrix) if partition(axes(a,1)) === partition(axes(b,1)) && partition(axes(a,2)) === partition(axes(b,2)) map(copy!,partition(a),partition(b)) elseif matching_own_indices(axes(a,1),axes(b,1)) && matching_own_indices(axes(a,2),axes(b,2)) map(copy!,own_values(a),own_values(b)) else - error("Trying to copy a PSparseMatrix into another one with a different data layout. This case is not implemented yet. It would require communications.") + error("Trying to copy a OldPSparseMatrix into another one with a different data layout. This case is not implemented yet. It would require communications.") end a end -function LinearAlgebra.fillstored!(a::PSparseMatrix,v) +function LinearAlgebra.fillstored!(a::OldPSparseMatrix,v) map(partition(a)) do values LinearAlgebra.fillstored!(values,v) end a end -function Base.:*(a::Number,b::PSparseMatrix) +function Base.:*(a::Number,b::OldPSparseMatrix) matrix_partition = map(partition(b)) do values a*values end cache = map(copy_cache,b.cache) - PSparseMatrix(matrix_partition,partition(axes(b,1)),partition(axes(b,2)),cache) + OldPSparseMatrix(matrix_partition,partition(axes(b,1)),partition(axes(b,2)),cache) end -function Base.:*(b::PSparseMatrix,a::Number) +function Base.:*(b::OldPSparseMatrix,a::Number) a*b end -function Base.:*(a::PSparseMatrix,b::PVector) +function Base.:*(a::OldPSparseMatrix,b::PVector) Ta = eltype(a) Tb = eltype(b) T = typeof(zero(Ta)*zero(Tb)+zero(Ta)*zero(Tb)) @@ -454,17 +357,17 @@ end for op in (:+,:-) @eval begin - function Base.$op(a::PSparseMatrix) + function Base.$op(a::OldPSparseMatrix) matrix_partition = map(partition(a)) do a $op(a) end cache = map(copy_cache,a.cache) - PSparseMatrix(matrix_partition,partition(axes(a,1)),partition(axes(a,2)),cache) + OldPSparseMatrix(matrix_partition,partition(axes(a,1)),partition(axes(a,2)),cache) end end end -function LinearAlgebra.mul!(c::PVector,a::PSparseMatrix,b::PVector,α::Number,β::Number) +function LinearAlgebra.mul!(c::PVector,a::OldPSparseMatrix,b::PVector,α::Number,β::Number) @boundscheck @assert matching_own_indices(axes(c,1),axes(a,1)) @boundscheck @assert matching_own_indices(axes(a,2),axes(b,1)) @boundscheck @assert matching_ghost_indices(axes(a,2),axes(b,1)) @@ -486,15 +389,12 @@ function LinearAlgebra.mul!(c::PVector,a::PSparseMatrix,b::PVector,α::Number,β c end -""" - psparse!([f,]I,J,V,row_partition,col_partition;discover_rows=true,discover_cols=true) -> Task +function old_psparse(f,row_partition,col_partition;assembled=false) + matrix_partition = map(f,row_partition,col_partition) + OldPSparseMatrix(matrix_partition,row_partition,col_partition,assembled) +end -Crate an instance of [`PSparseMatrix`](@ref) by setting arbitrary entries -from each of the underlying parts. It returns a task that produces the -instance of [`PSparseMatrix`](@ref) allowing latency hiding while performing -the communications needed in its setup. -""" -function psparse!(f,I,J,V,row_partition,col_partition;discover_rows=true,discover_cols=true) +function old_psparse!(f,I,J,V,row_partition,col_partition;discover_rows=true,discover_cols=true) if discover_rows I_owner = find_owner(row_partition,I) row_partition = map(union_ghost,row_partition,I,I_owner) @@ -509,28 +409,12 @@ function psparse!(f,I,J,V,row_partition,col_partition;discover_rows=true,discove map(to_local!,I,row_partition) map(to_local!,J,col_partition) matrix_partition = map(f,I,J,V,row_partition,col_partition) - PSparseMatrix(matrix_partition,row_partition,col_partition) + OldPSparseMatrix(matrix_partition,row_partition,col_partition) end end -function psparse!(I,J,V,row_partition,col_partition;kwargs...) - psparse!(default_local_values,I,J,V,row_partition,col_partition;kwargs...) -end - -""" - psparse(f,row_partition,col_partition) - -Build an instance of [`PSparseMatrix`](@ref) from the initialization function -`f` and the partition for rows and columns `row_partition` and `col_partition`. - -Equivalent to - - matrix_partition = map(f,row_partition,col_partition) - PSparseMatrix(matrix_partition,row_partition,col_partition) -""" -function psparse(f,row_partition,col_partition) - matrix_partition = map(f,row_partition,col_partition) - PSparseMatrix(matrix_partition,row_partition,col_partition) +function old_psparse!(I,J,V,row_partition,col_partition;kwargs...) + old_psparse!(default_local_values,I,J,V,row_partition,col_partition;kwargs...) end function default_local_values(row_indices,col_indices) @@ -545,7 +429,7 @@ function default_local_values(I,J,V,row_indices,col_indices) sparse(I,J,V,m,n) end -function trivial_partition(row_partition) +function old_trivial_partition(row_partition) destination = 1 n_own = map(row_partition) do indices owner = part_id(indices) @@ -589,9 +473,9 @@ function from_trivial_partition!(c::PVector,c_in_main::PVector) end function to_trivial_partition( - a::PSparseMatrix{M}, - row_partition_in_main=trivial_partition(partition(axes(a,1))), - col_partition_in_main=trivial_partition(partition(axes(a,2)))) where M + a::OldPSparseMatrix{M}, + row_partition_in_main=old_trivial_partition(partition(axes(a,1))), + col_partition_in_main=old_trivial_partition(partition(axes(a,2)))) where M destination = 1 Ta = eltype(a) I,J,V = map(partition(a),partition(axes(a,1)),partition(axes(a,2))) do a,row_indices,col_indices @@ -633,11 +517,11 @@ function to_trivial_partition( n = local_length(col_indices) compresscoo(M,myI,myJ,myV,m,n) end - PSparseMatrix(values,row_partition_in_main,col_partition_in_main) + OldPSparseMatrix(values,row_partition_in_main,col_partition_in_main) end # Not efficient, just for convenience and debugging purposes -function Base.:\(a::PSparseMatrix,b::PVector) +function Base.:\(a::OldPSparseMatrix,b::PVector) Ta = eltype(a) Tb = eltype(b) T = typeof(one(Ta)\one(Tb)+one(Ta)\one(Tb)) @@ -660,12 +544,12 @@ struct PLU{A,B,C} rows::B cols::C end -function LinearAlgebra.lu(a::PSparseMatrix) +function LinearAlgebra.lu(a::OldPSparseMatrix) a_in_main = to_trivial_partition(a) lu_in_main = map_main(lu,partition(a_in_main)) PLU(lu_in_main,axes(a_in_main,1),axes(a_in_main,2)) end -function LinearAlgebra.lu!(b::PLU,a::PSparseMatrix) +function LinearAlgebra.lu!(b::PLU,a::OldPSparseMatrix) a_in_main = to_trivial_partition(a,partition(b.rows),partition(b.cols)) map_main(lu!,b.lu_in_main,partition(a_in_main)) b @@ -680,9 +564,1394 @@ end # Misc functions that could be removed if IterativeSolvers was implemented in terms # of axes(A,d) instead of size(A,d) -function IterativeSolvers.zerox(A::PSparseMatrix,b::PVector) +function IterativeSolvers.zerox(A::OldPSparseMatrix,b::PVector) T = IterativeSolvers.Adivtype(A, b) x = similar(b, T, axes(A, 2)) fill!(x, zero(T)) return x end + +# New stuff + +struct GenericSplitMatrixBlocks{A,B,C,D} + own_own::A + own_ghost::B + ghost_own::C + ghost_ghost::D +end +struct SplitMatrixBlocks{A} + own_own::A + own_ghost::A + ghost_own::A + ghost_ghost::A +end +function split_matrix_blocks(own_own,own_ghost,ghost_own,ghost_ghost) + GenericSplitMatrixBlocks(own_own,own_ghost,ghost_own,ghost_ghost) +end +function split_matrix_blocks(own_own::A,own_ghost::A,ghost_own::A,ghost_ghost::A) where A + SplitMatrixBlocks(own_own,own_ghost,ghost_own,ghost_ghost) +end + +abstract type AbstractSplitMatrix{T} <: AbstractMatrix{T} end + +struct GenericSplitMatrix{A,B,C,T} <: AbstractSplitMatrix{T} + blocks::A + row_permutation::B + col_permutation::C + function GenericSplitMatrix(blocks,row_permutation,col_permutation) + T = eltype(blocks.own_own) + A = typeof(blocks) + B = typeof(row_permutation) + C = typeof(col_permutation) + new{A,B,C,T}(blocks,row_permutation,col_permutation) + end +end + +struct SplitMatrix{A,T} <: AbstractSplitMatrix{T} + blocks::SplitMatrixBlocks{A} + row_permutation::UnitRange{Int32} + col_permutation::UnitRange{Int32} + function SplitMatrix( + blocks::SplitMatrixBlocks{A},row_permutation,col_permutation) where A + T = eltype(blocks.own_own) + row_perm = convert(UnitRange{Int32},row_permutation) + col_perm = convert(UnitRange{Int32},col_permutation) + new{A,T}(blocks,row_perm,col_perm) + end +end + +function split_matrix(blocks,row_permutation,col_permutation) + GenericSplitMatrix(blocks,row_permutation,col_permutation) +end + +function split_matrix( + blocks::SplitMatrixBlocks, + row_permutation::UnitRange, + col_permutation::UnitRange) + SplitMatrix(blocks,row_permutation,col_permutation) +end + + +Base.size(a::AbstractSplitMatrix) = (length(a.row_permutation),length(a.col_permutation)) +Base.IndexStyle(::Type{<:AbstractSplitMatrix}) = IndexCartesian() +function Base.getindex(a::AbstractSplitMatrix,i::Int,j::Int) + n_own_rows, n_own_cols = size(a.blocks.own_own) + ip = a.row_permutation[i] + jp = a.col_permutation[j] + T = eltype(a) + if ip <= n_own_rows && jp <= n_own_cols + v = a.blocks.own_own[ip,jp] + elseif ip <= n_own_rows + v = a.blocks.own_ghost[ip,jp-n_own_cols] + elseif jp <= n_own_cols + v = a.blocks.ghost_own[ip-n_own_rows,jp] + else + v = a.blocks.ghost_ghost[ip-n_own_rows,jp-n_own_cols] + end + convert(T,v) +end + +function own_own_values(values::AbstractSplitMatrix,indices_rows,indices_cols) + values.blocks.own_own +end +function own_ghost_values(values::AbstractSplitMatrix,indices_rows,indices_cols) + values.blocks.own_ghost +end +function ghost_own_values(values::AbstractSplitMatrix,indices_rows,indices_cols) + values.blocks.ghost_own +end +function ghost_ghost_values(values::AbstractSplitMatrix,indices_rows,indices_cols) + values.blocks.ghost_ghost +end + +Base.similar(a::AbstractSplitMatrix) = similar(a,eltype(a)) +function Base.similar(a::AbstractSplitMatrix,::Type{T}) where T + own_own = similar(a.blocks.own_own,T) + own_ghost = similar(a.blocks.own_ghost,T) + ghost_own = similar(a.blocks.ghost_own,T) + ghost_ghost = similar(a.blocks.ghost_ghost,T) + blocks = split_matrix_blocks(own_own,own_ghost,ghost_own,ghost_ghost) + split_matrix(blocks,a.row_permutation,a.col_permutation) +end + +function Base.copy!(a::AbstractSplitMatrix,b::AbstractSplitMatrix) + copy!(a.blocks.own_own,b.blocks.own_own) + copy!(a.blocks.own_ghost,b.blocks.own_ghost) + copy!(a.blocks.ghost_own,b.blocks.ghost_own) + copy!(a.blocks.ghost_ghost,b.blocks.ghost_ghost) + a +end +function Base.copyto!(a::AbstractSplitMatrix,b::AbstractSplitMatrix) + copyto!(a.blocks.own_own,b.blokcs.own_own) + copyto!(a.blocks.own_ghost,b.blokcs.own_ghost) + copyto!(a.blocks.ghost_own,b.blokcs.ghost_own) + copyto!(a.blocks.ghost_ghost,b.blokcs.ghost_ghost) + a +end + +function LinearAlgebra.fillstored!(a::AbstractSplitMatrix,v) + LinearAlgebra.fillstored!(a.blocks.own_own,v) + LinearAlgebra.fillstored!(a.blocks.own_ghost,v) + LinearAlgebra.fillstored!(a.blocks.ghost_own,v) + LinearAlgebra.fillstored!(a.blocks.ghost_ghost,v) + a +end + +function Base.:*(a::Number,b::AbstractSplitMatrix) + own_own = a*b.blocks.own_own + own_ghost = a*b.blocks.own_ghost + ghost_own = a*b.blocks.ghost_own + ghost_ghost = a*b.blocks.ghost_ghost + blocks = split_matrix_blocks(own_own,own_ghost,ghost_own,ghost_ghost) + split_matrix(blocks,b.row_permutation,b.col_permutation) +end + +function Base.:*(b::AbstractSplitMatrix,a::Number) + a*b +end + +for op in (:+,:-) + @eval begin + function Base.$op(a::AbstractSplitMatrix) + own_own = $op(a.blocks.own_own) + own_ghost = $op(a.blocks.own_ghost) + ghost_own = $op(a.blocks.ghost_own) + ghost_ghost = $op(a.blocks.ghost_ghost) + blocks = split_matrix_blocks(own_own,own_ghost,ghost_own,ghost_ghost) + split_matrix(blocks,a.row_permutation,a.col_permutation) + end + function Base.$op(a::AbstractSplitMatrix,b::AbstractSplitMatrix) + @boundscheck @assert a.row_permutation == b.row_permutation + @boundscheck @assert a.col_permutation == b.col_permutation + own_own = $op(a.blocks.own_own,b.blocks.own_own) + own_ghost = $op(a.blocks.own_ghost,b.blocks.own_ghost) + ghost_own = $op(a.blocks.ghost_own,b.blocks.ghost_own) + ghost_ghost = $op(a.blocks.ghost_ghost,b.blocks.ghost_ghost) + blocks = split_matrix_blocks(own_own,own_ghost,ghost_own,ghost_ghost) + split_matrix(blocks,b.row_permutation,b.col_permutation) + end + end +end + +function split_format_locally(A,rows,cols) + n_own_rows = own_length(rows) + n_own_cols = own_length(cols) + n_ghost_rows = ghost_length(rows) + n_ghost_cols = ghost_length(cols) + rows_perm = local_permutation(rows) + cols_perm = local_permutation(cols) + n_own_own = 0 + n_own_ghost = 0 + n_ghost_own = 0 + n_ghost_ghost = 0 + for (i,j,v) in nziterator(A) + ip = rows_perm[i] + jp = cols_perm[j] + if ip <= n_own_rows && jp <= n_own_cols + n_own_own += 1 + elseif ip <= n_own_rows + n_own_ghost += 1 + elseif jp <= n_own_cols + n_ghost_own += 1 + else + n_ghost_ghost += 1 + end + end + Ti = indextype(A) + Tv = eltype(A) + own_own = (I=zeros(Ti,n_own_own),J=zeros(Ti,n_own_own),V=zeros(Tv,n_own_own)) + own_ghost = (I=zeros(Ti,n_own_ghost),J=zeros(Ti,n_own_ghost),V=zeros(Tv,n_own_ghost)) + ghost_own = (I=zeros(Ti,n_ghost_own),J=zeros(Ti,n_ghost_own),V=zeros(Tv,n_ghost_own)) + ghost_ghost = (I=zeros(Ti,n_ghost_ghost),J=zeros(Ti,n_ghost_ghost),V=zeros(Tv,n_ghost_ghost)) + n_own_own = 0 + n_own_ghost = 0 + n_ghost_own = 0 + n_ghost_ghost = 0 + for (i,j,v) in nziterator(A) + ip = rows_perm[i] + jp = cols_perm[j] + if ip <= n_own_rows && jp <= n_own_cols + n_own_own += 1 + own_own.I[n_own_own] = ip + own_own.J[n_own_own] = jp + own_own.V[n_own_own] = v + elseif ip <= n_own_rows + n_own_ghost += 1 + own_ghost.I[n_own_ghost] = ip + own_ghost.J[n_own_ghost] = jp-n_own_cols + own_ghost.V[n_own_ghost] = v + elseif jp <= n_own_cols + n_ghost_own += 1 + ghost_own.I[n_ghost_own] = ip-n_own_cols + ghost_own.J[n_ghost_own] = jp + ghost_own.V[n_ghost_own] = v + else + n_ghost_ghost += 1 + ghost_ghost.I[n_ghost_ghost] = i-n_own_rows + ghost_ghost.J[n_ghost_ghost] = j-n_own_cols + ghost_ghost.V[n_ghost_ghost] = v + end + end + TA = typeof(A) + A1 = compresscoo(TA,own_own...,n_own_rows ,n_own_cols) + A2 = compresscoo(TA,own_ghost...,n_own_rows ,n_ghost_cols) + A3 = compresscoo(TA,ghost_own...,n_ghost_rows,n_own_cols) + A4 = compresscoo(TA,ghost_ghost...,n_ghost_rows,n_ghost_cols) + blocks = split_matrix_blocks(A1,A2,A3,A4) + B = split_matrix(blocks,rows_perm,cols_perm) + c1 = precompute_nzindex(A1,own_own.I,own_own.J) + c2 = precompute_nzindex(A2,own_ghost.I,own_ghost.J) + c3 = precompute_nzindex(A3,ghost_own.I,ghost_own.J) + c4 = precompute_nzindex(A4,ghost_ghost.I,ghost_ghost.J) + own_own_V = own_own.V + own_ghost_V = own_ghost.V + ghost_own_V = ghost_own.V + ghost_ghost_V = ghost_ghost.V + cache = (c1,c2,c3,c4,own_own_V,own_ghost_V,ghost_own_V,ghost_ghost_V) + B, cache +end + +function split_format_locally!(B::AbstractSplitMatrix,A,rows,cols,cache) + (c1,c2,c3,c4,own_own_V,own_ghost_V,ghost_own_V,ghost_ghost_V) = cache + n_own_rows = own_length(rows) + n_own_cols = own_length(cols) + n_ghost_rows = ghost_length(rows) + n_ghost_cols = ghost_length(cols) + rows_perm = local_permutation(rows) + cols_perm = local_permutation(cols) + n_own_own = 0 + n_own_ghost = 0 + n_ghost_own = 0 + n_ghost_ghost = 0 + for (i,j,v) in nziterator(A) + ip = rows_perm[i] + jp = cols_perm[j] + if ip <= n_own_rows && jp <= n_own_cols + n_own_own += 1 + own_own_V[n_own_own] = v + elseif ip <= n_own_rows + n_own_ghost += 1 + own_ghost_V[n_own_ghost] = v + elseif jp <= n_own_cols + n_ghost_own += 1 + ghost_own_V[n_ghost_own] = v + else + n_ghost_ghost += 1 + ghost_ghost_V[n_ghost_ghost] = v + end + end + setcoofast!(B.blocks.own_own,own_own_V,c1) + setcoofast!(B.blocks.own_ghost,own_ghost_V,c2) + setcoofast!(B.blocks.ghost_own,ghost_own_V,c3) + setcoofast!(B.blocks.ghost_ghost,ghost_ghost_V,c4) + B +end + +""" + struct PSparseMatrix{V,B,C,D,T} + +`PSparseMatrix` (partitioned sparse matrix) +is a type representing a matrix whose rows are +distributed (a.k.a. partitioned) over different parts for distributed-memory +parallel computations. Each part stores a subset of the rows of the matrix and their +corresponding non zero columns. + +This type overloads numerous array-like operations with corresponding +parallel implementations. + +# Properties + +- `matrix_partition::A` +- `row_partition::B` +- `col_partition::C` +- `assembled::Bool` + +`matrix_partition[i]` contains a (sparse) matrix with the local rows and the +corresponding nonzero columns (the local columns) in the part number `i`. +`eltype(matrix_partition) == V`. +`row_partition[i]` and `col_partition[i]` contain information +about the local, own, and ghost rows and columns respectively in part number `i`. +The types `eltype(row_partition)` and `eltype(col_partition)` implement the +[`AbstractLocalIndices`](@ref) interface. For `assembled==true`, it is assumed that the matrix data +is fully contained in the own rows. + +# Supertype hierarchy + + PSparseMatrix{V,A,B,C,T} <: AbstractMatrix{T} + +with `T=eltype(V)`. +""" +struct PSparseMatrix{V,B,C,D,T} <: AbstractMatrix{T} + matrix_partition::B + row_partition::C + col_partition::D + assembled::Bool + @doc """ + PSparseMatrix(matrix_partition,row_partition,col_partition,assembled) + + Build an instance for [`PSparseMatrix`](@ref) from the underlying fields + `matrix_partition`, `row_partition`, `col_partition`, assembled. + """ + function PSparseMatrix( + matrix_partition,row_partition,col_partition,assembled) + V = eltype(matrix_partition) + T = eltype(V) + B = typeof(matrix_partition) + C = typeof(row_partition) + D = typeof(col_partition) + new{V,B,C,D,T}(matrix_partition,row_partition,col_partition,assembled) + end +end +partition(a::PSparseMatrix) = a.matrix_partition +Base.axes(a::PSparseMatrix) = (PRange(a.row_partition),PRange(a.col_partition)) +Base.size(a::PSparseMatrix) = map(length,axes(a)) +Base.IndexStyle(::Type{<:PSparseMatrix}) = IndexCartesian() +function Base.getindex(a::PSparseMatrix,gi::Int,gj::Int) + scalar_indexing_action(a) +end +function Base.setindex!(a::PSparseMatrix,v,gi::Int,gj::Int) + scalar_indexing_action(a) +end + +function Base.show(io::IO,k::MIME"text/plain",data::PSparseMatrix) + T = eltype(partition(data)) + m,n = size(data) + np = length(partition(data)) + map_main(partition(data)) do values + println(io,"$(m)×$(n) PSparseMatrix partitioned into $np parts of type $T") + end +end +function Base.show(io::IO,data::PSparseMatrix) + print(io,"PSparseMatrix(…)") +end + +""" + local_values(a::PSparseMatrix) + +Get a vector of matrices containing the local rows and columns +in each part of `a`. + +The row and column indices of the returned matrices can be mapped to global +indices, own indices, ghost indices, and owner by using +[`local_to_global`](@ref), [`local_to_own`](@ref), [`local_to_ghost`](@ref), +and [`local_to_owner`](@ref), respectively. +""" +function local_values(a::PSparseMatrix) + partition(a) +end + +""" + own_own_values(a::PSparseMatrix) + +Get a vector of matrices containing the own rows and columns +in each part of `a`. + +The row and column indices of the returned matrices can be mapped to global +indices, local indices, and owner by using [`own_to_global`](@ref), +[`own_to_local`](@ref), and [`own_to_owner`](@ref), respectively. +""" +function own_own_values(a::PSparseMatrix) + map(own_own_values,partition(a),partition(axes(a,1)),partition(axes(a,2))) +end + +""" + own_ghost_values(a::PSparseMatrix) + +Get a vector of matrices containing the own rows and ghost columns +in each part of `a`. + +The *row* indices of the returned matrices can be mapped to global indices, +local indices, and owner by using [`own_to_global`](@ref), +[`own_to_local`](@ref), and [`own_to_owner`](@ref), respectively. + +The *column* indices of the returned matrices can be mapped to global indices, +local indices, and owner by using [`ghost_to_global`](@ref), +[`ghost_to_local`](@ref), and [`ghost_to_owner`](@ref), respectively. +""" +function own_ghost_values(a::PSparseMatrix) + map(own_ghost_values,partition(a),partition(axes(a,1)),partition(axes(a,2))) +end + +""" + ghost_own_values(a::PSparseMatrix) + +Get a vector of matrices containing the ghost rows and own columns +in each part of `a`. + +The *row* indices of the returned matrices can be mapped to global indices, +local indices, and owner by using [`ghost_to_global`](@ref), +[`ghost_to_local`](@ref), and [`ghost_to_owner`](@ref), respectively. + +The *column* indices of the returned matrices can be mapped to global indices, +local indices, and owner by using [`own_to_global`](@ref), +[`own_to_local`](@ref), and [`own_to_owner`](@ref), respectively. +""" +function ghost_own_values(a::PSparseMatrix) + map(ghost_own_values,partition(a),partition(axes(a,1)),partition(axes(a,2))) +end + +""" + ghost_ghost_values(a::PSparseMatrix) + +Get a vector of matrices containing the ghost rows and columns +in each part of `a`. + +The row and column indices of the returned matrices can be mapped to global +indices, local indices, and owner by using [`ghost_to_global`](@ref), +[`ghost_to_local`](@ref), and [`ghost_to_owner`](@ref), respectively. +""" +function ghost_ghost_values(a::PSparseMatrix) + map(ghost_ghost_values,partition(a),partition(axes(a,1)),partition(axes(a,2))) +end + +val_parameter(a) = a +val_parameter(::Val{a}) where a = a + +function split_format(A::PSparseMatrix;reuse=Val(false)) + rows = partition(axes(A,1)) + cols = partition(axes(A,2)) + values, cache = map(split_format_locally,partition(A),rows,cols) |> tuple_of_arrays + B = PSparseMatrix(values,rows,cols,A.assembled) + if val_parameter(reuse) == false + B + else + B, cache + end +end + +function split_format!(B,A::PSparseMatrix,cache) + rows = partition(axes(A,1)) + cols = partition(axes(A,2)) + map(split_format_locally!,partition(B),partition(A),rows,cols,cache) + B +end + +""" + psparse(f,row_partition,col_partition;assembled) + +Build an instance of [`PSparseMatrix`](@ref) from the initialization function +`f` and the partition for rows and columns `row_partition` and `col_partition`. + +Equivalent to + + matrix_partition = map(f,row_partition,col_partition) + PSparseMatrix(matrix_partition,row_partition,col_partition,assembled) +""" +function psparse(f,row_partition,col_partition;assembled) + matrix_partition = map(f,row_partition,col_partition) + PSparseMatrix(matrix_partition,row_partition,col_partition,assembled) +end + +function psparse(I,J,V,rows,cols;kwargs...) + psparse(sparse,I,J,V,rows,cols;kwargs...) +end + +""" + psparse([f,]I,J,V,row_partition,col_partition;kwargs...) -> Task + +Crate an instance of [`PSparseMatrix`](@ref) by setting arbitrary entries +from each of the underlying parts. It returns a task that produces the +instance of [`PSparseMatrix`](@ref) allowing latency hiding while performing +the communications needed in its setup. +""" +function psparse(f,I,J,V,rows,cols; + split_format=true, + assembled=false, + assemble=true, + discover_rows=true, + discover_cols=true, + restore_ids = true, + assembly_neighbors_options_rows = (;), + assembly_neighbors_options_cols = (;), + reuse=Val(false) + ) + + # TODO for some particular cases + # this function allocates more + # intermediate results than needed + # One can e.g. merge the split_format and assemble steps + # Even the matrix compression step could be + # merged with the assembly step + + map(I,J) do I,J + @assert I !== J + end + + if assembled || assemble + @boundscheck @assert all(i->ghost_length(i)==0,rows) + end + + if !assembled && discover_rows + I_owner = find_owner(rows,I) + rows_sa = map(union_ghost,rows,I,I_owner) + assembly_neighbors(rows_sa;assembly_neighbors_options_rows...) + else + rows_sa = rows + end + if discover_cols + J_owner = find_owner(cols,J) + cols_sa = map(union_ghost,cols,J,J_owner) + if ! assemble + assembly_neighbors(rows_sa;assembly_neighbors_options_cols...) + end + else + cols_sa = cols + end + map(map_global_to_local!,I,rows_sa) + map(map_global_to_local!,J,cols_sa) + values_sa = map(f,I,J,V,map(local_length,rows_sa),map(local_length,cols_sa)) + if val_parameter(reuse) + K = map(precompute_nzindex,values_sa,I,J) + end + if restore_ids + map(map_local_to_global!,I,rows_sa) + map(map_local_to_global!,J,cols_sa) + end + A = PSparseMatrix(values_sa,rows_sa,cols_sa,assembled) + if split_format + B,cacheB = PartitionedArrays.split_format(A;reuse=true) + else + B,cacheB = A,nothing + end + if assemble + t = PartitionedArrays.assemble(B,rows;reuse=true,assembly_neighbors_options_cols) + else + t = @async B,cacheB + end + if val_parameter(reuse) == false + return @async begin + C, cacheC = fetch(t) + C + end + else + return @async begin + C, cacheC = fetch(t) + cache = (A,B,K,cacheB,cacheC,split_format,assembled) + (C, cache) + end + end +end + +""" + psparse!(C::PSparseMatrix,V,cache) +""" +function psparse!(C,V,cache) + (A,B,K,cacheB,cacheC,split_format,assembled) = cache + rows_sa = partition(axes(A,1)) + cols_sa = partition(axes(A,2)) + values_sa = partition(A) + map(setcoofast!,values_sa,V,K) + if split_format + split_format!(B,A,cacheB) + end + if !assembled && C.assembled + t = PartitionedArrays.assemble!(C,B,cacheC) + else + t = @async C + end +end + +function assemble(A::PSparseMatrix;kwargs...) + rows = map(remove_ghost,partition(axes(A,1))) + assemble(A,rows;kwargs...) +end + +""" + assemble(A::PSparseMatrix[,rows];kwargs...) +""" +function assemble(A::PSparseMatrix,rows;kwargs...) + @boundscheck @assert matching_own_indices(axes(A,1),PRange(rows)) + T = eltype(partition(A)) + psparse_assemble_impl(A,T,rows;kwargs...) +end + +""" + assemble!(B::PSparseMatrix,A::PSparseMatrix,cache) +""" +function assemble!(B::PSparseMatrix,A::PSparseMatrix,cache) + T = eltype(partition(A)) + psparse_assemble_impl!(B,A,T,cache) +end + +function psparse_assemble_impl(A,::Type,rows) + error("Case not implemented yet") +end + +function psparse_assemble_impl( + A, + ::Type{<:AbstractSplitMatrix}, + rows; + reuse=Val(false), + assembly_neighbors_options_cols=(;)) + + function setup_cache_snd(A,parts_snd,rows_sa,cols_sa) + A_ghost_own = A.blocks.ghost_own + A_ghost_ghost = A.blocks.ghost_ghost + gen = ( owner=>i for (i,owner) in enumerate(parts_snd) ) + owner_to_p = Dict(gen) + ptrs = zeros(Int32,length(parts_snd)+1) + ghost_to_owner_row = ghost_to_owner(rows_sa) + ghost_to_global_row = ghost_to_global(rows_sa) + own_to_global_col = own_to_global(cols_sa) + ghost_to_global_col = ghost_to_global(cols_sa) + for (i,_,_) in nziterator(A_ghost_own) + owner = ghost_to_owner_row[i] + ptrs[owner_to_p[owner]+1] += 1 + end + for (i,_,_) in nziterator(A_ghost_ghost) + owner = ghost_to_owner_row[i] + ptrs[owner_to_p[owner]+1] += 1 + end + length_to_ptrs!(ptrs) + Tv = eltype(A_ghost_own) + ndata = ptrs[end]-1 + I_snd_data = zeros(Int,ndata) + J_snd_data = zeros(Int,ndata) + V_snd_data = zeros(Tv,ndata) + k_snd_data = zeros(Int32,ndata) + nnz_ghost_own = 0 + for (k,(i,j,v)) in enumerate(nziterator(A_ghost_own)) + owner = ghost_to_owner_row[i] + p = ptrs[owner_to_p[owner]] + I_snd_data[p] = ghost_to_global_row[i] + J_snd_data[p] = own_to_global_col[j] + V_snd_data[p] = v + k_snd_data[p] = k + ptrs[owner_to_p[owner]] += 1 + nnz_ghost_own += 1 + end + for (k,(i,j,v)) in enumerate(nziterator(A_ghost_ghost)) + owner = ghost_to_owner_row[i] + p = ptrs[owner_to_p[owner]] + I_snd_data[p] = ghost_to_global_row[i] + J_snd_data[p] = ghost_to_global_col[j] + V_snd_data[p] = v + k_snd_data[p] = k+nnz_ghost_own + ptrs[owner_to_p[owner]] += 1 + end + rewind_ptrs!(ptrs) + I_snd = JaggedArray(I_snd_data,ptrs) + J_snd = JaggedArray(J_snd_data,ptrs) + V_snd = JaggedArray(V_snd_data,ptrs) + k_snd = JaggedArray(k_snd_data,ptrs) + (;I_snd,J_snd,V_snd,k_snd,parts_snd) + end + function setup_cache_rcv(I_rcv,J_rcv,V_rcv,parts_rcv) + k_rcv_data = zeros(Int32,length(I_rcv.data)) + k_rcv = JaggedArray(k_rcv_data,I_rcv.ptrs) + (;I_rcv,J_rcv,V_rcv,k_rcv,parts_rcv) + end + function setup_touched_col_ids(A,cache_rcv,cols_sa) + J_rcv_data = cache_rcv.J_rcv.data + l1 = nnz(A.own_ghost) + l2 = length(J_rcv_data) + J_aux = zeros(Int,l1+l2) + ghost_to_global_col = ghost_to_global(cols_sa) + for (p,(_,j,_)) in enumerate(nziterator(A.own_ghost)) + J_own_ghost[p] = ghost_to_global_col[j] + end + J_aux[l1.+(1:l2)] = J_rcv_data + J_aux + end + function setup_own_triplets(A,cache_rcv,rows_sa,cols_sa) + nz_own_own = findnz(A.blocks.own_own) + nz_own_ghost = findnz(A.blocks.own_ghost) + I_rcv_data = cache_rcv.I_rcv.data + J_rcv_data = cache_rcv.J_rcv.data + V_rcv_data = cache_rcv.V_rcv.data + k_rcv_data = cache_rcv.k_rcv.data + global_to_own_col = global_to_own(cols_sa) + is_ghost = findall(j->global_to_own_col[j]==0,J_rcv_data) + is_own = findall(j->global_to_own_col[j]!=0,J_rcv_data) + I_rcv_own = view(I_rcv_data,is_own) + J_rcv_own = view(J_rcv_data,is_own) + V_rcv_own = view(V_rcv_data,is_own) + k_rcv_own = view(k_rcv_data,is_own) + I_rcv_ghost = view(I_rcv_data,is_ghost) + J_rcv_ghost = view(J_rcv_data,is_ghost) + V_rcv_ghost = view(V_rcv_data,is_ghost) + k_rcv_ghost = view(k_rcv_data,is_ghost) + # After this col ids in own_ghost triplet remain global + map_global_to_own!(I_rcv_own,rows_sa) + map_global_to_own!(J_rcv_own,cols_sa) + map_global_to_own!(I_rcv_ghost,rows_sa) + map_ghost_to_global!(nz_own_ghost[2],cols_sa) + own_own_I = vcat(nz_own_own[1],I_rcv_own) + own_own_J = vcat(nz_own_own[2],J_rcv_own) + own_own_V = vcat(nz_own_own[3],V_rcv_own) + own_own_triplet = (own_own_I,own_own_J,own_own_V) + own_ghost_I = vcat(nz_own_ghost[1],I_rcv_ghost) + own_ghost_J = vcat(nz_own_ghost[2],J_rcv_ghost) + own_ghost_V = vcat(nz_own_ghost[3],V_rcv_ghost) + map_global_to_ghost!(nz_own_ghost[2],cols_sa) + own_ghost_triplet = (own_ghost_I,own_ghost_J,own_ghost_V) + triplets = (own_own_triplet,own_ghost_triplet) + aux = (I_rcv_own,J_rcv_own,k_rcv_own,I_rcv_ghost,J_rcv_ghost,k_rcv_ghost,nz_own_own,nz_own_ghost) + triplets, own_ghost_J, aux + end + function finalize_values(A,rows_fa,cols_fa,cache_snd,cache_rcv,triplets,aux) + (own_own_triplet,own_ghost_triplet) = triplets + (I_rcv_own,J_rcv_own,k_rcv_own,I_rcv_ghost,J_rcv_ghost,k_rcv_ghost,nz_own_own,nz_own_ghost) = aux + map_global_to_ghost!(own_ghost_triplet[2],cols_fa) + map_global_to_ghost!(J_rcv_ghost,cols_fa) + TA = typeof(A.blocks.own_own) + n_own_rows = own_length(rows_fa) + n_own_cols = own_length(cols_fa) + n_ghost_rows = ghost_length(rows_fa) + n_ghost_cols = ghost_length(cols_fa) + Ti = indextype(A.blocks.own_own) + Tv = eltype(A.blocks.own_own) + own_own = compresscoo(TA,own_own_triplet...,n_own_rows,n_own_cols) + own_ghost = compresscoo(TA,own_ghost_triplet...,n_own_rows,n_ghost_cols) + ghost_own = compresscoo(TA,Ti[],Ti[],Tv[],n_ghost_rows,n_own_cols) + ghost_ghost = compresscoo(TA,Ti[],Ti[],Tv[],n_ghost_rows,n_ghost_cols) + blocks = split_matrix_blocks(own_own,own_ghost,ghost_own,ghost_ghost) + values = split_matrix(blocks,local_permutation(rows_fa),local_permutation(rows_fa)) + nnz_own_own = nnz(own_own) + k_own_sa = precompute_nzindex(own_own,own_own_triplet[1:2]...) + k_ghost_sa = precompute_nzindex(own_ghost,own_ghost_triplet[1:2]...) + for p in 1:length(I_rcv_own) + i = I_rcv_own[p] + j = J_rcv_own[p] + k_rcv_own[p] = nzindex(own_own,i,j) + end + for p in 1:length(I_rcv_ghost) + i = I_rcv_ghost[p] + j = J_rcv_ghost[p] + k_rcv_ghost[p] = nzindex(own_ghost,i,j) + nnz_own_own + end + cache = (;k_own_sa,k_ghost_sa,cache_snd...,cache_rcv...) + values, cache + end + rows_sa = partition(axes(A,1)) + cols_sa = partition(axes(A,2)) + #rows = map(remove_ghost,rows_sa) + cols = map(remove_ghost,cols_sa) + parts_snd, parts_rcv = assembly_neighbors(rows_sa) + cache_snd = map(setup_cache_snd,partition(A),parts_snd,rows_sa,cols_sa) + I_snd = map(i->i.I_snd,cache_snd) + J_snd = map(i->i.J_snd,cache_snd) + V_snd = map(i->i.V_snd,cache_snd) + graph = ExchangeGraph(parts_snd,parts_rcv) + t_I = exchange(I_snd,graph) + t_J = exchange(J_snd,graph) + t_V = exchange(V_snd,graph) + @async begin + I_rcv = fetch(t_I) + J_rcv = fetch(t_J) + V_rcv = fetch(t_V) + cache_rcv = map(setup_cache_rcv,I_rcv,J_rcv,V_rcv,parts_rcv) + triplets,J,aux = map(setup_own_triplets,partition(A),cache_rcv,rows_sa,cols_sa) |> tuple_of_arrays + J_owner = find_owner(cols_sa,J) + rows_fa = rows + cols_fa = map(union_ghost,cols,J,J_owner) + assembly_neighbors(cols_fa;assembly_neighbors_options_cols...) + vals_fa, cache = map(finalize_values,partition(A),rows_fa,cols_fa,cache_snd,cache_rcv,triplets,aux) |> tuple_of_arrays + assembled = true + B = PSparseMatrix(vals_fa,rows_fa,cols_fa,assembled) + if val_parameter(reuse) == false + B + else + B, cache + end + end +end + +function psparse_assemble_impl!(B,A,::Type,cache) + error("case not implemented") +end + +function psparse_assemble_impl!(B,A,::Type{<:AbstractSplitMatrix},cache) + function setup_snd(A,cache) + A_ghost_own = A.blocks.ghost_own + A_ghost_ghost = A.blocks.ghost_ghost + nnz_ghost_own = nnz(A_ghost_own) + V_snd_data = cache.V_snd.data + k_snd_data = cache.k_snd.data + nz_ghost_own = nonzeros(A_ghost_own) + nz_ghost_ghost = nonzeros(A_ghost_ghost) + for p in 1:length(k_snd_data) + k = k_snd_data[p] + if k <= nnz_ghost_own + v = nz_ghost_own[k] + else + v = nz_ghost_ghost[k-nnz_ghost_own] + end + V_snd_data[p] = v + end + end + function setup_sa(B,A,cache) + setcoofast!(B.blocks.own_own,nonzeros(A.blocks.own_own),cache.k_own_sa) + setcoofast!(B.blocks.own_ghost,nonzeros(A.blocks.own_ghost),cache.k_ghost_sa) + end + function setup_rcv(B,cache) + B_own_own = B.blocks.own_own + B_own_ghost = B.blocks.own_ghost + nnz_own_own = nnz(B_own_own) + V_rcv_data = cache.V_rcv.data + k_rcv_data = cache.k_rcv.data + nz_own_own = nonzeros(B_own_own) + nz_own_ghost = nonzeros(B_own_ghost) + for p in 1:length(k_rcv_data) + k = k_rcv_data[p] + v = V_rcv_data[p] + if k <= nnz_own_own + nz_own_own[k] += v + else + nz_own_ghost[k-nnz_own_own] += v + end + end + end + map(setup_snd,partition(A),cache) + parts_snd = map(i->i.parts_snd,cache) + parts_rcv = map(i->i.parts_rcv,cache) + V_snd = map(i->i.V_snd,cache) + V_rcv = map(i->i.V_rcv,cache) + graph = ExchangeGraph(parts_snd,parts_rcv) + t = exchange!(V_rcv,V_snd,graph) + map(setup_sa,partition(B),partition(A),cache) + @async begin + wait(t) + map(setup_rcv,partition(B),cache) + B + end +end + +""" + consistent(A::PSparseMatrix,rows;kwargs...) +""" +function consistent(A::PSparseMatrix,rows_co;kwargs...) + @assert A.assembled + T = eltype(partition(A)) + psparse_consitent_impl(A,T,rows_co;kwargs...) +end + +""" + consistent!(B::PSparseMatrix,A::PSparseMatrix,cache) +""" +function consistent!(B::PSparseMatrix,A::PSparseMatrix,cache) + @assert A.assembled + T = eltype(partition(A)) + psparse_consitent_impl!(B,A,T,cache) +end + +function psparse_consitent_impl( + A, + ::Type{<:AbstractSplitMatrix}, + rows_co; + reuse=Val(false)) + + function setup_snd(A,parts_snd,lids_snd,rows_co,cols_fa) + own_to_local_row = own_to_local(rows_co) + own_to_global_row = own_to_global(rows_co) + own_to_global_col = own_to_global(cols_fa) + ghost_to_global_col = ghost_to_global(cols_fa) + li_to_p = zeros(Int32,size(A,1)) + for p in 1:length(lids_snd) + li_to_p[lids_snd[p]] .= p + end + ptrs = zeros(Int32,length(parts_snd)+1) + for (i,j,v) in nziterator(A.blocks.own_own) + li = own_to_local_row[i] + p = li_to_p[li] + if p == 0 + continue + end + ptrs[p+1] += 1 + end + for (i,j,v) in nziterator(A.blocks.own_ghost) + li = own_to_local_row[i] + p = li_to_p[li] + if p == 0 + continue + end + ptrs[p+1] += 1 + end + length_to_ptrs!(ptrs) + ndata = ptrs[end]-1 + T = eltype(A) + I_snd = JaggedArray(zeros(Int,ndata),ptrs) + J_snd = JaggedArray(zeros(Int,ndata),ptrs) + V_snd = JaggedArray(zeros(T,ndata),ptrs) + k_snd = JaggedArray(zeros(Int32,ndata),ptrs) + for (k,(i,j,v)) in enumerate(nziterator(A.blocks.own_own)) + li = own_to_local_row[i] + p = li_to_p[li] + if p == 0 + continue + end + q = ptrs[p] + I_snd.data[q] = own_to_global_row[i] + J_snd.data[q] = own_to_global_col[j] + V_snd.data[q] = v + k_snd.data[q] = k + ptrs[p] += 1 + end + nnz_own_own = nnz(A.blocks.own_own) + for (k,(i,j,v)) in enumerate(nziterator(A.blocks.own_ghost)) + li = own_to_local_row[i] + p = li_to_p[li] + if p == 0 + continue + end + q = ptrs[p] + I_snd.data[q] = own_to_global_row[i] + J_snd.data[q] = ghost_to_global_col[j] + V_snd.data[q] = v + k_snd.data[q] = k+nnz_own_own + ptrs[p] += 1 + end + rewind_ptrs!(ptrs) + cache_snd = (;parts_snd,lids_snd,I_snd,J_snd,V_snd,k_snd) + cache_snd + end + function setup_rcv(parts_rcv,lids_rcv,I_rcv,J_rcv,V_rcv) + cache_rcv = (;parts_rcv,lids_rcv,I_rcv,J_rcv,V_rcv) + cache_rcv + end + function finalize(A,cache_snd,cache_rcv,rows_co,cols_fa) + I_rcv_data = cache_rcv.I_rcv.data + J_rcv_data = cache_rcv.J_rcv.data + V_rcv_data = cache_rcv.V_rcv.data + global_to_own_col = global_to_own(cols_fa) + global_to_ghost_col = global_to_ghost(cols_fa) + is_own = findall(j->global_to_own_col[j]!=0,J_rcv_data) + is_ghost = findall(j->global_to_ghost_col[j]!=0,J_rcv_data) + I_rcv_own = I_rcv_data[is_own] + J_rcv_own = J_rcv_data[is_own] + V_rcv_own = V_rcv_data[is_own] + I_rcv_ghost = I_rcv_data[is_ghost] + J_rcv_ghost = J_rcv_data[is_ghost] + V_rcv_ghost = V_rcv_data[is_ghost] + map_global_to_ghost!(I_rcv_own,rows_co) + map_global_to_ghost!(I_rcv_ghost,rows_co) + map_global_to_own!(J_rcv_own,cols_fa) + map_global_to_ghost!(J_rcv_ghost,cols_fa) + own_own = A.blocks.own_own + own_ghost = A.blocks.own_ghost + n_ghost_rows = ghost_length(rows_co) + n_own_cols = own_length(cols_fa) + n_ghost_cols = ghost_length(cols_fa) + TA = typeof(A.blocks.ghost_own) + ghost_own = compresscoo(TA,I_rcv_own,J_rcv_own,V_rcv_own,n_ghost_rows,n_own_cols) + ghost_ghost = compresscoo(TA,I_rcv_ghost,J_rcv_ghost,V_rcv_ghost,n_ghost_rows,n_ghost_cols) + K_own = precompute_nzindex(ghost_own,I_rcv_own,J_rcv_own) + K_ghost = precompute_nzindex(ghost_ghost,I_rcv_ghost,J_rcv_ghost) + blocks = split_matrix_blocks(own_own,own_ghost,ghost_own,ghost_ghost) + values = split_matrix(blocks,local_permutation(rows_co),local_permutation(cols_fa)) + k_snd = cache_snd.k_snd + V_snd = cache_snd.V_snd + V_rcv = cache_rcv.V_rcv + parts_snd = cache_snd.parts_snd + parts_rcv = cache_rcv.parts_rcv + cache = (;parts_snd,parts_rcv,k_snd,V_snd,V_rcv,is_ghost,is_own,V_rcv_own,V_rcv_ghost,K_own,K_ghost) + values,cache + end + @assert matching_own_indices(axes(A,1),PRange(rows_co)) + rows_fa = partition(axes(A,1)) + cols_fa = partition(axes(A,2)) + # snd and rcv are swapped on purpose + parts_rcv,parts_snd = assembly_neighbors(rows_co) + lids_rcv,lids_snd = assembly_local_indices(rows_co) + cache_snd = map(setup_snd,partition(A),parts_snd,lids_snd,rows_co,cols_fa) + I_snd = map(i->i.I_snd,cache_snd) + J_snd = map(i->i.J_snd,cache_snd) + V_snd = map(i->i.V_snd,cache_snd) + graph = ExchangeGraph(parts_snd,parts_rcv) + t_I = exchange(I_snd,graph) + t_J = exchange(J_snd,graph) + t_V = exchange(V_snd,graph) + @async begin + I_rcv = fetch(t_I) + J_rcv = fetch(t_J) + V_rcv = fetch(t_V) + cache_rcv = map(setup_rcv,parts_rcv,lids_rcv,I_rcv,J_rcv,V_rcv) + values,cache = map(finalize,partition(A),cache_snd,cache_rcv,rows_co,cols_fa) |> tuple_of_arrays + B = PSparseMatrix(values,rows_co,cols_fa,A.assembled) + if val_parameter(reuse) == false + B + else + B,cache + end + end +end + +function psparse_consitent_impl!(B,A,::Type{<:AbstractSplitMatrix},cache) + function setup_snd(A,cache) + k_snd_data = cache.k_snd.data + V_snd_data = cache.V_snd.data + nnz_own_own = nnz(A.blocks.own_own) + A_own_own = nonzeros(A.blocks.own_own) + A_own_ghost = nonzeros(A.blocks.own_ghost) + for (p,k) in enumerate(k_snd_data) + if k <= nnz_own_own + v = A_own_own[k] + else + v = A_own_ghost[k-nnz_own_own] + end + V_snd_data[p] = v + end + end + function setup_rcv(B,cache) + is_ghost = cache.is_ghost + is_own = cache.is_own + V_rcv_data = cache.V_rcv.data + K_own = cache.K_own + K_ghost = cache.K_ghost + V_rcv_own = V_rcv_data[is_own] + V_rcv_ghost = V_rcv_data[is_ghost] + setcoofast!(B.blocks.ghost_own,V_rcv_own,K_own) + setcoofast!(B.blocks.ghost_ghost,V_rcv_ghost,K_ghost) + B + end + map(setup_snd,partition(A),cache) + parts_snd = map(i->i.parts_snd,cache) + parts_rcv = map(i->i.parts_rcv,cache) + V_snd = map(i->i.V_snd,cache) + V_rcv = map(i->i.V_rcv,cache) + graph = ExchangeGraph(parts_snd,parts_rcv) + t = exchange!(V_rcv,V_snd,graph) + @async begin + wait(t) + map(setup_rcv,partition(B),cache) + B + end +end + +function Base.:*(a::PSparseMatrix,b::PVector) + Ta = eltype(a) + Tb = eltype(b) + T = typeof(zero(Ta)*zero(Tb)+zero(Ta)*zero(Tb)) + c = PVector{Vector{T}}(undef,partition(axes(a,1))) + mul!(c,a,b) + c +end + +function Base.:*(a::Number,b::PSparseMatrix) + matrix_partition = map(partition(b)) do values + a*values + end + rows = partition(axes(b,1)) + cols = partition(axes(b,2)) + PSparseMatrix(matrix_partition,rows,cols,b.assembled) +end + +function Base.:*(b::PSparseMatrix,a::Number) + a*b +end + +for op in (:+,:-) + @eval begin + function Base.$op(a::PSparseMatrix) + matrix_partition = map(partition(a)) do a + $op(a) + end + rows = partition(axes(a,1)) + cols = partition(axes(a,2)) + PSparseMatrix(matrix_partition,rows,cols,a.assembled) + end + function Base.$op(a::PSparseMatrix,b::PSparseMatrix) + @boundscheck @assert matching_local_indices(axes(a,1),axes(b,1)) + @boundscheck @assert matching_local_indices(axes(a,2),axes(b,2)) + matrix_partition = map(partition(a),partition(b)) do a,b + $op(a,b) + end + rows = partition(axes(b,1)) + cols = partition(axes(b,2)) + assembled = a.assembled && b.assembled + PSparseMatrix(matrix_partition,rows,cols,assembled) + end + end +end + +function LinearAlgebra.mul!(c::PVector,a::PSparseMatrix,b::PVector,α::Number,β::Number) + @boundscheck @assert matching_own_indices(axes(c,1),axes(a,1)) + @boundscheck @assert matching_own_indices(axes(a,2),axes(b,1)) + @boundscheck @assert matching_ghost_indices(axes(a,2),axes(b,1)) + if ! a.assembled + @boundscheck @assert matching_ghost_indices(axes(a,1),axes(c,1)) + end + # Start the exchange + t = consistent!(b) + # Meanwhile, process the owned blocks + map(own_values(c),own_own_values(a),own_values(b)) do co,aoo,bo + if β != 1 + β != 0 ? rmul!(co, β) : fill!(co,zero(eltype(co))) + end + mul!(co,aoo,bo,α,1) + end + if ! a.assembled + map(ghost_values(c),ghost_own_values(a),own_values(b)) do ch,aho,bo + if β != 1 + β != 0 ? rmul!(ch, β) : fill!(ch,zero(eltype(ch))) + end + mul!(ch,aho,bo,α,1) + end + end + # Wait for the exchange to finish + wait(t) + # process the ghost block + map(own_values(c),own_ghost_values(a),ghost_values(b)) do co,aoh,bh + mul!(co,aoh,bh,α,1) + end + if ! a.assembled + map(ghost_values(c),ghost_ghost_values(a),ghost_values(b)) do ch,ahh,bh + mul!(ch,ahh,bh,α,1) + end + assemble!(c) |> wait + end + c +end + +Base.similar(a::PSparseMatrix) = similar(a,eltype(a)) +function Base.similar(a::PSparseMatrix,::Type{T}) where T + matrix_partition = map(partition(a)) do values + similar(values,T) + end + rows, cols = axes(a) + PSparseMatrix(matrix_partition,partition(rows),partition(cols),a.assembled) +end + +function Base.copy!(a::PSparseMatrix,b::PSparseMatrix) + @assert size(a) == size(b) + copyto!(a,b) +end + +function Base.copyto!(a::PSparseMatrix,b::PSparseMatrix) + ## Think about the role + @assert a.assembled == b.assembled + if partition(axes(a,1)) === partition(axes(b,1)) && partition(axes(a,2)) === partition(axes(b,2)) + map(copy!,partition(a),partition(b)) + else + error("Trying to copy a OldPSparseMatrix into another one with a different data layout. This case is not implemented yet. It would require communications.") + end + a +end + +function LinearAlgebra.fillstored!(a::PSparseMatrix,v) + map(partition(a)) do values + LinearAlgebra.fillstored!(values,v) + end + a +end + +# This function could be removed if IterativeSolvers was implemented in terms +# of axes(A,d) instead of size(A,d) +function IterativeSolvers.zerox(A::PSparseMatrix,b::PVector) + T = IterativeSolvers.Adivtype(A, b) + x = similar(b, T, axes(A, 2)) + fill!(x, zero(T)) + return x +end + +""" + repartition(A::PSparseMatrix,new_rows,new_cols;reuse=false) +""" +function repartition(A::PSparseMatrix,new_rows,new_cols;reuse=Val(false)) + function prepare_triplets(A_own_own,A_own_ghost,A_rows,A_cols) + I1,J1,V1 = findnz(A_own_own) + I2,J2,V2 = findnz(A_own_ghost) + map_own_to_global!(I1,A_rows) + map_own_to_global!(I2,A_rows) + map_own_to_global!(J1,A_cols) + map_ghost_to_global!(J2,A_cols) + I = vcat(I1,I2) + J = vcat(J1,J2) + V = vcat(V1,V2) + (I,J,V) + end + A_own_own = own_own_values(A) + A_own_ghost = own_ghost_values(A) + A_rows = partition(axes(A,1)) + A_cols = partition(axes(A,2)) + I,J,V = map(prepare_triplets,A_own_own,A_own_ghost,A_rows,A_cols) |> tuple_of_arrays + # TODO this one does not preserve the local storage layout of A + t = psparse(I,J,V,new_rows,new_cols;reuse=true) + @async begin + B,cacheB = fetch(t) + if val_parameter(reuse) == false + B + else + cache = (V,cacheB) + B, cache + end + end +end + +""" + repartition!(B::PSparseMatrix,A::PSparseMatrix,cache) +""" +function repartition!(B::PSparseMatrix,A::PSparseMatrix,cache) + (V,cacheB) = cache + function fill_values!(V,A_own_own,A_own_ghost) + nz_own_own = nonzeros(A_own_own) + nz_own_ghost = nonzeros(A_own_ghost) + l1 = length(nz_own_own) + l2 = length(nz_own_ghost) + V[1:l1] = nz_own_own + V[(1:l2).+l1] = nz_own_ghost + end + A_own_own = own_own_values(A) + A_own_ghost = own_ghost_values(A) + map(fill_values!,V,A_own_own,A_own_ghost) + psparse!(B,V,cacheB) +end + +""" + repartition(A::PSparseMatrix,b::PVector,new_rows,new_cols;reuse=false) +""" +function repartition(A::PSparseMatrix,b::PVector,new_rows,new_cols;reuse=Val(false)) + # TODO this is just a reference implementation + # for the moment. It can be optimized. + t1 = repartition(A,new_rows,new_cols;reuse=true) + t2 = repartition(b,new_rows;reuse=true) + @async begin + B,cacheB = fetch(t1) + c,cachec = fetch(t2) + if val_parameter(reuse) + cache = (cacheB,cachec) + B,c,cache + else + B,c + end + end +end + +""" + repartition!(B,c,A,b,cache) + +- `B::PSparseMatrix` +- `c::PVector` +- `A::PSparseMatrix` +- `b::PVector` +- `cache` + +""" +function repartition!(B::PSparseMatrix,c::PVector,A::PSparseMatrix,b::PVector,cache) + (cacheB,cachec) = cache + t1 = repartition!(B,A,cacheB) + t2 = repartition!(c,b,cachec) + @async begin + wait(t1) + wait(t2) + B,c + end +end + +""" + psystem(I,J,V,I2,V2,rows,cols;kwargs...) +""" +function psystem(I,J,V,I2,V2,rows,cols; + assembled=false, + assemble=true, + discover_rows=true, + discover_cols=true, + restore_ids = true, + reuse=Val(false), + assembly_neighbors_options_rows = (;), + assembly_neighbors_options_cols = (;) + ) + + # TODO this is just a reference implementation + # for the moment. + # It can be optimized to exploit the fact + # that we want to generate a matrix and a vector + + t1 = psparse(I,J,V,rows,cols; + assembled, + assemble, + discover_rows, + discover_cols, + restore_ids, + assembly_neighbors_options_rows, + assembly_neighbors_options_cols, + reuse=true) + + t2 = pvector(I2,V2,rows; + assembled, + assemble, + discover_rows, + restore_ids, + assembly_neighbors_options_rows, + reuse=true) + + @async begin + A,cacheA = fetch(t1) + b,cacheb = fetch(t2) + if val_parameter(reuse) + cache = (cacheA,cacheb) + A,b,cache + else + A,b + end + end +end + +""" + psystem!(A,b,V,V2,cache) +""" +function psystem!(A,b,V,V2,cache) + (cacheA,cacheb) = cache + t1 = psparse!(A,V,cacheA) + t2 = pvector!(b,V2,cacheb) + @async begin + wait(t1) + wait(t2) + (A,b) + end +end + +# Not efficient, just for convenience and debugging purposes +function Base.:\(a::PSparseMatrix,b::PVector) + m,n = size(a) + ranks = linear_indices(partition(a)) + rows_trivial = trivial_partition(ranks,m) + cols_trivial = trivial_partition(ranks,n) + a_in_main = repartition(a,rows_trivial,cols_trivial) |> fetch + b_in_main = repartition(b,partition(axes(a_in_main,1))) |> fetch + @static if VERSION >= v"1.9" + values = map(\,own_own_values(a_in_main),own_values(b_in_main)) + else + values = map(\,own_own_values(a_in_main),map(collect,own_values(b_in_main))) + end + c_in_main = PVector(values,cols_trivial) + cols = partition(axes(a,2)) + c = repartition(c_in_main,cols) |> fetch + c +end + +# Not efficient, just for convenience and debugging purposes +struct PLUNew{A,B,C} + lu_in_main::A + rows::B + cols::C +end +function LinearAlgebra.lu(a::PSparseMatrix) + m,n = size(a) + ranks = linear_indices(partition(a)) + rows_trivial = trivial_partition(ranks,m) + cols_trivial = trivial_partition(ranks,n) + a_in_main = repartition(a,rows_trivial,cols_trivial) |> fetch + lu_in_main = map_main(lu,own_own_values(a_in_main)) + PLUNew(lu_in_main,axes(a_in_main,1),axes(a_in_main,2)) +end +function LinearAlgebra.lu!(b::PLUNew,a::PSparseMatrix) + rows_trivial = partition(b.rows) + cols_trivial = partition(b.cols) + a_in_main = repartition(a,rows_trivial,cols_trivial) |> fetch + map_main(lu!,b.lu_in_main,own_own_values(a_in_main)) + b +end +function LinearAlgebra.ldiv!(c::PVector,a::PLUNew,b::PVector) + rows_trivial = partition(a.rows) + cols_trivial = partition(a.cols) + b_in_main = repartition(b,rows_trivial) |> fetch + values = map(partition(c),partition(b_in_main)) do c,b + similar(c,length(b)) + end + map_main(ldiv!,values,a.lu_in_main,partition(b_in_main)) + c_in_main = PVector(values,cols_trivial) + repartition!(c,c_in_main) |> wait + c +end + diff --git a/src/p_vector.jl b/src/p_vector.jl index 8cebf37a..06d79e94 100644 --- a/src/p_vector.jl +++ b/src/p_vector.jl @@ -227,9 +227,12 @@ function Base.show(io::IO,k::MIME"text/plain",data::PVector) n = length(data) np = length(partition(data)) map_main(partition(data)) do values - println(io,"$n-element PVector{$T} partitioned into $np parts") + println(io,"$n-element PVector partitioned into $np parts of type $T") end end +function Base.show(io::IO,data::PVector) + print(io,"PVector(…)") +end function p_vector_cache(vector_partition,index_partition) p_vector_cache_impl(eltype(vector_partition),vector_partition,index_partition) @@ -387,7 +390,7 @@ julia> map(local_to_global,row_partition) [3, 4, 5, 6] julia> a = pones(row_partition) -6-element PVector{Vector{Float64}} partitioned into 2 parts +6-element PVector partitioned into 2 parts of type Vector{Float64} julia> local_values(a) 2-element Vector{Vector{Float64}}: @@ -439,7 +442,7 @@ julia> map(local_to_global,row_partition) [3, 4, 5, 6] julia> a = pvector(inds->fill(part_id(inds),length(inds)),row_partition) -6-element PVector{Vector{Int32}} partitioned into 2 parts +6-element PVector partitioned into 2 parts of type Vector{Int32} julia> local_values(a) 2-element Vector{Vector{Int32}}: @@ -455,7 +458,6 @@ julia> local_values(a) ``` """ function consistent!(a::PVector) - insert(a,b) = b cache = map(reverse,a.cache) t = assemble!(insert,partition(a),cache) @async begin @@ -463,6 +465,7 @@ function consistent!(a::PVector) a end end +insert(a,b) = b function Base.similar(a::PVector,::Type{T},inds::Tuple{<:PRange}) where T @@ -535,16 +538,9 @@ Equivalent to vector_partition = map(f,index_partition) PVector(vector_partition,index_partition) end +pvector(f,r::PRange) = pvector(f,partition(r)) -""" - pvector!([f,]I,V,index_partition;discover_rows=true) -> Task - -Crate an instance of [`PVector`](@ref) by setting arbitrary entries -from each of the underlying parts. It returns a task that produces the -instance of [`PVector`](@ref) allowing latency hiding while performing -the communications needed in its setup. -""" -function pvector!(f,I,V,index_partition;discover_rows=true) +function old_pvector!(f,I,V,index_partition;discover_rows=true) if discover_rows I_owner = find_owner(index_partition,I) index_partition = map(union_ghost,index_partition,I,I_owner) @@ -555,8 +551,99 @@ function pvector!(f,I,V,index_partition;discover_rows=true) assemble!(v) end -function pvector!(I,V,index_partition;kwargs...) - pvector!(default_local_values,I,V,index_partition;kwargs...) +function dense_vector(I,V,n) + T = eltype(V) + a = zeros(T,n) + for (i,v) in zip(I,V) + a[i] += v + end + a +end + +function pvector(I,V,rows;kwargs...) + pvector(dense_vector,I,V,rows;kwargs...) +end + +""" + pvector([f,]I,V,index_partition;kwargs...) -> Task + +Crate an instance of [`PVector`](@ref) by setting arbitrary entries +from each of the underlying parts. It returns a task that produces the +instance of [`PVector`](@ref) allowing latency hiding while performing +the communications needed in its setup. +""" +function pvector(f,I,V,rows; + assembled=false, + assemble=true, + discover_rows=true, + restore_ids = true, + reuse=Val(false), + assembly_neighbors_options_rows = (;) + ) + + if assembled || assemble + @boundscheck @assert all(i->ghost_length(i)==0,rows) + end + + if !assembled && discover_rows + I_owner = find_owner(rows,I) + rows_sa = map(union_ghost,rows,I,I_owner) + assembly_neighbors(rows_sa;assembly_neighbors_options_rows...) + else + rows_sa = rows + end + map(map_global_to_local!,I,rows_sa) + values_sa = map(f,I,V,map(local_length,rows_sa)) + if val_parameter(reuse) + K = map(copy,I) + end + if restore_ids + map(map_local_to_global!,I,rows_sa) + end + A = PVector(values_sa,rows_sa) + if !assembled && assemble + t = PartitionedArrays.assemble(A,rows;reuse=true) + else + t = @async A,nothing + end + if val_parameter(reuse) == false + return @async begin + B, cacheB = fetch(t) + B + end + else + return @async begin + B, cacheB = fetch(t) + cache = (A,cacheB,assemble,assembled,K) + (B, cache) + end + end +end + +""" + pvector!(B::PVector,V,cache) +""" +function pvector!(B,V,cache) + function update!(A,K,V) + fill!(A,0) + for (k,v) in zip(K,V) + A[k] += v + end + end + (A,cacheB,assemble,assembled,K) = cache + rows_sa = partition(axes(A,1)) + values_sa = partition(A) + map(update!,values_sa,K,V) + if !assembled && assemble + t = PartitionedArrays.assemble!(B,A,cacheB) + else + t = @async B + end +end + + +function old_pvector!(I,V,index_partition;kwargs...) + old_pvector!(default_local_values,I,V,index_partition;kwargs...) end function default_local_values(indices) @@ -842,3 +929,151 @@ for M in Distances.metrics end end +# New stuff + +function assemble(v::PVector;kwargs...) + rows = map(remove_ghost,partition(axes(v,1))) + assemble(v,rows;kwargs...) +end + +""" + assemble(v::PVector[,rows];reuse=false) +""" +function assemble(v::PVector,rows;reuse=Val(false)) + @boundscheck @assert matching_own_indices(axes(v,1),PRange(rows)) + # TODO this is just a reference implementation + # for the moment. + # The construction of v2 can (should) be avoided + w = similar(v,PRange(rows)) + v2 = copy(v) + t = assemble!(v2) + @async begin + wait(t) + w .= v2 + if val_parameter(reuse) + cache = v2 + w,cache + else + w + end + end +end + +""" + assemble!(w::PVector,v::PVector,cache) +""" +function assemble!(w::PVector,v::PVector,cache) + # TODO this is just a reference implementation + # for the moment. + # The construction of v2 can (should) be avoided + v2 = cache + copy!(v2,v) + t = assemble!(v2) + @async begin + wait(t) + w .= v2 + w + end +end + +""" + consistent(v::PVector,rows;reuse=false) +""" +function consistent(v::PVector,rows;reuse=Val(false)) + # TODO this is just a reference implementation + # for the moment. It can be optimized + @boundscheck @assert matching_own_indices(axes(v,1),PRange(rows)) + w = similar(v,PRange(rows)) + w .= v + t = consistent!(w) + @async begin + wait(t) + if val_parameter(reuse) + w,nothing + else + w + end + end +end + +""" + consistent!(w::PVector,v::PVector,cache) +""" +function consistent!(w::PVector,v::PVector,cache) + w .= v + t = consistent!(w) + @async begin + wait(t) + w + end +end + + +function repartition_cache(v::PVector,new_partition) + rows_da = map(remove_ghost,new_partition) + row_partition = partition(axes(v,1)) + I = map(collect∘own_to_global,row_partition) + V = own_values(v) + I_owner = find_owner(rows_da,I) + rows_sa = map(union_ghost,rows_da,I,I_owner) + map(map_global_to_local!,I,rows_sa) + v_sa = similar(v,PRange(rows_sa)) + (;I,v_sa) +end + +""" + repartition(v::PVector,new_partition;reuse=false) +""" +function repartition(v::PVector,new_partition;reuse=Val(false)) + w = similar(v,PRange(new_partition)) + cache = repartition_cache(v,new_partition) + t = repartition!(w,v,cache) + @async begin + wait(t) + if val_parameter(reuse) == true + w, cache + else + w + end + end +end + +function repartition!(w::PVector,v::PVector;kwargs...) + cache=repartition_cache(v,partition(axes(w,1))) + repartition!(w,v,cache;kwargs...) +end + +""" + repartition!(w::PVector,v::PVector[,cache];reversed=false) +""" +function repartition!(w::PVector,v::PVector,cache;reversed=false) + new_partition = partition(axes(w,1)) + old_partition = partition(axes(v,1)) + I = cache.I + v_sa = cache.v_sa + if ! reversed + V = own_values(v) + fill!(v_sa,0) + map(setindex!,partition(v_sa),V,I) + t = assemble!(v_sa) + return @async begin + wait(t) + w .= v_sa + w + end + else + v_sa .= v + t = consistent!(v_sa) + return @async begin + wait(t) + map(partition(v_sa),partition(w),I) do v_sa,w,I + for k in 1:length(I) + w[k] = v_sa[I[k]] + end + end + w + end + end +end + + diff --git a/src/primitives.jl b/src/primitives.jl index c0868867..fae1842c 100644 --- a/src/primitives.jl +++ b/src/primitives.jl @@ -338,8 +338,13 @@ function scatter_impl!(rcv,snd,source,::Type{T}) where T rcv end +# Deprecated +emit(args...;kwargs...) = multicast(args...;kwargs...) +emit!(args...;kwargs...) = multicast!(args...;kwargs...) +allocate_emit(args...;kwargs...) = allocate_multicast(args...;kwargs...) + """ - emit(snd;source=MAIN) + multicast(snd;source=MAIN) Copy `snd[source]` into a new array of the same size and type as `snd`. @@ -355,7 +360,7 @@ julia> a = [0,0,2,0] 2 0 -julia> emit(a,source=3) +julia> multicast(a,source=3) 4-element Vector{Int64}: 2 2 @@ -363,37 +368,37 @@ julia> emit(a,source=3) 2 ``` """ -function emit(snd;source=MAIN) +function multicast(snd;source=MAIN) T = eltype(snd) - emit_impl(snd,source,T) + multicast_impl(snd,source,T) end -function emit_impl(snd,source,::Type{T}) where T +function multicast_impl(snd,source,::Type{T}) where T @assert source !== :all "All to all not implemented" - rcv = allocate_emit(snd;source) - emit!(rcv,snd;source) + rcv = allocate_multicast(snd;source) + multicast!(rcv,snd;source) rcv end """ - allocate_emit(snd;source=1) + allocate_multicast(snd;source=1) -Allocate an array to be used in the first argument of [`emit!`](@ref). +Allocate an array to be used in the first argument of [`multicast!`](@ref). """ -function allocate_emit(snd;source=1) +function allocate_multicast(snd;source=1) T = eltype(snd) - allocate_emit_impl(snd,source,T) + allocate_multicast_impl(snd,source,T) end -function allocate_emit_impl(snd,source,::Type{T}) where T +function allocate_multicast_impl(snd,source,::Type{T}) where T @assert source !== :all "Scatter all not implemented" similar(snd) end -function allocate_emit_impl(snd,source,::Type{T}) where T<:AbstractVector +function allocate_multicast_impl(snd,source,::Type{T}) where T<:AbstractVector @assert source !== :all "Scatter all not implemented" n = map(length,snd) - n_all = emit(n;source) + n_all = multicast(n;source) S = eltype(T) map(n_all) do n Vector{S}(undef,n) @@ -401,18 +406,18 @@ function allocate_emit_impl(snd,source,::Type{T}) where T<:AbstractVector end """ - emit!(rcv,snd;source=1) + multicast!(rcv,snd;source=1) -In-place version of [`emit`](@ref). The destination array `rcv` -can be generated with the helper function [`allocate_emit`](@ref). +In-place version of [`multicast`](@ref). The destination array `rcv` +can be generated with the helper function [`allocate_multicast`](@ref). It returns `rcv`. """ -function emit!(rcv,snd;source=1) +function multicast!(rcv,snd;source=1) T = eltype(snd) - emit_impl!(rcv,snd,source,T) + multicast_impl!(rcv,snd,source,T) end -function emit_impl!(rcv,snd,source,::Type{T}) where T +function multicast_impl!(rcv,snd,source,::Type{T}) where T @assert source !== :all "Emit all not implemented" for i in eachindex(rcv) rcv[i] = snd[source] @@ -569,12 +574,16 @@ function default_find_rcv_ids(::AbstractArray) end """ - ExchangeGraph(snd; symmetric=false [,neighbors,find_rcv_ids]) + ExchangeGraph(snd; symmetric=false [rcv, neighbors,find_rcv_ids]) Create an `ExchangeGraph` object only from the lists of outgoing -neighbors in `snd`. If `symmetric==true`, then the incoming neighbors +neighbors in `snd`. In case the list of incoming neighbors is +known, it can be passed as key-word argument by setting `rcv` and the rest of +key-word arguments are ignored. +If `symmetric==true`, then the incoming neighbors are set to `snd`. Otherwise, either the optional `neighbors` or -`find_rcv_ids` are considered, in that order. `neighbors` is also an `ExchangeGraph` +`find_rcv_ids` are considered, in that order. +`neighbors` is also an `ExchangeGraph` that contains a super set of the outgoing and incoming neighbors associated with `snd`. It is used to find the incoming neighbors `rcv` efficiently. If `neighbors` are not provided, then `find_rcv_ids` @@ -583,10 +592,13 @@ is used (either the user-provided or a default one). rcv side of the exchange graph out of the snd side information. """ function ExchangeGraph(snd; + rcv=nothing, neighbors=nothing, symmetric=false, find_rcv_ids=default_find_rcv_ids(snd)) - if symmetric + if (rcv != nothing) + ExchangeGraph(snd,rcv) + elseif symmetric ExchangeGraph(snd,snd) elseif (neighbors != nothing) ExchangeGraph_impl_with_neighbors(snd,neighbors) diff --git a/src/sparse_utils.jl b/src/sparse_utils.jl index 1d909115..d1942928 100644 --- a/src/sparse_utils.jl +++ b/src/sparse_utils.jl @@ -302,6 +302,7 @@ function compresscoo( m,n,combine) end + function compresscoo( ::Type{SparseMatrixCSR{Bi,Tv,Ti}}, I::AbstractVector, @@ -332,3 +333,21 @@ Base.@propagate_inbounds Base.getindex(v::EltypeVector{T},i::Integer) where T = Base.@propagate_inbounds Base.setindex!(v::EltypeVector,w,i::Integer) = (v.parent[i] = w) Base.IndexStyle(::Type{<:EltypeVector{T,V}}) where {T,V} = IndexStyle(V) +function precompute_nzindex(A,I,J) + K = zeros(Int32,length(I)) + for (p,(i,j)) in enumerate(zip(I,J)) + K[p] = nzindex(A,i,j) + end + K +end + +function setcoofast!(A,V,K) + LinearAlgebra.fillstored!(A,0) + A_nz = nonzeros(A) + for (k,v) in zip(K,V) + A_nz[k] += v + end + A +end + + diff --git a/test/debug_array/p_sparse_matrix_tests.jl b/test/debug_array/p_sparse_matrix_tests.jl index c5e3a2ec..a8c345c3 100644 --- a/test/debug_array/p_sparse_matrix_tests.jl +++ b/test/debug_array/p_sparse_matrix_tests.jl @@ -1,4 +1,4 @@ -module DebugArrayPSparseMatrixTests +module DebugArrayOldPSparseMatrixTests using PartitionedArrays diff --git a/test/fdm_example.jl b/test/fdm_example.jl index 30998e52..abb80895 100644 --- a/test/fdm_example.jl +++ b/test/fdm_example.jl @@ -82,11 +82,11 @@ function fdm_example(distribute) toc!(t,"IJV") I,J,V = tuple_of_arrays(IJV) - # Build the PSparseMatrix from the coo-vectors + # Build the OldPSparseMatrix from the coo-vectors # and the data distribution described by rows and cols. tic!(t) tentative_col_partition = row_partition - A = psparse!(I,J,V,row_partition,tentative_col_partition,discover_rows=false) |> fetch + A = psparse(I,J,V,row_partition,tentative_col_partition,discover_rows=false) |> fetch toc!(t,"A") cols = axes(A,2) diff --git a/test/fem_example.jl b/test/fem_example.jl index 755544d7..bb183d57 100644 --- a/test/fem_example.jl +++ b/test/fem_example.jl @@ -262,37 +262,6 @@ function setup_exact_solution(values,space,grid,col_indices) end end -function setup_dofs(space,grid,tentative_dof_indices) - ghost_dof_to_local_dof = findall(i->i==0,space.permutation) - n_ghost_dofs = length(ghost_dof_to_local_dof) - n_local_dofs = length(space.permutation) - n_own_dofs = n_local_dofs - n_ghost_dofs - n_global_dofs = global_length(tentative_dof_indices) - space.permutation[ghost_dof_to_local_dof] = .- (1:n_ghost_dofs) - ghost_dof_to_owner = space.local_dof_to_owner[ghost_dof_to_local_dof] - ghost_to_global_dof = zeros(Int,n_ghost_dofs) - for cartesian_local_cell in grid.linear_to_cartesian_local_cell - local_cell = grid.cartesian_to_linear_local_cell[cartesian_local_cell] - for (element_node,cartesian_element_node) in enumerate(grid.linear_to_cartesian_element_node) - cartesian_local_node = cartesian_local_cell + (cartesian_element_node - grid.cartesian_offset) - local_node = grid.cartesian_to_linear_local_node[cartesian_local_node] - local_dof = space.local_node_to_dof[local_node] - if local_dof > 0 - ghost_dof = space.permutation[local_dof] - if ghost_dof < 0 - global_dof = space.local_cell_to_global_dofs[local_cell][element_node] - ghost_to_global_dof[-ghost_dof] = global_dof - end - end - end - end - for local_dof in ghost_dof_to_local_dof - space.permutation[local_dof] = n_own_dofs - space.permutation[local_dof] - end - ghost_dofs = GhostIndices(n_global_dofs,ghost_to_global_dof,ghost_dof_to_owner) - permute_indices(replace_ghost(tentative_dof_indices,ghost_dofs),space.permutation) -end - function fem_example(distribute) params = setup_params() n_ranks = prod(params.parts_per_dir) @@ -303,35 +272,70 @@ function fem_example(distribute) grid = map(setup_grid,cell_partition) n_own_dofs, space = map(setup_space,grid) |> tuple_of_arrays n_global_dofs = sum(n_own_dofs) - tentative_dof_partition = variable_partition(n_own_dofs,n_global_dofs) - local_cell_to_global_dofs = map(setup_cell_dofs,grid,space,tentative_dof_partition) + dof_partition = variable_partition(n_own_dofs,n_global_dofs) + local_cell_to_global_dofs = map(setup_cell_dofs,grid,space,dof_partition) cell_to_global_dofs = PVector(local_cell_to_global_dofs,cell_partition) consistent!(cell_to_global_dofs) |> wait - map(finish_cell_dofs,grid,space,tentative_dof_partition) + map(finish_cell_dofs,grid,space,dof_partition) I,J,V = map(setup_IJV,space,grid) |> tuple_of_arrays - t = psparse!(I,J,V,tentative_dof_partition,tentative_dof_partition) - I,V = map(setup_b,space,grid) |> tuple_of_arrays + t = psparse(I,J,V,dof_partition,dof_partition) + II,VV = map(setup_b,space,grid) |> tuple_of_arrays A = fetch(t) - display(A) - row_partition = partition(axes(A,1)) - b = pvector!(I,V,row_partition,discover_rows=false) |> fetch + b = pvector(II,VV,dof_partition) |> fetch x = IterativeSolvers.cg(A,b,verbose=i_am_main(rank)) x̂ = similar(x) col_partition = partition(axes(A,2)) map(setup_exact_solution,local_values(x̂),space,grid,col_partition) @test norm(x-x̂) < 1.0e-5 - dof_partition = map(setup_dofs,space,grid,tentative_dof_partition) - # Some optimizations when building A - try - cell_partition = uniform_partition(rank,params.parts_per_dir,params.cells_per_dir,ghost_per_dir) - I_owner = find_owner(tentative_dof_partition,I) - row_partition = map(union_ghost,tentative_dof_partition,I,I_owner) - neighbors = assembly_graph(cell_partition) - assembly_graph(row_partition;neighbors) - t = psparse!(I,J,V,row_partition,tentative_dof_partition,discover_rows=false) - A = fetch(t) - catch e - rethrow(e) - end + + ## re-assembly + A,cacheA = psparse(I,J,V,dof_partition,dof_partition;reuse=true) |> fetch + b,cacheb = pvector(II,VV,dof_partition;reuse=true) |> fetch + psparse!(A,V,cacheA) |> wait + pvector!(b,VV,cacheb) |> wait + display((A,cacheA)) + + x = IterativeSolvers.cg(A,b,verbose=i_am_main(rank)) + @test norm(x-x̂) < 1.0e-5 + + # Simultaneous generation of matrix and vector + A,b = psystem(I,J,V,II,VV,dof_partition,dof_partition) |> fetch + x = IterativeSolvers.cg(A,b,verbose=i_am_main(rank)) + @test norm(x-x̂) < 1.0e-5 + + # Simultaneous generation of matrix and vector + # giving extra info and allowing for reassembly + # This one is potentially the most efficient + # with the only caveat that the computation + # of II and VV is not overlapped with the assembly + # of the matrix (not a big problem since I,J,V,II,VV are usually + # computed in the same cell-wise loop) + neighbors = assembly_graph(cell_partition) + A,b,cache = psystem( + I,J,V,II,VV,dof_partition,dof_partition; + assembly_neighbors_options_rows = (;neighbors), + assembly_neighbors_options_cols = (;neighbors), + reuse = true) |> fetch + x = IterativeSolvers.cg(A,b,verbose=i_am_main(rank)) + @test norm(x-x̂) < 1.0e-5 + + ## Re assembly with different values + V2 = map(i->2*i,V) + VV2 = map(i->2*i,VV) + #display(partition(b)) + psystem!(A,b,V2,VV2,cache) |> wait + #display(partition(b)) + x = IterativeSolvers.cg(A,b,verbose=i_am_main(rank)) + @test norm(x-x̂) < 1.0e-5 + + # Sub-assembled variant + A,b = psystem(I,J,V,II,VV,dof_partition,dof_partition;assemble=false) |> fetch + assemble!(b) |> wait + x = IterativeSolvers.cg(A,b,verbose=i_am_main(rank)) + col_partition = partition(axes(A,2)) + x̂ = similar(x) + map(setup_exact_solution,local_values(x̂),space,grid,col_partition) + @test norm(x-x̂) < 1.0e-5 + end diff --git a/test/mpi_array/drivers/exception_tests.jl b/test/mpi_array/drivers/exception_tests.jl index 107e17ac..453839ff 100644 --- a/test/mpi_array/drivers/exception_tests.jl +++ b/test/mpi_array/drivers/exception_tests.jl @@ -10,7 +10,7 @@ function exception_tests(distribute) 0 end end - p = emit(p_main) + p = multicast(p_main) map(parts,p) do part,part_fail @assert part_fail != part end diff --git a/test/mpi_array/drivers/p_sparse_matrix_tests.jl b/test/mpi_array/drivers/p_sparse_matrix_tests.jl index babd72d1..3da70ef4 100644 --- a/test/mpi_array/drivers/p_sparse_matrix_tests.jl +++ b/test/mpi_array/drivers/p_sparse_matrix_tests.jl @@ -1,4 +1,4 @@ -module MPIArrayPSparseMatrixTests +module MPIArrayOldPSparseMatrixTests using PartitionedArrays diff --git a/test/p_range_tests.jl b/test/p_range_tests.jl index b3228a61..05dd1d32 100644 --- a/test/p_range_tests.jl +++ b/test/p_range_tests.jl @@ -143,8 +143,10 @@ function p_range_tests(distribute) LocalIndices(n,part,[1,3,7,9,10],Int32[1,1,3,4,4]) end end + @test renumber_partition(ids) !== ids ids = uniform_partition(parts,n) + @test renumber_partition(ids) === ids @test length(ids) == length(parts) gids = map(parts) do part @@ -176,6 +178,7 @@ function p_range_tests(distribute) end end ids5 = variable_partition(a,sum(a)) + @test renumber_partition(ids5) === ids5 map(parts,ids5) do part, local_to_global if part == 1 @test local_to_global == [1, 2, 3, 4] diff --git a/test/p_sparse_matrix_tests.jl b/test/p_sparse_matrix_tests.jl index c4483e6b..22f9f539 100644 --- a/test/p_sparse_matrix_tests.jl +++ b/test/p_sparse_matrix_tests.jl @@ -31,7 +31,7 @@ function p_sparse_matrix_tests(distribute) row_partition = uniform_partition(rank,n) col_partition = row_partition - A = psparse!(I,J,V,row_partition,col_partition) |> fetch + A = old_psparse!(I,J,V,row_partition,col_partition) |> fetch n = 10 @@ -47,7 +47,7 @@ function p_sparse_matrix_tests(distribute) a end - A = PSparseMatrix(values,row_partition,col_partition) + A = OldPSparseMatrix(values,row_partition,col_partition) x = pfill(3.0,col_partition) b = similar(x,axes(A,1)) mul!(b,A,x) @@ -85,7 +85,7 @@ function p_sparse_matrix_tests(distribute) end end |> tuple_of_arrays - A = psparse!(I,J,V,row_partition,col_partition) |> fetch + A = old_psparse!(I,J,V,row_partition,col_partition) |> fetch assemble!(A) |> wait x = pfill(1.5,partition(axes(A,2))) @@ -122,5 +122,197 @@ function p_sparse_matrix_tests(distribute) @test norm(r) < 1.0e-9 display(A) + # New stuff + + n = 10 + parts = rank + row_partition = uniform_partition(parts,n) + col_partition = row_partition + + I,J,V = map(parts) do part + if part == 1 + [1,2,1,2,2], [2,6,1,2,1], [1.0,2.0,30.0,10.0,1.0] + elseif part == 2 + [3,3,4,6], [3,9,4,2], [10.0,2.0,30.0,2.0] + elseif part == 3 + [5,5,6,7], [5,6,6,7], [10.0,2.0,30.0,1.0] + else + [9,9,8,10,6], [9,3,8,10,5], [10.0,2.0,30.0,50.0,2.0] + end + end |> tuple_of_arrays + + A = psparse(I,J,V,row_partition,col_partition,split_format=false,assemble=false) |> fetch + B = split_format(A) + B, cache = split_format(A,reuse=true) + split_format!(B,A,cache) + C = assemble(B) |> fetch + C,cache = assemble(B,reuse=true) |> fetch + assemble!(C,B,cache) |> wait + display(C) + + A = psparse(I,J,V,row_partition,col_partition,split_format=true,assemble=false) |> fetch + A = psparse(I,J,V,row_partition,col_partition,split_format=true,assemble=true) |> fetch + A = psparse(I,J,V,row_partition,col_partition) |> fetch + display(A) + # TODO Assembly in non-split_format format not yet implemented + #A = psparse(I,J,V,row_partition,col_partition,split_format=false,assemble=true) |> fetch + + A,cache = psparse(I,J,V,row_partition,col_partition,reuse=true) |> fetch + psparse!(A,V,cache) |> wait + + A_fa = psparse(I,J,V,row_partition,col_partition) |> fetch + rows_co = partition(axes(A_fa,2)) + A_co = consistent(A_fa,rows_co) |> fetch + A_co,cache = consistent(A_fa,rows_co;reuse=true) |> fetch + consistent!(A_co,A_fa,cache) |> wait + + n = 10 + parts = rank + row_partition = uniform_partition(parts,n) + col_partition = row_partition + + I,J,V = map(row_partition,col_partition) do rows, cols + i = collect(own_to_global(rows)) + j = copy(i) + v = fill(2.0,length(i)) + i,j,v + end |> tuple_of_arrays + + A = psparse(I,J,V,row_partition,col_partition) |> fetch + x = pfill(3.0,axes(A,2)) + b = similar(x,axes(A,1)) + mul!(b,A,x) + map(own_values(b)) do values + @test all( values .== 6 ) + end + consistent!(b) |> wait + map(partition(b)) do values + @test all( values .== 6 ) + end + + _A = similar(A) + _A = similar(A,eltype(A)) + copy!(_A,A) + + LinearAlgebra.fillstored!(A,1.0) + fill!(x,3.0) + mul!(b,A,x) + consistent!(b) |> wait + map(partition(b)) do values + @test all( values .== 3 ) + end + + I,J,V = map(parts) do part + if part == 1 + [1,2,1,2,2], [2,6,1,2,1], [1.0,2.0,30.0,10.0,1.0] + elseif part == 2 + [3,3,4,6], [3,9,4,2], [10.0,2.0,30.0,2.0] + elseif part == 3 + [5,5,6,7], [5,6,6,7], [10.0,2.0,30.0,1.0] + else + [9,9,8,10,6], [9,3,8,10,5], [10.0,2.0,30.0,50.0,2.0] + end + end |> tuple_of_arrays + + A = psparse(I,J,V,row_partition,col_partition) |> fetch + x = pones(partition(axes(A,2))) + y = A*x + @test isa(y,PVector) + dy = y - y + + x = IterativeSolvers.cg(A,y) + r = A*x-y + @test norm(r) < 1.0e-9 + + x = pfill(0.0,partition(axes(A,2))) + IterativeSolvers.cg!(x,A,y) + r = A*x-y + @test norm(r) < 1.0e-9 + fill!(x,0.0) + + x = A\y + @test isa(x,PVector) + r = A*x-y + @test norm(r) < 1.0e-9 + + factors = lu(A) + x .= 0 + ldiv!(x,factors,y) + r = A*x-y + @test norm(r) < 1.0e-9 + + lu!(factors,A) + x .= 0 + ldiv!(x,factors,y) + r = A*x-y + map(i->fill!(i,100),ghost_values(r)) + @test norm(r) < 1.0e-9 + display(A) + + rows_trivial = trivial_partition(parts,n) + cols_trivial = rows_trivial + values = map(collect∘local_to_global,rows_trivial) + w0 = PVector(values,rows_trivial) + values = map(collect∘local_to_global,row_partition) + v = PVector(values,row_partition) + v0 = copy(v) + w = repartition(v,rows_trivial) |> fetch + @test w == w0 + repartition!(w,v) |> wait + @test w == w0 + w, cache = repartition(v,rows_trivial;reuse=true) |> fetch + repartition!(w,v,cache) |> wait + @test w == w0 + repartition!(v,w,cache;reversed=true) |> wait + @test v == v0 + + B = repartition(A,rows_trivial,cols_trivial) |> fetch + B,cache = repartition(A,rows_trivial,cols_trivial;reuse=true) |> fetch + repartition!(B,A,cache) + + B,w = repartition(A,v,rows_trivial,cols_trivial) |> fetch + B,w,cache = repartition(A,v,rows_trivial,cols_trivial,reuse=true) |> fetch + repartition!(B,w,A,v,cache) |> wait + + I2 = map(copy,I) + V2 = map(copy,I) + rows = row_partition + cols = col_partition + v = pvector(I2,V2,rows) |> fetch + v,cache = pvector(I2,V2,rows;reuse=true) |> fetch + pvector!(v,V,cache) |> wait + + v = pvector(I2,V2,rows;assemble=false) |> fetch + w = assemble(v) |> fetch + w = assemble(v,rows) |> fetch + w,cache = assemble(v,reuse=true) |> fetch + assemble!(w,v,cache) |> wait + + A_cols = partition(axes(A,2)) + u = consistent(w,A_cols) |> fetch + u,cache = consistent(w,A_cols;reuse=true) |> fetch + consistent!(u,w,cache) |> wait + + A,b = psystem(I,J,V,I2,V2,rows,cols) |> fetch + A,b,cache = psystem(I,J,V,I2,V2,rows,cols,reuse=true) |> fetch + psystem!(A,b,V,V2,cache) |> wait + + display((A,A)) + display((b,b)) + + LinearAlgebra.fillstored!(A,3) + B = 2*A + @test eltype(partition(B)) == eltype(partition(A)) + B = A*2 + @test eltype(partition(B)) == eltype(partition(A)) + B = +A + @test eltype(partition(B)) == eltype(partition(A)) + B = -A + @test eltype(partition(B)) == eltype(partition(A)) + C = B+A + @test eltype(partition(C)) == eltype(partition(A)) + C = B-A + @test eltype(partition(C)) == eltype(partition(A)) + end diff --git a/test/p_vector_tests.jl b/test/p_vector_tests.jl index d17088fa..e7d4cd15 100644 --- a/test/p_vector_tests.jl +++ b/test/p_vector_tests.jl @@ -47,7 +47,7 @@ function p_vector_tests(distribute) end |> tuple_of_arrays row_partition = uniform_partition(rank,n) - a = pvector!(I,V,row_partition) |> fetch + a = old_pvector!(I,V,row_partition) |> fetch @test any(i->i>n,a) == false @test all(i->i fetch + v = old_pvector!(gids,values,row_partition) |> fetch u = 2*v map(partition(u),partition(v)) do u,v @test u == 2*v diff --git a/test/primitives_tests.jl b/test/primitives_tests.jl index 41aa02f3..db8da705 100644 --- a/test/primitives_tests.jl +++ b/test/primitives_tests.jl @@ -61,12 +61,12 @@ function primitives_tests(distribute) @test rcv == [[1],[1,2],[1,2,3],[1,2,3,4]] end - rcv = emit(rank,source=2) + rcv = multicast(rank,source=2) map(rcv) do rcv @test rcv == 2 end - rcv = emit(snd,source=2) + rcv = multicast(snd,source=2) map(rcv) do rcv @test rcv == [1,2] end