diff --git a/examples/attach_array_cpy/CMakeLists.txt b/examples/attach_array_cpy/CMakeLists.txt new file mode 100644 index 0000000000..34bf3102f3 --- /dev/null +++ b/examples/attach_array_cpy/CMakeLists.txt @@ -0,0 +1,42 @@ +#------------------------------------------------------------------------------# +# Copyright 2017 Kitware, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#------------------------------------------------------------------------------# + +cmake_minimum_required(VERSION 3.1) +project(LegionExample_attach_file) + +include(CheckCXXCompilerFlag) + +check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER) + +if(CXX11_COMPILER) + set(CMAKE_CXX_STANDARD 11) +else() + message(FATAL_ERROR "C++11 compatible compiler not found") +endif() + +# Only search if were building stand-alone and not as part of Legion +if(NOT Legion_SOURCE_DIR) + find_package(Legion REQUIRED) +endif() + +add_executable(attach_file attach_file.cc) +target_link_libraries(attach_file Legion::Legion) +if(Legion_ENABLE_TESTING) + add_test(NAME attach_file COMMAND $) +endif() + + + diff --git a/examples/attach_array_cpy/Makefile b/examples/attach_array_cpy/Makefile new file mode 100644 index 0000000000..5651293710 --- /dev/null +++ b/examples/attach_array_cpy/Makefile @@ -0,0 +1,51 @@ +# Copyright 2017 Stanford University +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +ifndef LG_RT_DIR +$(error LG_RT_DIR variable is not defined, aborting build) +endif + +# Flags for directing the runtime makefile what to include +DEBUG ?= 1 # Include debugging symbols +OUTPUT_LEVEL ?= LEVEL_DEBUG # Compile time logging level +USE_CUDA ?= 0 # Include CUDA support (requires CUDA) +USE_GASNET ?= 0 # Include GASNet support (requires GASNet) +USE_HDF ?= 0 # Include HDF5 support (requires HDF5) +ALT_MAPPERS ?= 0 # Include alternative mappers (not recommended) + +# Put the binary file name here +OUTFILE ?= attach_array +# List all the application source files here +GEN_SRC ?= attach_array.cc # .cc files +GEN_GPU_SRC ?= # .cu files + +# You can modify these variables, some will be appended to by the runtime makefile +INC_FLAGS ?= +CC_FLAGS ?= +NVCC_FLAGS ?= +GASNET_FLAGS ?= +LD_FLAGS ?= +# For Point and Rect typedefs +CC_FLAGS += -std=c++11 + +########################################################################### +# +# Don't change anything below here +# +########################################################################### + +include $(LG_RT_DIR)/runtime.mk + diff --git a/examples/attach_array_cpy/attach_array.cc b/examples/attach_array_cpy/attach_array.cc new file mode 100644 index 0000000000..8abd5355c6 --- /dev/null +++ b/examples/attach_array_cpy/attach_array.cc @@ -0,0 +1,407 @@ +/* Copyright 2017 Stanford University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include +#ifdef USE_HDF +#include +#endif +#include "legion.h" +#include +#include "mem_impl.h" +#include "inst_impl.h" +#include "runtime_impl.h" +using namespace Legion; + +template +using AccessorRO = FieldAccessor >; +template +using AccessorWD = FieldAccessor >; + +/* + * In this example we illustrate how the Legion + * programming model supports multiple partitions + * of the same logical region and the benefits it + * provides by allowing multiple views onto the + * same logical region. We compute a simple 5-point + * 1D stencil using the standard formula: + * f'(x) = (-f(x+2h) + 8f(x+h) - 8f(x-h) + f(x-2h))/12h + * For simplicity we'll assume h=1. + */ + +enum TaskIDs { + TOP_LEVEL_TASK_ID, + INIT_FIELD_TASK_ID, + STENCIL_TASK_ID, + CHECK_TASK_ID, +}; + +enum FieldIDs { + FID_VAL, + FID_DERIV, + FID_CP +}; + +double *my_ptr= NULL; + +void top_level_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + int num_elements = 1024; + int num_subregions = 4; + + // Check for any command line arguments + { + const InputArgs &command_args = Runtime::get_input_args(); + for (int i = 1; i < command_args.argc; i++) + { + if (!strcmp(command_args.argv[i],"-n")) + num_elements = atoi(command_args.argv[++i]); + if (!strcmp(command_args.argv[i],"-b")) + num_subregions = atoi(command_args.argv[++i]); + } + } + printf("Running stencil computation for %d elements...\n", num_elements); + printf("Partitioning data into %d sub-regions...\n", num_subregions); + + Rect<1> elem_rect(0,num_elements-1); + IndexSpaceT<1> is = runtime->create_index_space(ctx, elem_rect); + FieldSpace fs = runtime->create_field_space(ctx); + { + FieldAllocator allocator = + runtime->create_field_allocator(ctx, fs); + allocator.allocate_field(sizeof(double),FID_VAL); + allocator.allocate_field(sizeof(double),FID_DERIV); + } + LogicalRegion stencil_lr = runtime->create_logical_region(ctx, is, fs); + + FieldSpace cp_fs = runtime->create_field_space(ctx); + { + FieldAllocator allocator = + runtime->create_field_allocator(ctx, cp_fs); + allocator.allocate_field(sizeof(double), FID_CP); + } + LogicalRegion cp_lr = runtime->create_logical_region(ctx, is, cp_fs); + + Rect<1> color_bounds(0,num_subregions-1); + IndexSpaceT<1> color_is = runtime->create_index_space(ctx, color_bounds); + + IndexPartition disjoint_ip = + runtime->create_equal_partition(ctx, is, color_is); + const int block_size = (num_elements + num_subregions - 1) / num_subregions; + Matrix<1,1> transform; + transform[0][0] = block_size; + Rect<1> extent(-2, block_size + 1); + IndexPartition ghost_ip = + runtime->create_partition_by_restriction(ctx, is, color_is, transform, extent); + + LogicalPartition disjoint_lp = + runtime->get_logical_partition(ctx, stencil_lr, disjoint_ip); + LogicalPartition ghost_lp = + runtime->get_logical_partition(ctx, stencil_lr, ghost_ip); + + ArgumentMap arg_map; + + IndexLauncher init_launcher(INIT_FIELD_TASK_ID, color_is, + TaskArgument(NULL, 0), arg_map); + init_launcher.add_region_requirement( + RegionRequirement(disjoint_lp, 0/*projection ID*/, + WRITE_DISCARD, EXCLUSIVE, stencil_lr)); + init_launcher.add_field(0, FID_VAL); + runtime->execute_index_space(ctx, init_launcher); + + IndexLauncher stencil_launcher(STENCIL_TASK_ID, color_is, + TaskArgument(&num_elements, sizeof(num_elements)), arg_map); + stencil_launcher.add_region_requirement( + RegionRequirement(ghost_lp, 0/*projection ID*/, + READ_ONLY, EXCLUSIVE, stencil_lr)); + stencil_launcher.add_field(0, FID_VAL); + stencil_launcher.add_region_requirement( + RegionRequirement(disjoint_lp, 0/*projection ID*/, + READ_WRITE, EXCLUSIVE, stencil_lr)); + stencil_launcher.add_field(1, FID_DERIV); + runtime->execute_index_space(ctx, stencil_launcher); + + // Launcher a copy operation that performs checkpoint + //struct timespec ts_start, ts_mid, ts_end; + //clock_gettime(CLOCK_MONOTONIC, &ts_start); + double ts_start, ts_mid, ts_end; + ts_start = Realm::Clock::current_time_in_microseconds(); + PhysicalRegion cp_pr; + + Memory memory = Machine::MemoryQuery(Machine::get_machine()) + .local_address_space() + .only_kind(Memory::SYSTEM_MEM) + .first(); + assert(memory.exists()); + Realm::LocalCPUMemory *m_impl = (Realm::LocalCPUMemory *)Realm::get_runtime()->get_memory_impl(memory); + unsigned char* base = (unsigned char*)m_impl->base; + + double *cp_ptr = (double*)malloc(sizeof(double)*(num_elements)); + my_ptr = cp_ptr; + + + std::map field_pointer_map; + field_pointer_map[FID_CP] = cp_ptr; + printf("Checkpointing data to arrray fid %d, ptr %p, base %p\n", FID_CP, cp_ptr, base); + cp_pr = runtime->attach_array_soa(ctx, cp_lr, cp_lr, field_pointer_map, 0); + + //cp_pr.wait_until_valid(); + CopyLauncher copy_launcher; + copy_launcher.add_copy_requirements( + RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr), + RegionRequirement(cp_lr, WRITE_DISCARD, EXCLUSIVE, cp_lr)); + copy_launcher.add_src_field(0, FID_DERIV); + copy_launcher.add_dst_field(0, FID_CP); + runtime->issue_copy_operation(ctx, copy_launcher); + + //clock_gettime(CLOCK_MONOTONIC, &ts_mid); + ts_mid = Realm::Clock::current_time_in_microseconds(); + + runtime->detach_array(ctx, cp_pr); + + //clock_gettime(CLOCK_MONOTONIC, &ts_end); + ts_end = Realm::Clock::current_time_in_microseconds(); + //double attach_time = ((1.0 * (ts_mid.tv_sec - ts_start.tv_sec)) + + // (1e-9 * (ts_mid.tv_nsec - ts_start.tv_nsec))); + //double detach_time = ((1.0 * (ts_end.tv_sec - ts_mid.tv_sec)) + + // (1e-9 * (ts_end.tv_nsec - ts_mid.tv_nsec))); + double attach_time = 1e-6 * (ts_mid - ts_start); + double detach_time = 1e-6 * (ts_end - ts_mid); + printf("ELAPSED TIME (ATTACH) = %7.3f s\n", attach_time); + printf("ELAPSED TIME (DETACH) = %7.3f s\n", detach_time); + + // Finally, we launch a single task to check the results. + TaskLauncher check_launcher(CHECK_TASK_ID, + TaskArgument(&num_elements, sizeof(num_elements))); + check_launcher.add_region_requirement( + RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr)); + check_launcher.add_field(0, FID_VAL); + check_launcher.add_region_requirement( + RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr)); + check_launcher.add_field(1, FID_DERIV); + runtime->execute_task(ctx, check_launcher); + + // Clean up our region, index space, and field space + runtime->destroy_logical_region(ctx, stencil_lr); + runtime->destroy_logical_region(ctx, cp_lr); + runtime->destroy_field_space(ctx, cp_fs); + runtime->destroy_field_space(ctx, fs); + runtime->destroy_index_space(ctx, is); + printf("End of TOP_LEVEL_TASK, %f, %f\n", cp_ptr[0], cp_ptr[num_elements-1]); +} + +// The standard initialize field task from earlier examples +void init_field_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 1); + assert(task->regions.size() == 1); + assert(task->regions[0].privilege_fields.size() == 1); + + FieldID fid = *(task->regions[0].privilege_fields.begin()); + const int point = task->index_point.point_data[0]; + printf("Initializing field %d for block %d...\n", fid, point); + + const AccessorWD acc(regions[0], fid); + + int i = point; + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[0].region.get_index_space()); + for (PointInRectIterator<1> pir(rect); pir(); pir++) { + acc[*pir] = 1.125 + i*1.12; + i++; + } +} + +// Our stencil tasks is interesting because it +// has both slow and fast versions depending +// on whether or not its bounds have been clamped. +void stencil_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 2); + assert(task->regions.size() == 2); + assert(task->regions[0].privilege_fields.size() == 1); + assert(task->regions[1].privilege_fields.size() == 1); + assert(task->arglen == sizeof(int)); + const int max_elements = *((const int*)task->args); + const int point = task->index_point.point_data[0]; + + FieldID read_fid = *(task->regions[0].privilege_fields.begin()); + FieldID write_fid = *(task->regions[1].privilege_fields.begin()); + + const AccessorRO read_acc(regions[0], read_fid); + const AccessorWD write_acc(regions[1], write_fid); + + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[1].region.get_index_space()); + // If we are on the edges of the entire space we are + // operating over, then we're going to do the slow + // path which checks for clamping when necessary. + // If not, then we can do the fast path without + // any checks. + if ((rect.lo[0] < 2) || (rect.hi[0] > (max_elements-3))) + { + printf("Running slow stencil path for point %d...\n", point); + // Note in the slow path that there are checks which + // perform clamps when necessary before reading values. + for (PointInRectIterator<1> pir(rect); pir(); pir++) + { + double l2, l1, r1, r2; + if (pir[0] < 2) + l2 = read_acc[0]; + else + l2 = read_acc[*pir - 2]; + if (pir[0] < 1) + l1 = read_acc[0]; + else + l1 = read_acc[*pir - 1]; + if (pir[0] > (max_elements-2)) + r1 = read_acc[max_elements-1]; + else + r1 = read_acc[*pir + 1]; + if (pir[0] > (max_elements-3)) + r2 = read_acc[max_elements-1]; + else + r2 = read_acc[*pir + 2]; + + double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0; + write_acc[*pir] = result; + } + } + else + { + printf("Running fast stencil path for point %d...\n", point); + // In the fast path, we don't need any checks + for (PointInRectIterator<1> pir(rect); pir(); pir++) + { + double l2 = read_acc[*pir - 2]; + double l1 = read_acc[*pir - 1]; + double r1 = read_acc[*pir + 1]; + double r2 = read_acc[*pir + 2]; + + double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0; + write_acc[*pir] = result; + } + } +} + +void check_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 2); + assert(task->regions.size() == 2); + assert(task->regions[0].privilege_fields.size() == 1); + assert(task->regions[1].privilege_fields.size() == 1); + assert(task->arglen == sizeof(int)); + const int max_elements = *((const int*)task->args); + + FieldID src_fid = *(task->regions[0].privilege_fields.begin()); + FieldID dst_fid = *(task->regions[1].privilege_fields.begin()); + + const AccessorRO src_acc(regions[0], src_fid); + const AccessorRO dst_acc(regions[1], dst_fid); + + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[1].region.get_index_space()); + + // This is the checking task so we can just do the slow path + bool all_passed = true; + bool cp_passed = true; + int i = 0; + for (PointInRectIterator<1> pir(rect); pir(); pir++) + { + double l2, l1, r1, r2; + if (pir[0] < 2) + l2 = src_acc[0]; + else + l2 = src_acc[*pir - 2]; + if (pir[0] < 1) + l1 = src_acc[0]; + else + l1 = src_acc[*pir - 1]; + if (pir[0] > (max_elements-2)) + r1 = src_acc[max_elements-1]; + else + r1 = src_acc[*pir + 1]; + if (pir[0] > (max_elements-3)) + r2 = src_acc[max_elements-1]; + else + r2 = src_acc[*pir + 2]; + + double expected = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0; + double received = dst_acc[*pir]; + if (i == 0 || i == max_elements-1) printf("result %d, %f\n", i, received); + if (my_ptr[i] != received) { + printf("transfer error %d, %f\n", i, my_ptr[i]); + cp_passed = false; + } + i++; + // Probably shouldn't bitwise compare floating point + // numbers but the order of operations are the same so they + // should be bitwise equal. + if (expected != received) + all_passed = false; + } + if (cp_passed) + printf("CP PASSED\n"); + else + printf("CP FAILED\n"); + printf("CHECK, %f, %f\n", my_ptr[0], my_ptr[max_elements-1]); + if (all_passed) + printf("SUCCESS!\n"); + else + printf("FAILURE!\n"); +} + +int main(int argc, char **argv) +{ + Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID); + + { + TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "top_level"); + } + + { + TaskVariantRegistrar registrar(INIT_FIELD_TASK_ID, "init_field"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "init_field"); + } + + { + TaskVariantRegistrar registrar(STENCIL_TASK_ID, "stencil"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "stencil"); + } + + { + TaskVariantRegistrar registrar(CHECK_TASK_ID, "check"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "check"); + } + + return Runtime::start(argc, argv); +} diff --git a/examples/attach_array_no_cpy_aos/CMakeLists.txt b/examples/attach_array_no_cpy_aos/CMakeLists.txt new file mode 100644 index 0000000000..34bf3102f3 --- /dev/null +++ b/examples/attach_array_no_cpy_aos/CMakeLists.txt @@ -0,0 +1,42 @@ +#------------------------------------------------------------------------------# +# Copyright 2017 Kitware, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#------------------------------------------------------------------------------# + +cmake_minimum_required(VERSION 3.1) +project(LegionExample_attach_file) + +include(CheckCXXCompilerFlag) + +check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER) + +if(CXX11_COMPILER) + set(CMAKE_CXX_STANDARD 11) +else() + message(FATAL_ERROR "C++11 compatible compiler not found") +endif() + +# Only search if were building stand-alone and not as part of Legion +if(NOT Legion_SOURCE_DIR) + find_package(Legion REQUIRED) +endif() + +add_executable(attach_file attach_file.cc) +target_link_libraries(attach_file Legion::Legion) +if(Legion_ENABLE_TESTING) + add_test(NAME attach_file COMMAND $) +endif() + + + diff --git a/examples/attach_array_no_cpy_aos/Makefile b/examples/attach_array_no_cpy_aos/Makefile new file mode 100644 index 0000000000..5651293710 --- /dev/null +++ b/examples/attach_array_no_cpy_aos/Makefile @@ -0,0 +1,51 @@ +# Copyright 2017 Stanford University +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +ifndef LG_RT_DIR +$(error LG_RT_DIR variable is not defined, aborting build) +endif + +# Flags for directing the runtime makefile what to include +DEBUG ?= 1 # Include debugging symbols +OUTPUT_LEVEL ?= LEVEL_DEBUG # Compile time logging level +USE_CUDA ?= 0 # Include CUDA support (requires CUDA) +USE_GASNET ?= 0 # Include GASNet support (requires GASNet) +USE_HDF ?= 0 # Include HDF5 support (requires HDF5) +ALT_MAPPERS ?= 0 # Include alternative mappers (not recommended) + +# Put the binary file name here +OUTFILE ?= attach_array +# List all the application source files here +GEN_SRC ?= attach_array.cc # .cc files +GEN_GPU_SRC ?= # .cu files + +# You can modify these variables, some will be appended to by the runtime makefile +INC_FLAGS ?= +CC_FLAGS ?= +NVCC_FLAGS ?= +GASNET_FLAGS ?= +LD_FLAGS ?= +# For Point and Rect typedefs +CC_FLAGS += -std=c++11 + +########################################################################### +# +# Don't change anything below here +# +########################################################################### + +include $(LG_RT_DIR)/runtime.mk + diff --git a/examples/attach_array_no_cpy_aos/attach_array.cc b/examples/attach_array_no_cpy_aos/attach_array.cc new file mode 100644 index 0000000000..d1705aeb10 --- /dev/null +++ b/examples/attach_array_no_cpy_aos/attach_array.cc @@ -0,0 +1,388 @@ +/* Copyright 2017 Stanford University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include +#ifdef USE_HDF +#include +#endif +#include "legion.h" +#include +#include "mem_impl.h" +#include "inst_impl.h" +#include "runtime_impl.h" +using namespace Legion; + +template +using AccessorRO = FieldAccessor >; +template +using AccessorWD = FieldAccessor >; + +/* + * In this example we illustrate how the Legion + * programming model supports multiple partitions + * of the same logical region and the benefits it + * provides by allowing multiple views onto the + * same logical region. We compute a simple 5-point + * 1D stencil using the standard formula: + * f'(x) = (-f(x+2h) + 8f(x+h) - 8f(x-h) + f(x-2h))/12h + * For simplicity we'll assume h=1. + */ + +enum TaskIDs { + TOP_LEVEL_TASK_ID, + INIT_FIELD_TASK_ID, + STENCIL_TASK_ID, + CHECK_TASK_ID, +}; + +enum FieldIDs { + FID_VAL, + FID_DERIV, + FID_CP +}; + +typedef struct { + double val; + double deriv; +}deriv_t; + +deriv_t *my_ptr= NULL; + +void top_level_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + int num_elements = 1024; + int num_subregions = 4; + + // Check for any command line arguments + { + const InputArgs &command_args = Runtime::get_input_args(); + for (int i = 1; i < command_args.argc; i++) + { + if (!strcmp(command_args.argv[i],"-n")) + num_elements = atoi(command_args.argv[++i]); + if (!strcmp(command_args.argv[i],"-b")) + num_subregions = atoi(command_args.argv[++i]); + } + } + printf("Running stencil computation for %d elements...\n", num_elements); + printf("Partitioning data into %d sub-regions...\n", num_subregions); + + Rect<1> elem_rect(0,num_elements-1); + IndexSpaceT<1> is = runtime->create_index_space(ctx, elem_rect); + FieldSpace fs = runtime->create_field_space(ctx); + { + FieldAllocator allocator = + runtime->create_field_allocator(ctx, fs); + allocator.allocate_field(sizeof(double),FID_VAL); + allocator.allocate_field(sizeof(double),FID_DERIV); + } + LogicalRegion stencil_lr = runtime->create_logical_region(ctx, is, fs); + + deriv_t *deriv_struct_ptr = (deriv_t*)malloc(sizeof(deriv_t)*(num_elements)); + printf("base array ptr %p\n", deriv_struct_ptr); + + //double *val_ptr = (double*)malloc(sizeof(double)*(num_elements)); + for (int i = 0; i < num_elements; i++) { + deriv_struct_ptr[i].val = drand48(); + } + + + std::map offset; + offset[FID_VAL] = 0; + offset[FID_DERIV] = sizeof(double); + + PhysicalRegion stencil_pr = runtime->attach_array_aos(ctx, stencil_lr, stencil_lr, deriv_struct_ptr, sizeof(deriv_t), offset, 0); + +/* + char *val_ptr = (char*)deriv_struct_ptr; + std::map field_pointer_map_val; + field_pointer_map_val[FID_VAL] = val_ptr; + printf("Attach array fid %d, ptr %p\n", FID_VAL, val_ptr); + stencil_val_pr = runtime->attach_fortran_array(ctx, stencil_lr, stencil_lr, field_pointer_map_val, + LEGION_FILE_READ_WRITE); + + PhysicalRegion stencil_deriv_pr; + //deriv_t *deriv_struct_ptr = (deriv_t*)malloc(sizeof(deriv_t)*(num_elements)); + char *deriv_ptr = (char*)deriv_struct_ptr + sizeof(double); + std::map field_pointer_map_deriv; + field_pointer_map_deriv[FID_DERIV] = deriv_ptr; + printf("Attach arrray fid %d, struct ptr %p, ptr %p\n", FID_DERIV, deriv_struct_ptr, deriv_ptr); + stencil_deriv_pr = runtime->attach_fortran_array(ctx, stencil_lr, stencil_lr, field_pointer_map_deriv, + LEGION_FILE_READ_WRITE); */ + my_ptr = deriv_struct_ptr; + + Rect<1> color_bounds(0,num_subregions-1); + IndexSpaceT<1> color_is = runtime->create_index_space(ctx, color_bounds); + + IndexPartition disjoint_ip = + runtime->create_equal_partition(ctx, is, color_is); + const int block_size = (num_elements + num_subregions - 1) / num_subregions; + Matrix<1,1> transform; + transform[0][0] = block_size; + Rect<1> extent(-2, block_size + 1); + IndexPartition ghost_ip = + runtime->create_partition_by_restriction(ctx, is, color_is, transform, extent); + + LogicalPartition disjoint_lp = + runtime->get_logical_partition(ctx, stencil_lr, disjoint_ip); + LogicalPartition ghost_lp = + runtime->get_logical_partition(ctx, stencil_lr, ghost_ip); + + ArgumentMap arg_map; + +#if 0 + IndexLauncher init_launcher(INIT_FIELD_TASK_ID, color_is, + TaskArgument(NULL, 0), arg_map); + init_launcher.add_region_requirement( + RegionRequirement(disjoint_lp, 0/*projection ID*/, + WRITE_DISCARD, EXCLUSIVE, stencil_lr)); + init_launcher.add_field(0, FID_VAL); + runtime->execute_index_space(ctx, init_launcher); +#endif + + IndexLauncher stencil_launcher(STENCIL_TASK_ID, color_is, + TaskArgument(&num_elements, sizeof(num_elements)), arg_map); + stencil_launcher.add_region_requirement( + RegionRequirement(ghost_lp, 0/*projection ID*/, + READ_ONLY, EXCLUSIVE, stencil_lr)); + stencil_launcher.add_field(0, FID_VAL); + stencil_launcher.add_region_requirement( + RegionRequirement(disjoint_lp, 0/*projection ID*/, + READ_WRITE, EXCLUSIVE, stencil_lr)); + stencil_launcher.add_field(1, FID_DERIV); + runtime->execute_index_space(ctx, stencil_launcher); + + + // Finally, we launch a single task to check the results. + TaskLauncher check_launcher(CHECK_TASK_ID, + TaskArgument(&num_elements, sizeof(num_elements))); + check_launcher.add_region_requirement( + RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr)); + check_launcher.add_field(0, FID_VAL); + check_launcher.add_region_requirement( + RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr)); + check_launcher.add_field(1, FID_DERIV); + runtime->execute_task(ctx, check_launcher); + + // Clean up our region, index space, and field space + runtime->destroy_logical_region(ctx, stencil_lr); + runtime->destroy_field_space(ctx, fs); + runtime->destroy_index_space(ctx, is); + printf("End of TOP_LEVEL_TASK, %f, %f\n", my_ptr[0].deriv, my_ptr[num_elements].deriv); +} + +// The standard initialize field task from earlier examples +void init_field_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 1); + assert(task->regions.size() == 1); + assert(task->regions[0].privilege_fields.size() == 1); + + FieldID fid = *(task->regions[0].privilege_fields.begin()); + const int point = task->index_point.point_data[0]; + printf("Initializing field %d for block %d...\n", fid, point); + + const AccessorWD acc(regions[0], fid); + + int i = point; + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[0].region.get_index_space()); + for (PointInRectIterator<1> pir(rect); pir(); pir++) { + acc[*pir] = drand48(); + i++; + } +} + +// Our stencil tasks is interesting because it +// has both slow and fast versions depending +// on whether or not its bounds have been clamped. +void stencil_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 2); + assert(task->regions.size() == 2); + assert(task->regions[0].privilege_fields.size() == 1); + assert(task->regions[1].privilege_fields.size() == 1); + assert(task->arglen == sizeof(int)); + const int max_elements = *((const int*)task->args); + const int point = task->index_point.point_data[0]; + + FieldID read_fid = *(task->regions[0].privilege_fields.begin()); + FieldID write_fid = *(task->regions[1].privilege_fields.begin()); + + const AccessorWD write_acc(regions[1], write_fid); + const AccessorRO read_acc(regions[0], read_fid); + //const FieldAccessor write_acc(regions[1], write_fid); + + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[1].region.get_index_space()); + // If we are on the edges of the entire space we are + // operating over, then we're going to do the slow + // path which checks for clamping when necessary. + // If not, then we can do the fast path without + // any checks. + if ((rect.lo[0] < 2) || (rect.hi[0] > (max_elements-3))) + { + printf("Running slow stencil path for point %d...\n", point); + // Note in the slow path that there are checks which + // perform clamps when necessary before reading values. + for (PointInRectIterator<1> pir(rect); pir(); pir++) + { + double l2, l1, r1, r2; + if (pir[0] < 2) + l2 = read_acc[0]; + else + l2 = read_acc[*pir - 2]; + if (pir[0] < 1) + l1 = read_acc[0]; + else + l1 = read_acc[*pir - 1]; + if (pir[0] > (max_elements-2)) + r1 = read_acc[max_elements-1]; + else + r1 = read_acc[*pir + 1]; + if (pir[0] > (max_elements-3)) + r2 = read_acc[max_elements-1]; + else + r2 = read_acc[*pir + 2]; + + double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0; + //deriv.deriv = result; + //deriv.check = 1.0; + write_acc[*pir] = result; + } + } + else + { + printf("Running fast stencil path for point %d...\n", point); + // In the fast path, we don't need any checks + for (PointInRectIterator<1> pir(rect); pir(); pir++) + { + double l2 = read_acc[*pir - 2]; + double l1 = read_acc[*pir - 1]; + double r1 = read_acc[*pir + 1]; + double r2 = read_acc[*pir + 2]; + + double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0; + //deriv.deriv = result; + //deriv.check = 1.0; + write_acc[*pir] = result; + } + } +} + +void check_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 2); + assert(task->regions.size() == 2); + assert(task->regions[0].privilege_fields.size() == 1); + assert(task->regions[1].privilege_fields.size() == 1); + assert(task->arglen == sizeof(int)); + const int max_elements = *((const int*)task->args); + + FieldID src_fid = *(task->regions[0].privilege_fields.begin()); + FieldID dst_fid = *(task->regions[1].privilege_fields.begin()); + + const AccessorRO src_acc(regions[0], src_fid); + const AccessorRO dst_acc(regions[1], dst_fid); + + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[1].region.get_index_space()); + + // This is the checking task so we can just do the slow path + bool all_passed = true; + int i = 0; + for (PointInRectIterator<1> pir(rect); pir(); pir++) + { + double l2, l1, r1, r2; + if (pir[0] < 2) + l2 = src_acc[0]; + else + l2 = src_acc[*pir - 2]; + if (pir[0] < 1) + l1 = src_acc[0]; + else + l1 = src_acc[*pir - 1]; + if (pir[0] > (max_elements-2)) + r1 = src_acc[max_elements-1]; + else + r1 = src_acc[*pir + 1]; + if (pir[0] > (max_elements-3)) + r2 = src_acc[max_elements-1]; + else + r2 = src_acc[*pir + 2]; + + double expected = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0; + double received = dst_acc[*pir]; + if (i == 0 || i == max_elements-1) printf("result %d, %f,ptr %p, src %p\n", i, received, dst_acc.ptr(rect.lo), src_acc.ptr(rect.lo)); + if (my_ptr[i].deriv != received) { + printf("transfer error %d, %f\n", i, my_ptr[i].deriv); + } + i++; + // Probably shouldn't bitwise compare floating point + // numbers but the order of operations are the same so they + // should be bitwise equal. + if (expected != received) + all_passed = false; + } + printf("CHECK, %f, %f\n", my_ptr[0].deriv, my_ptr[max_elements-1].deriv); + if (all_passed) + printf("SUCCESS!\n"); + else + printf("FAILURE!\n"); +} + +int main(int argc, char **argv) +{ + Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID); + + { + TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "top_level"); + } + + { + TaskVariantRegistrar registrar(INIT_FIELD_TASK_ID, "init_field"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "init_field"); + } + + { + TaskVariantRegistrar registrar(STENCIL_TASK_ID, "stencil"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "stencil"); + } + + { + TaskVariantRegistrar registrar(CHECK_TASK_ID, "check"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "check"); + } + + return Runtime::start(argc, argv); +} diff --git a/examples/attach_array_no_cpy_soa/CMakeLists.txt b/examples/attach_array_no_cpy_soa/CMakeLists.txt new file mode 100644 index 0000000000..34bf3102f3 --- /dev/null +++ b/examples/attach_array_no_cpy_soa/CMakeLists.txt @@ -0,0 +1,42 @@ +#------------------------------------------------------------------------------# +# Copyright 2017 Kitware, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#------------------------------------------------------------------------------# + +cmake_minimum_required(VERSION 3.1) +project(LegionExample_attach_file) + +include(CheckCXXCompilerFlag) + +check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER) + +if(CXX11_COMPILER) + set(CMAKE_CXX_STANDARD 11) +else() + message(FATAL_ERROR "C++11 compatible compiler not found") +endif() + +# Only search if were building stand-alone and not as part of Legion +if(NOT Legion_SOURCE_DIR) + find_package(Legion REQUIRED) +endif() + +add_executable(attach_file attach_file.cc) +target_link_libraries(attach_file Legion::Legion) +if(Legion_ENABLE_TESTING) + add_test(NAME attach_file COMMAND $) +endif() + + + diff --git a/examples/attach_array_no_cpy_soa/Makefile b/examples/attach_array_no_cpy_soa/Makefile new file mode 100644 index 0000000000..5651293710 --- /dev/null +++ b/examples/attach_array_no_cpy_soa/Makefile @@ -0,0 +1,51 @@ +# Copyright 2017 Stanford University +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +ifndef LG_RT_DIR +$(error LG_RT_DIR variable is not defined, aborting build) +endif + +# Flags for directing the runtime makefile what to include +DEBUG ?= 1 # Include debugging symbols +OUTPUT_LEVEL ?= LEVEL_DEBUG # Compile time logging level +USE_CUDA ?= 0 # Include CUDA support (requires CUDA) +USE_GASNET ?= 0 # Include GASNet support (requires GASNet) +USE_HDF ?= 0 # Include HDF5 support (requires HDF5) +ALT_MAPPERS ?= 0 # Include alternative mappers (not recommended) + +# Put the binary file name here +OUTFILE ?= attach_array +# List all the application source files here +GEN_SRC ?= attach_array.cc # .cc files +GEN_GPU_SRC ?= # .cu files + +# You can modify these variables, some will be appended to by the runtime makefile +INC_FLAGS ?= +CC_FLAGS ?= +NVCC_FLAGS ?= +GASNET_FLAGS ?= +LD_FLAGS ?= +# For Point and Rect typedefs +CC_FLAGS += -std=c++11 + +########################################################################### +# +# Don't change anything below here +# +########################################################################### + +include $(LG_RT_DIR)/runtime.mk + diff --git a/examples/attach_array_no_cpy_soa/attach_array.cc b/examples/attach_array_no_cpy_soa/attach_array.cc new file mode 100644 index 0000000000..7f3f9b93cc --- /dev/null +++ b/examples/attach_array_no_cpy_soa/attach_array.cc @@ -0,0 +1,375 @@ +/* Copyright 2017 Stanford University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include +#ifdef USE_HDF +#include +#endif +#include "legion.h" +#include +#include "mem_impl.h" +#include "inst_impl.h" +#include "runtime_impl.h" +using namespace Legion; + +template +using AccessorRO = FieldAccessor >; +template +using AccessorWD = FieldAccessor >; + +/* + * In this example we illustrate how the Legion + * programming model supports multiple partitions + * of the same logical region and the benefits it + * provides by allowing multiple views onto the + * same logical region. We compute a simple 5-point + * 1D stencil using the standard formula: + * f'(x) = (-f(x+2h) + 8f(x+h) - 8f(x-h) + f(x-2h))/12h + * For simplicity we'll assume h=1. + */ + +enum TaskIDs { + TOP_LEVEL_TASK_ID, + INIT_FIELD_TASK_ID, + STENCIL_TASK_ID, + CHECK_TASK_ID, +}; + +enum FieldIDs { + FID_VAL, + FID_DERIV, + FID_CP +}; + +typedef struct { + double check; + double deriv; +}deriv_t; + +deriv_t *my_ptr= NULL; + +void top_level_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + int num_elements = 1024; + int num_subregions = 4; + + // Check for any command line arguments + { + const InputArgs &command_args = Runtime::get_input_args(); + for (int i = 1; i < command_args.argc; i++) + { + if (!strcmp(command_args.argv[i],"-n")) + num_elements = atoi(command_args.argv[++i]); + if (!strcmp(command_args.argv[i],"-b")) + num_subregions = atoi(command_args.argv[++i]); + } + } + printf("Running stencil computation for %d elements...\n", num_elements); + printf("Partitioning data into %d sub-regions...\n", num_subregions); + + Rect<1> elem_rect(0,num_elements-1); + IndexSpaceT<1> is = runtime->create_index_space(ctx, elem_rect); + FieldSpace fs = runtime->create_field_space(ctx); + { + FieldAllocator allocator = + runtime->create_field_allocator(ctx, fs); + allocator.allocate_field(sizeof(double),FID_VAL); + allocator.allocate_field(sizeof(deriv_t),FID_DERIV); + } + LogicalRegion stencil_lr = runtime->create_logical_region(ctx, is, fs); + + PhysicalRegion stencil_val_pr; + double *val_ptr = (double*)malloc(sizeof(double)*(num_elements)); + for (int i = 0; i < num_elements; i++) { + val_ptr[i] = drand48(); + } + std::map field_pointer_map_val; + field_pointer_map_val[FID_VAL] = val_ptr; + printf("Attach array fid %d, ptr %p\n", FID_VAL, val_ptr); + // stencil_val_pr = runtime->attach_fortran_array(ctx, stencil_lr, stencil_lr, field_pointer_map_val, + // LEGION_FILE_READ_WRITE); + + PhysicalRegion stencil_deriv_pr; + deriv_t *deriv_ptr = (deriv_t*)malloc(sizeof(deriv_t)*(num_elements)); + std::map field_pointer_map_deriv; + field_pointer_map_deriv[FID_DERIV] = deriv_ptr; + field_pointer_map_deriv[FID_VAL] = val_ptr; + printf("Attach arrray fid %d, ptr %p\n", FID_DERIV, deriv_ptr); + stencil_deriv_pr = runtime->attach_array_soa(ctx, stencil_lr, stencil_lr, field_pointer_map_deriv, 0); + my_ptr = deriv_ptr; + + Rect<1> color_bounds(0,num_subregions-1); + IndexSpaceT<1> color_is = runtime->create_index_space(ctx, color_bounds); + + IndexPartition disjoint_ip = + runtime->create_equal_partition(ctx, is, color_is); + const int block_size = (num_elements + num_subregions - 1) / num_subregions; + Matrix<1,1> transform; + transform[0][0] = block_size; + Rect<1> extent(-2, block_size + 1); + IndexPartition ghost_ip = + runtime->create_partition_by_restriction(ctx, is, color_is, transform, extent); + + LogicalPartition disjoint_lp = + runtime->get_logical_partition(ctx, stencil_lr, disjoint_ip); + LogicalPartition ghost_lp = + runtime->get_logical_partition(ctx, stencil_lr, ghost_ip); + + ArgumentMap arg_map; + +#if 0 + IndexLauncher init_launcher(INIT_FIELD_TASK_ID, color_is, + TaskArgument(NULL, 0), arg_map); + init_launcher.add_region_requirement( + RegionRequirement(disjoint_lp, 0/*projection ID*/, + WRITE_DISCARD, EXCLUSIVE, stencil_lr)); + init_launcher.add_field(0, FID_VAL); + runtime->execute_index_space(ctx, init_launcher); +#endif + + IndexLauncher stencil_launcher(STENCIL_TASK_ID, color_is, + TaskArgument(&num_elements, sizeof(num_elements)), arg_map); + stencil_launcher.add_region_requirement( + RegionRequirement(ghost_lp, 0/*projection ID*/, + READ_ONLY, EXCLUSIVE, stencil_lr)); + stencil_launcher.add_field(0, FID_VAL); + stencil_launcher.add_region_requirement( + RegionRequirement(disjoint_lp, 0/*projection ID*/, + READ_WRITE, EXCLUSIVE, stencil_lr)); + stencil_launcher.add_field(1, FID_DERIV); + runtime->execute_index_space(ctx, stencil_launcher); + + + // Finally, we launch a single task to check the results. + TaskLauncher check_launcher(CHECK_TASK_ID, + TaskArgument(&num_elements, sizeof(num_elements))); + check_launcher.add_region_requirement( + RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr)); + check_launcher.add_field(0, FID_VAL); + check_launcher.add_region_requirement( + RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr)); + check_launcher.add_field(1, FID_DERIV); + runtime->execute_task(ctx, check_launcher); + + // Clean up our region, index space, and field space + runtime->destroy_logical_region(ctx, stencil_lr); + runtime->destroy_field_space(ctx, fs); + runtime->destroy_index_space(ctx, is); + printf("End of TOP_LEVEL_TASK, %f, %f\n", my_ptr[0].deriv, my_ptr[num_elements].deriv); +} + +// The standard initialize field task from earlier examples +void init_field_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 1); + assert(task->regions.size() == 1); + assert(task->regions[0].privilege_fields.size() == 1); + + FieldID fid = *(task->regions[0].privilege_fields.begin()); + const int point = task->index_point.point_data[0]; + printf("Initializing field %d for block %d...\n", fid, point); + + const AccessorWD acc(regions[0], fid); + + int i = point; + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[0].region.get_index_space()); + for (PointInRectIterator<1> pir(rect); pir(); pir++) { + acc[*pir] = drand48(); + i++; + } +} + +// Our stencil tasks is interesting because it +// has both slow and fast versions depending +// on whether or not its bounds have been clamped. +void stencil_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 2); + assert(task->regions.size() == 2); + assert(task->regions[0].privilege_fields.size() == 1); + assert(task->regions[1].privilege_fields.size() == 1); + assert(task->arglen == sizeof(int)); + const int max_elements = *((const int*)task->args); + const int point = task->index_point.point_data[0]; + + FieldID read_fid = *(task->regions[0].privilege_fields.begin()); + FieldID write_fid = *(task->regions[1].privilege_fields.begin()); + + const AccessorRO read_acc(regions[0], read_fid); + const AccessorWD write_acc(regions[1], write_fid); + + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[1].region.get_index_space()); + // If we are on the edges of the entire space we are + // operating over, then we're going to do the slow + // path which checks for clamping when necessary. + // If not, then we can do the fast path without + // any checks. + deriv_t deriv; + if ((rect.lo[0] < 2) || (rect.hi[0] > (max_elements-3))) + { + printf("Running slow stencil path for point %d...\n", point); + // Note in the slow path that there are checks which + // perform clamps when necessary before reading values. + for (PointInRectIterator<1> pir(rect); pir(); pir++) + { + double l2, l1, r1, r2; + if (pir[0] < 2) + l2 = read_acc[0]; + else + l2 = read_acc[*pir - 2]; + if (pir[0] < 1) + l1 = read_acc[0]; + else + l1 = read_acc[*pir - 1]; + if (pir[0] > (max_elements-2)) + r1 = read_acc[max_elements-1]; + else + r1 = read_acc[*pir + 1]; + if (pir[0] > (max_elements-3)) + r2 = read_acc[max_elements-1]; + else + r2 = read_acc[*pir + 2]; + + double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0; + deriv.deriv = result; + deriv.check = 1.0; + write_acc[*pir] = deriv; + } + } + else + { + printf("Running fast stencil path for point %d...\n", point); + // In the fast path, we don't need any checks + for (PointInRectIterator<1> pir(rect); pir(); pir++) + { + double l2 = read_acc[*pir - 2]; + double l1 = read_acc[*pir - 1]; + double r1 = read_acc[*pir + 1]; + double r2 = read_acc[*pir + 2]; + + double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0; + deriv.deriv = result; + deriv.check = 1.0; + write_acc[*pir] = deriv; + } + } +} + +void check_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 2); + assert(task->regions.size() == 2); + assert(task->regions[0].privilege_fields.size() == 1); + assert(task->regions[1].privilege_fields.size() == 1); + assert(task->arglen == sizeof(int)); + const int max_elements = *((const int*)task->args); + + FieldID src_fid = *(task->regions[0].privilege_fields.begin()); + FieldID dst_fid = *(task->regions[1].privilege_fields.begin()); + + const AccessorRO src_acc(regions[0], src_fid); + const AccessorRO dst_acc(regions[1], dst_fid); + + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[1].region.get_index_space()); + + // This is the checking task so we can just do the slow path + bool all_passed = true; + int i = 0; + for (PointInRectIterator<1> pir(rect); pir(); pir++) + { + double l2, l1, r1, r2; + if (pir[0] < 2) + l2 = src_acc[0]; + else + l2 = src_acc[*pir - 2]; + if (pir[0] < 1) + l1 = src_acc[0]; + else + l1 = src_acc[*pir - 1]; + if (pir[0] > (max_elements-2)) + r1 = src_acc[max_elements-1]; + else + r1 = src_acc[*pir + 1]; + if (pir[0] > (max_elements-3)) + r2 = src_acc[max_elements-1]; + else + r2 = src_acc[*pir + 2]; + + double expected = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0; + deriv_t received = dst_acc[*pir]; + if (i == 0 || i == max_elements-1) printf("result %d, %f, %f, ptr %p, src %p\n", i, received.deriv, received.check, dst_acc.ptr(rect.lo), src_acc.ptr(rect.lo)); + if (my_ptr[i].deriv != received.deriv) { + printf("transfer error %d, %f\n", i, my_ptr[i].deriv); + } + i++; + // Probably shouldn't bitwise compare floating point + // numbers but the order of operations are the same so they + // should be bitwise equal. + if (expected != received.deriv) + all_passed = false; + } + printf("CHECK, %f, %f\n", my_ptr[0].deriv, my_ptr[max_elements-1].deriv); + if (all_passed) + printf("SUCCESS!\n"); + else + printf("FAILURE!\n"); +} + +int main(int argc, char **argv) +{ + Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID); + + { + TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "top_level"); + } + + { + TaskVariantRegistrar registrar(INIT_FIELD_TASK_ID, "init_field"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "init_field"); + } + + { + TaskVariantRegistrar registrar(STENCIL_TASK_ID, "stencil"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "stencil"); + } + + { + TaskVariantRegistrar registrar(CHECK_TASK_ID, "check"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "check"); + } + + return Runtime::start(argc, argv); +} diff --git a/examples/attach_file/attach_file.cc b/examples/attach_file/attach_file.cc index dcbe2e4857..96dd013902 100644 --- a/examples/attach_file/attach_file.cc +++ b/examples/attach_file/attach_file.cc @@ -466,4 +466,4 @@ int main(int argc, char **argv) } return Runtime::start(argc, argv); -} +} \ No newline at end of file diff --git a/examples/circuit/circuit.cc b/examples/circuit/circuit.cc index e2f8ec7f96..5c154ed5b1 100644 --- a/examples/circuit/circuit.cc +++ b/examples/circuit/circuit.cc @@ -364,4 +364,3 @@ void allocate_locator_fields(Context ctx, Runtime *runtime, FieldSpace locator_s allocator.allocate_field(sizeof(PointerLocation), FID_LOCATOR); runtime->attach_name(locator_space, FID_LOCATOR, "locator"); } - diff --git a/runtime/legion/legion.cc b/runtime/legion/legion.cc index 6b4dbc4748..b2f13f16eb 100644 --- a/runtime/legion/legion.cc +++ b/runtime/legion/legion.cc @@ -5744,6 +5744,68 @@ namespace Legion { { runtime->detach_external_resource(ctx, region); } + + //-------------------------------------------------------------------------- + PhysicalRegion Runtime::attach_array_soa(Context ctx, + LogicalRegion handle, + LogicalRegion parent, + const std::map &field_pointer_map, + int c_f_layout_flag) + //-------------------------------------------------------------------------- + { + ExternalResource resource; + if (c_f_layout_flag == 0) { + resource = EXTERNAL_FORTRAN_ARRAY; + } else { + resource = EXTERNAL_C_ARRAY; + } + AttachLauncher launcher(resource, handle, parent); + launcher.attach_array(field_pointer_map, 0); + launcher.aos_base_ptr = NULL; + launcher.aos_stride = 0; + return runtime->attach_external_resource(ctx, launcher); + } + + //-------------------------------------------------------------------------- + void Runtime::detach_array(Context ctx, PhysicalRegion region) + //-------------------------------------------------------------------------- + { + runtime->detach_external_resource(ctx, region); + } + + //-------------------------------------------------------------------------- + PhysicalRegion Runtime::attach_array_aos(Context ctx, + LogicalRegion handle, + LogicalRegion parent, + const void* array_ptr, + size_t stride, + const std::map &field_offset, + int c_f_layout_flag) + //-------------------------------------------------------------------------- + { + unsigned char* base_ptr = (unsigned char*)array_ptr; + std::map::const_iterator it_offset = field_offset.begin(); + std::map field_pointer_map; + while(it_offset != field_offset.end()) + { + size_t offset = it_offset->second; + FieldID fid = it_offset->first; + unsigned char *ptr = base_ptr + offset; + field_pointer_map[fid] = ptr; + it_offset ++; + } + ExternalResource resource; + if (c_f_layout_flag == 0) { + resource = EXTERNAL_FORTRAN_ARRAY; + } else { + resource = EXTERNAL_C_ARRAY; + } + AttachLauncher launcher(resource, handle, parent); + launcher.attach_array(field_pointer_map, 1); + launcher.aos_base_ptr = base_ptr; + launcher.aos_stride = stride; + return runtime->attach_external_resource(ctx, launcher); + } //-------------------------------------------------------------------------- void Runtime::issue_copy_operation(Context ctx,const CopyLauncher &launcher) diff --git a/runtime/legion/legion.h b/runtime/legion/legion.h index 200ebb072d..c5a8e1f319 100644 --- a/runtime/legion/legion.h +++ b/runtime/legion/legion.h @@ -1750,6 +1750,8 @@ namespace Legion { inline void attach_hdf5(const char *file_name, const std::map &field_map, LegionFileMode mode); + inline void attach_array(const std::map &field_pointer_map, + int layoutflag); public: inline void add_field_pointer(FieldID fid, void *ptr); inline void set_pitch(unsigned dim, size_t pitch); @@ -1767,6 +1769,9 @@ namespace Legion { // Data for arrays std::map field_pointers; std::vector pitches; + int layout_flag; // SOA 0, AOS 1 + unsigned char* aos_base_ptr; + size_t aos_stride; public: // Inform the runtime about any static dependences // These will be ignored outside of static traces @@ -5181,6 +5186,21 @@ namespace Legion { LEGION_DEPRECATED("Detaching generic file type is deprecated " "in favor of generic detach interface.") void detach_file(Context ctx, PhysicalRegion region); + + PhysicalRegion attach_array_soa(Context ctx, + LogicalRegion handle, LogicalRegion parent, + const std::map &field_pointer_map, + int c_f_layout_flag); + + void detach_array(Context ctx, PhysicalRegion region); + + PhysicalRegion attach_array_aos(Context ctx, + LogicalRegion handle, + LogicalRegion parent, + const void* array_ptr, + size_t stride, + const std::map &field_offset, + int c_f_layout_flag); public: //------------------------------------------------------------------------ // Copy Operations diff --git a/runtime/legion/legion.inl b/runtime/legion/legion.inl index 70693a5888..2803d86c0c 100644 --- a/runtime/legion/legion.inl +++ b/runtime/legion/legion.inl @@ -3287,6 +3287,16 @@ namespace Legion { mode = m; field_files = field_map; } + + //-------------------------------------------------------------------------- + inline void AttachLauncher::attach_array(const std::map &field_pointer_map, + int layoutflag) + //-------------------------------------------------------------------------- + { + file_name = "ARRAY"; + layout_flag = layoutflag; + field_pointers = field_pointer_map; + } //-------------------------------------------------------------------------- inline void AttachLauncher::add_field_pointer(FieldID fid, void *ptr) diff --git a/runtime/legion/legion_ops.cc b/runtime/legion/legion_ops.cc index 9a270a2620..63f32ff9b4 100644 --- a/runtime/legion/legion_ops.cc +++ b/runtime/legion/legion_ops.cc @@ -13693,12 +13693,52 @@ namespace Legion { } case EXTERNAL_C_ARRAY: { - assert(false); // TODO: Implement this + if (launcher.field_pointers.empty()) + { + REPORT_LEGION_WARNING(LEGION_WARNING_HDF5_ATTACH_OPERATION, + "ARRAY ATTACH OPERATION ISSUED WITH NO " + "FIELD MAPPINGS IN TASK %s (ID %lld)! DID YOU " + "FORGET THEM?!?", parent_ctx->get_task_name(), + parent_ctx->get_unique_id()); + } + layout_flag = launcher.layout_flag; + aos_base_ptr = launcher.aos_base_ptr; + aos_stride = launcher.aos_stride; + // Construct the region requirement for this task + requirement = RegionRequirement(launcher.handle, WRITE_DISCARD, + EXCLUSIVE, launcher.parent); + for (std::map::const_iterator it = + launcher.field_pointers.begin(); it != + launcher.field_pointers.end(); it++) + { + requirement.add_field(it->first); + field_pointers_map[it->first] = it->second; + } break; } case EXTERNAL_FORTRAN_ARRAY: { - assert(false); // TODO implement this + if (launcher.field_pointers.empty()) + { + REPORT_LEGION_WARNING(LEGION_WARNING_HDF5_ATTACH_OPERATION, + "ARRAY ATTACH OPERATION ISSUED WITH NO " + "FIELD MAPPINGS IN TASK %s (ID %lld)! DID YOU " + "FORGET THEM?!?", parent_ctx->get_task_name(), + parent_ctx->get_unique_id()); + } + layout_flag = launcher.layout_flag; + aos_base_ptr = launcher.aos_base_ptr; + aos_stride = launcher.aos_stride; + // Construct the region requirement for this task + requirement = RegionRequirement(launcher.handle, WRITE_DISCARD, + EXCLUSIVE, launcher.parent); + for (std::map::const_iterator it = + launcher.field_pointers.begin(); it != + launcher.field_pointers.end(); it++) + { + requirement.add_field(it->first); + field_pointers_map[it->first] = it->second; + } break; } default: @@ -13936,12 +13976,40 @@ namespace Legion { } case EXTERNAL_C_ARRAY: { - assert(false); + // First build the set of field paths + std::vector field_ids(field_pointers_map.size()); + std::vector field_pointers(field_pointers_map.size()); + unsigned idx = 0; + for (std::map::const_iterator it = + field_pointers_map.begin(); it != field_pointers_map.end(); it++, idx++) + { + field_ids[idx] = it->first; + field_pointers[idx] = it->second; + } + // Now ask the low-level runtime to create the instance + result = node->create_array_instance(resource, field_ids, sizes, field_pointers, + layout_flag, aos_base_ptr, aos_stride); + constraints.specialized_constraint = + SpecializedConstraint(NORMAL_SPECIALIZE); break; } case EXTERNAL_FORTRAN_ARRAY: { - assert(false); + // First build the set of field paths + std::vector field_ids(field_pointers_map.size()); + std::vector field_pointers(field_pointers_map.size()); + unsigned idx = 0; + for (std::map::const_iterator it = + field_pointers_map.begin(); it != field_pointers_map.end(); it++, idx++) + { + field_ids[idx] = it->first; + field_pointers[idx] = it->second; + } + // Now ask the low-level runtime to create the instance + result = node->create_array_instance(resource, field_ids, sizes, field_pointers, + layout_flag, aos_base_ptr, aos_stride); + constraints.specialized_constraint = + SpecializedConstraint(NORMAL_SPECIALIZE); break; } default: @@ -14292,10 +14360,10 @@ namespace Legion { assert(!manager->is_reduction_manager()); #endif InstanceManager *inst_manager = manager->as_instance_manager(); - if (!inst_manager->is_attached_file()) + /*if (!inst_manager->is_attached_file()) REPORT_LEGION_ERROR(ERROR_ILLEGAL_DETACH_OPERATION, "Illegal detach operation on a physical region which " - "was not attached!") + "was not attached!") */ std::set applied_conditions; ApEvent detach_event = runtime->forest->detach_file(requirement, this, 0/*idx*/, diff --git a/runtime/legion/legion_ops.h b/runtime/legion/legion_ops.h index a6361c69b8..29764b11f4 100644 --- a/runtime/legion/legion_ops.h +++ b/runtime/legion/legion_ops.h @@ -2660,11 +2660,15 @@ namespace Legion { RestrictInfo restrict_info; const char *file_name; std::map field_map; + std::map field_pointers_map; LegionFileMode file_mode; PhysicalRegion region; unsigned parent_req_index; std::set map_applied_conditions; InstanceManager *file_instance; + int layout_flag; // SOA 0, AOS 1 + unsigned char* aos_base_ptr; + size_t aos_stride; }; /** diff --git a/runtime/legion/region_tree.cc b/runtime/legion/region_tree.cc index 05442b99b6..7caef4078b 100644 --- a/runtime/legion/region_tree.cc +++ b/runtime/legion/region_tree.cc @@ -7943,7 +7943,7 @@ namespace Legion { } #ifdef DEBUG_LEGION assert(layout != NULL); - assert(layout->constraints->specialized_constraint.is_file()); + // assert(layout->constraints->specialized_constraint.is_file()); #endif DistributedID did = context->runtime->get_available_distributed_id(false); MemoryManager *memory = diff --git a/runtime/legion/region_tree.h b/runtime/legion/region_tree.h index 51b8db29c0..8979cd8821 100644 --- a/runtime/legion/region_tree.h +++ b/runtime/legion/region_tree.h @@ -929,6 +929,11 @@ namespace Legion { const std::vector &field_sizes, const std::vector &field_files, bool read_only) = 0; + virtual PhysicalInstance create_array_instance(ExternalResource resource, + const std::vector &field_ids, + const std::vector &field_sizes, + const std::vector &field_pointers, + int layout_flag, unsigned char* aos_base_ptr, size_t aos_stride) = 0; public: virtual void get_launch_space_domain(Domain &launch_domain) = 0; virtual void validate_slicing(const std::vector &slice_spaces, @@ -1149,6 +1154,11 @@ namespace Legion { const std::vector &field_sizes, const std::vector &field_files, bool read_only); + virtual PhysicalInstance create_array_instance(ExternalResource resource, + const std::vector &field_ids, + const std::vector &field_sizes, + const std::vector &field_pointers, + int layout_flag, unsigned char* aos_base_ptr, size_t aos_stride); public: virtual void get_launch_space_domain(Domain &launch_domain); virtual void validate_slicing(const std::vector &slice_spaces, diff --git a/runtime/legion/region_tree.inl b/runtime/legion/region_tree.inl index b2d4939af7..f1464b9db2 100644 --- a/runtime/legion/region_tree.inl +++ b/runtime/legion/region_tree.inl @@ -2768,6 +2768,52 @@ namespace Legion { #endif return result; } + + //-------------------------------------------------------------------------- + template + PhysicalInstance IndexSpaceNodeT::create_array_instance( + ExternalResource resource, + const std::vector &field_ids, + const std::vector &field_sizes, + const std::vector &field_pointers, + int layout_flag, unsigned char* aos_base_ptr, size_t aos_stride) + //-------------------------------------------------------------------------- + { + DETAILED_PROFILER(context->runtime, REALM_CREATE_INSTANCE_CALL); + // Have to wait for the index space to be ready if necessary + Realm::ZIndexSpace local_space; + get_realm_index_space(local_space, true/*tight*/); + // No profiling for these kinds of instances currently + Realm::ProfilingRequestSet requests; + int c_f_resource = 0; + if (resource == EXTERNAL_C_ARRAY) { + c_f_resource = 1; + } + PhysicalInstance result; + if (layout_flag == 0) { // SOA + LgEvent ready(PhysicalInstance::create_array_instance_SOA(result, + local_space, + field_ids, + field_sizes, + field_pointers, + c_f_resource, + requests)); + ready.lg_wait(); + } else { // AOS + LgEvent ready(PhysicalInstance::create_array_instance_AOS(result, + local_space, + field_ids, + field_sizes, + field_pointers, + aos_base_ptr, aos_stride, + c_f_resource, + requests)); + ready.lg_wait(); + } + // assert(0 && "no HDF5 support"); + // result = PhysicalInstance::NO_INST; + return result; + } //-------------------------------------------------------------------------- template diff --git a/runtime/realm/attach_array.cc b/runtime/realm/attach_array.cc new file mode 100644 index 0000000000..99629135c7 --- /dev/null +++ b/runtime/realm/attach_array.cc @@ -0,0 +1,174 @@ +/* Copyright 2017 Stanford University, NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// HDF5-specific instance layouts and accessors +#include "inst_impl.h" +#include "runtime_impl.h" +#include "mem_impl.h" + +#include +#include + +namespace Realm { + + template + /*static*/ Event RegionInstance::create_array_instance_SOA(RegionInstance& inst, + const ZIndexSpace& space, + const std::vector &field_ids, + const std::vector &field_sizes, + const std::vector &field_pointers, + int resource, + const ProfilingRequestSet& reqs, + Event wait_on /*= Event::NO_EVENT*/) + { + Memory memory = Machine::MemoryQuery(Machine::get_machine()) + .local_address_space() + .only_kind(Memory::SYSTEM_MEM) + .first(); + assert(memory.exists()); + + InstanceLayout *layout = new InstanceLayout; + layout->bytes_used = 0; + layout->alignment_reqd = 0; // no allocation being made + layout->space = space; + layout->piece_lists.resize(field_sizes.size()); + + LocalCPUMemory *m_impl = (LocalCPUMemory *)get_runtime()->get_memory_impl(memory); + unsigned char* base = (unsigned char*)m_impl->base; + unsigned char* ptr = NULL; + for(size_t i = 0; i < field_sizes.size(); i++) { + FieldID id = field_ids[i]; + InstanceLayoutGeneric::FieldLayout& fl = layout->fields[id]; + fl.list_idx = i; + fl.rel_offset = 0; + fl.size_in_bytes = field_sizes[i]; + + // create a single piece (for non-empty index spaces) + if(!space.empty()) { + AffineLayoutPiece *alp = new AffineLayoutPiece; + alp->bounds = space.bounds; + ptr = (unsigned char*)field_pointers[i]; + alp->offset = (size_t)(ptr - base); + size_t stride = field_sizes[i]; + /* fortran layout */ + if (resource == 0) { + for(int j = 0; j < N; j++) { + alp->strides[j] = stride; + stride *= (space.bounds.hi[j] - space.bounds.lo[j] + 1); + } + } else { /* C layout */ + for(int j = N - 1; j >= 0; j--) { + alp->strides[j] = stride; + stride *= (space.bounds.hi[j] - space.bounds.lo[j] + 1); + } + } + layout->piece_lists[i].pieces.push_back(alp); + } + } + + Event e = create_instance(inst, memory, layout, reqs, wait_on); + RegionInstanceImpl *inst_impl = get_runtime()->get_instance_impl(inst); + printf("inst offset %lu\n", inst_impl->metadata.inst_offset); + return e; + } + +#define DOIT_ARRAY_SOA(N,T) \ + template Event RegionInstance::create_array_instance_SOA(RegionInstance&, \ + const ZIndexSpace&, \ + const std::vector&, \ + const std::vector&, \ + const std::vector&, \ + int, \ + const ProfilingRequestSet&, \ + Event); + FOREACH_NT(DOIT_ARRAY_SOA) + + template + /*static*/ Event RegionInstance::create_array_instance_AOS(RegionInstance& inst, + const ZIndexSpace& space, + const std::vector &field_ids, + const std::vector &field_sizes, + const std::vector &field_pointers, + unsigned char* aos_base_ptr, size_t aos_stride, + int resource, + const ProfilingRequestSet& reqs, + Event wait_on /*= Event::NO_EVENT*/) + { + Memory memory = Machine::MemoryQuery(Machine::get_machine()) + .local_address_space() + .only_kind(Memory::SYSTEM_MEM) + .first(); + assert(memory.exists()); + + InstanceLayout *layout = new InstanceLayout; + layout->bytes_used = 0; + layout->alignment_reqd = 0; // no allocation being made + layout->space = space; + layout->piece_lists.resize(field_sizes.size()); + + LocalCPUMemory *m_impl = (LocalCPUMemory *)get_runtime()->get_memory_impl(memory); + unsigned char* base = (unsigned char*)m_impl->base; + for(size_t i = 0; i < field_sizes.size(); i++) { + FieldID id = field_ids[i]; + InstanceLayoutGeneric::FieldLayout& fl = layout->fields[id]; + fl.list_idx = i; + if (i > 0) { + fl.rel_offset = (size_t)(((unsigned char*)field_pointers[i]) - ((unsigned char*)field_pointers[i-1])); + } else { + fl.rel_offset = (size_t)(((unsigned char*)field_pointers[i]) - aos_base_ptr); + } + fl.size_in_bytes = field_sizes[i]; + + // create a single piece (for non-empty index spaces) + if(!space.empty()) { + AffineLayoutPiece *alp = new AffineLayoutPiece; + alp->bounds = space.bounds; + alp->offset = (size_t)(aos_base_ptr - base); + size_t stride = aos_stride; + /* fortran layout */ + if (resource == 0) { + for(int j = 0; j < N; j++) { + alp->strides[j] = stride; + stride *= (space.bounds.hi[j] - space.bounds.lo[j] + 1); + } + } else { /* C layout */ + for(int j = N-1; j >= 0; j--) { + alp->strides[j] = stride; + stride *= (space.bounds.hi[j] - space.bounds.lo[j] + 1); + } + } + layout->piece_lists[i].pieces.push_back(alp); + } + } + + Event e = create_instance(inst, memory, layout, reqs, wait_on); + RegionInstanceImpl *inst_impl = get_runtime()->get_instance_impl(inst); + printf("inst offset %lu\n", inst_impl->metadata.inst_offset); + return e; + } + +#define DOIT_ARRAY_AOS(N,T) \ + template Event RegionInstance::create_array_instance_AOS(RegionInstance&, \ + const ZIndexSpace&, \ + const std::vector&, \ + const std::vector&, \ + const std::vector&, \ + unsigned char*, size_t, \ + int, \ + const ProfilingRequestSet&, \ + Event); + FOREACH_NT(DOIT_ARRAY_AOS) + +}; // namespace Realm diff --git a/runtime/realm/hdf5/hdf5_access.cc b/runtime/realm/hdf5/hdf5_access.cc index edcc999a43..99ce678ce0 100644 --- a/runtime/realm/hdf5/hdf5_access.cc +++ b/runtime/realm/hdf5/hdf5_access.cc @@ -81,4 +81,4 @@ namespace Realm { Event); FOREACH_NT(DOIT) -}; // namespace Realm +}; // namespace Realm \ No newline at end of file diff --git a/runtime/realm/inst_layout.h b/runtime/realm/inst_layout.h index 7b084095e5..c494013f69 100644 --- a/runtime/realm/inst_layout.h +++ b/runtime/realm/inst_layout.h @@ -144,7 +144,7 @@ namespace Realm { bool serialize(S& serializer) const; ZPoint strides; - size_t offset; + ptrdiff_t offset; }; template diff --git a/runtime/realm/instance.h b/runtime/realm/instance.h index d22a3f4b05..a5cd326dbd 100644 --- a/runtime/realm/instance.h +++ b/runtime/realm/instance.h @@ -146,6 +146,27 @@ namespace Realm { const ProfilingRequestSet& prs, Event wait_on = Event::NO_EVENT); #endif + + template + static Event create_array_instance_SOA(RegionInstance& inst, + const ZIndexSpace& space, + const std::vector &field_ids, + const std::vector &field_sizes, + const std::vector &field_pointers, + int resource, + const ProfilingRequestSet& prs, + Event wait_on = Event::NO_EVENT); + + template + static Event create_array_instance_AOS(RegionInstance& inst, + const ZIndexSpace& space, + const std::vector &field_ids, + const std::vector &field_sizes, + const std::vector &field_pointers, + unsigned char* aos_base_ptr, size_t aos_stride, + int resource, + const ProfilingRequestSet& prs, + Event wait_on = Event::NO_EVENT); void destroy(Event wait_on = Event::NO_EVENT) const; diff --git a/runtime/realm/mem_impl.cc b/runtime/realm/mem_impl.cc index ed6a02b24c..0850f5eab7 100644 --- a/runtime/realm/mem_impl.cc +++ b/runtime/realm/mem_impl.cc @@ -676,7 +676,7 @@ namespace Realm { void *LocalCPUMemory::get_direct_ptr(off_t offset, size_t size) { - assert((offset >= 0) && ((size_t)(offset + size) <= this->size)); +// assert((offset >= 0) && ((size_t)(offset + size) <= this->size)); return (base + offset); } diff --git a/runtime/runtime.mk b/runtime/runtime.mk index 8c9afb0742..7524eb137d 100644 --- a/runtime/runtime.mk +++ b/runtime/runtime.mk @@ -412,7 +412,8 @@ LOW_RUNTIME_SRC += $(LG_RT_DIR)/realm/runtime_impl.cc \ $(LG_RT_DIR)/realm/machine_impl.cc \ $(LG_RT_DIR)/realm/sampling_impl.cc \ $(LG_RT_DIR)/lowlevel.cc \ - $(LG_RT_DIR)/realm/transfer/lowlevel_disk.cc + $(LG_RT_DIR)/realm/transfer/lowlevel_disk.cc \ + $(LG_RT_DIR)/realm/attach_array.cc LOW_RUNTIME_SRC += $(LG_RT_DIR)/realm/numa/numa_module.cc \ $(LG_RT_DIR)/realm/numa/numasysif.cc ifeq ($(strip $(USE_OPENMP)),1) diff --git a/tutorial/06_privileges/privileges.cc b/tutorial/06_privileges/privileges.cc index 147beba0cf..61ee566a3c 100644 --- a/tutorial/06_privileges/privileges.cc +++ b/tutorial/06_privileges/privileges.cc @@ -297,4 +297,4 @@ int main(int argc, char **argv) } return Runtime::start(argc, argv); -} +} \ No newline at end of file diff --git a/tutorial/07_partitioning/partitioning.cc b/tutorial/07_partitioning/partitioning.cc index eb61103c47..63b8ce4397 100644 --- a/tutorial/07_partitioning/partitioning.cc +++ b/tutorial/07_partitioning/partitioning.cc @@ -335,4 +335,4 @@ int main(int argc, char **argv) } return Runtime::start(argc, argv); -} +} \ No newline at end of file diff --git a/tutorial/07_partitioning_attach_array_aos/CMakeLists.txt b/tutorial/07_partitioning_attach_array_aos/CMakeLists.txt new file mode 100644 index 0000000000..ebed303c58 --- /dev/null +++ b/tutorial/07_partitioning_attach_array_aos/CMakeLists.txt @@ -0,0 +1,39 @@ +#------------------------------------------------------------------------------# +# Copyright 2017 Kitware, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#------------------------------------------------------------------------------# + +cmake_minimum_required(VERSION 3.1) +project(LegionExample_07_partitioning) + +include(CheckCXXCompilerFlag) + +check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER) + +if(CXX11_COMPILER) + set(CMAKE_CXX_STANDARD 11) +else() + message(FATAL_ERROR "C++11 compatible compiler not found") +endif() + +# Only search if were building stand-alone and not as part of Legion +if(NOT Legion_SOURCE_DIR) + find_package(Legion REQUIRED) +endif() + +add_executable(partitioning partitioning.cc) +target_link_libraries(partitioning Legion::Legion) +if(Legion_ENABLE_TESTING) + add_test(NAME partitioning COMMAND $) +endif() diff --git a/tutorial/07_partitioning_attach_array_aos/Makefile b/tutorial/07_partitioning_attach_array_aos/Makefile new file mode 100644 index 0000000000..3651b6b749 --- /dev/null +++ b/tutorial/07_partitioning_attach_array_aos/Makefile @@ -0,0 +1,51 @@ +# Copyright 2017 Stanford University +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +ifndef LG_RT_DIR +$(error LG_RT_DIR variable is not defined, aborting build) +endif + +# Flags for directing the runtime makefile what to include +DEBUG := 1 # Include debugging symbols +OUTPUT_LEVEL ?= LEVEL_DEBUG # Compile time logging level +USE_CUDA ?= 0 # Include CUDA support (requires CUDA) +USE_GASNET ?= 0 # Include GASNet support (requires GASNet) +USE_HDF ?= 0 # Include HDF5 support (requires HDF5) +ALT_MAPPERS ?= 0 # Include alternative mappers (not recommended) + +# Put the binary file name here +OUTFILE ?= partitioning +# List all the application source files here +GEN_SRC ?= partitioning.cc # .cc files +GEN_GPU_SRC ?= # .cu files + +# You can modify these variables, some will be appended to by the runtime makefile +INC_FLAGS ?= +CC_FLAGS ?= +NVCC_FLAGS ?= +GASNET_FLAGS ?= +LD_FLAGS ?= +# For Point and Rect typedefs +CC_FLAGS += -std=c++11 + +########################################################################### +# +# Don't change anything below here +# +########################################################################### + +include $(LG_RT_DIR)/runtime.mk + diff --git a/tutorial/07_partitioning_attach_array_aos/partitioning.cc b/tutorial/07_partitioning_attach_array_aos/partitioning.cc new file mode 100644 index 0000000000..4c64ab9240 --- /dev/null +++ b/tutorial/07_partitioning_attach_array_aos/partitioning.cc @@ -0,0 +1,391 @@ +/* Copyright 2017 Stanford University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include "legion.h" +using namespace Legion; + +template +using AccessorRO = FieldAccessor >; +template +using AccessorWD = FieldAccessor >; + +enum TaskIDs { + TOP_LEVEL_TASK_ID, + INIT_FIELD_TASK_ID, + DAXPY_TASK_ID, + CHECK_TASK_ID, +}; + +enum FieldIDs { + FID_X, + FID_Y, + FID_Z, +}; + +typedef struct{ + double x; + double y; + double z; +}daxpy_t; + +double get_cur_time() { + struct timeval tv; + struct timezone tz; + double cur_time; + + gettimeofday(&tv, &tz); + cur_time = tv.tv_sec + tv.tv_usec / 1000000.0; + + return cur_time; +} + + +void top_level_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + int num_elements = 1024; + int num_subregions = 4; + // See if we have any command line arguments to parse + // Note we now have a new command line parameter which specifies + // how many subregions we should make. + { + const InputArgs &command_args = Runtime::get_input_args(); + for (int i = 1; i < command_args.argc; i++) + { + if (!strcmp(command_args.argv[i],"-n")) + num_elements = atoi(command_args.argv[++i]); + if (!strcmp(command_args.argv[i],"-b")) + num_subregions = atoi(command_args.argv[++i]); + } + } + printf("Running daxpy for %d elements...\n", num_elements); + printf("Partitioning data into %d sub-regions...\n", num_subregions); + + // Create our logical regions using the same schemas as earlier examples + Rect<1> elem_rect(0,num_elements-1); + IndexSpace is = runtime->create_index_space(ctx, elem_rect); + runtime->attach_name(is, "is"); + FieldSpace input_fs = runtime->create_field_space(ctx); + runtime->attach_name(input_fs, "input_fs"); + { + FieldAllocator allocator = + runtime->create_field_allocator(ctx, input_fs); + allocator.allocate_field(sizeof(double),FID_X); + runtime->attach_name(input_fs, FID_X, "X"); + allocator.allocate_field(sizeof(double),FID_Y); + runtime->attach_name(input_fs, FID_Y, "Y"); + } + FieldSpace output_fs = runtime->create_field_space(ctx); + runtime->attach_name(output_fs, "output_fs"); + { + FieldAllocator allocator = + runtime->create_field_allocator(ctx, output_fs); + allocator.allocate_field(sizeof(double),FID_Z); + runtime->attach_name(output_fs, FID_Z, "Z"); + } + LogicalRegion input_lr = runtime->create_logical_region(ctx, is, input_fs); + runtime->attach_name(input_lr, "input_lr"); + LogicalRegion output_lr = runtime->create_logical_region(ctx, is, output_fs); + runtime->attach_name(output_lr, "output_lr"); + + daxpy_t *array_ptr = (daxpy_t*)malloc(sizeof(daxpy_t)*(num_elements)); + + + std::map offset_input; + offset_input[FID_X] = 0; + offset_input[FID_Y] = sizeof(double); + + PhysicalRegion pr_input = runtime->attach_array_aos(ctx, input_lr, input_lr, array_ptr, sizeof(daxpy_t), offset_input, 0); + + std::map offset_output; + offset_output[FID_Z] = 2*sizeof(double); + + PhysicalRegion pr_output = runtime->attach_array_aos(ctx, output_lr, output_lr, array_ptr, sizeof(daxpy_t), offset_output, 0); + + // In addition to using rectangles and domains for launching index spaces + // of tasks (see example 02), Legion also uses them for performing + // operations on logical regions. Here we create a rectangle and a + // corresponding domain for describing the space of subregions that we + // want to create. Each subregion is assigned a 'color' which is why + // we name the variables 'color_bounds' and 'color_domain'. We'll use + // these below when we partition the region. + Rect<1> color_bounds(0,num_subregions-1); + IndexSpace color_is = runtime->create_index_space(ctx, color_bounds); + + // Parallelism in Legion is implicit. This means that rather than + // explicitly saying what should run in parallel, Legion applications + // partition up data and tasks specify which regions they access. + // The Legion runtime computes non-interference as a function of + // regions, fields, and privileges and then determines which tasks + // are safe to run in parallel. + // + // Data partitioning is performed on index spaces. The partitioning + // operation is used to break an index space of points into subsets + // of points each of which will become a sub index space. Partitions + // created on an index space are then transitively applied to all the + // logical regions created using the index space. We will show how + // to get names to the subregions later in this example. + // + // Here we want to create the IndexPartition 'ip'. We'll illustrate + // two ways of creating an index partition depending on whether the + // array being partitioned can be evenly partitioned into subsets + // or not. There are other methods to partitioning index spaces + // which are not covered here. We'll cover the case of coloring + // individual points in an index space in our capstone circuit example. + IndexPartition ip = runtime->create_equal_partition(ctx, is, color_is); + runtime->attach_name(ip, "ip"); + + // The index space 'is' was used in creating two logical regions: 'input_lr' + // and 'output_lr'. By creating an IndexPartitiong of 'is' we implicitly + // created a LogicalPartition for each of the logical regions created using + // 'is'. The Legion runtime provides several ways of getting the names for + // these LogicalPartitions. We'll look at one of them here. The + // 'get_logical_partition' method takes a LogicalRegion and an IndexPartition + // and returns the LogicalPartition of the given LogicalRegion that corresponds + // to the given IndexPartition. + LogicalPartition input_lp = runtime->get_logical_partition(ctx, input_lr, ip); + runtime->attach_name(input_lp, "input_lp"); + LogicalPartition output_lp = runtime->get_logical_partition(ctx, output_lr, ip); + runtime->attach_name(output_lp, "output_lp"); + + // Create our launch domain. Note that is the same as color domain + // as we are going to launch one task for each subregion we created. + ArgumentMap arg_map; + double start_init = get_cur_time(); + + // As in previous examples, we now want to launch tasks for initializing + // both the fields. However, to increase the amount of parallelism + // exposed to the runtime we will launch separate sub-tasks for each of + // the logical subregions created by our partitioning. To express this + // we create an IndexLauncher for launching an index space of tasks + // the same as example 02. + IndexLauncher init_launcher(INIT_FIELD_TASK_ID, color_is, + TaskArgument(NULL, 0), arg_map); + // For index space task launches we don't want to have to explicitly + // enumerate separate region requirements for all points in our launch + // domain. Instead Legion allows applications to place an upper bound + // on privileges required by subtasks and then specify which privileges + // each subtask receives using a projection function. In the case of + // the field initialization task, we say that all the subtasks will be + // using some subregion of the LogicalPartition 'input_lp'. Applications + // may also specify upper bounds using logical regions and not partitions. + // + // The Legion implementation assumes that all all points in an index + // space task launch request non-interfering privileges and for performance + // reasons this is unchecked. This means if two tasks in the same index + // space are accessing aliased data, then they must either both be + // with read-only or reduce privileges. + // + // When the runtime enumerates the launch_domain, it will invoke the + // projection function for each point in the space and use the resulting + // LogicalRegion computed for each point in the index space of tasks. + // The projection ID '0' is reserved and corresponds to the identity + // function which simply zips the space of tasks with the space of + // subregions in the partition. Applications can register their own + // projections functions via the 'register_region_projection' and + // 'register_partition_projection' functions before starting + // the runtime similar to how tasks are registered. + init_launcher.add_region_requirement( + RegionRequirement(input_lp, 0/*projection ID*/, + WRITE_DISCARD, EXCLUSIVE, input_lr)); + init_launcher.region_requirements[0].add_field(FID_X); + FutureMap fmi0 = runtime->execute_index_space(ctx, init_launcher); + + // Modify our region requirement to initialize the other field + // in the same way. Note that after we do this we have exposed + // 2*num_subregions task-level parallelism to the runtime because + // we have launched tasks that are both data-parallel on + // sub-regions and task-parallel on accessing different fields. + // The power of Legion is that it allows programmers to express + // these data usage patterns and automatically extracts both + // kinds of parallelism in a unified programming framework. + init_launcher.region_requirements[0].privilege_fields.clear(); + init_launcher.region_requirements[0].instance_fields.clear(); + init_launcher.region_requirements[0].add_field(FID_Y); + FutureMap fmi1 = runtime->execute_index_space(ctx, init_launcher); + fmi1.wait_all_results(); + fmi0.wait_all_results(); + double end_init = get_cur_time(); + printf("Attach AOS, init done, time %f\n", end_init - start_init); + + const double alpha = drand48(); + double start_t = get_cur_time(); + // We launch the subtasks for performing the daxpy computation + // in a similar way to the initialize field tasks. Note we + // again make use of two RegionRequirements which use a + // partition as the upper bound for the privileges for the task. + IndexLauncher daxpy_launcher(DAXPY_TASK_ID, color_is, + TaskArgument(&alpha, sizeof(alpha)), arg_map); + daxpy_launcher.add_region_requirement( + RegionRequirement(input_lp, 0/*projection ID*/, + READ_ONLY, EXCLUSIVE, input_lr)); + daxpy_launcher.region_requirements[0].add_field(FID_X); + daxpy_launcher.region_requirements[0].add_field(FID_Y); + daxpy_launcher.add_region_requirement( + RegionRequirement(output_lp, 0/*projection ID*/, + WRITE_DISCARD, EXCLUSIVE, output_lr)); + daxpy_launcher.region_requirements[1].add_field(FID_Z); + FutureMap fm = runtime->execute_index_space(ctx, daxpy_launcher); + fm.wait_all_results(); + double end_t = get_cur_time(); + printf("Attach AOS, daxpy done, time %f\n", end_t - start_t); + + // While we could also issue parallel subtasks for the checking + // task, we only issue a single task launch to illustrate an + // important Legion concept. Note the checking task operates + // on the entire 'input_lr' and 'output_lr' regions and not + // on the subregions. Even though the previous tasks were + // all operating on subregions, Legion will correctly compute + // data dependences on all the subtasks that generated the + // data in these two regions. + TaskLauncher check_launcher(CHECK_TASK_ID, TaskArgument(&alpha, sizeof(alpha))); + check_launcher.add_region_requirement( + RegionRequirement(input_lr, READ_ONLY, EXCLUSIVE, input_lr)); + check_launcher.region_requirements[0].add_field(FID_X); + check_launcher.region_requirements[0].add_field(FID_Y); + check_launcher.add_region_requirement( + RegionRequirement(output_lr, READ_ONLY, EXCLUSIVE, output_lr)); + check_launcher.region_requirements[1].add_field(FID_Z); + Future fu = runtime->execute_task(ctx, check_launcher); + fu.wait(); + + runtime->detach_array(ctx, pr_output); + runtime->detach_array(ctx, pr_input); + runtime->destroy_logical_region(ctx, input_lr); + runtime->destroy_logical_region(ctx, output_lr); + runtime->destroy_field_space(ctx, input_fs); + runtime->destroy_field_space(ctx, output_fs); + runtime->destroy_index_space(ctx, is); + free(array_ptr); +} + +void init_field_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 1); + assert(task->regions.size() == 1); + assert(task->regions[0].privilege_fields.size() == 1); + + FieldID fid = *(task->regions[0].privilege_fields.begin()); + const int point = task->index_point.point_data[0]; + printf("Initializing field %d for block %d...\n", fid, point); + + const AccessorWD acc(regions[0], fid); + + // Note here that we get the domain for the subregion for + // this task from the runtime which makes it safe for running + // both as a single task and as part of an index space of tasks. + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[0].region.get_index_space()); + for (PointInRectIterator<1> pir(rect); pir(); pir++) + acc[*pir] = drand48(); +} + +void daxpy_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 2); + assert(task->regions.size() == 2); + assert(task->arglen == sizeof(double)); + const double alpha = *((const double*)task->args); + const int point = task->index_point.point_data[0]; + + const AccessorRO acc_y(regions[0], FID_Y); + const AccessorRO acc_x(regions[0], FID_X); + const AccessorWD acc_z(regions[1], FID_Z); + + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[0].region.get_index_space()); + printf("Running daxpy computation with alpha %.8g for point %d, xptr %p, y_ptr %p, z_ptr %p...\n", + alpha, point, acc_x.ptr(rect.lo), acc_y.ptr(rect.lo), acc_z.ptr(rect.lo)); + for (PointInRectIterator<1> pir(rect); pir(); pir++) + acc_z[*pir] = alpha * acc_x[*pir] + acc_y[*pir]; +} + +void check_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 2); + assert(task->regions.size() == 2); + assert(task->arglen == sizeof(double)); + const double alpha = *((const double*)task->args); + + const AccessorRO acc_x(regions[0], FID_X); + const AccessorRO acc_y(regions[0], FID_Y); + const AccessorRO acc_z(regions[1], FID_Z); + + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[0].region.get_index_space()); + printf("Checking results... xptr %p, y_ptr %p, z_ptr %p...\n", + acc_x.ptr(rect.lo), acc_y.ptr(rect.lo), acc_z.ptr(rect.lo)); + bool all_passed = true; + for (PointInRectIterator<1> pir(rect); pir(); pir++) + { + double expected = alpha * acc_x[*pir] + acc_y[*pir]; + double received = acc_z[*pir]; + // Probably shouldn't check for floating point equivalence but + // the order of operations are the same should they should + // be bitwise equal. + if (expected != received) + all_passed = false; + } + if (all_passed) + printf("SUCCESS!\n"); + else + printf("FAILURE!\n"); +} + +int main(int argc, char **argv) +{ + Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID); + + { + TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "top_level"); + } + + { + TaskVariantRegistrar registrar(INIT_FIELD_TASK_ID, "init_field"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + registrar.set_leaf(); + Runtime::preregister_task_variant(registrar, "init_field"); + } + + { + TaskVariantRegistrar registrar(DAXPY_TASK_ID, "daxpy"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + registrar.set_leaf(); + Runtime::preregister_task_variant(registrar, "daxpy"); + } + + { + TaskVariantRegistrar registrar(CHECK_TASK_ID, "check"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + registrar.set_leaf(); + Runtime::preregister_task_variant(registrar, "check"); + } + + return Runtime::start(argc, argv); +} diff --git a/tutorial/07_partitioning_attach_array_soa/CMakeLists.txt b/tutorial/07_partitioning_attach_array_soa/CMakeLists.txt new file mode 100644 index 0000000000..ebed303c58 --- /dev/null +++ b/tutorial/07_partitioning_attach_array_soa/CMakeLists.txt @@ -0,0 +1,39 @@ +#------------------------------------------------------------------------------# +# Copyright 2017 Kitware, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#------------------------------------------------------------------------------# + +cmake_minimum_required(VERSION 3.1) +project(LegionExample_07_partitioning) + +include(CheckCXXCompilerFlag) + +check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER) + +if(CXX11_COMPILER) + set(CMAKE_CXX_STANDARD 11) +else() + message(FATAL_ERROR "C++11 compatible compiler not found") +endif() + +# Only search if were building stand-alone and not as part of Legion +if(NOT Legion_SOURCE_DIR) + find_package(Legion REQUIRED) +endif() + +add_executable(partitioning partitioning.cc) +target_link_libraries(partitioning Legion::Legion) +if(Legion_ENABLE_TESTING) + add_test(NAME partitioning COMMAND $) +endif() diff --git a/tutorial/07_partitioning_attach_array_soa/Makefile b/tutorial/07_partitioning_attach_array_soa/Makefile new file mode 100644 index 0000000000..3651b6b749 --- /dev/null +++ b/tutorial/07_partitioning_attach_array_soa/Makefile @@ -0,0 +1,51 @@ +# Copyright 2017 Stanford University +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +ifndef LG_RT_DIR +$(error LG_RT_DIR variable is not defined, aborting build) +endif + +# Flags for directing the runtime makefile what to include +DEBUG := 1 # Include debugging symbols +OUTPUT_LEVEL ?= LEVEL_DEBUG # Compile time logging level +USE_CUDA ?= 0 # Include CUDA support (requires CUDA) +USE_GASNET ?= 0 # Include GASNet support (requires GASNet) +USE_HDF ?= 0 # Include HDF5 support (requires HDF5) +ALT_MAPPERS ?= 0 # Include alternative mappers (not recommended) + +# Put the binary file name here +OUTFILE ?= partitioning +# List all the application source files here +GEN_SRC ?= partitioning.cc # .cc files +GEN_GPU_SRC ?= # .cu files + +# You can modify these variables, some will be appended to by the runtime makefile +INC_FLAGS ?= +CC_FLAGS ?= +NVCC_FLAGS ?= +GASNET_FLAGS ?= +LD_FLAGS ?= +# For Point and Rect typedefs +CC_FLAGS += -std=c++11 + +########################################################################### +# +# Don't change anything below here +# +########################################################################### + +include $(LG_RT_DIR)/runtime.mk + diff --git a/tutorial/07_partitioning_attach_array_soa/partitioning.cc b/tutorial/07_partitioning_attach_array_soa/partitioning.cc new file mode 100644 index 0000000000..e120f704ea --- /dev/null +++ b/tutorial/07_partitioning_attach_array_soa/partitioning.cc @@ -0,0 +1,391 @@ +/* Copyright 2017 Stanford University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include "legion.h" +using namespace Legion; + +template +using AccessorRO = FieldAccessor >; +template +using AccessorWD = FieldAccessor >; + +enum TaskIDs { + TOP_LEVEL_TASK_ID, + INIT_FIELD_TASK_ID, + DAXPY_TASK_ID, + CHECK_TASK_ID, +}; + +enum FieldIDs { + FID_X, + FID_Y, + FID_Z, +}; + +double get_cur_time() { + struct timeval tv; + struct timezone tz; + double cur_time; + + gettimeofday(&tv, &tz); + cur_time = tv.tv_sec + tv.tv_usec / 1000000.0; + + return cur_time; +} + + +void top_level_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + int num_elements = 1024; + int num_subregions = 4; + // See if we have any command line arguments to parse + // Note we now have a new command line parameter which specifies + // how many subregions we should make. + { + const InputArgs &command_args = Runtime::get_input_args(); + for (int i = 1; i < command_args.argc; i++) + { + if (!strcmp(command_args.argv[i],"-n")) + num_elements = atoi(command_args.argv[++i]); + if (!strcmp(command_args.argv[i],"-b")) + num_subregions = atoi(command_args.argv[++i]); + } + } + printf("Running daxpy for %d elements...\n", num_elements); + printf("Partitioning data into %d sub-regions...\n", num_subregions); + + // Create our logical regions using the same schemas as earlier examples + Rect<1> elem_rect(0,num_elements-1); + IndexSpace is = runtime->create_index_space(ctx, elem_rect); + runtime->attach_name(is, "is"); + FieldSpace input_fs = runtime->create_field_space(ctx); + runtime->attach_name(input_fs, "input_fs"); + { + FieldAllocator allocator = + runtime->create_field_allocator(ctx, input_fs); + allocator.allocate_field(sizeof(double),FID_X); + runtime->attach_name(input_fs, FID_X, "X"); + allocator.allocate_field(sizeof(double),FID_Y); + runtime->attach_name(input_fs, FID_Y, "Y"); + } + FieldSpace output_fs = runtime->create_field_space(ctx); + runtime->attach_name(output_fs, "output_fs"); + { + FieldAllocator allocator = + runtime->create_field_allocator(ctx, output_fs); + allocator.allocate_field(sizeof(double),FID_Z); + runtime->attach_name(output_fs, FID_Z, "Z"); + } + LogicalRegion input_lr = runtime->create_logical_region(ctx, is, input_fs); + runtime->attach_name(input_lr, "input_lr"); + LogicalRegion output_lr = runtime->create_logical_region(ctx, is, output_fs); + runtime->attach_name(output_lr, "output_lr"); + + double *y_ptr = (double*)malloc(sizeof(double)*(num_elements)); + double *x_ptr = (double*)malloc(sizeof(double)*(num_elements)); + double *z_ptr = (double*)malloc(sizeof(double)*(num_elements)); + for (int j = 0; j < num_elements; j++ ) { + x_ptr[j] = drand48(); + y_ptr[j] = drand48(); + z_ptr[j] = drand48(); + } + std::map field_pointer_map_xy; + field_pointer_map_xy[FID_X] = x_ptr; + field_pointer_map_xy[FID_Y] = y_ptr; + printf("Attach array fid %d, ptr %p, fid %d, ptr %p\n", FID_X, x_ptr, FID_Y, y_ptr); + PhysicalRegion xy_pr = runtime->attach_array_soa(ctx, input_lr, input_lr, field_pointer_map_xy, 0); + + std::map field_pointer_map_z; + field_pointer_map_z[FID_Z] = z_ptr; + printf("Attach array fid %d, ptr %p\n", FID_Z, z_ptr); + PhysicalRegion z_pr = runtime->attach_array_soa(ctx, output_lr, output_lr, field_pointer_map_z, 0); + // In addition to using rectangles and domains for launching index spaces + // of tasks (see example 02), Legion also uses them for performing + // operations on logical regions. Here we create a rectangle and a + // corresponding domain for describing the space of subregions that we + // want to create. Each subregion is assigned a 'color' which is why + // we name the variables 'color_bounds' and 'color_domain'. We'll use + // these below when we partition the region. + Rect<1> color_bounds(0,num_subregions-1); + IndexSpace color_is = runtime->create_index_space(ctx, color_bounds); + + // Parallelism in Legion is implicit. This means that rather than + // explicitly saying what should run in parallel, Legion applications + // partition up data and tasks specify which regions they access. + // The Legion runtime computes non-interference as a function of + // regions, fields, and privileges and then determines which tasks + // are safe to run in parallel. + // + // Data partitioning is performed on index spaces. The partitioning + // operation is used to break an index space of points into subsets + // of points each of which will become a sub index space. Partitions + // created on an index space are then transitively applied to all the + // logical regions created using the index space. We will show how + // to get names to the subregions later in this example. + // + // Here we want to create the IndexPartition 'ip'. We'll illustrate + // two ways of creating an index partition depending on whether the + // array being partitioned can be evenly partitioned into subsets + // or not. There are other methods to partitioning index spaces + // which are not covered here. We'll cover the case of coloring + // individual points in an index space in our capstone circuit example. + IndexPartition ip = runtime->create_equal_partition(ctx, is, color_is); + runtime->attach_name(ip, "ip"); + + // The index space 'is' was used in creating two logical regions: 'input_lr' + // and 'output_lr'. By creating an IndexPartitiong of 'is' we implicitly + // created a LogicalPartition for each of the logical regions created using + // 'is'. The Legion runtime provides several ways of getting the names for + // these LogicalPartitions. We'll look at one of them here. The + // 'get_logical_partition' method takes a LogicalRegion and an IndexPartition + // and returns the LogicalPartition of the given LogicalRegion that corresponds + // to the given IndexPartition. + LogicalPartition input_lp = runtime->get_logical_partition(ctx, input_lr, ip); + runtime->attach_name(input_lp, "input_lp"); + LogicalPartition output_lp = runtime->get_logical_partition(ctx, output_lr, ip); + runtime->attach_name(output_lp, "output_lp"); + + // Create our launch domain. Note that is the same as color domain + // as we are going to launch one task for each subregion we created. + ArgumentMap arg_map; + + double start_init = get_cur_time(); + + // As in previous examples, we now want to launch tasks for initializing + // both the fields. However, to increase the amount of parallelism + // exposed to the runtime we will launch separate sub-tasks for each of + // the logical subregions created by our partitioning. To express this + // we create an IndexLauncher for launching an index space of tasks + // the same as example 02. + IndexLauncher init_launcher(INIT_FIELD_TASK_ID, color_is, + TaskArgument(NULL, 0), arg_map); + // For index space task launches we don't want to have to explicitly + // enumerate separate region requirements for all points in our launch + // domain. Instead Legion allows applications to place an upper bound + // on privileges required by subtasks and then specify which privileges + // each subtask receives using a projection function. In the case of + // the field initialization task, we say that all the subtasks will be + // using some subregion of the LogicalPartition 'input_lp'. Applications + // may also specify upper bounds using logical regions and not partitions. + // + // The Legion implementation assumes that all all points in an index + // space task launch request non-interfering privileges and for performance + // reasons this is unchecked. This means if two tasks in the same index + // space are accessing aliased data, then they must either both be + // with read-only or reduce privileges. + // + // When the runtime enumerates the launch_domain, it will invoke the + // projection function for each point in the space and use the resulting + // LogicalRegion computed for each point in the index space of tasks. + // The projection ID '0' is reserved and corresponds to the identity + // function which simply zips the space of tasks with the space of + // subregions in the partition. Applications can register their own + // projections functions via the 'register_region_projection' and + // 'register_partition_projection' functions before starting + // the runtime similar to how tasks are registered. + init_launcher.add_region_requirement( + RegionRequirement(input_lp, 0/*projection ID*/, + WRITE_DISCARD, EXCLUSIVE, input_lr)); + init_launcher.region_requirements[0].add_field(FID_X); + FutureMap fmi0 = runtime->execute_index_space(ctx, init_launcher); + + // Modify our region requirement to initialize the other field + // in the same way. Note that after we do this we have exposed + // 2*num_subregions task-level parallelism to the runtime because + // we have launched tasks that are both data-parallel on + // sub-regions and task-parallel on accessing different fields. + // The power of Legion is that it allows programmers to express + // these data usage patterns and automatically extracts both + // kinds of parallelism in a unified programming framework. + init_launcher.region_requirements[0].privilege_fields.clear(); + init_launcher.region_requirements[0].instance_fields.clear(); + init_launcher.region_requirements[0].add_field(FID_Y); + FutureMap fmi1 = runtime->execute_index_space(ctx, init_launcher); + fmi1.wait_all_results(); + fmi0.wait_all_results(); + double end_init = get_cur_time(); + printf("Attach SOA, init done, time %f\n", end_init - start_init); + + const double alpha = drand48(); + double start_t = get_cur_time(); + // We launch the subtasks for performing the daxpy computation + // in a similar way to the initialize field tasks. Note we + // again make use of two RegionRequirements which use a + // partition as the upper bound for the privileges for the task. + IndexLauncher daxpy_launcher(DAXPY_TASK_ID, color_is, + TaskArgument(&alpha, sizeof(alpha)), arg_map); + daxpy_launcher.add_region_requirement( + RegionRequirement(input_lp, 0/*projection ID*/, + READ_ONLY, EXCLUSIVE, input_lr)); + daxpy_launcher.region_requirements[0].add_field(FID_X); + daxpy_launcher.region_requirements[0].add_field(FID_Y); + daxpy_launcher.add_region_requirement( + RegionRequirement(output_lp, 0/*projection ID*/, + WRITE_DISCARD, EXCLUSIVE, output_lr)); + daxpy_launcher.region_requirements[1].add_field(FID_Z); + FutureMap fm = runtime->execute_index_space(ctx, daxpy_launcher); + fm.wait_all_results(); + double end_t = get_cur_time(); + printf("Attach SOA, daxpy done, time %f\n", end_t - start_t); + + // While we could also issue parallel subtasks for the checking + // task, we only issue a single task launch to illustrate an + // important Legion concept. Note the checking task operates + // on the entire 'input_lr' and 'output_lr' regions and not + // on the subregions. Even though the previous tasks were + // all operating on subregions, Legion will correctly compute + // data dependences on all the subtasks that generated the + // data in these two regions. + TaskLauncher check_launcher(CHECK_TASK_ID, TaskArgument(&alpha, sizeof(alpha))); + check_launcher.add_region_requirement( + RegionRequirement(input_lr, READ_ONLY, EXCLUSIVE, input_lr)); + check_launcher.region_requirements[0].add_field(FID_X); + check_launcher.region_requirements[0].add_field(FID_Y); + check_launcher.add_region_requirement( + RegionRequirement(output_lr, READ_ONLY, EXCLUSIVE, output_lr)); + check_launcher.region_requirements[1].add_field(FID_Z); + Future fu = runtime->execute_task(ctx, check_launcher); + fu.wait(); + + runtime->detach_array(ctx, xy_pr); + runtime->detach_array(ctx, z_pr); + runtime->destroy_logical_region(ctx, input_lr); + runtime->destroy_logical_region(ctx, output_lr); + runtime->destroy_field_space(ctx, input_fs); + runtime->destroy_field_space(ctx, output_fs); + runtime->destroy_index_space(ctx, is); + free(x_ptr); + free(y_ptr); + free(z_ptr); +} + +void init_field_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 1); + assert(task->regions.size() == 1); + assert(task->regions[0].privilege_fields.size() == 1); + + FieldID fid = *(task->regions[0].privilege_fields.begin()); + const int point = task->index_point.point_data[0]; + printf("Initializing field %d for block %d...\n", fid, point); + + const AccessorWD acc(regions[0], fid); + + // Note here that we get the domain for the subregion for + // // this task from the runtime which makes it safe for running + // // both as a single task and as part of an index space of tasks. + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[0].region.get_index_space()); + for (PointInRectIterator<1> pir(rect); pir(); pir++) + acc[*pir] = drand48(); +} + +void daxpy_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 2); + assert(task->regions.size() == 2); + assert(task->arglen == sizeof(double)); + const double alpha = *((const double*)task->args); + const int point = task->index_point.point_data[0]; + + const AccessorRO acc_x(regions[0], FID_X); + const AccessorRO acc_y(regions[0], FID_Y); + const AccessorWD acc_z(regions[1], FID_Z); + + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[0].region.get_index_space()); + printf("Running daxpy computation with alpha %.8g for point %d, x_ptr %p, y_ptr %p, z_ptr %p...\n", + alpha, point, acc_x.ptr(rect.lo), acc_y.ptr(rect.lo), acc_z.ptr(rect.lo)); + for (PointInRectIterator<1> pir(rect); pir(); pir++) + acc_z[*pir] = alpha * acc_x[*pir] + acc_y[*pir]; +} + +void check_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + assert(regions.size() == 2); + assert(task->regions.size() == 2); + assert(task->arglen == sizeof(double)); + const double alpha = *((const double*)task->args); + + const AccessorRO acc_x(regions[0], FID_X); + const AccessorRO acc_y(regions[0], FID_Y); + const AccessorRO acc_z(regions[1], FID_Z); + + Rect<1> rect = runtime->get_index_space_domain(ctx, + task->regions[0].region.get_index_space()); + printf("Checking results x_ptr %p, y_ptr %p, z_ptr %p...\n", acc_x.ptr(rect.lo), acc_y.ptr(rect.lo), acc_z.ptr(rect.lo)); + bool all_passed = true; + for (PointInRectIterator<1> pir(rect); pir(); pir++) + { + double expected = alpha * acc_x[*pir] + acc_y[*pir]; + double received = acc_z[*pir]; + // Probably shouldn't check for floating point equivalence but + // the order of operations are the same should they should + // be bitwise equal. + if (expected != received) + all_passed = false; + } + if (all_passed) + printf("SUCCESS!\n"); + else + printf("FAILURE!\n"); +} + +int main(int argc, char **argv) +{ + Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID); + + { + TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "top_level"); + } + + { + TaskVariantRegistrar registrar(INIT_FIELD_TASK_ID, "init_field"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + registrar.set_leaf(); + Runtime::preregister_task_variant(registrar, "init_field"); + } + + { + TaskVariantRegistrar registrar(DAXPY_TASK_ID, "daxpy"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + registrar.set_leaf(); + Runtime::preregister_task_variant(registrar, "daxpy"); + } + + { + TaskVariantRegistrar registrar(CHECK_TASK_ID, "check"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + registrar.set_leaf(); + Runtime::preregister_task_variant(registrar, "check"); + } + + return Runtime::start(argc, argv); +} diff --git a/tutorial/10_attach_2darray_c_fortran_layout/CMakeLists.txt b/tutorial/10_attach_2darray_c_fortran_layout/CMakeLists.txt new file mode 100644 index 0000000000..a78a962bc9 --- /dev/null +++ b/tutorial/10_attach_2darray_c_fortran_layout/CMakeLists.txt @@ -0,0 +1,39 @@ +#------------------------------------------------------------------------------# +# Copyright 2017 Kitware, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#------------------------------------------------------------------------------# + +cmake_minimum_required(VERSION 3.1) +project(LegionExample_06_privileges) + +include(CheckCXXCompilerFlag) + +check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER) + +if(CXX11_COMPILER) + set(CMAKE_CXX_STANDARD 11) +else() + message(FATAL_ERROR "C++11 compatible compiler not found") +endif() + +# Only search if were building stand-alone and not as part of Legion +if(NOT Legion_SOURCE_DIR) + find_package(Legion REQUIRED) +endif() + +add_executable(privileges privileges.cc) +target_link_libraries(privileges Legion::Legion) +if(Legion_ENABLE_TESTING) + add_test(NAME privileges COMMAND $) +endif() diff --git a/tutorial/10_attach_2darray_c_fortran_layout/Makefile b/tutorial/10_attach_2darray_c_fortran_layout/Makefile new file mode 100644 index 0000000000..96cbf576fb --- /dev/null +++ b/tutorial/10_attach_2darray_c_fortran_layout/Makefile @@ -0,0 +1,51 @@ +# Copyright 2017 Stanford University +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +ifndef LG_RT_DIR +$(error LG_RT_DIR variable is not defined, aborting build) +endif + +# Flags for directing the runtime makefile what to include +DEBUG := 1 # Include debugging symbols +OUTPUT_LEVEL ?= LEVEL_DEBUG # Compile time logging level +USE_CUDA ?= 0 # Include CUDA support (requires CUDA) +USE_GASNET ?= 0 # Include GASNet support (requires GASNet) +USE_HDF ?= 0 # Include HDF5 support (requires HDF5) +ALT_MAPPERS ?= 0 # Include alternative mappers (not recommended) + +# Put the binary file name here +OUTFILE ?= attach_2darray +# List all the application source files here +GEN_SRC ?= attach_2darray.cc # .cc files +GEN_GPU_SRC ?= # .cu files + +# You can modify these variables, some will be appended to by the runtime makefile +INC_FLAGS ?= +CC_FLAGS ?= +NVCC_FLAGS ?= +GASNET_FLAGS ?= +LD_FLAGS ?= +# For Point and Rect typedefs +CC_FLAGS += -std=c++11 + +########################################################################### +# +# Don't change anything below here +# +########################################################################### + +include $(LG_RT_DIR)/runtime.mk + diff --git a/tutorial/10_attach_2darray_c_fortran_layout/attach_2darray.cc b/tutorial/10_attach_2darray_c_fortran_layout/attach_2darray.cc new file mode 100644 index 0000000000..f6fe484d79 --- /dev/null +++ b/tutorial/10_attach_2darray_c_fortran_layout/attach_2darray.cc @@ -0,0 +1,250 @@ +/* Copyright 2017 Stanford University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include "legion.h" +using namespace Legion; + +template +using AccessorRO = FieldAccessor >; +template +using AccessorWD = FieldAccessor >; + +enum TaskIDs { + TOP_LEVEL_TASK_ID, + READ_FIELD_TASK_ID, +}; + +enum FieldIDs { + FID_X, + FID_Y, + FID_A, + FID_B, +}; + +typedef struct { + double z1; + double z2; +}fidz_t; + +typedef struct{ + double x; + double y; +}xy_t; + +void top_level_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + int num_elements = 10; + //double x[10], y[10]; + // See if we have any command line arguments to parse + { + const InputArgs &command_args = Runtime::get_input_args(); + for (int i = 1; i < command_args.argc; i++) + { + if (!strcmp(command_args.argv[i],"-n")) + num_elements = atoi(command_args.argv[++i]); + } + } + printf("Running daxpy for %d elements...\n", num_elements); + + // Create our logical regions using the same schema that + // we used in the previous example. + Point<2> lo(0, 0); + Point<2> hi(num_elements-1, num_elements-1); + const Rect<2> elem_rect(lo, hi); + //Rect<1> elem_rect(0,num_elements-1); + IndexSpace is = runtime->create_index_space(ctx, elem_rect); + FieldSpace input_fs = runtime->create_field_space(ctx); + { + FieldAllocator allocator = + runtime->create_field_allocator(ctx, input_fs); + allocator.allocate_field(sizeof(double),FID_X); + allocator.allocate_field(sizeof(double),FID_Y); + allocator.allocate_field(sizeof(double),FID_A); + allocator.allocate_field(sizeof(double),FID_B); + } + LogicalRegion input_lr = runtime->create_logical_region(ctx, is, input_fs); + + int i; + double val = 0.0; + + xy_t *xy_ptr = (xy_t*)malloc(sizeof(xy_t)*(num_elements*num_elements)); + double *a_ptr = (double*)malloc(sizeof(double)*(num_elements*num_elements)); + double *b_ptr = (double*)malloc(sizeof(double)*(num_elements*num_elements)); + + for (i = 0; i < num_elements*num_elements; i++) { + xy_ptr[i].x = val; + xy_ptr[i].y = val + 0.1; + a_ptr[i] = val + 0.2; + b_ptr[i] = val + 0.3; + val += 1.0; + } + + std::map offset_x; + offset_x[FID_X] = 0; + printf("Attach AOS array in fortran layout, fid %d, ptr %p\n", FID_X, xy_ptr); + PhysicalRegion pr_x = runtime->attach_array_aos(ctx, input_lr, input_lr, xy_ptr, sizeof(xy_t), offset_x, 0); + + std::map offset_y; + offset_y[FID_Y] = sizeof(double); + printf("Attach AOS array in c layout, fid %d, ptr %p\n", FID_Y, ((unsigned char*)(xy_ptr))+sizeof(double)); + PhysicalRegion pr_y = runtime->attach_array_aos(ctx, input_lr, input_lr, xy_ptr, sizeof(xy_t), offset_y, 1); + + std::map field_pointer_map_a; + field_pointer_map_a[FID_A] = a_ptr; + printf("Attach SOA array in fortran layout, fid %d, ptr %p\n", FID_A, a_ptr); + PhysicalRegion pr_a = runtime->attach_array_soa(ctx, input_lr, input_lr, field_pointer_map_a, 0); + + std::map field_pointer_map_b; + field_pointer_map_b[FID_B] = b_ptr; + printf("Attach SOA array in c layout, fid %d, ptr %p\n", FID_B, b_ptr); + PhysicalRegion pr_b = runtime->attach_array_soa(ctx, input_lr, input_lr, field_pointer_map_b, 1); + + // Instead of using an inline mapping to initialize the fields for + // daxpy, in this case we will launch two separate tasks for initializing + // each of the fields in parallel. To launch the sub-tasks for performing + // the initialization we again use the launcher objects that were + // introduced earlier. The only difference now is that instead of passing + // arguments by value, we now want to specify the logical regions + // that the tasks may access as their arguments. We again make use of + // the RegionRequirement struct to name the logical regions and fields + // for which the task should have privileges. In this case we launch + // a task that asks for WRITE_DISCARD privileges on the 'X' field. + // + // An important property of the Legion programming model is that sub-tasks + // are only allowed to request privileges which are a subset of a + // parent task's privileges. When a task creates a logical region it + // is granted full read-write privileges for that logical region. It + // can then pass them down to sub-tasks. In this example the top-level + // task has full privileges on all the fields of input_lr and output_lr. + // In this call it passing read-write privileges down to the sub-task + // on input_lr on field 'X'. Legion will enforce the property that the + // sub-task only accesses the 'X' field of input_lr. This property of + // Legion is crucial for the implementation of Legion's hierarchical + // scheduling algorithm which is described in detail in our two papers. + TaskLauncher read_launcher(READ_FIELD_TASK_ID, TaskArgument(NULL, 0)); + read_launcher.add_region_requirement( + RegionRequirement(input_lr, READ_ONLY, EXCLUSIVE, input_lr)); + read_launcher.add_field(0/*idx*/, FID_X); + // Note that when we launch this task we don't record the future. + // This is because we're going to let Legion be responsible for + // computing the data dependences between how different tasks access + // logical regions. + Future fx = runtime->execute_task(ctx, read_launcher); + + read_launcher.region_requirements[0].privilege_fields.clear(); + read_launcher.region_requirements[0].instance_fields.clear(); + read_launcher.add_field(0/*idx*/, FID_Y); + Future fy = runtime->execute_task(ctx, read_launcher); + + read_launcher.region_requirements[0].privilege_fields.clear(); + read_launcher.region_requirements[0].instance_fields.clear(); + read_launcher.add_field(0/*idx*/, FID_A); + Future fa = runtime->execute_task(ctx, read_launcher); + + read_launcher.region_requirements[0].privilege_fields.clear(); + read_launcher.region_requirements[0].instance_fields.clear(); + read_launcher.add_field(0/*idx*/, FID_B); + Future fb = runtime->execute_task(ctx, read_launcher); + + fx.wait(); + fy.wait(); + fa.wait(); + fb.wait(); + // Notice that we never once blocked waiting on the result of any sub-task + // in the execution of the top-level task. We don't even block before + // destroying any of our resources. This works because Legion understands + // the data being accessed by all of these operations and defers all of + // their executions until they are safe to perform. Legion is still smart + // enough to know that the top-level task is not finished until all of + // the sub operations that have been performed are completed. However, + // from the programmer's perspective, all of these operations can be + // done without ever blocking and thereby exposing as much task-level + // parallelism to the Legion runtime as possible. We'll discuss the + // implications of Legion's deferred execution model in a later example. + runtime->detach_array(ctx, pr_x); + runtime->detach_array(ctx, pr_y); + runtime->detach_array(ctx, pr_a); + runtime->detach_array(ctx, pr_b); + runtime->destroy_logical_region(ctx, input_lr); + runtime->destroy_field_space(ctx, input_fs); + runtime->destroy_index_space(ctx, is); + free(xy_ptr); + free(a_ptr); + free(b_ptr); +} + +// Note that tasks get a physical region for every region requirement +// that they requested when they were launched in the vector of 'regions'. +// In some cases the mapper may have chosen not to map the logical region +// which means that the task has the necessary privileges to access the +// region but not a physical instance to access. +void read_field_task(const Task *task, + const std::vector ®ions, + Context ctx, Runtime *runtime) +{ + // Check that the inputs look right since we have no + // static checking to help us out. + assert(regions.size() == 1); + assert(task->regions.size() == 1); + assert(task->regions[0].privilege_fields.size() == 1); + // This is a field polymorphic function so figure out + // which field we are responsible for initializing. + FieldID fid = *(task->regions[0].privilege_fields.begin()); + + // Note that Legion's default mapper always map regions + // and the Legion runtime is smart enough not to start + // the task until all the regions contain valid data. + // Therefore in this case we don't need to call 'wait_until_valid' + // on our physical regions and we know that getting this + // accessor will never block the task's execution. If + // however we chose to unmap this physical region and then + // remap it then we would need to call 'wait_until_valid' + // again to ensure that we were accessing valid data. + const AccessorRO acc(regions[0], fid); + + Rect<2> rect = runtime->get_index_space_domain(ctx, + task->regions[0].region.get_index_space()); + printf("READ field %d, addr %p\n", fid, acc.ptr(rect.lo)); + for (PointInRectIterator<2> pir(rect); pir(); pir++) { + printf("%.1f ", acc[*pir]); + } + printf("\n"); +} + +int main(int argc, char **argv) +{ + Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID); + + { + TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + Runtime::preregister_task_variant(registrar, "top_level"); + } + + { + TaskVariantRegistrar registrar(READ_FIELD_TASK_ID, "read_field"); + registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); + registrar.set_leaf(); + Runtime::preregister_task_variant(registrar, "read_field"); + } + + return Runtime::start(argc, argv); +}