diff --git a/examples/attach_array_cpy/CMakeLists.txt b/examples/attach_array_cpy/CMakeLists.txt
new file mode 100644
index 0000000000..34bf3102f3
--- /dev/null
+++ b/examples/attach_array_cpy/CMakeLists.txt
@@ -0,0 +1,42 @@
+#------------------------------------------------------------------------------#
+# Copyright 2017 Kitware, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#------------------------------------------------------------------------------#
+
+cmake_minimum_required(VERSION 3.1)
+project(LegionExample_attach_file)
+
+include(CheckCXXCompilerFlag)
+
+check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER)
+
+if(CXX11_COMPILER)
+  set(CMAKE_CXX_STANDARD 11)
+else()
+  message(FATAL_ERROR "C++11 compatible compiler not found")
+endif()
+
+# Only search if were building stand-alone and not as part of Legion
+if(NOT Legion_SOURCE_DIR)
+  find_package(Legion REQUIRED)
+endif()
+
+add_executable(attach_file attach_file.cc)
+target_link_libraries(attach_file Legion::Legion)
+if(Legion_ENABLE_TESTING)
+  add_test(NAME attach_file COMMAND $<TARGET_FILE:attach_file>) 
+endif()
+
+
+
diff --git a/examples/attach_array_cpy/Makefile b/examples/attach_array_cpy/Makefile
new file mode 100644
index 0000000000..5651293710
--- /dev/null
+++ b/examples/attach_array_cpy/Makefile
@@ -0,0 +1,51 @@
+# Copyright 2017 Stanford University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+ifndef LG_RT_DIR
+$(error LG_RT_DIR variable is not defined, aborting build)
+endif
+
+# Flags for directing the runtime makefile what to include
+DEBUG           ?= 1		# Include debugging symbols
+OUTPUT_LEVEL    ?= LEVEL_DEBUG	# Compile time logging level
+USE_CUDA        ?= 0		# Include CUDA support (requires CUDA)
+USE_GASNET      ?= 0		# Include GASNet support (requires GASNet)
+USE_HDF         ?= 0		# Include HDF5 support (requires HDF5)
+ALT_MAPPERS     ?= 0		# Include alternative mappers (not recommended)
+
+# Put the binary file name here
+OUTFILE		?= attach_array 
+# List all the application source files here
+GEN_SRC		?= attach_array.cc	# .cc files
+GEN_GPU_SRC	?=				# .cu files
+
+# You can modify these variables, some will be appended to by the runtime makefile
+INC_FLAGS	?=
+CC_FLAGS	?=
+NVCC_FLAGS	?=
+GASNET_FLAGS	?=
+LD_FLAGS	?=
+# For Point and Rect typedefs
+CC_FLAGS	+= -std=c++11
+
+###########################################################################
+#
+#   Don't change anything below here
+#   
+###########################################################################
+
+include $(LG_RT_DIR)/runtime.mk
+
diff --git a/examples/attach_array_cpy/attach_array.cc b/examples/attach_array_cpy/attach_array.cc
new file mode 100644
index 0000000000..8abd5355c6
--- /dev/null
+++ b/examples/attach_array_cpy/attach_array.cc
@@ -0,0 +1,407 @@
+/* Copyright 2017 Stanford University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <cstdio>
+#include <cassert>
+#include <cstdlib>
+#include <unistd.h>
+#include <sys/types.h>
+#ifdef USE_HDF
+#include <hdf5.h>
+#endif
+#include "legion.h"
+#include <realm/machine.h>
+#include "mem_impl.h"
+#include "inst_impl.h"
+#include "runtime_impl.h"
+using namespace Legion;
+
+template<typename FT, int N, typename T = coord_t>
+using AccessorRO = FieldAccessor<READ_ONLY,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+template<typename FT, int N, typename T = coord_t>
+using AccessorWD = FieldAccessor<WRITE_DISCARD,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+
+/*
+ * In this example we illustrate how the Legion
+ * programming model supports multiple partitions
+ * of the same logical region and the benefits it
+ * provides by allowing multiple views onto the
+ * same logical region.  We compute a simple 5-point
+ * 1D stencil using the standard formula:
+ * f'(x) = (-f(x+2h) + 8f(x+h) - 8f(x-h) + f(x-2h))/12h
+ * For simplicity we'll assume h=1.
+ */
+
+enum TaskIDs {
+  TOP_LEVEL_TASK_ID,
+  INIT_FIELD_TASK_ID,
+  STENCIL_TASK_ID,
+  CHECK_TASK_ID,
+};
+
+enum FieldIDs {
+  FID_VAL,
+  FID_DERIV,
+  FID_CP
+};
+
+double *my_ptr= NULL;
+
+void top_level_task(const Task *task,
+                    const std::vector<PhysicalRegion> &regions,
+                    Context ctx, Runtime *runtime)
+{
+  int num_elements = 1024;
+  int num_subregions = 4;
+
+  // Check for any command line arguments
+  {
+      const InputArgs &command_args = Runtime::get_input_args();
+    for (int i = 1; i < command_args.argc; i++)
+    {
+      if (!strcmp(command_args.argv[i],"-n"))
+        num_elements = atoi(command_args.argv[++i]);
+      if (!strcmp(command_args.argv[i],"-b"))
+        num_subregions = atoi(command_args.argv[++i]);
+    }
+  }
+  printf("Running stencil computation for %d elements...\n", num_elements);
+  printf("Partitioning data into %d sub-regions...\n", num_subregions);
+
+  Rect<1> elem_rect(0,num_elements-1);
+  IndexSpaceT<1> is = runtime->create_index_space(ctx, elem_rect);
+  FieldSpace fs = runtime->create_field_space(ctx);
+  {
+    FieldAllocator allocator = 
+      runtime->create_field_allocator(ctx, fs);
+    allocator.allocate_field(sizeof(double),FID_VAL);
+    allocator.allocate_field(sizeof(double),FID_DERIV);
+  }
+  LogicalRegion stencil_lr = runtime->create_logical_region(ctx, is, fs);
+
+  FieldSpace cp_fs = runtime->create_field_space(ctx);
+  {
+    FieldAllocator allocator = 
+      runtime->create_field_allocator(ctx, cp_fs);
+    allocator.allocate_field(sizeof(double), FID_CP);
+  }
+  LogicalRegion cp_lr = runtime->create_logical_region(ctx, is, cp_fs);
+  
+  Rect<1> color_bounds(0,num_subregions-1);
+  IndexSpaceT<1> color_is = runtime->create_index_space(ctx, color_bounds);
+
+  IndexPartition disjoint_ip = 
+    runtime->create_equal_partition(ctx, is, color_is);
+  const int block_size = (num_elements + num_subregions - 1) / num_subregions;
+  Matrix<1,1> transform;
+  transform[0][0] = block_size;
+  Rect<1> extent(-2, block_size + 1);
+  IndexPartition ghost_ip = 
+    runtime->create_partition_by_restriction(ctx, is, color_is, transform, extent);
+
+  LogicalPartition disjoint_lp = 
+    runtime->get_logical_partition(ctx, stencil_lr, disjoint_ip);
+  LogicalPartition ghost_lp = 
+    runtime->get_logical_partition(ctx, stencil_lr, ghost_ip);
+
+  ArgumentMap arg_map;
+
+  IndexLauncher init_launcher(INIT_FIELD_TASK_ID, color_is,
+                              TaskArgument(NULL, 0), arg_map);
+  init_launcher.add_region_requirement(
+      RegionRequirement(disjoint_lp, 0/*projection ID*/,
+                        WRITE_DISCARD, EXCLUSIVE, stencil_lr));
+  init_launcher.add_field(0, FID_VAL);
+  runtime->execute_index_space(ctx, init_launcher);
+
+  IndexLauncher stencil_launcher(STENCIL_TASK_ID, color_is,
+       TaskArgument(&num_elements, sizeof(num_elements)), arg_map);
+  stencil_launcher.add_region_requirement(
+      RegionRequirement(ghost_lp, 0/*projection ID*/,
+                        READ_ONLY, EXCLUSIVE, stencil_lr));
+  stencil_launcher.add_field(0, FID_VAL);
+  stencil_launcher.add_region_requirement(
+      RegionRequirement(disjoint_lp, 0/*projection ID*/,
+                        READ_WRITE, EXCLUSIVE, stencil_lr));
+  stencil_launcher.add_field(1, FID_DERIV);
+  runtime->execute_index_space(ctx, stencil_launcher);
+
+  // Launcher a copy operation that performs checkpoint
+  //struct timespec ts_start, ts_mid, ts_end;
+  //clock_gettime(CLOCK_MONOTONIC, &ts_start);
+  double ts_start, ts_mid, ts_end;
+  ts_start = Realm::Clock::current_time_in_microseconds();
+  PhysicalRegion cp_pr;
+  
+  Memory memory = Machine::MemoryQuery(Machine::get_machine())
+    .local_address_space()
+    .only_kind(Memory::SYSTEM_MEM)
+    .first();
+  assert(memory.exists());
+  Realm::LocalCPUMemory *m_impl = (Realm::LocalCPUMemory *)Realm::get_runtime()->get_memory_impl(memory);
+  unsigned char* base = (unsigned char*)m_impl->base;
+
+  double *cp_ptr = (double*)malloc(sizeof(double)*(num_elements));
+  my_ptr = cp_ptr;
+
+    
+  std::map<FieldID,void*> field_pointer_map;
+  field_pointer_map[FID_CP] = cp_ptr;
+  printf("Checkpointing data to arrray fid %d, ptr %p, base %p\n", FID_CP, cp_ptr, base);  
+  cp_pr = runtime->attach_array_soa(ctx, cp_lr, cp_lr, field_pointer_map, 0);         
+
+  //cp_pr.wait_until_valid();
+  CopyLauncher copy_launcher;
+  copy_launcher.add_copy_requirements(
+      RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr),
+      RegionRequirement(cp_lr, WRITE_DISCARD, EXCLUSIVE, cp_lr));
+  copy_launcher.add_src_field(0, FID_DERIV);
+  copy_launcher.add_dst_field(0, FID_CP);
+  runtime->issue_copy_operation(ctx, copy_launcher);
+  
+  //clock_gettime(CLOCK_MONOTONIC, &ts_mid);
+  ts_mid = Realm::Clock::current_time_in_microseconds();
+
+  runtime->detach_array(ctx, cp_pr);
+
+  //clock_gettime(CLOCK_MONOTONIC, &ts_end);
+  ts_end = Realm::Clock::current_time_in_microseconds();
+  //double attach_time = ((1.0 * (ts_mid.tv_sec - ts_start.tv_sec)) +
+  //                   (1e-9 * (ts_mid.tv_nsec - ts_start.tv_nsec)));
+  //double detach_time = ((1.0 * (ts_end.tv_sec - ts_mid.tv_sec)) +
+  //                   (1e-9 * (ts_end.tv_nsec - ts_mid.tv_nsec)));
+  double attach_time = 1e-6 * (ts_mid - ts_start);
+  double detach_time = 1e-6 * (ts_end - ts_mid);
+  printf("ELAPSED TIME (ATTACH) = %7.3f s\n", attach_time);
+  printf("ELAPSED TIME (DETACH) = %7.3f s\n", detach_time);
+
+  // Finally, we launch a single task to check the results.
+  TaskLauncher check_launcher(CHECK_TASK_ID, 
+      TaskArgument(&num_elements, sizeof(num_elements)));
+  check_launcher.add_region_requirement(
+      RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr));
+  check_launcher.add_field(0, FID_VAL);
+  check_launcher.add_region_requirement(
+      RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr));
+  check_launcher.add_field(1, FID_DERIV);
+  runtime->execute_task(ctx, check_launcher);
+
+  // Clean up our region, index space, and field space
+  runtime->destroy_logical_region(ctx, stencil_lr);
+  runtime->destroy_logical_region(ctx, cp_lr);
+  runtime->destroy_field_space(ctx, cp_fs);
+  runtime->destroy_field_space(ctx, fs);
+  runtime->destroy_index_space(ctx, is);
+  printf("End of TOP_LEVEL_TASK, %f, %f\n", cp_ptr[0], cp_ptr[num_elements-1]);
+}
+
+// The standard initialize field task from earlier examples
+void init_field_task(const Task *task,
+                     const std::vector<PhysicalRegion> &regions,
+                     Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 1); 
+  assert(task->regions.size() == 1);
+  assert(task->regions[0].privilege_fields.size() == 1);
+
+  FieldID fid = *(task->regions[0].privilege_fields.begin());
+  const int point = task->index_point.point_data[0];
+  printf("Initializing field %d for block %d...\n", fid, point);
+
+  const AccessorWD<double,1> acc(regions[0], fid);
+
+  int i = point;
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[0].region.get_index_space());
+  for (PointInRectIterator<1> pir(rect); pir(); pir++) {
+    acc[*pir] = 1.125 + i*1.12;
+    i++;
+  }
+}
+
+// Our stencil tasks is interesting because it
+// has both slow and fast versions depending
+// on whether or not its bounds have been clamped.
+void stencil_task(const Task *task,
+                  const std::vector<PhysicalRegion> &regions,
+                  Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 2);
+  assert(task->regions.size() == 2);
+  assert(task->regions[0].privilege_fields.size() == 1);
+  assert(task->regions[1].privilege_fields.size() == 1);
+  assert(task->arglen == sizeof(int));
+  const int max_elements = *((const int*)task->args);
+  const int point = task->index_point.point_data[0];
+  
+  FieldID read_fid = *(task->regions[0].privilege_fields.begin());
+  FieldID write_fid = *(task->regions[1].privilege_fields.begin());
+
+  const AccessorRO<double,1> read_acc(regions[0], read_fid);
+  const AccessorWD<double,1> write_acc(regions[1], write_fid);
+
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[1].region.get_index_space());
+  // If we are on the edges of the entire space we are 
+  // operating over, then we're going to do the slow
+  // path which checks for clamping when necessary.
+  // If not, then we can do the fast path without
+  // any checks.
+  if ((rect.lo[0] < 2) || (rect.hi[0] > (max_elements-3)))
+  {
+    printf("Running slow stencil path for point %d...\n", point);
+    // Note in the slow path that there are checks which
+    // perform clamps when necessary before reading values.
+    for (PointInRectIterator<1> pir(rect); pir(); pir++)
+    {
+      double l2, l1, r1, r2;
+      if (pir[0] < 2)
+        l2 = read_acc[0];
+      else
+        l2 = read_acc[*pir - 2];
+      if (pir[0] < 1)
+        l1 = read_acc[0];
+      else
+        l1 = read_acc[*pir - 1];
+      if (pir[0] > (max_elements-2))
+        r1 = read_acc[max_elements-1];
+      else
+        r1 = read_acc[*pir + 1];
+      if (pir[0] > (max_elements-3))
+        r2 = read_acc[max_elements-1];
+      else
+        r2 = read_acc[*pir + 2];
+      
+      double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0;
+      write_acc[*pir] = result;
+    }
+  }
+  else
+  {
+    printf("Running fast stencil path for point %d...\n", point);
+    // In the fast path, we don't need any checks
+    for (PointInRectIterator<1> pir(rect); pir(); pir++)
+    {
+      double l2 = read_acc[*pir - 2];
+      double l1 = read_acc[*pir - 1];
+      double r1 = read_acc[*pir + 1];
+      double r2 = read_acc[*pir + 2];
+
+      double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0;
+      write_acc[*pir] = result;
+    }
+  }
+}
+
+void check_task(const Task *task,
+                const std::vector<PhysicalRegion> &regions,
+                Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 2);
+  assert(task->regions.size() == 2);
+  assert(task->regions[0].privilege_fields.size() == 1);
+  assert(task->regions[1].privilege_fields.size() == 1);
+  assert(task->arglen == sizeof(int));
+  const int max_elements = *((const int*)task->args);
+
+  FieldID src_fid = *(task->regions[0].privilege_fields.begin());
+  FieldID dst_fid = *(task->regions[1].privilege_fields.begin());
+
+  const AccessorRO<double,1> src_acc(regions[0], src_fid);
+  const AccessorRO<double,1> dst_acc(regions[1], dst_fid);
+
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[1].region.get_index_space());
+
+  // This is the checking task so we can just do the slow path
+  bool all_passed = true;
+  bool cp_passed = true;
+  int i = 0;
+  for (PointInRectIterator<1> pir(rect); pir(); pir++)
+  {
+    double l2, l1, r1, r2;
+    if (pir[0] < 2)
+      l2 = src_acc[0];
+    else
+      l2 = src_acc[*pir - 2];
+    if (pir[0] < 1)
+      l1 = src_acc[0];
+    else
+      l1 = src_acc[*pir - 1];
+    if (pir[0] > (max_elements-2))
+      r1 = src_acc[max_elements-1];
+    else
+      r1 = src_acc[*pir + 1];
+    if (pir[0] > (max_elements-3))
+      r2 = src_acc[max_elements-1];
+    else
+      r2 = src_acc[*pir + 2];
+    
+    double expected = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0;
+    double received = dst_acc[*pir];
+    if (i == 0 || i == max_elements-1) printf("result %d, %f\n", i, received);
+    if (my_ptr[i] != received) {
+        printf("transfer error %d, %f\n", i, my_ptr[i]);
+        cp_passed = false;
+    }
+    i++;
+    // Probably shouldn't bitwise compare floating point
+    // numbers but the order of operations are the same so they
+    // should be bitwise equal.
+    if (expected != received)
+      all_passed = false;
+  }
+  if (cp_passed)
+    printf("CP PASSED\n");
+  else
+    printf("CP FAILED\n");
+  printf("CHECK, %f, %f\n", my_ptr[0], my_ptr[max_elements-1]);
+  if (all_passed)
+    printf("SUCCESS!\n");
+  else
+    printf("FAILURE!\n");
+}
+
+int main(int argc, char **argv)
+{
+  Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID);
+
+  {
+    TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<top_level_task>(registrar, "top_level");
+  }
+
+  {
+    TaskVariantRegistrar registrar(INIT_FIELD_TASK_ID, "init_field");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<init_field_task>(registrar, "init_field");
+  }
+
+  {
+    TaskVariantRegistrar registrar(STENCIL_TASK_ID, "stencil");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<stencil_task>(registrar, "stencil");
+  }
+
+  {
+    TaskVariantRegistrar registrar(CHECK_TASK_ID, "check");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<check_task>(registrar, "check");
+  }
+
+  return Runtime::start(argc, argv);
+}
diff --git a/examples/attach_array_no_cpy_aos/CMakeLists.txt b/examples/attach_array_no_cpy_aos/CMakeLists.txt
new file mode 100644
index 0000000000..34bf3102f3
--- /dev/null
+++ b/examples/attach_array_no_cpy_aos/CMakeLists.txt
@@ -0,0 +1,42 @@
+#------------------------------------------------------------------------------#
+# Copyright 2017 Kitware, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#------------------------------------------------------------------------------#
+
+cmake_minimum_required(VERSION 3.1)
+project(LegionExample_attach_file)
+
+include(CheckCXXCompilerFlag)
+
+check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER)
+
+if(CXX11_COMPILER)
+  set(CMAKE_CXX_STANDARD 11)
+else()
+  message(FATAL_ERROR "C++11 compatible compiler not found")
+endif()
+
+# Only search if were building stand-alone and not as part of Legion
+if(NOT Legion_SOURCE_DIR)
+  find_package(Legion REQUIRED)
+endif()
+
+add_executable(attach_file attach_file.cc)
+target_link_libraries(attach_file Legion::Legion)
+if(Legion_ENABLE_TESTING)
+  add_test(NAME attach_file COMMAND $<TARGET_FILE:attach_file>) 
+endif()
+
+
+
diff --git a/examples/attach_array_no_cpy_aos/Makefile b/examples/attach_array_no_cpy_aos/Makefile
new file mode 100644
index 0000000000..5651293710
--- /dev/null
+++ b/examples/attach_array_no_cpy_aos/Makefile
@@ -0,0 +1,51 @@
+# Copyright 2017 Stanford University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+ifndef LG_RT_DIR
+$(error LG_RT_DIR variable is not defined, aborting build)
+endif
+
+# Flags for directing the runtime makefile what to include
+DEBUG           ?= 1		# Include debugging symbols
+OUTPUT_LEVEL    ?= LEVEL_DEBUG	# Compile time logging level
+USE_CUDA        ?= 0		# Include CUDA support (requires CUDA)
+USE_GASNET      ?= 0		# Include GASNet support (requires GASNet)
+USE_HDF         ?= 0		# Include HDF5 support (requires HDF5)
+ALT_MAPPERS     ?= 0		# Include alternative mappers (not recommended)
+
+# Put the binary file name here
+OUTFILE		?= attach_array 
+# List all the application source files here
+GEN_SRC		?= attach_array.cc	# .cc files
+GEN_GPU_SRC	?=				# .cu files
+
+# You can modify these variables, some will be appended to by the runtime makefile
+INC_FLAGS	?=
+CC_FLAGS	?=
+NVCC_FLAGS	?=
+GASNET_FLAGS	?=
+LD_FLAGS	?=
+# For Point and Rect typedefs
+CC_FLAGS	+= -std=c++11
+
+###########################################################################
+#
+#   Don't change anything below here
+#   
+###########################################################################
+
+include $(LG_RT_DIR)/runtime.mk
+
diff --git a/examples/attach_array_no_cpy_aos/attach_array.cc b/examples/attach_array_no_cpy_aos/attach_array.cc
new file mode 100644
index 0000000000..d1705aeb10
--- /dev/null
+++ b/examples/attach_array_no_cpy_aos/attach_array.cc
@@ -0,0 +1,388 @@
+/* Copyright 2017 Stanford University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <cstdio>
+#include <cassert>
+#include <cstdlib>
+#include <unistd.h>
+#include <sys/types.h>
+#ifdef USE_HDF
+#include <hdf5.h>
+#endif
+#include "legion.h"
+#include <realm/machine.h>
+#include "mem_impl.h"
+#include "inst_impl.h"
+#include "runtime_impl.h"
+using namespace Legion;
+
+template<typename FT, int N, typename T = coord_t>
+using AccessorRO = FieldAccessor<READ_ONLY,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+template<typename FT, int N, typename T = coord_t>
+using AccessorWD = FieldAccessor<WRITE_DISCARD,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+
+/*
+ * In this example we illustrate how the Legion
+ * programming model supports multiple partitions
+ * of the same logical region and the benefits it
+ * provides by allowing multiple views onto the
+ * same logical region.  We compute a simple 5-point
+ * 1D stencil using the standard formula:
+ * f'(x) = (-f(x+2h) + 8f(x+h) - 8f(x-h) + f(x-2h))/12h
+ * For simplicity we'll assume h=1.
+ */
+
+enum TaskIDs {
+  TOP_LEVEL_TASK_ID,
+  INIT_FIELD_TASK_ID,
+  STENCIL_TASK_ID,
+  CHECK_TASK_ID,
+};
+
+enum FieldIDs {
+  FID_VAL,
+  FID_DERIV,
+  FID_CP
+};
+
+typedef struct {
+  double val;
+  double deriv;
+}deriv_t;
+
+deriv_t *my_ptr= NULL;
+
+void top_level_task(const Task *task,
+                    const std::vector<PhysicalRegion> &regions,
+                    Context ctx, Runtime *runtime)
+{
+  int num_elements = 1024;
+  int num_subregions = 4;
+
+  // Check for any command line arguments
+  {
+      const InputArgs &command_args = Runtime::get_input_args();
+    for (int i = 1; i < command_args.argc; i++)
+    {
+      if (!strcmp(command_args.argv[i],"-n"))
+        num_elements = atoi(command_args.argv[++i]);
+      if (!strcmp(command_args.argv[i],"-b"))
+        num_subregions = atoi(command_args.argv[++i]);
+    }
+  }
+  printf("Running stencil computation for %d elements...\n", num_elements);
+  printf("Partitioning data into %d sub-regions...\n", num_subregions);
+
+  Rect<1> elem_rect(0,num_elements-1);
+  IndexSpaceT<1> is = runtime->create_index_space(ctx, elem_rect);
+  FieldSpace fs = runtime->create_field_space(ctx);
+  {
+    FieldAllocator allocator = 
+      runtime->create_field_allocator(ctx, fs);
+    allocator.allocate_field(sizeof(double),FID_VAL);
+    allocator.allocate_field(sizeof(double),FID_DERIV);
+  }
+  LogicalRegion stencil_lr = runtime->create_logical_region(ctx, is, fs);
+  
+  deriv_t *deriv_struct_ptr = (deriv_t*)malloc(sizeof(deriv_t)*(num_elements));
+  printf("base array ptr %p\n", deriv_struct_ptr);
+
+  //double *val_ptr = (double*)malloc(sizeof(double)*(num_elements));
+  for (int i = 0; i < num_elements; i++) {
+    deriv_struct_ptr[i].val = drand48();
+  }
+  
+  
+  std::map<FieldID, size_t> offset;
+  offset[FID_VAL] = 0;
+  offset[FID_DERIV] = sizeof(double);
+  
+  PhysicalRegion stencil_pr = runtime->attach_array_aos(ctx, stencil_lr, stencil_lr, deriv_struct_ptr, sizeof(deriv_t), offset, 0);
+
+/*  
+  char *val_ptr = (char*)deriv_struct_ptr;
+  std::map<FieldID,void*> field_pointer_map_val;
+  field_pointer_map_val[FID_VAL] = val_ptr;
+  printf("Attach array fid %d, ptr %p\n", FID_VAL, val_ptr);  
+  stencil_val_pr = runtime->attach_fortran_array(ctx, stencil_lr, stencil_lr, field_pointer_map_val,
+		 LEGION_FILE_READ_WRITE); 
+     
+  PhysicalRegion stencil_deriv_pr;
+  //deriv_t *deriv_struct_ptr = (deriv_t*)malloc(sizeof(deriv_t)*(num_elements));
+  char *deriv_ptr = (char*)deriv_struct_ptr + sizeof(double); 
+  std::map<FieldID,void*> field_pointer_map_deriv;
+  field_pointer_map_deriv[FID_DERIV] = deriv_ptr;
+  printf("Attach arrray fid %d, struct ptr %p, ptr %p\n", FID_DERIV, deriv_struct_ptr, deriv_ptr);  
+  stencil_deriv_pr = runtime->attach_fortran_array(ctx, stencil_lr, stencil_lr, field_pointer_map_deriv,
+	  LEGION_FILE_READ_WRITE); */
+  my_ptr = deriv_struct_ptr;
+       
+  Rect<1> color_bounds(0,num_subregions-1);
+  IndexSpaceT<1> color_is = runtime->create_index_space(ctx, color_bounds);
+
+  IndexPartition disjoint_ip = 
+    runtime->create_equal_partition(ctx, is, color_is);
+  const int block_size = (num_elements + num_subregions - 1) / num_subregions;
+  Matrix<1,1> transform;
+  transform[0][0] = block_size;
+  Rect<1> extent(-2, block_size + 1);
+  IndexPartition ghost_ip = 
+    runtime->create_partition_by_restriction(ctx, is, color_is, transform, extent);
+
+  LogicalPartition disjoint_lp = 
+    runtime->get_logical_partition(ctx, stencil_lr, disjoint_ip);
+  LogicalPartition ghost_lp = 
+    runtime->get_logical_partition(ctx, stencil_lr, ghost_ip);
+
+  ArgumentMap arg_map;
+  
+#if 0
+  IndexLauncher init_launcher(INIT_FIELD_TASK_ID, color_is,
+                              TaskArgument(NULL, 0), arg_map);
+  init_launcher.add_region_requirement(
+      RegionRequirement(disjoint_lp, 0/*projection ID*/,
+                        WRITE_DISCARD, EXCLUSIVE, stencil_lr));
+  init_launcher.add_field(0, FID_VAL);
+  runtime->execute_index_space(ctx, init_launcher);
+#endif
+
+  IndexLauncher stencil_launcher(STENCIL_TASK_ID, color_is,
+       TaskArgument(&num_elements, sizeof(num_elements)), arg_map);
+  stencil_launcher.add_region_requirement(
+      RegionRequirement(ghost_lp, 0/*projection ID*/,
+                        READ_ONLY, EXCLUSIVE, stencil_lr));
+  stencil_launcher.add_field(0, FID_VAL);
+  stencil_launcher.add_region_requirement(
+      RegionRequirement(disjoint_lp, 0/*projection ID*/,
+                        READ_WRITE, EXCLUSIVE, stencil_lr));
+  stencil_launcher.add_field(1, FID_DERIV);
+  runtime->execute_index_space(ctx, stencil_launcher);
+
+
+  // Finally, we launch a single task to check the results.
+  TaskLauncher check_launcher(CHECK_TASK_ID, 
+      TaskArgument(&num_elements, sizeof(num_elements)));
+  check_launcher.add_region_requirement(
+      RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr));
+  check_launcher.add_field(0, FID_VAL);
+  check_launcher.add_region_requirement(
+      RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr));
+  check_launcher.add_field(1, FID_DERIV);
+  runtime->execute_task(ctx, check_launcher);
+
+  // Clean up our region, index space, and field space
+  runtime->destroy_logical_region(ctx, stencil_lr);
+  runtime->destroy_field_space(ctx, fs);
+  runtime->destroy_index_space(ctx, is);
+  printf("End of TOP_LEVEL_TASK, %f, %f\n", my_ptr[0].deriv, my_ptr[num_elements].deriv);
+}
+
+// The standard initialize field task from earlier examples
+void init_field_task(const Task *task,
+                     const std::vector<PhysicalRegion> &regions,
+                     Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 1); 
+  assert(task->regions.size() == 1);
+  assert(task->regions[0].privilege_fields.size() == 1);
+
+  FieldID fid = *(task->regions[0].privilege_fields.begin());
+  const int point = task->index_point.point_data[0];
+  printf("Initializing field %d for block %d...\n", fid, point);
+
+  const AccessorWD<double,1> acc(regions[0], fid);
+
+  int i = point;
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[0].region.get_index_space());
+  for (PointInRectIterator<1> pir(rect); pir(); pir++) {
+    acc[*pir] = drand48();
+    i++;
+  }
+}
+
+// Our stencil tasks is interesting because it
+// has both slow and fast versions depending
+// on whether or not its bounds have been clamped.
+void stencil_task(const Task *task,
+                  const std::vector<PhysicalRegion> &regions,
+                  Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 2);
+  assert(task->regions.size() == 2);
+  assert(task->regions[0].privilege_fields.size() == 1);
+  assert(task->regions[1].privilege_fields.size() == 1);
+  assert(task->arglen == sizeof(int));
+  const int max_elements = *((const int*)task->args);
+  const int point = task->index_point.point_data[0];
+  
+  FieldID read_fid = *(task->regions[0].privilege_fields.begin());
+  FieldID write_fid = *(task->regions[1].privilege_fields.begin());
+
+  const AccessorWD<double,1> write_acc(regions[1], write_fid);
+  const AccessorRO<double,1> read_acc(regions[0], read_fid);
+  //const FieldAccessor<WRITE_DISCARD,double,1> write_acc(regions[1], write_fid);
+
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[1].region.get_index_space());
+  // If we are on the edges of the entire space we are 
+  // operating over, then we're going to do the slow
+  // path which checks for clamping when necessary.
+  // If not, then we can do the fast path without
+  // any checks.
+  if ((rect.lo[0] < 2) || (rect.hi[0] > (max_elements-3)))
+  {
+    printf("Running slow stencil path for point %d...\n", point);
+    // Note in the slow path that there are checks which
+    // perform clamps when necessary before reading values.
+    for (PointInRectIterator<1> pir(rect); pir(); pir++)
+    {
+      double l2, l1, r1, r2;
+      if (pir[0] < 2)
+        l2 = read_acc[0];
+      else
+        l2 = read_acc[*pir - 2];
+      if (pir[0] < 1)
+        l1 = read_acc[0];
+      else
+        l1 = read_acc[*pir - 1];
+      if (pir[0] > (max_elements-2))
+        r1 = read_acc[max_elements-1];
+      else
+        r1 = read_acc[*pir + 1];
+      if (pir[0] > (max_elements-3))
+        r2 = read_acc[max_elements-1];
+      else
+        r2 = read_acc[*pir + 2];
+      
+      double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0;
+      //deriv.deriv = result;
+      //deriv.check = 1.0;
+      write_acc[*pir] = result;
+    }
+  }
+  else
+  {
+    printf("Running fast stencil path for point %d...\n", point);
+    // In the fast path, we don't need any checks
+    for (PointInRectIterator<1> pir(rect); pir(); pir++)
+    {
+      double l2 = read_acc[*pir - 2];
+      double l1 = read_acc[*pir - 1];
+      double r1 = read_acc[*pir + 1];
+      double r2 = read_acc[*pir + 2];
+
+      double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0;
+      //deriv.deriv = result;
+      //deriv.check = 1.0;
+      write_acc[*pir] = result;
+    }
+  }
+}
+
+void check_task(const Task *task,
+                const std::vector<PhysicalRegion> &regions,
+                Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 2);
+  assert(task->regions.size() == 2);
+  assert(task->regions[0].privilege_fields.size() == 1);
+  assert(task->regions[1].privilege_fields.size() == 1);
+  assert(task->arglen == sizeof(int));
+  const int max_elements = *((const int*)task->args);
+
+  FieldID src_fid = *(task->regions[0].privilege_fields.begin());
+  FieldID dst_fid = *(task->regions[1].privilege_fields.begin());
+
+  const AccessorRO<double,1> src_acc(regions[0], src_fid);
+  const AccessorRO<double,1> dst_acc(regions[1], dst_fid);
+
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[1].region.get_index_space());
+
+  // This is the checking task so we can just do the slow path
+  bool all_passed = true;
+  int i = 0;
+  for (PointInRectIterator<1> pir(rect); pir(); pir++)
+  {
+    double l2, l1, r1, r2;
+    if (pir[0] < 2)
+      l2 = src_acc[0];
+    else
+      l2 = src_acc[*pir - 2];
+    if (pir[0] < 1)
+      l1 = src_acc[0];
+    else
+      l1 = src_acc[*pir - 1];
+    if (pir[0] > (max_elements-2))
+      r1 = src_acc[max_elements-1];
+    else
+      r1 = src_acc[*pir + 1];
+    if (pir[0] > (max_elements-3))
+      r2 = src_acc[max_elements-1];
+    else
+      r2 = src_acc[*pir + 2];
+    
+    double expected = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0;
+    double received = dst_acc[*pir];
+    if (i == 0 || i == max_elements-1) printf("result %d, %f,ptr %p, src %p\n", i, received, dst_acc.ptr(rect.lo), src_acc.ptr(rect.lo));
+    if (my_ptr[i].deriv != received) {
+        printf("transfer error %d, %f\n", i, my_ptr[i].deriv);
+    }
+    i++;
+    // Probably shouldn't bitwise compare floating point
+    // numbers but the order of operations are the same so they
+    // should be bitwise equal.
+    if (expected != received)
+      all_passed = false;
+  }
+  printf("CHECK, %f, %f\n", my_ptr[0].deriv, my_ptr[max_elements-1].deriv);
+  if (all_passed)
+    printf("SUCCESS!\n");
+  else
+    printf("FAILURE!\n");
+}
+
+int main(int argc, char **argv)
+{
+  Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID);
+
+  {
+    TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<top_level_task>(registrar, "top_level");
+  }
+
+  {
+    TaskVariantRegistrar registrar(INIT_FIELD_TASK_ID, "init_field");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<init_field_task>(registrar, "init_field");
+  }
+
+  {
+    TaskVariantRegistrar registrar(STENCIL_TASK_ID, "stencil");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<stencil_task>(registrar, "stencil");
+  }
+
+  {
+    TaskVariantRegistrar registrar(CHECK_TASK_ID, "check");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<check_task>(registrar, "check");
+  }
+
+  return Runtime::start(argc, argv);
+}
diff --git a/examples/attach_array_no_cpy_soa/CMakeLists.txt b/examples/attach_array_no_cpy_soa/CMakeLists.txt
new file mode 100644
index 0000000000..34bf3102f3
--- /dev/null
+++ b/examples/attach_array_no_cpy_soa/CMakeLists.txt
@@ -0,0 +1,42 @@
+#------------------------------------------------------------------------------#
+# Copyright 2017 Kitware, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#------------------------------------------------------------------------------#
+
+cmake_minimum_required(VERSION 3.1)
+project(LegionExample_attach_file)
+
+include(CheckCXXCompilerFlag)
+
+check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER)
+
+if(CXX11_COMPILER)
+  set(CMAKE_CXX_STANDARD 11)
+else()
+  message(FATAL_ERROR "C++11 compatible compiler not found")
+endif()
+
+# Only search if were building stand-alone and not as part of Legion
+if(NOT Legion_SOURCE_DIR)
+  find_package(Legion REQUIRED)
+endif()
+
+add_executable(attach_file attach_file.cc)
+target_link_libraries(attach_file Legion::Legion)
+if(Legion_ENABLE_TESTING)
+  add_test(NAME attach_file COMMAND $<TARGET_FILE:attach_file>) 
+endif()
+
+
+
diff --git a/examples/attach_array_no_cpy_soa/Makefile b/examples/attach_array_no_cpy_soa/Makefile
new file mode 100644
index 0000000000..5651293710
--- /dev/null
+++ b/examples/attach_array_no_cpy_soa/Makefile
@@ -0,0 +1,51 @@
+# Copyright 2017 Stanford University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+ifndef LG_RT_DIR
+$(error LG_RT_DIR variable is not defined, aborting build)
+endif
+
+# Flags for directing the runtime makefile what to include
+DEBUG           ?= 1		# Include debugging symbols
+OUTPUT_LEVEL    ?= LEVEL_DEBUG	# Compile time logging level
+USE_CUDA        ?= 0		# Include CUDA support (requires CUDA)
+USE_GASNET      ?= 0		# Include GASNet support (requires GASNet)
+USE_HDF         ?= 0		# Include HDF5 support (requires HDF5)
+ALT_MAPPERS     ?= 0		# Include alternative mappers (not recommended)
+
+# Put the binary file name here
+OUTFILE		?= attach_array 
+# List all the application source files here
+GEN_SRC		?= attach_array.cc	# .cc files
+GEN_GPU_SRC	?=				# .cu files
+
+# You can modify these variables, some will be appended to by the runtime makefile
+INC_FLAGS	?=
+CC_FLAGS	?=
+NVCC_FLAGS	?=
+GASNET_FLAGS	?=
+LD_FLAGS	?=
+# For Point and Rect typedefs
+CC_FLAGS	+= -std=c++11
+
+###########################################################################
+#
+#   Don't change anything below here
+#   
+###########################################################################
+
+include $(LG_RT_DIR)/runtime.mk
+
diff --git a/examples/attach_array_no_cpy_soa/attach_array.cc b/examples/attach_array_no_cpy_soa/attach_array.cc
new file mode 100644
index 0000000000..7f3f9b93cc
--- /dev/null
+++ b/examples/attach_array_no_cpy_soa/attach_array.cc
@@ -0,0 +1,375 @@
+/* Copyright 2017 Stanford University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <cstdio>
+#include <cassert>
+#include <cstdlib>
+#include <unistd.h>
+#include <sys/types.h>
+#ifdef USE_HDF
+#include <hdf5.h>
+#endif
+#include "legion.h"
+#include <realm/machine.h>
+#include "mem_impl.h"
+#include "inst_impl.h"
+#include "runtime_impl.h"
+using namespace Legion;
+
+template<typename FT, int N, typename T = coord_t>
+using AccessorRO = FieldAccessor<READ_ONLY,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+template<typename FT, int N, typename T = coord_t>
+using AccessorWD = FieldAccessor<WRITE_DISCARD,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+
+/*
+ * In this example we illustrate how the Legion
+ * programming model supports multiple partitions
+ * of the same logical region and the benefits it
+ * provides by allowing multiple views onto the
+ * same logical region.  We compute a simple 5-point
+ * 1D stencil using the standard formula:
+ * f'(x) = (-f(x+2h) + 8f(x+h) - 8f(x-h) + f(x-2h))/12h
+ * For simplicity we'll assume h=1.
+ */
+
+enum TaskIDs {
+  TOP_LEVEL_TASK_ID,
+  INIT_FIELD_TASK_ID,
+  STENCIL_TASK_ID,
+  CHECK_TASK_ID,
+};
+
+enum FieldIDs {
+  FID_VAL,
+  FID_DERIV,
+  FID_CP
+};
+
+typedef struct {
+  double check;
+  double deriv;
+}deriv_t;
+
+deriv_t *my_ptr= NULL;
+
+void top_level_task(const Task *task,
+                    const std::vector<PhysicalRegion> &regions,
+                    Context ctx, Runtime *runtime)
+{
+  int num_elements = 1024;
+  int num_subregions = 4;
+
+  // Check for any command line arguments
+  {
+      const InputArgs &command_args = Runtime::get_input_args();
+    for (int i = 1; i < command_args.argc; i++)
+    {
+      if (!strcmp(command_args.argv[i],"-n"))
+        num_elements = atoi(command_args.argv[++i]);
+      if (!strcmp(command_args.argv[i],"-b"))
+        num_subregions = atoi(command_args.argv[++i]);
+    }
+  }
+  printf("Running stencil computation for %d elements...\n", num_elements);
+  printf("Partitioning data into %d sub-regions...\n", num_subregions);
+
+  Rect<1> elem_rect(0,num_elements-1);
+  IndexSpaceT<1> is = runtime->create_index_space(ctx, elem_rect);
+  FieldSpace fs = runtime->create_field_space(ctx);
+  {
+    FieldAllocator allocator = 
+      runtime->create_field_allocator(ctx, fs);
+    allocator.allocate_field(sizeof(double),FID_VAL);
+    allocator.allocate_field(sizeof(deriv_t),FID_DERIV);
+  }
+  LogicalRegion stencil_lr = runtime->create_logical_region(ctx, is, fs);
+  
+  PhysicalRegion stencil_val_pr;
+  double *val_ptr = (double*)malloc(sizeof(double)*(num_elements));
+  for (int i = 0; i < num_elements; i++) {
+    val_ptr[i] = drand48();
+  }
+  std::map<FieldID,void*> field_pointer_map_val;
+  field_pointer_map_val[FID_VAL] = val_ptr;
+  printf("Attach array fid %d, ptr %p\n", FID_VAL, val_ptr);  
+ // stencil_val_pr = runtime->attach_fortran_array(ctx, stencil_lr, stencil_lr, field_pointer_map_val,
+	//	 LEGION_FILE_READ_WRITE); 
+     
+  PhysicalRegion stencil_deriv_pr;
+  deriv_t *deriv_ptr = (deriv_t*)malloc(sizeof(deriv_t)*(num_elements));
+  std::map<FieldID,void*> field_pointer_map_deriv;
+  field_pointer_map_deriv[FID_DERIV] = deriv_ptr;
+  field_pointer_map_deriv[FID_VAL] = val_ptr;
+  printf("Attach arrray fid %d, ptr %p\n", FID_DERIV, deriv_ptr);  
+  stencil_deriv_pr = runtime->attach_array_soa(ctx, stencil_lr, stencil_lr, field_pointer_map_deriv, 0); 
+  my_ptr = deriv_ptr;
+       
+  Rect<1> color_bounds(0,num_subregions-1);
+  IndexSpaceT<1> color_is = runtime->create_index_space(ctx, color_bounds);
+
+  IndexPartition disjoint_ip = 
+    runtime->create_equal_partition(ctx, is, color_is);
+  const int block_size = (num_elements + num_subregions - 1) / num_subregions;
+  Matrix<1,1> transform;
+  transform[0][0] = block_size;
+  Rect<1> extent(-2, block_size + 1);
+  IndexPartition ghost_ip = 
+    runtime->create_partition_by_restriction(ctx, is, color_is, transform, extent);
+
+  LogicalPartition disjoint_lp = 
+    runtime->get_logical_partition(ctx, stencil_lr, disjoint_ip);
+  LogicalPartition ghost_lp = 
+    runtime->get_logical_partition(ctx, stencil_lr, ghost_ip);
+
+  ArgumentMap arg_map;
+  
+#if 0
+  IndexLauncher init_launcher(INIT_FIELD_TASK_ID, color_is,
+                              TaskArgument(NULL, 0), arg_map);
+  init_launcher.add_region_requirement(
+      RegionRequirement(disjoint_lp, 0/*projection ID*/,
+                        WRITE_DISCARD, EXCLUSIVE, stencil_lr));
+  init_launcher.add_field(0, FID_VAL);
+  runtime->execute_index_space(ctx, init_launcher);
+#endif
+
+  IndexLauncher stencil_launcher(STENCIL_TASK_ID, color_is,
+       TaskArgument(&num_elements, sizeof(num_elements)), arg_map);
+  stencil_launcher.add_region_requirement(
+      RegionRequirement(ghost_lp, 0/*projection ID*/,
+                        READ_ONLY, EXCLUSIVE, stencil_lr));
+  stencil_launcher.add_field(0, FID_VAL);
+  stencil_launcher.add_region_requirement(
+      RegionRequirement(disjoint_lp, 0/*projection ID*/,
+                        READ_WRITE, EXCLUSIVE, stencil_lr));
+  stencil_launcher.add_field(1, FID_DERIV);
+  runtime->execute_index_space(ctx, stencil_launcher);
+
+
+  // Finally, we launch a single task to check the results.
+  TaskLauncher check_launcher(CHECK_TASK_ID, 
+      TaskArgument(&num_elements, sizeof(num_elements)));
+  check_launcher.add_region_requirement(
+      RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr));
+  check_launcher.add_field(0, FID_VAL);
+  check_launcher.add_region_requirement(
+      RegionRequirement(stencil_lr, READ_ONLY, EXCLUSIVE, stencil_lr));
+  check_launcher.add_field(1, FID_DERIV);
+  runtime->execute_task(ctx, check_launcher);
+
+  // Clean up our region, index space, and field space
+  runtime->destroy_logical_region(ctx, stencil_lr);
+  runtime->destroy_field_space(ctx, fs);
+  runtime->destroy_index_space(ctx, is);
+  printf("End of TOP_LEVEL_TASK, %f, %f\n", my_ptr[0].deriv, my_ptr[num_elements].deriv);
+}
+
+// The standard initialize field task from earlier examples
+void init_field_task(const Task *task,
+                     const std::vector<PhysicalRegion> &regions,
+                     Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 1); 
+  assert(task->regions.size() == 1);
+  assert(task->regions[0].privilege_fields.size() == 1);
+
+  FieldID fid = *(task->regions[0].privilege_fields.begin());
+  const int point = task->index_point.point_data[0];
+  printf("Initializing field %d for block %d...\n", fid, point);
+
+  const AccessorWD<double,1> acc(regions[0], fid);
+
+  int i = point;
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[0].region.get_index_space());
+  for (PointInRectIterator<1> pir(rect); pir(); pir++) {
+    acc[*pir] = drand48();
+    i++;
+  }
+}
+
+// Our stencil tasks is interesting because it
+// has both slow and fast versions depending
+// on whether or not its bounds have been clamped.
+void stencil_task(const Task *task,
+                  const std::vector<PhysicalRegion> &regions,
+                  Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 2);
+  assert(task->regions.size() == 2);
+  assert(task->regions[0].privilege_fields.size() == 1);
+  assert(task->regions[1].privilege_fields.size() == 1);
+  assert(task->arglen == sizeof(int));
+  const int max_elements = *((const int*)task->args);
+  const int point = task->index_point.point_data[0];
+  
+  FieldID read_fid = *(task->regions[0].privilege_fields.begin());
+  FieldID write_fid = *(task->regions[1].privilege_fields.begin());
+
+  const AccessorRO<double,1> read_acc(regions[0], read_fid);
+  const AccessorWD<deriv_t,1> write_acc(regions[1], write_fid);
+
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[1].region.get_index_space());
+  // If we are on the edges of the entire space we are 
+  // operating over, then we're going to do the slow
+  // path which checks for clamping when necessary.
+  // If not, then we can do the fast path without
+  // any checks.
+  deriv_t deriv;
+  if ((rect.lo[0] < 2) || (rect.hi[0] > (max_elements-3)))
+  {
+    printf("Running slow stencil path for point %d...\n", point);
+    // Note in the slow path that there are checks which
+    // perform clamps when necessary before reading values.
+    for (PointInRectIterator<1> pir(rect); pir(); pir++)
+    {
+      double l2, l1, r1, r2;
+      if (pir[0] < 2)
+        l2 = read_acc[0];
+      else
+        l2 = read_acc[*pir - 2];
+      if (pir[0] < 1)
+        l1 = read_acc[0];
+      else
+        l1 = read_acc[*pir - 1];
+      if (pir[0] > (max_elements-2))
+        r1 = read_acc[max_elements-1];
+      else
+        r1 = read_acc[*pir + 1];
+      if (pir[0] > (max_elements-3))
+        r2 = read_acc[max_elements-1];
+      else
+        r2 = read_acc[*pir + 2];
+      
+      double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0;
+      deriv.deriv = result;
+      deriv.check = 1.0;
+      write_acc[*pir] = deriv;
+    }
+  }
+  else
+  {
+    printf("Running fast stencil path for point %d...\n", point);
+    // In the fast path, we don't need any checks
+    for (PointInRectIterator<1> pir(rect); pir(); pir++)
+    {
+      double l2 = read_acc[*pir - 2];
+      double l1 = read_acc[*pir - 1];
+      double r1 = read_acc[*pir + 1];
+      double r2 = read_acc[*pir + 2];
+
+      double result = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0;
+      deriv.deriv = result;
+      deriv.check = 1.0;
+      write_acc[*pir] = deriv;
+    }
+  }
+}
+
+void check_task(const Task *task,
+                const std::vector<PhysicalRegion> &regions,
+                Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 2);
+  assert(task->regions.size() == 2);
+  assert(task->regions[0].privilege_fields.size() == 1);
+  assert(task->regions[1].privilege_fields.size() == 1);
+  assert(task->arglen == sizeof(int));
+  const int max_elements = *((const int*)task->args);
+
+  FieldID src_fid = *(task->regions[0].privilege_fields.begin());
+  FieldID dst_fid = *(task->regions[1].privilege_fields.begin());
+
+  const AccessorRO<double,1> src_acc(regions[0], src_fid);
+  const AccessorRO<deriv_t,1> dst_acc(regions[1], dst_fid);
+
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[1].region.get_index_space());
+
+  // This is the checking task so we can just do the slow path
+  bool all_passed = true;
+  int i = 0;
+  for (PointInRectIterator<1> pir(rect); pir(); pir++)
+  {
+    double l2, l1, r1, r2;
+    if (pir[0] < 2)
+      l2 = src_acc[0];
+    else
+      l2 = src_acc[*pir - 2];
+    if (pir[0] < 1)
+      l1 = src_acc[0];
+    else
+      l1 = src_acc[*pir - 1];
+    if (pir[0] > (max_elements-2))
+      r1 = src_acc[max_elements-1];
+    else
+      r1 = src_acc[*pir + 1];
+    if (pir[0] > (max_elements-3))
+      r2 = src_acc[max_elements-1];
+    else
+      r2 = src_acc[*pir + 2];
+    
+    double expected = (-l2 + 8.0*l1 - 8.0*r1 + r2) / 12.0;
+    deriv_t received = dst_acc[*pir];
+    if (i == 0 || i == max_elements-1) printf("result %d, %f, %f, ptr %p, src %p\n", i, received.deriv, received.check, dst_acc.ptr(rect.lo), src_acc.ptr(rect.lo));
+    if (my_ptr[i].deriv != received.deriv) {
+        printf("transfer error %d, %f\n", i, my_ptr[i].deriv);
+    }
+    i++;
+    // Probably shouldn't bitwise compare floating point
+    // numbers but the order of operations are the same so they
+    // should be bitwise equal.
+    if (expected != received.deriv)
+      all_passed = false;
+  }
+  printf("CHECK, %f, %f\n", my_ptr[0].deriv, my_ptr[max_elements-1].deriv);
+  if (all_passed)
+    printf("SUCCESS!\n");
+  else
+    printf("FAILURE!\n");
+}
+
+int main(int argc, char **argv)
+{
+  Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID);
+
+  {
+    TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<top_level_task>(registrar, "top_level");
+  }
+
+  {
+    TaskVariantRegistrar registrar(INIT_FIELD_TASK_ID, "init_field");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<init_field_task>(registrar, "init_field");
+  }
+
+  {
+    TaskVariantRegistrar registrar(STENCIL_TASK_ID, "stencil");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<stencil_task>(registrar, "stencil");
+  }
+
+  {
+    TaskVariantRegistrar registrar(CHECK_TASK_ID, "check");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<check_task>(registrar, "check");
+  }
+
+  return Runtime::start(argc, argv);
+}
diff --git a/examples/attach_file/attach_file.cc b/examples/attach_file/attach_file.cc
index dcbe2e4857..96dd013902 100644
--- a/examples/attach_file/attach_file.cc
+++ b/examples/attach_file/attach_file.cc
@@ -466,4 +466,4 @@ int main(int argc, char **argv)
   }
 
   return Runtime::start(argc, argv);
-}
+}
\ No newline at end of file
diff --git a/examples/circuit/circuit.cc b/examples/circuit/circuit.cc
index e2f8ec7f96..5c154ed5b1 100644
--- a/examples/circuit/circuit.cc
+++ b/examples/circuit/circuit.cc
@@ -364,4 +364,3 @@ void allocate_locator_fields(Context ctx, Runtime *runtime, FieldSpace locator_s
   allocator.allocate_field(sizeof(PointerLocation), FID_LOCATOR);
   runtime->attach_name(locator_space, FID_LOCATOR, "locator");
 }
-
diff --git a/runtime/legion/legion.cc b/runtime/legion/legion.cc
index 6b4dbc4748..b2f13f16eb 100644
--- a/runtime/legion/legion.cc
+++ b/runtime/legion/legion.cc
@@ -5744,6 +5744,68 @@ namespace Legion {
     {
       runtime->detach_external_resource(ctx, region);
     }
+    
+    //--------------------------------------------------------------------------
+    PhysicalRegion Runtime::attach_array_soa(Context ctx,
+                                             LogicalRegion handle,
+                                             LogicalRegion parent,
+                                             const std::map<FieldID,void*> &field_pointer_map,
+                                             int c_f_layout_flag)
+    //--------------------------------------------------------------------------
+    {
+      ExternalResource resource;
+      if (c_f_layout_flag == 0) {
+        resource = EXTERNAL_FORTRAN_ARRAY;
+      } else {
+        resource = EXTERNAL_C_ARRAY;
+      }
+      AttachLauncher launcher(resource, handle, parent);
+      launcher.attach_array(field_pointer_map, 0);
+      launcher.aos_base_ptr = NULL;
+      launcher.aos_stride = 0;
+      return runtime->attach_external_resource(ctx, launcher);
+    }
+
+    //--------------------------------------------------------------------------
+    void Runtime::detach_array(Context ctx, PhysicalRegion region)
+    //--------------------------------------------------------------------------
+    {
+      runtime->detach_external_resource(ctx, region);
+    }
+    
+    //--------------------------------------------------------------------------
+    PhysicalRegion Runtime::attach_array_aos(Context ctx,
+                                             LogicalRegion handle,
+                                             LogicalRegion parent,
+                                             const void* array_ptr,
+                                             size_t stride,
+                                             const std::map<FieldID, size_t> &field_offset,
+                                             int c_f_layout_flag)
+    //--------------------------------------------------------------------------
+    {
+      unsigned char* base_ptr = (unsigned char*)array_ptr; 
+      std::map<FieldID, size_t>::const_iterator it_offset = field_offset.begin();
+      std::map<FieldID,void*> field_pointer_map;
+      while(it_offset != field_offset.end())
+      {
+          size_t offset = it_offset->second;
+          FieldID fid = it_offset->first;
+          unsigned char *ptr = base_ptr + offset;
+          field_pointer_map[fid] = ptr;
+          it_offset ++;
+      }
+      ExternalResource resource;
+      if (c_f_layout_flag == 0) {
+        resource = EXTERNAL_FORTRAN_ARRAY;
+      } else {
+        resource = EXTERNAL_C_ARRAY;
+      }
+      AttachLauncher launcher(resource, handle, parent);
+      launcher.attach_array(field_pointer_map, 1);
+      launcher.aos_base_ptr = base_ptr;
+      launcher.aos_stride = stride;
+      return runtime->attach_external_resource(ctx, launcher);
+    }
 
     //--------------------------------------------------------------------------
     void Runtime::issue_copy_operation(Context ctx,const CopyLauncher &launcher)
diff --git a/runtime/legion/legion.h b/runtime/legion/legion.h
index 200ebb072d..c5a8e1f319 100644
--- a/runtime/legion/legion.h
+++ b/runtime/legion/legion.h
@@ -1750,6 +1750,8 @@ namespace Legion {
       inline void attach_hdf5(const char *file_name,
                               const std::map<FieldID,const char*> &field_map,
                               LegionFileMode mode);
+      inline void attach_array(const std::map<FieldID,void*> &field_pointer_map,
+                              int layoutflag);
     public:
       inline void add_field_pointer(FieldID fid, void *ptr);
       inline void set_pitch(unsigned dim, size_t pitch);
@@ -1767,6 +1769,9 @@ namespace Legion {
       // Data for arrays
       std::map<FieldID,/*pointers*/void*>           field_pointers;
       std::vector<size_t/*bytes*/>                  pitches;
+      int                                           layout_flag; // SOA 0, AOS 1
+      unsigned char*                                aos_base_ptr;
+      size_t                                        aos_stride;
     public:
       // Inform the runtime about any static dependences
       // These will be ignored outside of static traces
@@ -5181,6 +5186,21 @@ namespace Legion {
       LEGION_DEPRECATED("Detaching generic file type is deprecated "
                         "in favor of generic detach interface.")
       void detach_file(Context ctx, PhysicalRegion region);
+      
+      PhysicalRegion attach_array_soa(Context ctx,
+                                      LogicalRegion handle, LogicalRegion parent,
+                                      const std::map<FieldID,void*> &field_pointer_map,
+                                      int c_f_layout_flag);
+      
+      void detach_array(Context ctx, PhysicalRegion region);                                       
+      
+      PhysicalRegion attach_array_aos(Context ctx,
+                                      LogicalRegion handle,
+                                      LogicalRegion parent,
+                                      const void* array_ptr,
+                                      size_t stride, 
+                                      const std::map<FieldID, size_t> &field_offset, 
+                                      int c_f_layout_flag);
     public:
       //------------------------------------------------------------------------
       // Copy Operations
diff --git a/runtime/legion/legion.inl b/runtime/legion/legion.inl
index 70693a5888..2803d86c0c 100644
--- a/runtime/legion/legion.inl
+++ b/runtime/legion/legion.inl
@@ -3287,6 +3287,16 @@ namespace Legion {
       mode = m;
       field_files = field_map;
     }
+    
+    //--------------------------------------------------------------------------
+    inline void AttachLauncher::attach_array(const std::map<FieldID,void*> &field_pointer_map,
+                                int layoutflag)
+    //--------------------------------------------------------------------------
+    {
+      file_name = "ARRAY";
+      layout_flag = layoutflag;
+      field_pointers = field_pointer_map;
+    }
 
     //--------------------------------------------------------------------------
     inline void AttachLauncher::add_field_pointer(FieldID fid, void *ptr)
diff --git a/runtime/legion/legion_ops.cc b/runtime/legion/legion_ops.cc
index 9a270a2620..63f32ff9b4 100644
--- a/runtime/legion/legion_ops.cc
+++ b/runtime/legion/legion_ops.cc
@@ -13693,12 +13693,52 @@ namespace Legion {
           }
         case EXTERNAL_C_ARRAY:
           {
-            assert(false); // TODO: Implement this
+            if (launcher.field_pointers.empty()) 
+            {
+              REPORT_LEGION_WARNING(LEGION_WARNING_HDF5_ATTACH_OPERATION,
+                            "ARRAY ATTACH OPERATION ISSUED WITH NO "
+                            "FIELD MAPPINGS IN TASK %s (ID %lld)! DID YOU "
+                            "FORGET THEM?!?", parent_ctx->get_task_name(),
+                            parent_ctx->get_unique_id());
+            }
+            layout_flag = launcher.layout_flag;
+            aos_base_ptr = launcher.aos_base_ptr;
+            aos_stride = launcher.aos_stride;
+            // Construct the region requirement for this task
+            requirement = RegionRequirement(launcher.handle, WRITE_DISCARD, 
+                                            EXCLUSIVE, launcher.parent);
+            for (std::map<FieldID,void*>::const_iterator it = 
+                  launcher.field_pointers.begin(); it != 
+                  launcher.field_pointers.end(); it++)
+            {
+              requirement.add_field(it->first);
+              field_pointers_map[it->first] = it->second;
+            }
             break;
           }
         case EXTERNAL_FORTRAN_ARRAY:
           {
-            assert(false); // TODO implement this
+            if (launcher.field_pointers.empty()) 
+            {
+              REPORT_LEGION_WARNING(LEGION_WARNING_HDF5_ATTACH_OPERATION,
+                            "ARRAY ATTACH OPERATION ISSUED WITH NO "
+                            "FIELD MAPPINGS IN TASK %s (ID %lld)! DID YOU "
+                            "FORGET THEM?!?", parent_ctx->get_task_name(),
+                            parent_ctx->get_unique_id());
+            }
+            layout_flag = launcher.layout_flag;
+            aos_base_ptr = launcher.aos_base_ptr;
+            aos_stride = launcher.aos_stride;
+            // Construct the region requirement for this task
+            requirement = RegionRequirement(launcher.handle, WRITE_DISCARD, 
+                                            EXCLUSIVE, launcher.parent);
+            for (std::map<FieldID,void*>::const_iterator it = 
+                  launcher.field_pointers.begin(); it != 
+                  launcher.field_pointers.end(); it++)
+            {
+              requirement.add_field(it->first);
+              field_pointers_map[it->first] = it->second;
+            }
             break;
           }
         default:
@@ -13936,12 +13976,40 @@ namespace Legion {
           }
         case EXTERNAL_C_ARRAY:
           {
-            assert(false);
+            // First build the set of field paths
+            std::vector<Realm::FieldID> field_ids(field_pointers_map.size());
+            std::vector<void*> field_pointers(field_pointers_map.size());
+            unsigned idx = 0;
+            for (std::map<FieldID,void*>::const_iterator it = 
+                  field_pointers_map.begin(); it != field_pointers_map.end(); it++, idx++)
+            {
+              field_ids[idx] = it->first;
+              field_pointers[idx] = it->second;
+            }
+            // Now ask the low-level runtime to create the instance
+            result = node->create_array_instance(resource, field_ids, sizes, field_pointers,
+                                        layout_flag, aos_base_ptr, aos_stride);
+            constraints.specialized_constraint = 
+              SpecializedConstraint(NORMAL_SPECIALIZE);
             break;
           }
         case EXTERNAL_FORTRAN_ARRAY:
           {
-            assert(false);
+            // First build the set of field paths
+            std::vector<Realm::FieldID> field_ids(field_pointers_map.size());
+            std::vector<void*> field_pointers(field_pointers_map.size());
+            unsigned idx = 0;
+            for (std::map<FieldID,void*>::const_iterator it = 
+                  field_pointers_map.begin(); it != field_pointers_map.end(); it++, idx++)
+            {
+              field_ids[idx] = it->first;
+              field_pointers[idx] = it->second;
+            }
+            // Now ask the low-level runtime to create the instance
+            result = node->create_array_instance(resource, field_ids, sizes, field_pointers,
+                                        layout_flag, aos_base_ptr, aos_stride);
+            constraints.specialized_constraint = 
+              SpecializedConstraint(NORMAL_SPECIALIZE);
             break;
           }
         default:
@@ -14292,10 +14360,10 @@ namespace Legion {
       assert(!manager->is_reduction_manager()); 
 #endif
       InstanceManager *inst_manager = manager->as_instance_manager(); 
-      if (!inst_manager->is_attached_file())
+      /*if (!inst_manager->is_attached_file())
         REPORT_LEGION_ERROR(ERROR_ILLEGAL_DETACH_OPERATION,
                       "Illegal detach operation on a physical region which "
-                      "was not attached!")
+                      "was not attached!") */
       std::set<RtEvent> applied_conditions;
       ApEvent detach_event = 
         runtime->forest->detach_file(requirement, this, 0/*idx*/, 
diff --git a/runtime/legion/legion_ops.h b/runtime/legion/legion_ops.h
index a6361c69b8..29764b11f4 100644
--- a/runtime/legion/legion_ops.h
+++ b/runtime/legion/legion_ops.h
@@ -2660,11 +2660,15 @@ namespace Legion {
       RestrictInfo restrict_info;
       const char *file_name;
       std::map<FieldID,const char*> field_map;
+      std::map<FieldID,void*> field_pointers_map;
       LegionFileMode file_mode;
       PhysicalRegion region;
       unsigned parent_req_index;
       std::set<RtEvent> map_applied_conditions;
       InstanceManager *file_instance;
+      int layout_flag;  // SOA 0, AOS 1
+      unsigned char* aos_base_ptr;
+      size_t aos_stride;
     };
 
     /**
diff --git a/runtime/legion/region_tree.cc b/runtime/legion/region_tree.cc
index 05442b99b6..7caef4078b 100644
--- a/runtime/legion/region_tree.cc
+++ b/runtime/legion/region_tree.cc
@@ -7943,7 +7943,7 @@ namespace Legion {
       }
 #ifdef DEBUG_LEGION
       assert(layout != NULL);
-      assert(layout->constraints->specialized_constraint.is_file());
+   //   assert(layout->constraints->specialized_constraint.is_file());
 #endif
       DistributedID did = context->runtime->get_available_distributed_id(false);
       MemoryManager *memory = 
diff --git a/runtime/legion/region_tree.h b/runtime/legion/region_tree.h
index 51b8db29c0..8979cd8821 100644
--- a/runtime/legion/region_tree.h
+++ b/runtime/legion/region_tree.h
@@ -929,6 +929,11 @@ namespace Legion {
                                    const std::vector<size_t> &field_sizes,
                                    const std::vector<const char*> &field_files,
                                    bool read_only) = 0;
+      virtual PhysicalInstance create_array_instance(ExternalResource resource,
+                                   const std::vector<Realm::FieldID> &field_ids,
+                                   const std::vector<size_t> &field_sizes,
+                                   const std::vector<void*> &field_pointers,
+                                   int layout_flag, unsigned char* aos_base_ptr, size_t aos_stride) = 0;
     public:
       virtual void get_launch_space_domain(Domain &launch_domain) = 0;
       virtual void validate_slicing(const std::vector<IndexSpace> &slice_spaces,
@@ -1149,6 +1154,11 @@ namespace Legion {
                                    const std::vector<size_t> &field_sizes,
                                    const std::vector<const char*> &field_files,
                                    bool read_only);
+      virtual PhysicalInstance create_array_instance(ExternalResource resource,
+                                   const std::vector<Realm::FieldID> &field_ids,
+                                   const std::vector<size_t> &field_sizes,
+                                   const std::vector<void*> &field_pointers,
+                                   int layout_flag, unsigned char* aos_base_ptr, size_t aos_stride);
     public:
       virtual void get_launch_space_domain(Domain &launch_domain);
       virtual void validate_slicing(const std::vector<IndexSpace> &slice_spaces,
diff --git a/runtime/legion/region_tree.inl b/runtime/legion/region_tree.inl
index b2d4939af7..f1464b9db2 100644
--- a/runtime/legion/region_tree.inl
+++ b/runtime/legion/region_tree.inl
@@ -2768,6 +2768,52 @@ namespace Legion {
 #endif
       return result;
     }
+    
+    //--------------------------------------------------------------------------
+    template<int DIM, typename T>
+    PhysicalInstance IndexSpaceNodeT<DIM,T>::create_array_instance(
+                                    ExternalResource resource,
+                                    const std::vector<Realm::FieldID> &field_ids,
+                                    const std::vector<size_t> &field_sizes,
+                                    const std::vector<void*> &field_pointers,
+                                    int layout_flag, unsigned char* aos_base_ptr, size_t aos_stride)
+    //--------------------------------------------------------------------------
+    {
+      DETAILED_PROFILER(context->runtime, REALM_CREATE_INSTANCE_CALL);
+      // Have to wait for the index space to be ready if necessary
+      Realm::ZIndexSpace<DIM,T> local_space;
+      get_realm_index_space(local_space, true/*tight*/);
+      // No profiling for these kinds of instances currently
+      Realm::ProfilingRequestSet requests;
+      int c_f_resource = 0;
+      if (resource == EXTERNAL_C_ARRAY) {
+        c_f_resource = 1;
+      }
+      PhysicalInstance result;
+      if (layout_flag == 0) {  // SOA
+        LgEvent ready(PhysicalInstance::create_array_instance_SOA(result, 
+				           local_space,
+                   field_ids,
+							     field_sizes,
+							     field_pointers,
+							     c_f_resource,
+							     requests));
+        ready.lg_wait();
+      } else {  // AOS
+        LgEvent ready(PhysicalInstance::create_array_instance_AOS(result, 
+				           local_space,
+                   field_ids,
+							     field_sizes,
+							     field_pointers,
+							     aos_base_ptr, aos_stride,
+                   c_f_resource,
+							     requests));
+        ready.lg_wait();
+      }
+     // assert(0 && "no HDF5 support");
+    //  result = PhysicalInstance::NO_INST;
+      return result;
+    }
 
     //--------------------------------------------------------------------------
     template<int DIM, typename T>
diff --git a/runtime/realm/attach_array.cc b/runtime/realm/attach_array.cc
new file mode 100644
index 0000000000..99629135c7
--- /dev/null
+++ b/runtime/realm/attach_array.cc
@@ -0,0 +1,174 @@
+/* Copyright 2017 Stanford University, NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// HDF5-specific instance layouts and accessors
+#include "inst_impl.h"
+#include "runtime_impl.h"
+#include "mem_impl.h"
+
+#include <realm/deppart/inst_helper.h>
+#include <realm/machine.h>
+
+namespace Realm {
+  
+  template <int N, typename T>
+  /*static*/ Event RegionInstance::create_array_instance_SOA(RegionInstance& inst,
+							  const ZIndexSpace<N,T>& space,
+                const std::vector<FieldID> &field_ids,
+							  const std::vector<size_t> &field_sizes,
+                const std::vector<void*> &field_pointers,
+							  int resource,
+							  const ProfilingRequestSet& reqs,
+							  Event wait_on /*= Event::NO_EVENT*/)
+  {
+    Memory memory = Machine::MemoryQuery(Machine::get_machine())
+      .local_address_space()
+      .only_kind(Memory::SYSTEM_MEM)
+      .first();
+    assert(memory.exists());
+
+    InstanceLayout<N,T> *layout = new InstanceLayout<N,T>;
+    layout->bytes_used = 0;
+    layout->alignment_reqd = 0;  // no allocation being made
+    layout->space = space;
+    layout->piece_lists.resize(field_sizes.size());
+    
+    LocalCPUMemory *m_impl = (LocalCPUMemory *)get_runtime()->get_memory_impl(memory);
+    unsigned char* base = (unsigned char*)m_impl->base;
+    unsigned char* ptr = NULL;
+    for(size_t i = 0; i < field_sizes.size(); i++) {
+      FieldID id = field_ids[i];
+      InstanceLayoutGeneric::FieldLayout& fl = layout->fields[id];
+      fl.list_idx = i;
+      fl.rel_offset = 0;
+      fl.size_in_bytes = field_sizes[i];
+
+      // create a single piece (for non-empty index spaces)
+      if(!space.empty()) {
+	      AffineLayoutPiece<N,T> *alp = new AffineLayoutPiece<N,T>;
+	      alp->bounds = space.bounds;
+        ptr = (unsigned char*)field_pointers[i];
+	      alp->offset = (size_t)(ptr - base);
+	      size_t stride = field_sizes[i];
+        /* fortran layout */
+        if (resource == 0) {
+	        for(int j = 0; j < N; j++) {
+	          alp->strides[j] = stride;
+	          stride *= (space.bounds.hi[j] - space.bounds.lo[j] + 1);
+	        }
+        } else { /* C layout */
+	        for(int j = N - 1; j >= 0; j--) {
+	          alp->strides[j] = stride;
+	          stride *= (space.bounds.hi[j] - space.bounds.lo[j] + 1);
+	        }
+        }
+	      layout->piece_lists[i].pieces.push_back(alp);
+      }
+    }
+        
+    Event e = create_instance(inst, memory, layout, reqs, wait_on);
+    RegionInstanceImpl *inst_impl = get_runtime()->get_instance_impl(inst);
+    printf("inst offset %lu\n", inst_impl->metadata.inst_offset);
+    return e;
+  }
+
+#define DOIT_ARRAY_SOA(N,T) \
+  template Event RegionInstance::create_array_instance_SOA<N,T>(RegionInstance&, \
+							      const ZIndexSpace<N,T>&, \
+                    const std::vector<FieldID>&, \
+							      const std::vector<size_t>&, \
+							      const std::vector<void *>&, \
+							      int, \
+							      const ProfilingRequestSet&, \
+							      Event);
+  FOREACH_NT(DOIT_ARRAY_SOA)  
+    
+  template <int N, typename T>
+  /*static*/ Event RegionInstance::create_array_instance_AOS(RegionInstance& inst,
+							  const ZIndexSpace<N,T>& space,
+                const std::vector<FieldID> &field_ids,
+							  const std::vector<size_t> &field_sizes,
+                const std::vector<void*> &field_pointers,
+							  unsigned char* aos_base_ptr, size_t aos_stride,
+                int resource,
+							  const ProfilingRequestSet& reqs,
+							  Event wait_on /*= Event::NO_EVENT*/)
+  {
+    Memory memory = Machine::MemoryQuery(Machine::get_machine())
+      .local_address_space()
+      .only_kind(Memory::SYSTEM_MEM)
+      .first();
+    assert(memory.exists());
+    
+    InstanceLayout<N,T> *layout = new InstanceLayout<N,T>;
+    layout->bytes_used = 0;
+    layout->alignment_reqd = 0;  // no allocation being made
+    layout->space = space;
+    layout->piece_lists.resize(field_sizes.size());
+    
+    LocalCPUMemory *m_impl = (LocalCPUMemory *)get_runtime()->get_memory_impl(memory);
+    unsigned char* base = (unsigned char*)m_impl->base;
+    for(size_t i = 0; i < field_sizes.size(); i++) {
+      FieldID id = field_ids[i];
+      InstanceLayoutGeneric::FieldLayout& fl = layout->fields[id];
+      fl.list_idx = i;
+      if (i > 0) {
+        fl.rel_offset = (size_t)(((unsigned char*)field_pointers[i]) - ((unsigned char*)field_pointers[i-1]));
+      } else {
+        fl.rel_offset = (size_t)(((unsigned char*)field_pointers[i]) - aos_base_ptr);
+      }
+      fl.size_in_bytes = field_sizes[i];
+
+      // create a single piece (for non-empty index spaces)
+      if(!space.empty()) {
+	      AffineLayoutPiece<N,T> *alp = new AffineLayoutPiece<N,T>;
+	      alp->bounds = space.bounds;
+	      alp->offset = (size_t)(aos_base_ptr - base);
+        size_t stride = aos_stride;
+        /* fortran layout */
+        if (resource == 0) {
+	        for(int j = 0; j < N; j++) {
+	          alp->strides[j] = stride;
+            stride *= (space.bounds.hi[j] - space.bounds.lo[j] + 1);
+	        }
+        } else { /* C layout */
+	        for(int j = N-1; j >= 0; j--) {
+	          alp->strides[j] = stride;
+            stride *= (space.bounds.hi[j] - space.bounds.lo[j] + 1);
+	        }
+        }
+	      layout->piece_lists[i].pieces.push_back(alp);
+      }
+    }
+        
+    Event e = create_instance(inst, memory, layout, reqs, wait_on);
+    RegionInstanceImpl *inst_impl = get_runtime()->get_instance_impl(inst);
+    printf("inst offset %lu\n", inst_impl->metadata.inst_offset);
+    return e;
+  }
+
+#define DOIT_ARRAY_AOS(N,T) \
+  template Event RegionInstance::create_array_instance_AOS<N,T>(RegionInstance&, \
+							      const ZIndexSpace<N,T>&, \
+                    const std::vector<FieldID>&, \
+							      const std::vector<size_t>&, \
+							      const std::vector<void *>&, \
+							      unsigned char*, size_t, \
+                    int, \
+							      const ProfilingRequestSet&, \
+							      Event);
+  FOREACH_NT(DOIT_ARRAY_AOS)  
+
+}; // namespace Realm
diff --git a/runtime/realm/hdf5/hdf5_access.cc b/runtime/realm/hdf5/hdf5_access.cc
index edcc999a43..99ce678ce0 100644
--- a/runtime/realm/hdf5/hdf5_access.cc
+++ b/runtime/realm/hdf5/hdf5_access.cc
@@ -81,4 +81,4 @@ namespace Realm {
 							      Event);
   FOREACH_NT(DOIT)
 
-}; // namespace Realm
+}; // namespace Realm
\ No newline at end of file
diff --git a/runtime/realm/inst_layout.h b/runtime/realm/inst_layout.h
index 7b084095e5..c494013f69 100644
--- a/runtime/realm/inst_layout.h
+++ b/runtime/realm/inst_layout.h
@@ -144,7 +144,7 @@ namespace Realm {
     bool serialize(S& serializer) const;
 
     ZPoint<N, size_t> strides;
-    size_t offset;
+    ptrdiff_t offset;
   };
 
   template <int N, typename T>
diff --git a/runtime/realm/instance.h b/runtime/realm/instance.h
index d22a3f4b05..a5cd326dbd 100644
--- a/runtime/realm/instance.h
+++ b/runtime/realm/instance.h
@@ -146,6 +146,27 @@ namespace Realm {
 				      const ProfilingRequestSet& prs,
 				      Event wait_on = Event::NO_EVENT);
 #endif
+              
+    template <int N, typename T>
+    static Event create_array_instance_SOA(RegionInstance& inst,
+				      const ZIndexSpace<N,T>& space,
+              const std::vector<FieldID> &field_ids,
+				      const std::vector<size_t> &field_sizes,
+				      const std::vector<void*> &field_pointers,
+				      int resource,
+				      const ProfilingRequestSet& prs,
+				      Event wait_on = Event::NO_EVENT);
+              
+    template <int N, typename T>
+    static Event create_array_instance_AOS(RegionInstance& inst,
+				      const ZIndexSpace<N,T>& space,
+              const std::vector<FieldID> &field_ids,
+				      const std::vector<size_t> &field_sizes,
+				      const std::vector<void*> &field_pointers,
+				      unsigned char* aos_base_ptr, size_t aos_stride,
+              int resource,
+				      const ProfilingRequestSet& prs,
+				      Event wait_on = Event::NO_EVENT);
 
     void destroy(Event wait_on = Event::NO_EVENT) const;
 
diff --git a/runtime/realm/mem_impl.cc b/runtime/realm/mem_impl.cc
index ed6a02b24c..0850f5eab7 100644
--- a/runtime/realm/mem_impl.cc
+++ b/runtime/realm/mem_impl.cc
@@ -676,7 +676,7 @@ namespace Realm {
 
   void *LocalCPUMemory::get_direct_ptr(off_t offset, size_t size)
   {
-    assert((offset >= 0) && ((size_t)(offset + size) <= this->size));
+//    assert((offset >= 0) && ((size_t)(offset + size) <= this->size));
     return (base + offset);
   }
 
diff --git a/runtime/runtime.mk b/runtime/runtime.mk
index 8c9afb0742..7524eb137d 100644
--- a/runtime/runtime.mk
+++ b/runtime/runtime.mk
@@ -412,7 +412,8 @@ LOW_RUNTIME_SRC += $(LG_RT_DIR)/realm/runtime_impl.cc \
 		   $(LG_RT_DIR)/realm/machine_impl.cc \
 		   $(LG_RT_DIR)/realm/sampling_impl.cc \
                    $(LG_RT_DIR)/lowlevel.cc \
-                   $(LG_RT_DIR)/realm/transfer/lowlevel_disk.cc
+                   $(LG_RT_DIR)/realm/transfer/lowlevel_disk.cc \
+									 $(LG_RT_DIR)/realm/attach_array.cc
 LOW_RUNTIME_SRC += $(LG_RT_DIR)/realm/numa/numa_module.cc \
 		   $(LG_RT_DIR)/realm/numa/numasysif.cc
 ifeq ($(strip $(USE_OPENMP)),1)
diff --git a/tutorial/06_privileges/privileges.cc b/tutorial/06_privileges/privileges.cc
index 147beba0cf..61ee566a3c 100644
--- a/tutorial/06_privileges/privileges.cc
+++ b/tutorial/06_privileges/privileges.cc
@@ -297,4 +297,4 @@ int main(int argc, char **argv)
   }
 
   return Runtime::start(argc, argv);
-}
+}
\ No newline at end of file
diff --git a/tutorial/07_partitioning/partitioning.cc b/tutorial/07_partitioning/partitioning.cc
index eb61103c47..63b8ce4397 100644
--- a/tutorial/07_partitioning/partitioning.cc
+++ b/tutorial/07_partitioning/partitioning.cc
@@ -335,4 +335,4 @@ int main(int argc, char **argv)
   }
 
   return Runtime::start(argc, argv);
-}
+}
\ No newline at end of file
diff --git a/tutorial/07_partitioning_attach_array_aos/CMakeLists.txt b/tutorial/07_partitioning_attach_array_aos/CMakeLists.txt
new file mode 100644
index 0000000000..ebed303c58
--- /dev/null
+++ b/tutorial/07_partitioning_attach_array_aos/CMakeLists.txt
@@ -0,0 +1,39 @@
+#------------------------------------------------------------------------------#
+# Copyright 2017 Kitware, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#------------------------------------------------------------------------------#
+
+cmake_minimum_required(VERSION 3.1)
+project(LegionExample_07_partitioning)
+
+include(CheckCXXCompilerFlag)
+
+check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER)
+
+if(CXX11_COMPILER)
+  set(CMAKE_CXX_STANDARD 11)
+else()
+  message(FATAL_ERROR "C++11 compatible compiler not found")
+endif()
+
+# Only search if were building stand-alone and not as part of Legion
+if(NOT Legion_SOURCE_DIR)
+  find_package(Legion REQUIRED)
+endif()
+
+add_executable(partitioning partitioning.cc)
+target_link_libraries(partitioning Legion::Legion)
+if(Legion_ENABLE_TESTING)
+  add_test(NAME partitioning COMMAND $<TARGET_FILE:partitioning>) 
+endif()
diff --git a/tutorial/07_partitioning_attach_array_aos/Makefile b/tutorial/07_partitioning_attach_array_aos/Makefile
new file mode 100644
index 0000000000..3651b6b749
--- /dev/null
+++ b/tutorial/07_partitioning_attach_array_aos/Makefile
@@ -0,0 +1,51 @@
+# Copyright 2017 Stanford University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+ifndef LG_RT_DIR
+$(error LG_RT_DIR variable is not defined, aborting build)
+endif
+
+# Flags for directing the runtime makefile what to include
+DEBUG           := 1		# Include debugging symbols
+OUTPUT_LEVEL    ?= LEVEL_DEBUG	# Compile time logging level
+USE_CUDA        ?= 0		# Include CUDA support (requires CUDA)
+USE_GASNET      ?= 0		# Include GASNet support (requires GASNet)
+USE_HDF         ?= 0		# Include HDF5 support (requires HDF5)
+ALT_MAPPERS     ?= 0		# Include alternative mappers (not recommended)
+
+# Put the binary file name here
+OUTFILE		?= partitioning 
+# List all the application source files here
+GEN_SRC		?= partitioning.cc		# .cc files
+GEN_GPU_SRC	?=				# .cu files
+
+# You can modify these variables, some will be appended to by the runtime makefile
+INC_FLAGS	?=
+CC_FLAGS	?=
+NVCC_FLAGS	?=
+GASNET_FLAGS	?=
+LD_FLAGS	?=
+# For Point and Rect typedefs
+CC_FLAGS	+= -std=c++11
+
+###########################################################################
+#
+#   Don't change anything below here
+#   
+###########################################################################
+
+include $(LG_RT_DIR)/runtime.mk
+
diff --git a/tutorial/07_partitioning_attach_array_aos/partitioning.cc b/tutorial/07_partitioning_attach_array_aos/partitioning.cc
new file mode 100644
index 0000000000..4c64ab9240
--- /dev/null
+++ b/tutorial/07_partitioning_attach_array_aos/partitioning.cc
@@ -0,0 +1,391 @@
+/* Copyright 2017 Stanford University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <cstdio>
+#include <cassert>
+#include <cstdlib>
+#include <sys/time.h>
+#include "legion.h"
+using namespace Legion;
+
+template<typename FT, int N, typename T = coord_t>
+using AccessorRO = FieldAccessor<READ_ONLY,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+template<typename FT, int N, typename T = coord_t>
+using AccessorWD = FieldAccessor<WRITE_DISCARD,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+
+enum TaskIDs {
+  TOP_LEVEL_TASK_ID,
+  INIT_FIELD_TASK_ID,
+  DAXPY_TASK_ID,
+  CHECK_TASK_ID,
+};
+
+enum FieldIDs {
+  FID_X,
+  FID_Y,
+  FID_Z,
+};
+
+typedef struct{
+    double x;
+    double y;
+    double z;
+}daxpy_t;
+
+double get_cur_time() {
+  struct timeval   tv;
+  struct timezone  tz;
+  double cur_time;
+
+  gettimeofday(&tv, &tz);
+  cur_time = tv.tv_sec + tv.tv_usec / 1000000.0;
+
+  return cur_time;
+}
+
+
+void top_level_task(const Task *task,
+                    const std::vector<PhysicalRegion> &regions,
+                    Context ctx, Runtime *runtime)
+{
+  int num_elements = 1024; 
+  int num_subregions = 4;
+  // See if we have any command line arguments to parse
+  // Note we now have a new command line parameter which specifies
+  // how many subregions we should make.
+  {
+    const InputArgs &command_args = Runtime::get_input_args();
+    for (int i = 1; i < command_args.argc; i++)
+    {
+      if (!strcmp(command_args.argv[i],"-n"))
+        num_elements = atoi(command_args.argv[++i]);
+      if (!strcmp(command_args.argv[i],"-b"))
+        num_subregions = atoi(command_args.argv[++i]);
+    }
+  }
+  printf("Running daxpy for %d elements...\n", num_elements);
+  printf("Partitioning data into %d sub-regions...\n", num_subregions);
+
+  // Create our logical regions using the same schemas as earlier examples
+  Rect<1> elem_rect(0,num_elements-1);
+  IndexSpace is = runtime->create_index_space(ctx, elem_rect); 
+  runtime->attach_name(is, "is");
+  FieldSpace input_fs = runtime->create_field_space(ctx);
+  runtime->attach_name(input_fs, "input_fs");
+  {
+    FieldAllocator allocator = 
+      runtime->create_field_allocator(ctx, input_fs);
+    allocator.allocate_field(sizeof(double),FID_X);
+    runtime->attach_name(input_fs, FID_X, "X");
+    allocator.allocate_field(sizeof(double),FID_Y);
+    runtime->attach_name(input_fs, FID_Y, "Y");
+  }
+  FieldSpace output_fs = runtime->create_field_space(ctx);
+  runtime->attach_name(output_fs, "output_fs");
+  {
+    FieldAllocator allocator = 
+      runtime->create_field_allocator(ctx, output_fs);
+    allocator.allocate_field(sizeof(double),FID_Z);
+    runtime->attach_name(output_fs, FID_Z, "Z");
+  }
+  LogicalRegion input_lr = runtime->create_logical_region(ctx, is, input_fs);
+  runtime->attach_name(input_lr, "input_lr");
+  LogicalRegion output_lr = runtime->create_logical_region(ctx, is, output_fs);
+  runtime->attach_name(output_lr, "output_lr");
+
+  daxpy_t *array_ptr = (daxpy_t*)malloc(sizeof(daxpy_t)*(num_elements));
+
+  
+  std::map<FieldID, size_t> offset_input;
+  offset_input[FID_X] = 0;
+  offset_input[FID_Y] = sizeof(double);
+
+  PhysicalRegion pr_input = runtime->attach_array_aos(ctx, input_lr, input_lr, array_ptr, sizeof(daxpy_t), offset_input, 0);
+  
+  std::map<FieldID, size_t> offset_output;
+  offset_output[FID_Z] = 2*sizeof(double);
+  
+  PhysicalRegion pr_output = runtime->attach_array_aos(ctx, output_lr, output_lr, array_ptr, sizeof(daxpy_t), offset_output, 0);
+  
+  // In addition to using rectangles and domains for launching index spaces
+  // of tasks (see example 02), Legion also uses them for performing 
+  // operations on logical regions.  Here we create a rectangle and a
+  // corresponding domain for describing the space of subregions that we
+  // want to create.  Each subregion is assigned a 'color' which is why
+  // we name the variables 'color_bounds' and 'color_domain'.  We'll use
+  // these below when we partition the region.
+  Rect<1> color_bounds(0,num_subregions-1);
+  IndexSpace color_is = runtime->create_index_space(ctx, color_bounds);
+
+  // Parallelism in Legion is implicit.  This means that rather than
+  // explicitly saying what should run in parallel, Legion applications
+  // partition up data and tasks specify which regions they access.
+  // The Legion runtime computes non-interference as a function of 
+  // regions, fields, and privileges and then determines which tasks 
+  // are safe to run in parallel.
+  //
+  // Data partitioning is performed on index spaces.  The partitioning 
+  // operation is used to break an index space of points into subsets 
+  // of points each of which will become a sub index space.  Partitions 
+  // created on an index space are then transitively applied to all the 
+  // logical regions created using the index space.  We will show how
+  // to get names to the subregions later in this example.
+  //
+  // Here we want to create the IndexPartition 'ip'.  We'll illustrate
+  // two ways of creating an index partition depending on whether the
+  // array being partitioned can be evenly partitioned into subsets
+  // or not.  There are other methods to partitioning index spaces
+  // which are not covered here.  We'll cover the case of coloring
+  // individual points in an index space in our capstone circuit example.
+  IndexPartition ip = runtime->create_equal_partition(ctx, is, color_is);
+  runtime->attach_name(ip, "ip");
+
+  // The index space 'is' was used in creating two logical regions: 'input_lr'
+  // and 'output_lr'.  By creating an IndexPartitiong of 'is' we implicitly
+  // created a LogicalPartition for each of the logical regions created using
+  // 'is'.  The Legion runtime provides several ways of getting the names for
+  // these LogicalPartitions.  We'll look at one of them here.  The
+  // 'get_logical_partition' method takes a LogicalRegion and an IndexPartition
+  // and returns the LogicalPartition of the given LogicalRegion that corresponds
+  // to the given IndexPartition.  
+  LogicalPartition input_lp = runtime->get_logical_partition(ctx, input_lr, ip);
+  runtime->attach_name(input_lp, "input_lp");
+  LogicalPartition output_lp = runtime->get_logical_partition(ctx, output_lr, ip);
+  runtime->attach_name(output_lp, "output_lp");
+
+  // Create our launch domain.  Note that is the same as color domain
+  // as we are going to launch one task for each subregion we created.
+  ArgumentMap arg_map;
+  double start_init = get_cur_time();
+
+  // As in previous examples, we now want to launch tasks for initializing 
+  // both the fields.  However, to increase the amount of parallelism
+  // exposed to the runtime we will launch separate sub-tasks for each of
+  // the logical subregions created by our partitioning.  To express this
+  // we create an IndexLauncher for launching an index space of tasks
+  // the same as example 02.
+  IndexLauncher init_launcher(INIT_FIELD_TASK_ID, color_is, 
+                              TaskArgument(NULL, 0), arg_map);
+  // For index space task launches we don't want to have to explicitly
+  // enumerate separate region requirements for all points in our launch
+  // domain.  Instead Legion allows applications to place an upper bound
+  // on privileges required by subtasks and then specify which privileges
+  // each subtask receives using a projection function.  In the case of
+  // the field initialization task, we say that all the subtasks will be
+  // using some subregion of the LogicalPartition 'input_lp'.  Applications
+  // may also specify upper bounds using logical regions and not partitions.
+  //
+  // The Legion implementation assumes that all all points in an index
+  // space task launch request non-interfering privileges and for performance
+  // reasons this is unchecked.  This means if two tasks in the same index
+  // space are accessing aliased data, then they must either both be
+  // with read-only or reduce privileges.
+  //
+  // When the runtime enumerates the launch_domain, it will invoke the
+  // projection function for each point in the space and use the resulting
+  // LogicalRegion computed for each point in the index space of tasks.
+  // The projection ID '0' is reserved and corresponds to the identity 
+  // function which simply zips the space of tasks with the space of
+  // subregions in the partition.  Applications can register their own
+  // projections functions via the 'register_region_projection' and
+  // 'register_partition_projection' functions before starting 
+  // the runtime similar to how tasks are registered.
+  init_launcher.add_region_requirement(
+      RegionRequirement(input_lp, 0/*projection ID*/, 
+                        WRITE_DISCARD, EXCLUSIVE, input_lr));
+  init_launcher.region_requirements[0].add_field(FID_X);
+  FutureMap fmi0 = runtime->execute_index_space(ctx, init_launcher);
+
+  // Modify our region requirement to initialize the other field
+  // in the same way.  Note that after we do this we have exposed
+  // 2*num_subregions task-level parallelism to the runtime because
+  // we have launched tasks that are both data-parallel on
+  // sub-regions and task-parallel on accessing different fields.
+  // The power of Legion is that it allows programmers to express
+  // these data usage patterns and automatically extracts both
+  // kinds of parallelism in a unified programming framework.
+  init_launcher.region_requirements[0].privilege_fields.clear();
+  init_launcher.region_requirements[0].instance_fields.clear();
+  init_launcher.region_requirements[0].add_field(FID_Y);
+  FutureMap fmi1 = runtime->execute_index_space(ctx, init_launcher);
+  fmi1.wait_all_results();
+  fmi0.wait_all_results();
+  double end_init = get_cur_time();
+  printf("Attach AOS, init done, time %f\n", end_init - start_init);
+
+  const double alpha = drand48();
+  double start_t = get_cur_time();
+  // We launch the subtasks for performing the daxpy computation
+  // in a similar way to the initialize field tasks.  Note we
+  // again make use of two RegionRequirements which use a
+  // partition as the upper bound for the privileges for the task.
+  IndexLauncher daxpy_launcher(DAXPY_TASK_ID, color_is,
+                TaskArgument(&alpha, sizeof(alpha)), arg_map);
+  daxpy_launcher.add_region_requirement(
+      RegionRequirement(input_lp, 0/*projection ID*/,
+                        READ_ONLY, EXCLUSIVE, input_lr));
+  daxpy_launcher.region_requirements[0].add_field(FID_X);
+  daxpy_launcher.region_requirements[0].add_field(FID_Y);
+  daxpy_launcher.add_region_requirement(
+      RegionRequirement(output_lp, 0/*projection ID*/,
+                        WRITE_DISCARD, EXCLUSIVE, output_lr));
+  daxpy_launcher.region_requirements[1].add_field(FID_Z);
+  FutureMap fm = runtime->execute_index_space(ctx, daxpy_launcher);
+  fm.wait_all_results();
+  double end_t = get_cur_time();
+  printf("Attach AOS, daxpy done, time %f\n", end_t - start_t);
+                    
+  // While we could also issue parallel subtasks for the checking
+  // task, we only issue a single task launch to illustrate an
+  // important Legion concept.  Note the checking task operates
+  // on the entire 'input_lr' and 'output_lr' regions and not
+  // on the subregions.  Even though the previous tasks were
+  // all operating on subregions, Legion will correctly compute
+  // data dependences on all the subtasks that generated the
+  // data in these two regions.  
+  TaskLauncher check_launcher(CHECK_TASK_ID, TaskArgument(&alpha, sizeof(alpha)));
+  check_launcher.add_region_requirement(
+      RegionRequirement(input_lr, READ_ONLY, EXCLUSIVE, input_lr));
+  check_launcher.region_requirements[0].add_field(FID_X);
+  check_launcher.region_requirements[0].add_field(FID_Y);
+  check_launcher.add_region_requirement(
+      RegionRequirement(output_lr, READ_ONLY, EXCLUSIVE, output_lr));
+  check_launcher.region_requirements[1].add_field(FID_Z);
+  Future fu = runtime->execute_task(ctx, check_launcher);
+  fu.wait();
+
+  runtime->detach_array(ctx, pr_output);
+  runtime->detach_array(ctx, pr_input);
+  runtime->destroy_logical_region(ctx, input_lr);
+  runtime->destroy_logical_region(ctx, output_lr);
+  runtime->destroy_field_space(ctx, input_fs);
+  runtime->destroy_field_space(ctx, output_fs);
+  runtime->destroy_index_space(ctx, is);
+  free(array_ptr);
+}
+
+void init_field_task(const Task *task,
+                     const std::vector<PhysicalRegion> &regions,
+                     Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 1); 
+  assert(task->regions.size() == 1);
+  assert(task->regions[0].privilege_fields.size() == 1);
+
+  FieldID fid = *(task->regions[0].privilege_fields.begin());
+  const int point = task->index_point.point_data[0];
+  printf("Initializing field %d for block %d...\n", fid, point);
+
+  const AccessorWD<double,1> acc(regions[0], fid);
+  
+  // Note here that we get the domain for the subregion for
+  // this task from the runtime which makes it safe for running
+  // both as a single task and as part of an index space of tasks.
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[0].region.get_index_space());
+  for (PointInRectIterator<1> pir(rect); pir(); pir++)
+    acc[*pir] = drand48();
+}
+
+void daxpy_task(const Task *task,
+                const std::vector<PhysicalRegion> &regions,
+                Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 2);
+  assert(task->regions.size() == 2);
+  assert(task->arglen == sizeof(double));
+  const double alpha = *((const double*)task->args);
+  const int point = task->index_point.point_data[0];
+
+  const AccessorRO<double,1> acc_y(regions[0], FID_Y);
+  const AccessorRO<double,1> acc_x(regions[0], FID_X);
+  const AccessorWD<double,1> acc_z(regions[1], FID_Z);
+
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[0].region.get_index_space());
+  printf("Running daxpy computation with alpha %.8g for point %d, xptr %p, y_ptr %p, z_ptr %p...\n", 
+          alpha, point, acc_x.ptr(rect.lo), acc_y.ptr(rect.lo), acc_z.ptr(rect.lo));
+  for (PointInRectIterator<1> pir(rect); pir(); pir++)
+    acc_z[*pir] = alpha * acc_x[*pir] + acc_y[*pir];
+}
+
+void check_task(const Task *task,
+                const std::vector<PhysicalRegion> &regions,
+                Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 2);
+  assert(task->regions.size() == 2);
+  assert(task->arglen == sizeof(double));
+  const double alpha = *((const double*)task->args);
+
+  const AccessorRO<double,1> acc_x(regions[0], FID_X);
+  const AccessorRO<double,1> acc_y(regions[0], FID_Y);
+  const AccessorRO<double,1> acc_z(regions[1], FID_Z);
+
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[0].region.get_index_space());
+  printf("Checking results... xptr %p, y_ptr %p, z_ptr %p...\n", 
+                          acc_x.ptr(rect.lo), acc_y.ptr(rect.lo), acc_z.ptr(rect.lo));
+  bool all_passed = true;
+  for (PointInRectIterator<1> pir(rect); pir(); pir++)
+  {
+    double expected = alpha * acc_x[*pir] + acc_y[*pir];
+    double received = acc_z[*pir];
+    // Probably shouldn't check for floating point equivalence but
+    // the order of operations are the same should they should
+    // be bitwise equal.
+    if (expected != received)
+      all_passed = false;
+  }
+  if (all_passed)
+    printf("SUCCESS!\n");
+  else
+    printf("FAILURE!\n");
+}
+
+int main(int argc, char **argv)
+{
+  Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID);
+
+  {
+    TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<top_level_task>(registrar, "top_level");
+  }
+
+  {
+    TaskVariantRegistrar registrar(INIT_FIELD_TASK_ID, "init_field");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    registrar.set_leaf();
+    Runtime::preregister_task_variant<init_field_task>(registrar, "init_field");
+  }
+
+  {
+    TaskVariantRegistrar registrar(DAXPY_TASK_ID, "daxpy");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    registrar.set_leaf();
+    Runtime::preregister_task_variant<daxpy_task>(registrar, "daxpy");
+  }
+
+  {
+    TaskVariantRegistrar registrar(CHECK_TASK_ID, "check");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    registrar.set_leaf();
+    Runtime::preregister_task_variant<check_task>(registrar, "check");
+  }
+
+  return Runtime::start(argc, argv);
+}
diff --git a/tutorial/07_partitioning_attach_array_soa/CMakeLists.txt b/tutorial/07_partitioning_attach_array_soa/CMakeLists.txt
new file mode 100644
index 0000000000..ebed303c58
--- /dev/null
+++ b/tutorial/07_partitioning_attach_array_soa/CMakeLists.txt
@@ -0,0 +1,39 @@
+#------------------------------------------------------------------------------#
+# Copyright 2017 Kitware, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#------------------------------------------------------------------------------#
+
+cmake_minimum_required(VERSION 3.1)
+project(LegionExample_07_partitioning)
+
+include(CheckCXXCompilerFlag)
+
+check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER)
+
+if(CXX11_COMPILER)
+  set(CMAKE_CXX_STANDARD 11)
+else()
+  message(FATAL_ERROR "C++11 compatible compiler not found")
+endif()
+
+# Only search if were building stand-alone and not as part of Legion
+if(NOT Legion_SOURCE_DIR)
+  find_package(Legion REQUIRED)
+endif()
+
+add_executable(partitioning partitioning.cc)
+target_link_libraries(partitioning Legion::Legion)
+if(Legion_ENABLE_TESTING)
+  add_test(NAME partitioning COMMAND $<TARGET_FILE:partitioning>) 
+endif()
diff --git a/tutorial/07_partitioning_attach_array_soa/Makefile b/tutorial/07_partitioning_attach_array_soa/Makefile
new file mode 100644
index 0000000000..3651b6b749
--- /dev/null
+++ b/tutorial/07_partitioning_attach_array_soa/Makefile
@@ -0,0 +1,51 @@
+# Copyright 2017 Stanford University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+ifndef LG_RT_DIR
+$(error LG_RT_DIR variable is not defined, aborting build)
+endif
+
+# Flags for directing the runtime makefile what to include
+DEBUG           := 1		# Include debugging symbols
+OUTPUT_LEVEL    ?= LEVEL_DEBUG	# Compile time logging level
+USE_CUDA        ?= 0		# Include CUDA support (requires CUDA)
+USE_GASNET      ?= 0		# Include GASNet support (requires GASNet)
+USE_HDF         ?= 0		# Include HDF5 support (requires HDF5)
+ALT_MAPPERS     ?= 0		# Include alternative mappers (not recommended)
+
+# Put the binary file name here
+OUTFILE		?= partitioning 
+# List all the application source files here
+GEN_SRC		?= partitioning.cc		# .cc files
+GEN_GPU_SRC	?=				# .cu files
+
+# You can modify these variables, some will be appended to by the runtime makefile
+INC_FLAGS	?=
+CC_FLAGS	?=
+NVCC_FLAGS	?=
+GASNET_FLAGS	?=
+LD_FLAGS	?=
+# For Point and Rect typedefs
+CC_FLAGS	+= -std=c++11
+
+###########################################################################
+#
+#   Don't change anything below here
+#   
+###########################################################################
+
+include $(LG_RT_DIR)/runtime.mk
+
diff --git a/tutorial/07_partitioning_attach_array_soa/partitioning.cc b/tutorial/07_partitioning_attach_array_soa/partitioning.cc
new file mode 100644
index 0000000000..e120f704ea
--- /dev/null
+++ b/tutorial/07_partitioning_attach_array_soa/partitioning.cc
@@ -0,0 +1,391 @@
+/* Copyright 2017 Stanford University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <cstdio>
+#include <cassert>
+#include <cstdlib>
+#include <sys/time.h>
+#include "legion.h"
+using namespace Legion;
+
+template<typename FT, int N, typename T = coord_t>
+using AccessorRO = FieldAccessor<READ_ONLY,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+template<typename FT, int N, typename T = coord_t>
+using AccessorWD = FieldAccessor<WRITE_DISCARD,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+
+enum TaskIDs {
+  TOP_LEVEL_TASK_ID,
+  INIT_FIELD_TASK_ID,
+  DAXPY_TASK_ID,
+  CHECK_TASK_ID,
+};
+
+enum FieldIDs {
+  FID_X,
+  FID_Y,
+  FID_Z,
+};
+
+double get_cur_time() {
+  struct timeval   tv;
+  struct timezone  tz;
+  double cur_time;
+
+  gettimeofday(&tv, &tz);
+  cur_time = tv.tv_sec + tv.tv_usec / 1000000.0;
+
+  return cur_time;
+}
+
+
+void top_level_task(const Task *task,
+                    const std::vector<PhysicalRegion> &regions,
+                    Context ctx, Runtime *runtime)
+{
+  int num_elements = 1024; 
+  int num_subregions = 4;
+  // See if we have any command line arguments to parse
+  // Note we now have a new command line parameter which specifies
+  // how many subregions we should make.
+  {
+    const InputArgs &command_args = Runtime::get_input_args();
+    for (int i = 1; i < command_args.argc; i++)
+    {
+      if (!strcmp(command_args.argv[i],"-n"))
+        num_elements = atoi(command_args.argv[++i]);
+      if (!strcmp(command_args.argv[i],"-b"))
+        num_subregions = atoi(command_args.argv[++i]);
+    }
+  }
+  printf("Running daxpy for %d elements...\n", num_elements);
+  printf("Partitioning data into %d sub-regions...\n", num_subregions);
+
+  // Create our logical regions using the same schemas as earlier examples
+  Rect<1> elem_rect(0,num_elements-1);
+  IndexSpace is = runtime->create_index_space(ctx, elem_rect); 
+  runtime->attach_name(is, "is");
+  FieldSpace input_fs = runtime->create_field_space(ctx);
+  runtime->attach_name(input_fs, "input_fs");
+  {
+    FieldAllocator allocator = 
+      runtime->create_field_allocator(ctx, input_fs);
+    allocator.allocate_field(sizeof(double),FID_X);
+    runtime->attach_name(input_fs, FID_X, "X");
+    allocator.allocate_field(sizeof(double),FID_Y);
+    runtime->attach_name(input_fs, FID_Y, "Y");
+  }
+  FieldSpace output_fs = runtime->create_field_space(ctx);
+  runtime->attach_name(output_fs, "output_fs");
+  {
+    FieldAllocator allocator = 
+      runtime->create_field_allocator(ctx, output_fs);
+    allocator.allocate_field(sizeof(double),FID_Z);
+    runtime->attach_name(output_fs, FID_Z, "Z");
+  }
+  LogicalRegion input_lr = runtime->create_logical_region(ctx, is, input_fs);
+  runtime->attach_name(input_lr, "input_lr");
+  LogicalRegion output_lr = runtime->create_logical_region(ctx, is, output_fs);
+  runtime->attach_name(output_lr, "output_lr");
+
+  double *y_ptr = (double*)malloc(sizeof(double)*(num_elements));
+  double *x_ptr = (double*)malloc(sizeof(double)*(num_elements));
+  double *z_ptr = (double*)malloc(sizeof(double)*(num_elements));
+  for (int j = 0; j < num_elements; j++ ) {
+      x_ptr[j] = drand48();
+      y_ptr[j] = drand48();
+      z_ptr[j] = drand48();
+  }
+  std::map<FieldID,void*> field_pointer_map_xy;
+  field_pointer_map_xy[FID_X] = x_ptr;
+  field_pointer_map_xy[FID_Y] = y_ptr;
+  printf("Attach array fid %d, ptr %p, fid %d, ptr %p\n", FID_X, x_ptr, FID_Y, y_ptr);  
+  PhysicalRegion xy_pr = runtime->attach_array_soa(ctx, input_lr, input_lr, field_pointer_map_xy, 0); 
+  
+  std::map<FieldID,void*> field_pointer_map_z;
+  field_pointer_map_z[FID_Z] = z_ptr;
+  printf("Attach array fid %d, ptr %p\n", FID_Z, z_ptr);
+  PhysicalRegion z_pr = runtime->attach_array_soa(ctx, output_lr, output_lr, field_pointer_map_z, 0);
+  // In addition to using rectangles and domains for launching index spaces
+  // of tasks (see example 02), Legion also uses them for performing 
+  // operations on logical regions.  Here we create a rectangle and a
+  // corresponding domain for describing the space of subregions that we
+  // want to create.  Each subregion is assigned a 'color' which is why
+  // we name the variables 'color_bounds' and 'color_domain'.  We'll use
+  // these below when we partition the region.
+  Rect<1> color_bounds(0,num_subregions-1);
+  IndexSpace color_is = runtime->create_index_space(ctx, color_bounds);
+
+  // Parallelism in Legion is implicit.  This means that rather than
+  // explicitly saying what should run in parallel, Legion applications
+  // partition up data and tasks specify which regions they access.
+  // The Legion runtime computes non-interference as a function of 
+  // regions, fields, and privileges and then determines which tasks 
+  // are safe to run in parallel.
+  //
+  // Data partitioning is performed on index spaces.  The partitioning 
+  // operation is used to break an index space of points into subsets 
+  // of points each of which will become a sub index space.  Partitions 
+  // created on an index space are then transitively applied to all the 
+  // logical regions created using the index space.  We will show how
+  // to get names to the subregions later in this example.
+  //
+  // Here we want to create the IndexPartition 'ip'.  We'll illustrate
+  // two ways of creating an index partition depending on whether the
+  // array being partitioned can be evenly partitioned into subsets
+  // or not.  There are other methods to partitioning index spaces
+  // which are not covered here.  We'll cover the case of coloring
+  // individual points in an index space in our capstone circuit example.
+  IndexPartition ip = runtime->create_equal_partition(ctx, is, color_is);
+  runtime->attach_name(ip, "ip");
+
+  // The index space 'is' was used in creating two logical regions: 'input_lr'
+  // and 'output_lr'.  By creating an IndexPartitiong of 'is' we implicitly
+  // created a LogicalPartition for each of the logical regions created using
+  // 'is'.  The Legion runtime provides several ways of getting the names for
+  // these LogicalPartitions.  We'll look at one of them here.  The
+  // 'get_logical_partition' method takes a LogicalRegion and an IndexPartition
+  // and returns the LogicalPartition of the given LogicalRegion that corresponds
+  // to the given IndexPartition.  
+  LogicalPartition input_lp = runtime->get_logical_partition(ctx, input_lr, ip);
+  runtime->attach_name(input_lp, "input_lp");
+  LogicalPartition output_lp = runtime->get_logical_partition(ctx, output_lr, ip);
+  runtime->attach_name(output_lp, "output_lp");
+
+  // Create our launch domain.  Note that is the same as color domain
+  // as we are going to launch one task for each subregion we created.
+  ArgumentMap arg_map;
+  
+  double start_init = get_cur_time();
+
+  // As in previous examples, we now want to launch tasks for initializing 
+  // both the fields.  However, to increase the amount of parallelism
+  // exposed to the runtime we will launch separate sub-tasks for each of
+  // the logical subregions created by our partitioning.  To express this
+  // we create an IndexLauncher for launching an index space of tasks
+  // the same as example 02.
+  IndexLauncher init_launcher(INIT_FIELD_TASK_ID, color_is, 
+                              TaskArgument(NULL, 0), arg_map);
+  // For index space task launches we don't want to have to explicitly
+  // enumerate separate region requirements for all points in our launch
+  // domain.  Instead Legion allows applications to place an upper bound
+  // on privileges required by subtasks and then specify which privileges
+  // each subtask receives using a projection function.  In the case of
+  // the field initialization task, we say that all the subtasks will be
+  // using some subregion of the LogicalPartition 'input_lp'.  Applications
+  // may also specify upper bounds using logical regions and not partitions.
+  //
+  // The Legion implementation assumes that all all points in an index
+  // space task launch request non-interfering privileges and for performance
+  // reasons this is unchecked.  This means if two tasks in the same index
+  // space are accessing aliased data, then they must either both be
+  // with read-only or reduce privileges.
+  //
+  // When the runtime enumerates the launch_domain, it will invoke the
+  // projection function for each point in the space and use the resulting
+  // LogicalRegion computed for each point in the index space of tasks.
+  // The projection ID '0' is reserved and corresponds to the identity 
+  // function which simply zips the space of tasks with the space of
+  // subregions in the partition.  Applications can register their own
+  // projections functions via the 'register_region_projection' and
+  // 'register_partition_projection' functions before starting 
+  // the runtime similar to how tasks are registered.
+  init_launcher.add_region_requirement(
+      RegionRequirement(input_lp, 0/*projection ID*/, 
+                    WRITE_DISCARD, EXCLUSIVE, input_lr));
+  init_launcher.region_requirements[0].add_field(FID_X);
+  FutureMap fmi0 = runtime->execute_index_space(ctx, init_launcher);
+
+  // Modify our region requirement to initialize the other field
+  // in the same way.  Note that after we do this we have exposed
+  // 2*num_subregions task-level parallelism to the runtime because
+  // we have launched tasks that are both data-parallel on
+  // sub-regions and task-parallel on accessing different fields.
+  // The power of Legion is that it allows programmers to express
+  // these data usage patterns and automatically extracts both
+  // kinds of parallelism in a unified programming framework.
+  init_launcher.region_requirements[0].privilege_fields.clear();
+  init_launcher.region_requirements[0].instance_fields.clear();
+  init_launcher.region_requirements[0].add_field(FID_Y);
+  FutureMap fmi1 = runtime->execute_index_space(ctx, init_launcher);
+  fmi1.wait_all_results();
+  fmi0.wait_all_results();
+  double end_init = get_cur_time();
+  printf("Attach SOA, init done, time %f\n", end_init - start_init);
+
+  const double alpha = drand48();
+  double start_t = get_cur_time();
+  // We launch the subtasks for performing the daxpy computation
+  // in a similar way to the initialize field tasks.  Note we
+  // again make use of two RegionRequirements which use a
+  // partition as the upper bound for the privileges for the task.
+  IndexLauncher daxpy_launcher(DAXPY_TASK_ID, color_is,
+                TaskArgument(&alpha, sizeof(alpha)), arg_map);
+  daxpy_launcher.add_region_requirement(
+      RegionRequirement(input_lp, 0/*projection ID*/,
+                        READ_ONLY, EXCLUSIVE, input_lr));
+  daxpy_launcher.region_requirements[0].add_field(FID_X);
+  daxpy_launcher.region_requirements[0].add_field(FID_Y);
+  daxpy_launcher.add_region_requirement(
+      RegionRequirement(output_lp, 0/*projection ID*/,
+                        WRITE_DISCARD, EXCLUSIVE, output_lr));
+  daxpy_launcher.region_requirements[1].add_field(FID_Z);
+  FutureMap fm = runtime->execute_index_space(ctx, daxpy_launcher);
+  fm.wait_all_results();
+  double end_t = get_cur_time();
+  printf("Attach SOA, daxpy done, time %f\n", end_t - start_t);
+                    
+  // While we could also issue parallel subtasks for the checking
+  // task, we only issue a single task launch to illustrate an
+  // important Legion concept.  Note the checking task operates
+  // on the entire 'input_lr' and 'output_lr' regions and not
+  // on the subregions.  Even though the previous tasks were
+  // all operating on subregions, Legion will correctly compute
+  // data dependences on all the subtasks that generated the
+  // data in these two regions.  
+  TaskLauncher check_launcher(CHECK_TASK_ID, TaskArgument(&alpha, sizeof(alpha)));
+  check_launcher.add_region_requirement(
+      RegionRequirement(input_lr, READ_ONLY, EXCLUSIVE, input_lr));
+  check_launcher.region_requirements[0].add_field(FID_X);
+  check_launcher.region_requirements[0].add_field(FID_Y);
+  check_launcher.add_region_requirement(
+      RegionRequirement(output_lr, READ_ONLY, EXCLUSIVE, output_lr));
+  check_launcher.region_requirements[1].add_field(FID_Z);
+  Future fu = runtime->execute_task(ctx, check_launcher);
+  fu.wait();
+  
+  runtime->detach_array(ctx, xy_pr);
+  runtime->detach_array(ctx, z_pr);
+  runtime->destroy_logical_region(ctx, input_lr);
+  runtime->destroy_logical_region(ctx, output_lr);
+  runtime->destroy_field_space(ctx, input_fs);
+  runtime->destroy_field_space(ctx, output_fs);
+  runtime->destroy_index_space(ctx, is);
+  free(x_ptr);
+  free(y_ptr);
+  free(z_ptr);
+}
+
+void init_field_task(const Task *task,
+                     const std::vector<PhysicalRegion> &regions,
+                     Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 1); 
+  assert(task->regions.size() == 1);
+  assert(task->regions[0].privilege_fields.size() == 1);
+
+  FieldID fid = *(task->regions[0].privilege_fields.begin());
+  const int point = task->index_point.point_data[0];
+  printf("Initializing field %d for block %d...\n", fid, point);
+
+  const AccessorWD<double,1> acc(regions[0], fid);
+                              
+  // Note here that we get the domain for the subregion for
+  //   // this task from the runtime which makes it safe for running
+  //     // both as a single task and as part of an index space of tasks.
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+       task->regions[0].region.get_index_space());
+    for (PointInRectIterator<1> pir(rect); pir(); pir++)
+      acc[*pir] = drand48();
+}
+
+void daxpy_task(const Task *task,
+                const std::vector<PhysicalRegion> &regions,
+                Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 2);
+  assert(task->regions.size() == 2);
+  assert(task->arglen == sizeof(double));
+  const double alpha = *((const double*)task->args);
+  const int point = task->index_point.point_data[0];
+
+  const AccessorRO<double,1> acc_x(regions[0], FID_X);
+  const AccessorRO<double,1> acc_y(regions[0], FID_Y);
+  const AccessorWD<double,1> acc_z(regions[1], FID_Z);
+
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[0].region.get_index_space());
+  printf("Running daxpy computation with alpha %.8g for point %d, x_ptr %p, y_ptr %p, z_ptr %p...\n", 
+          alpha, point, acc_x.ptr(rect.lo), acc_y.ptr(rect.lo), acc_z.ptr(rect.lo));
+  for (PointInRectIterator<1> pir(rect); pir(); pir++)
+    acc_z[*pir] = alpha * acc_x[*pir] + acc_y[*pir];
+}
+
+void check_task(const Task *task,
+                const std::vector<PhysicalRegion> &regions,
+                Context ctx, Runtime *runtime)
+{
+  assert(regions.size() == 2);
+  assert(task->regions.size() == 2);
+  assert(task->arglen == sizeof(double));
+  const double alpha = *((const double*)task->args);
+
+  const AccessorRO<double,1> acc_x(regions[0], FID_X);
+  const AccessorRO<double,1> acc_y(regions[0], FID_Y);
+  const AccessorRO<double,1> acc_z(regions[1], FID_Z);
+
+  Rect<1> rect = runtime->get_index_space_domain(ctx,
+                  task->regions[0].region.get_index_space());
+  printf("Checking results x_ptr %p, y_ptr %p, z_ptr %p...\n", acc_x.ptr(rect.lo), acc_y.ptr(rect.lo), acc_z.ptr(rect.lo));
+  bool all_passed = true;
+  for (PointInRectIterator<1> pir(rect); pir(); pir++)
+  {
+    double expected = alpha * acc_x[*pir] + acc_y[*pir];
+    double received = acc_z[*pir];
+    // Probably shouldn't check for floating point equivalence but
+    // the order of operations are the same should they should
+    // be bitwise equal.
+    if (expected != received)
+      all_passed = false;
+  }
+  if (all_passed)
+    printf("SUCCESS!\n");
+  else
+    printf("FAILURE!\n");
+}
+
+int main(int argc, char **argv)
+{
+  Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID);
+
+  {
+    TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<top_level_task>(registrar, "top_level");
+  }
+
+  {
+    TaskVariantRegistrar registrar(INIT_FIELD_TASK_ID, "init_field");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    registrar.set_leaf();
+    Runtime::preregister_task_variant<init_field_task>(registrar, "init_field");
+  }
+
+  {
+    TaskVariantRegistrar registrar(DAXPY_TASK_ID, "daxpy");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    registrar.set_leaf();
+    Runtime::preregister_task_variant<daxpy_task>(registrar, "daxpy");
+  }
+
+  {
+    TaskVariantRegistrar registrar(CHECK_TASK_ID, "check");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    registrar.set_leaf();
+    Runtime::preregister_task_variant<check_task>(registrar, "check");
+  }
+
+  return Runtime::start(argc, argv);
+}
diff --git a/tutorial/10_attach_2darray_c_fortran_layout/CMakeLists.txt b/tutorial/10_attach_2darray_c_fortran_layout/CMakeLists.txt
new file mode 100644
index 0000000000..a78a962bc9
--- /dev/null
+++ b/tutorial/10_attach_2darray_c_fortran_layout/CMakeLists.txt
@@ -0,0 +1,39 @@
+#------------------------------------------------------------------------------#
+# Copyright 2017 Kitware, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#------------------------------------------------------------------------------#
+
+cmake_minimum_required(VERSION 3.1)
+project(LegionExample_06_privileges)
+
+include(CheckCXXCompilerFlag)
+
+check_cxx_compiler_flag( "-std=c++11" CXX11_COMPILER)
+
+if(CXX11_COMPILER)
+  set(CMAKE_CXX_STANDARD 11)
+else()
+  message(FATAL_ERROR "C++11 compatible compiler not found")
+endif()
+
+# Only search if were building stand-alone and not as part of Legion
+if(NOT Legion_SOURCE_DIR)
+  find_package(Legion REQUIRED)
+endif()
+
+add_executable(privileges privileges.cc)
+target_link_libraries(privileges Legion::Legion)
+if(Legion_ENABLE_TESTING)
+  add_test(NAME privileges COMMAND $<TARGET_FILE:privileges>) 
+endif()
diff --git a/tutorial/10_attach_2darray_c_fortran_layout/Makefile b/tutorial/10_attach_2darray_c_fortran_layout/Makefile
new file mode 100644
index 0000000000..96cbf576fb
--- /dev/null
+++ b/tutorial/10_attach_2darray_c_fortran_layout/Makefile
@@ -0,0 +1,51 @@
+# Copyright 2017 Stanford University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+ifndef LG_RT_DIR
+$(error LG_RT_DIR variable is not defined, aborting build)
+endif
+
+# Flags for directing the runtime makefile what to include
+DEBUG           := 1		# Include debugging symbols
+OUTPUT_LEVEL    ?= LEVEL_DEBUG	# Compile time logging level
+USE_CUDA        ?= 0		# Include CUDA support (requires CUDA)
+USE_GASNET      ?= 0		# Include GASNet support (requires GASNet)
+USE_HDF         ?= 0		# Include HDF5 support (requires HDF5)
+ALT_MAPPERS     ?= 0		# Include alternative mappers (not recommended)
+
+# Put the binary file name here
+OUTFILE		?= attach_2darray 
+# List all the application source files here
+GEN_SRC		?= attach_2darray.cc		# .cc files
+GEN_GPU_SRC	?=				# .cu files
+
+# You can modify these variables, some will be appended to by the runtime makefile
+INC_FLAGS	?=
+CC_FLAGS	?=
+NVCC_FLAGS	?=
+GASNET_FLAGS	?=
+LD_FLAGS	?=
+# For Point and Rect typedefs
+CC_FLAGS	+= -std=c++11
+
+###########################################################################
+#
+#   Don't change anything below here
+#   
+###########################################################################
+
+include $(LG_RT_DIR)/runtime.mk
+
diff --git a/tutorial/10_attach_2darray_c_fortran_layout/attach_2darray.cc b/tutorial/10_attach_2darray_c_fortran_layout/attach_2darray.cc
new file mode 100644
index 0000000000..f6fe484d79
--- /dev/null
+++ b/tutorial/10_attach_2darray_c_fortran_layout/attach_2darray.cc
@@ -0,0 +1,250 @@
+/* Copyright 2017 Stanford University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <cstdio>
+#include <cassert>
+#include <cstdlib>
+#include "legion.h"
+using namespace Legion;
+
+template<typename FT, int N, typename T = coord_t>
+using AccessorRO = FieldAccessor<READ_ONLY,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+template<typename FT, int N, typename T = coord_t>
+using AccessorWD = FieldAccessor<WRITE_DISCARD,FT,N,T,Realm::AffineAccessor<FT,N,T> >;
+
+enum TaskIDs {
+  TOP_LEVEL_TASK_ID,
+  READ_FIELD_TASK_ID,
+};
+
+enum FieldIDs {
+  FID_X,
+  FID_Y,
+  FID_A,
+  FID_B,
+};
+
+typedef struct {
+    double z1;
+    double z2;
+}fidz_t;
+
+typedef struct{
+    double x;
+    double y;
+}xy_t;
+
+void top_level_task(const Task *task,
+                    const std::vector<PhysicalRegion> &regions,
+                    Context ctx, Runtime *runtime)
+{
+  int num_elements = 10; 
+  //double x[10], y[10];
+  // See if we have any command line arguments to parse
+  {
+    const InputArgs &command_args = Runtime::get_input_args();
+    for (int i = 1; i < command_args.argc; i++)
+    {
+      if (!strcmp(command_args.argv[i],"-n"))
+        num_elements = atoi(command_args.argv[++i]);
+    }
+  }
+  printf("Running daxpy for %d elements...\n", num_elements);
+
+  // Create our logical regions using the same schema that
+  // we used in the previous example.
+  Point<2> lo(0, 0);
+  Point<2> hi(num_elements-1, num_elements-1);
+  const Rect<2> elem_rect(lo, hi);
+  //Rect<1> elem_rect(0,num_elements-1);
+  IndexSpace is = runtime->create_index_space(ctx, elem_rect); 
+  FieldSpace input_fs = runtime->create_field_space(ctx);
+  {
+    FieldAllocator allocator = 
+      runtime->create_field_allocator(ctx, input_fs);
+    allocator.allocate_field(sizeof(double),FID_X);
+    allocator.allocate_field(sizeof(double),FID_Y);
+    allocator.allocate_field(sizeof(double),FID_A);
+    allocator.allocate_field(sizeof(double),FID_B);
+  }
+  LogicalRegion input_lr = runtime->create_logical_region(ctx, is, input_fs);
+  
+  int i;
+  double val = 0.0;
+
+  xy_t *xy_ptr = (xy_t*)malloc(sizeof(xy_t)*(num_elements*num_elements));
+  double *a_ptr = (double*)malloc(sizeof(double)*(num_elements*num_elements));
+  double *b_ptr = (double*)malloc(sizeof(double)*(num_elements*num_elements));
+  
+  for (i = 0; i < num_elements*num_elements; i++) {
+      xy_ptr[i].x = val;
+      xy_ptr[i].y = val + 0.1;
+      a_ptr[i] = val + 0.2;
+      b_ptr[i] = val + 0.3;
+      val += 1.0;
+  }
+  
+  std::map<FieldID, size_t> offset_x;
+  offset_x[FID_X] = 0;
+  printf("Attach AOS array in fortran layout, fid %d, ptr %p\n", FID_X, xy_ptr);  
+  PhysicalRegion pr_x = runtime->attach_array_aos(ctx, input_lr, input_lr, xy_ptr, sizeof(xy_t), offset_x, 0);
+  
+  std::map<FieldID, size_t> offset_y;
+  offset_y[FID_Y] = sizeof(double);
+  printf("Attach AOS array in c layout, fid %d, ptr %p\n", FID_Y, ((unsigned char*)(xy_ptr))+sizeof(double));  
+  PhysicalRegion pr_y = runtime->attach_array_aos(ctx, input_lr, input_lr, xy_ptr, sizeof(xy_t), offset_y, 1);
+  
+  std::map<FieldID,void*> field_pointer_map_a;
+  field_pointer_map_a[FID_A] = a_ptr;
+  printf("Attach SOA array in fortran layout, fid %d, ptr %p\n", FID_A, a_ptr);  
+  PhysicalRegion pr_a = runtime->attach_array_soa(ctx, input_lr, input_lr, field_pointer_map_a, 0); 
+  
+  std::map<FieldID,void*> field_pointer_map_b;
+  field_pointer_map_b[FID_B] = b_ptr;
+  printf("Attach SOA array in c layout, fid %d, ptr %p\n", FID_B, b_ptr);  
+  PhysicalRegion pr_b = runtime->attach_array_soa(ctx, input_lr, input_lr, field_pointer_map_b, 1); 
+
+  // Instead of using an inline mapping to initialize the fields for
+  // daxpy, in this case we will launch two separate tasks for initializing
+  // each of the fields in parallel.  To launch the sub-tasks for performing
+  // the initialization we again use the launcher objects that were
+  // introduced earlier.  The only difference now is that instead of passing
+  // arguments by value, we now want to specify the logical regions
+  // that the tasks may access as their arguments.  We again make use of
+  // the RegionRequirement struct to name the logical regions and fields
+  // for which the task should have privileges.  In this case we launch
+  // a task that asks for WRITE_DISCARD privileges on the 'X' field.
+  //
+  // An important property of the Legion programming model is that sub-tasks
+  // are only allowed to request privileges which are a subset of a 
+  // parent task's privileges.  When a task creates a logical region it
+  // is granted full read-write privileges for that logical region.  It
+  // can then pass them down to sub-tasks.  In this example the top-level
+  // task has full privileges on all the fields of input_lr and output_lr.
+  // In this call it passing read-write privileges down to the sub-task
+  // on input_lr on field 'X'.  Legion will enforce the property that the 
+  // sub-task only accesses the 'X' field of input_lr.  This property of
+  // Legion is crucial for the implementation of Legion's hierarchical
+  // scheduling algorithm which is described in detail in our two papers.
+  TaskLauncher read_launcher(READ_FIELD_TASK_ID, TaskArgument(NULL, 0));
+  read_launcher.add_region_requirement(
+      RegionRequirement(input_lr, READ_ONLY, EXCLUSIVE, input_lr));
+  read_launcher.add_field(0/*idx*/, FID_X);
+  // Note that when we launch this task we don't record the future.
+  // This is because we're going to let Legion be responsible for 
+  // computing the data dependences between how different tasks access
+  // logical regions.
+  Future fx = runtime->execute_task(ctx, read_launcher);
+  
+  read_launcher.region_requirements[0].privilege_fields.clear();
+  read_launcher.region_requirements[0].instance_fields.clear();
+  read_launcher.add_field(0/*idx*/, FID_Y);
+  Future fy = runtime->execute_task(ctx, read_launcher);
+  
+  read_launcher.region_requirements[0].privilege_fields.clear();
+  read_launcher.region_requirements[0].instance_fields.clear();
+  read_launcher.add_field(0/*idx*/, FID_A);
+  Future fa = runtime->execute_task(ctx, read_launcher);
+
+  read_launcher.region_requirements[0].privilege_fields.clear();
+  read_launcher.region_requirements[0].instance_fields.clear();
+  read_launcher.add_field(0/*idx*/, FID_B);
+  Future fb = runtime->execute_task(ctx, read_launcher);
+  
+  fx.wait();
+  fy.wait();
+  fa.wait();
+  fb.wait();
+  // Notice that we never once blocked waiting on the result of any sub-task
+  // in the execution of the top-level task.  We don't even block before
+  // destroying any of our resources.  This works because Legion understands
+  // the data being accessed by all of these operations and defers all of
+  // their executions until they are safe to perform.  Legion is still smart
+  // enough to know that the top-level task is not finished until all of
+  // the sub operations that have been performed are completed.  However,
+  // from the programmer's perspective, all of these operations can be
+  // done without ever blocking and thereby exposing as much task-level
+  // parallelism to the Legion runtime as possible.  We'll discuss the
+  // implications of Legion's deferred execution model in a later example.
+  runtime->detach_array(ctx, pr_x);
+  runtime->detach_array(ctx, pr_y);
+  runtime->detach_array(ctx, pr_a);
+  runtime->detach_array(ctx, pr_b);
+  runtime->destroy_logical_region(ctx, input_lr);
+  runtime->destroy_field_space(ctx, input_fs);
+  runtime->destroy_index_space(ctx, is);
+  free(xy_ptr);
+  free(a_ptr);
+  free(b_ptr);
+}
+
+// Note that tasks get a physical region for every region requirement
+// that they requested when they were launched in the vector of 'regions'.
+// In some cases the mapper may have chosen not to map the logical region
+// which means that the task has the necessary privileges to access the
+// region but not a physical instance to access.
+void read_field_task(const Task *task,
+                     const std::vector<PhysicalRegion> &regions,
+                     Context ctx, Runtime *runtime)
+{
+  // Check that the inputs look right since we have no
+  // static checking to help us out.
+  assert(regions.size() == 1); 
+  assert(task->regions.size() == 1);
+  assert(task->regions[0].privilege_fields.size() == 1);
+  // This is a field polymorphic function so figure out
+  // which field we are responsible for initializing.
+  FieldID fid = *(task->regions[0].privilege_fields.begin());
+  
+  // Note that Legion's default mapper always map regions
+  // and the Legion runtime is smart enough not to start
+  // the task until all the regions contain valid data.  
+  // Therefore in this case we don't need to call 'wait_until_valid'
+  // on our physical regions and we know that getting this
+  // accessor will never block the task's execution.  If
+  // however we chose to unmap this physical region and then
+  // remap it then we would need to call 'wait_until_valid'
+  // again to ensure that we were accessing valid data.
+  const AccessorRO<double,2> acc(regions[0], fid);
+
+  Rect<2> rect = runtime->get_index_space_domain(ctx, 
+                  task->regions[0].region.get_index_space());
+  printf("READ field %d, addr %p\n", fid, acc.ptr(rect.lo));
+  for (PointInRectIterator<2> pir(rect); pir(); pir++) {
+    printf("%.1f ", acc[*pir]);
+  }
+  printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+  Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID);
+
+  {
+    TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    Runtime::preregister_task_variant<top_level_task>(registrar, "top_level");
+  }
+
+  {
+    TaskVariantRegistrar registrar(READ_FIELD_TASK_ID, "read_field");
+    registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
+    registrar.set_leaf();
+    Runtime::preregister_task_variant<read_field_task>(registrar, "read_field");
+  }
+
+  return Runtime::start(argc, argv);
+}