From b7f5a8a3509dd01eef7011315c0ac721d5b27d64 Mon Sep 17 00:00:00 2001 From: YLJALDC Date: Tue, 6 Oct 2020 18:35:24 +0000 Subject: [PATCH 1/8] indicator updated --- autodist/graph_item.py | 8 ++++++++ .../kernel/synchronization/all_reduce_synchronizer.py | 3 +++ autodist/kernel/synchronization/ps_synchronizer.py | 3 +++ 3 files changed, 14 insertions(+) diff --git a/autodist/graph_item.py b/autodist/graph_item.py index e581447..179ddfb 100644 --- a/autodist/graph_item.py +++ b/autodist/graph_item.py @@ -241,6 +241,8 @@ def __init__(self, graph: ops.Graph = None, graph_def: GraphDef = None): # Info self.info = Info() self.optimizer, self.optimizer_args, self.optimizer_kwargs = None, None, None + self.updated = True + self.var_op_name_to_grad_dict = None def get_trainable_variables(self): """Get variables that need to be synchronized if doing data parallelism.""" @@ -319,6 +321,9 @@ def all_update_ops(self): @property def var_op_name_to_grad_info(self): """A mapping from VarHandleOp name (e.g. "W" not "W:0") to its (grad, var, update_op) tuple.""" + # if the graph has not been rewritten, return old dict instead of generating a new one + if not self.updated: + return self.var_op_name_to_grad_dict expected_var_ops = {var.op: (grad, var) for grad, var in self.grad_target_pairs.items()} res = {} for op in self.all_update_ops: @@ -336,6 +341,9 @@ def var_op_name_to_grad_info(self): if var_op.name in res: raise ValueError('A variable cannot correspond to more than one update op for now.') res[var_op.name] = expected_var_ops[var_op] + (op,) + # recalculated the dict, set the indicator + self.var_op_name_to_grad_dict = res + self.updated = False return res def _is_auxiliary(self, update_op: ops.Operation): diff --git a/autodist/kernel/synchronization/all_reduce_synchronizer.py b/autodist/kernel/synchronization/all_reduce_synchronizer.py index b186f51..1a37a94 100644 --- a/autodist/kernel/synchronization/all_reduce_synchronizer.py +++ b/autodist/kernel/synchronization/all_reduce_synchronizer.py @@ -88,6 +88,7 @@ def in_graph_apply(self, graph_item, var_name): # Throw an error if the variable is sparse master_op_name = ops.prepend_name_scope(var_op_name, replica_prefix(0)) + graph_item.updated = True grad, _, _ = graph_item.var_op_name_to_grad_info[master_op_name] with item.graph.as_default(): self._share_initializer(item, var_op_name, master_replica=0) @@ -115,6 +116,7 @@ def _collect_dense_gradients(self, graph_item, var_op_name): for i in range(0, self.num_replicas): op_name = ops.prepend_name_scope(var_op_name, replica_prefix(i)) + graph_item.updated = True grad, _, _ = graph_item.var_op_name_to_grad_info[op_name] # TODO (Tairui): (3) Merge of reduction for performance grad_consumers = get_consumers(grad.op) # this line must happen before the reduction @@ -140,6 +142,7 @@ def _collect_sparse_gradients(self, graph_item, var_op_name): raise ValueError('CollectiveOps requires collective group size > 1') for i in range(0, self.num_replicas): op_name = ops.prepend_name_scope(var_op_name, replica_prefix(i)) + graph_item.updated = True grad, _, _ = graph_item.var_op_name_to_grad_info[op_name] # TODO (Tairui): (3) Merge of reduction for performance indices_c_ops = grad.indices.consumers() diff --git a/autodist/kernel/synchronization/ps_synchronizer.py b/autodist/kernel/synchronization/ps_synchronizer.py index 560e45f..6d1c96c 100644 --- a/autodist/kernel/synchronization/ps_synchronizer.py +++ b/autodist/kernel/synchronization/ps_synchronizer.py @@ -84,6 +84,7 @@ def in_graph_apply(self, graph_item, var_name): self._share_variable(item, var_op_name, master_replica=master_replica_index) master_var_name = ops.prepend_name_scope(var_name, replica_prefix(master_replica_index)) master_var_op_name = get_op_name(master_var_name) + item.updated = True # force graph item to recalculate the dict grad, target, update_op = item.var_op_name_to_grad_info[master_var_op_name] agg_grad = self._aggregate_gradients(item, old_update_op=update_op, old_grad=grad, old_target=target) @@ -223,6 +224,7 @@ def _prune_control_dependencies(self, graph_item, var_op_name, master_replica=0) if i == master_replica: continue this_var_op_name = ops.prepend_name_scope(var_op_name, replica_prefix(i)) + graph_item.updated = True _, _, update_op = graph_item.var_op_name_to_grad_info[this_var_op_name] source_op = self._get_optimizer_source_op(update_op) remove_from_control_consumers(get_control_consumers(source_op), source_op) @@ -261,6 +263,7 @@ def between_graph_apply(self, graph_item, var_name): item = graph_item # here the variable on replica:0 has been shared, so the original var_name won't work var_op_name = ops.prepend_name_scope(get_op_name(var_name), replica_prefix(0)) + item.updated = True gradient, target, update_op = item.var_op_name_to_grad_info[var_op_name] with item.graph.as_default(): proxy = self._create_proxy(item, gradient, target) if self._local_replication else None From ac8738d402c48a55fd4825ee83e9db3832cd2f85 Mon Sep 17 00:00:00 2001 From: YLJALDC Date: Tue, 6 Oct 2020 20:31:33 +0000 Subject: [PATCH 2/8] updated --- autodist/graph_item.py | 4 ++-- autodist/kernel/synchronization/ps_synchronizer.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/autodist/graph_item.py b/autodist/graph_item.py index 179ddfb..dbcd9de 100644 --- a/autodist/graph_item.py +++ b/autodist/graph_item.py @@ -241,7 +241,7 @@ def __init__(self, graph: ops.Graph = None, graph_def: GraphDef = None): # Info self.info = Info() self.optimizer, self.optimizer_args, self.optimizer_kwargs = None, None, None - self.updated = True + self.updated = True self.var_op_name_to_grad_dict = None def get_trainable_variables(self): @@ -323,7 +323,7 @@ def var_op_name_to_grad_info(self): """A mapping from VarHandleOp name (e.g. "W" not "W:0") to its (grad, var, update_op) tuple.""" # if the graph has not been rewritten, return old dict instead of generating a new one if not self.updated: - return self.var_op_name_to_grad_dict + return self.var_op_name_to_grad_dict expected_var_ops = {var.op: (grad, var) for grad, var in self.grad_target_pairs.items()} res = {} for op in self.all_update_ops: diff --git a/autodist/kernel/synchronization/ps_synchronizer.py b/autodist/kernel/synchronization/ps_synchronizer.py index 6d1c96c..941aa00 100644 --- a/autodist/kernel/synchronization/ps_synchronizer.py +++ b/autodist/kernel/synchronization/ps_synchronizer.py @@ -84,7 +84,7 @@ def in_graph_apply(self, graph_item, var_name): self._share_variable(item, var_op_name, master_replica=master_replica_index) master_var_name = ops.prepend_name_scope(var_name, replica_prefix(master_replica_index)) master_var_op_name = get_op_name(master_var_name) - item.updated = True # force graph item to recalculate the dict + item.updated = True # force graph item to recalculate the dict grad, target, update_op = item.var_op_name_to_grad_info[master_var_op_name] agg_grad = self._aggregate_gradients(item, old_update_op=update_op, old_grad=grad, old_target=target) From 53bf07e9f0d2dba631d505e26538255bd9490696 Mon Sep 17 00:00:00 2001 From: YLJALDC Date: Wed, 7 Oct 2020 04:10:12 +0000 Subject: [PATCH 3/8] incremental create dict --- autodist/graph_item.py | 24 +++++++++++++++---- autodist/kernel/graph_transformer.py | 3 ++- .../kernel/synchronization/ps_synchronizer.py | 4 +++- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/autodist/graph_item.py b/autodist/graph_item.py index dbcd9de..40108cb 100644 --- a/autodist/graph_item.py +++ b/autodist/graph_item.py @@ -17,6 +17,7 @@ import contextlib import copy import functools +from collections import defaultdict from typing import Union, Callable from google.protobuf.any_pb2 import Any @@ -242,7 +243,9 @@ def __init__(self, graph: ops.Graph = None, graph_def: GraphDef = None): self.info = Info() self.optimizer, self.optimizer_args, self.optimizer_kwargs = None, None, None self.updated = True - self.var_op_name_to_grad_dict = None + self.var_op_name_to_grad_dict = dict() #None + self.update_op_depend_var = defaultdict(list) + self.first_time_loop = True def get_trainable_variables(self): """Get variables that need to be synchronized if doing data parallelism.""" @@ -325,6 +328,7 @@ def var_op_name_to_grad_info(self): if not self.updated: return self.var_op_name_to_grad_dict expected_var_ops = {var.op: (grad, var) for grad, var in self.grad_target_pairs.items()} + #on_trainable_variable = {var.op: True for grad, var in self.grad_target_pairs.items()} res = {} for op in self.all_update_ops: var_op = op.inputs[op_info.UPDATE_OP_VAR_POS].op @@ -335,16 +339,28 @@ def var_op_name_to_grad_info(self): # TODO: we should not hardcode this scope. # It is actually coming from the name given to the saver is_saving = update_op_scope.endswith('save') - # TODO(future): support one variable -> multiple update ops (see AdamWeightDecay optimizer) if on_trainable_variable and not is_initialization and not is_saving and not self._is_auxiliary(op): if var_op.name in res: raise ValueError('A variable cannot correspond to more than one update op for now.') res[var_op.name] = expected_var_ops[var_op] + (op,) + self.var_op_name_to_grad_dict[var_op.name] = expected_var_ops[var_op] + (op,) + # analyze what var_ops the op depends on, if all removed, then can remove this op from the loop + if self.first_time_loop: + self.update_op_depend_var[op].append(var_op.name) + # this var has been done, remove this var from values of the dict + if len(self.update_op_depend_var[op]) != 0: + self.update_op_depend_var[op].remove(var_op.name) + if len(self.update_op_depend_var[op]) == 0: + self.all_update_ops.remove(op) + + #print(len(self.all_update_ops)) # recalculated the dict, set the indicator - self.var_op_name_to_grad_dict = res + #self.var_op_name_to_grad_dict = res self.updated = False - return res + self.first_time_loop = False + return self.var_op_name_to_grad_dict #res + def _is_auxiliary(self, update_op: ops.Operation): """Check whether a specific update_op is an auxiliary op that should not be considered.""" diff --git a/autodist/kernel/graph_transformer.py b/autodist/kernel/graph_transformer.py index 4f6c2ee..67c89f3 100644 --- a/autodist/kernel/graph_transformer.py +++ b/autodist/kernel/graph_transformer.py @@ -64,7 +64,6 @@ def transform(self): graph_item, self._strategy.node_config = VariablePartitioner.apply(self._strategy.node_config, graph_item) visualization_util.log_graph(graph=graph_item.graph, name='1-after-partition') - # Create Synchronizers for each node in the strategy self._initialize_synchronizers() @@ -146,6 +145,7 @@ def _in_graph_apply(self, graph_item: GraphItem): GraphItem """ new_graph_item = graph_item + new_graph_item.first_time_loop = True for var_name, syncer in self._synchronizers.items(): new_graph_item = syncer.in_graph_apply(new_graph_item, var_name) return new_graph_item @@ -161,6 +161,7 @@ def _between_graph_apply(self, multi_gpu_graph_item: GraphItem): GraphItem """ new_graph_item = multi_gpu_graph_item + new_graph_item.first_time_loop = True for var_name, syncer in self._synchronizers.items(): new_graph_item = syncer.between_graph_apply(new_graph_item, var_name) self._prune_colocation_groups(new_graph_item) diff --git a/autodist/kernel/synchronization/ps_synchronizer.py b/autodist/kernel/synchronization/ps_synchronizer.py index 941aa00..8ba5316 100644 --- a/autodist/kernel/synchronization/ps_synchronizer.py +++ b/autodist/kernel/synchronization/ps_synchronizer.py @@ -84,8 +84,10 @@ def in_graph_apply(self, graph_item, var_name): self._share_variable(item, var_op_name, master_replica=master_replica_index) master_var_name = ops.prepend_name_scope(var_name, replica_prefix(master_replica_index)) master_var_op_name = get_op_name(master_var_name) - item.updated = True # force graph item to recalculate the dict + #item.updated = True # force graph item to recalculate the dict grad, target, update_op = item.var_op_name_to_grad_info[master_var_op_name] + #print(grad, target, update_op,master_var_op_name,master_var_name) + #assert False agg_grad = self._aggregate_gradients(item, old_update_op=update_op, old_grad=grad, old_target=target) # update grad_target_pair and variable info From 833d9ba96afc1a22742756815ae4204c684c959e Mon Sep 17 00:00:00 2001 From: YLJALDC Date: Fri, 9 Oct 2020 02:02:07 +0000 Subject: [PATCH 4/8] incremental implemented; change trainable_var_op_to_var to cache --- autodist/graph_item.py | 17 +++++++++-------- autodist/kernel/graph_transformer.py | 1 + .../kernel/synchronization/ps_synchronizer.py | 3 +++ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/autodist/graph_item.py b/autodist/graph_item.py index 40108cb..7b42ba7 100644 --- a/autodist/graph_item.py +++ b/autodist/graph_item.py @@ -246,6 +246,7 @@ def __init__(self, graph: ops.Graph = None, graph_def: GraphDef = None): self.var_op_name_to_grad_dict = dict() #None self.update_op_depend_var = defaultdict(list) self.first_time_loop = True + self.var_quried = [] def get_trainable_variables(self): """Get variables that need to be synchronized if doing data parallelism.""" @@ -328,7 +329,6 @@ def var_op_name_to_grad_info(self): if not self.updated: return self.var_op_name_to_grad_dict expected_var_ops = {var.op: (grad, var) for grad, var in self.grad_target_pairs.items()} - #on_trainable_variable = {var.op: True for grad, var in self.grad_target_pairs.items()} res = {} for op in self.all_update_ops: var_op = op.inputs[op_info.UPDATE_OP_VAR_POS].op @@ -340,6 +340,7 @@ def var_op_name_to_grad_info(self): # It is actually coming from the name given to the saver is_saving = update_op_scope.endswith('save') # TODO(future): support one variable -> multiple update ops (see AdamWeightDecay optimizer) + #print(on_trainable_variable, is_initialization, is_saving, self._is_auxiliary(op)) if on_trainable_variable and not is_initialization and not is_saving and not self._is_auxiliary(op): if var_op.name in res: raise ValueError('A variable cannot correspond to more than one update op for now.') @@ -348,15 +349,15 @@ def var_op_name_to_grad_info(self): # analyze what var_ops the op depends on, if all removed, then can remove this op from the loop if self.first_time_loop: self.update_op_depend_var[op].append(var_op.name) - # this var has been done, remove this var from values of the dict - if len(self.update_op_depend_var[op]) != 0: - self.update_op_depend_var[op].remove(var_op.name) + + assert len(self.var_quried) <= 1 + if len(self.var_quried) > 0: + if var_op.name == self.var_quried[0]: + self.update_op_depend_var[op].remove(var_op.name) + self.var_quried.remove(var_op.name) if len(self.update_op_depend_var[op]) == 0: self.all_update_ops.remove(op) - - #print(len(self.all_update_ops)) # recalculated the dict, set the indicator - #self.var_op_name_to_grad_dict = res self.updated = False self.first_time_loop = False return self.var_op_name_to_grad_dict #res @@ -397,7 +398,7 @@ def grad_target_pairs(self): ) if isinstance(g, tuple) else self.graph.get_tensor_by_name(g): self.graph.get_tensor_by_name(t) for g, t in self._grad_target_pairs.items()} - @property + @cached_property def trainable_var_op_to_var(self): """ Mapping from trainable variable ops (e.g. VarHandleOps) to the Variables. diff --git a/autodist/kernel/graph_transformer.py b/autodist/kernel/graph_transformer.py index 67c89f3..fc5ff63 100644 --- a/autodist/kernel/graph_transformer.py +++ b/autodist/kernel/graph_transformer.py @@ -162,6 +162,7 @@ def _between_graph_apply(self, multi_gpu_graph_item: GraphItem): """ new_graph_item = multi_gpu_graph_item new_graph_item.first_time_loop = True + GraphItem.all_update_ops.fget.cache_clear() for var_name, syncer in self._synchronizers.items(): new_graph_item = syncer.between_graph_apply(new_graph_item, var_name) self._prune_colocation_groups(new_graph_item) diff --git a/autodist/kernel/synchronization/ps_synchronizer.py b/autodist/kernel/synchronization/ps_synchronizer.py index 8ba5316..e44ffdd 100644 --- a/autodist/kernel/synchronization/ps_synchronizer.py +++ b/autodist/kernel/synchronization/ps_synchronizer.py @@ -86,6 +86,7 @@ def in_graph_apply(self, graph_item, var_name): master_var_op_name = get_op_name(master_var_name) #item.updated = True # force graph item to recalculate the dict grad, target, update_op = item.var_op_name_to_grad_info[master_var_op_name] + item.var_quried.append(master_var_op_name) #print(grad, target, update_op,master_var_op_name,master_var_name) #assert False agg_grad = self._aggregate_gradients(item, old_update_op=update_op, old_grad=grad, old_target=target) @@ -267,6 +268,8 @@ def between_graph_apply(self, graph_item, var_name): var_op_name = ops.prepend_name_scope(get_op_name(var_name), replica_prefix(0)) item.updated = True gradient, target, update_op = item.var_op_name_to_grad_info[var_op_name] + item.var_quried.append(var_op_name) + #print(item.var_quried) with item.graph.as_default(): proxy = self._create_proxy(item, gradient, target) if self._local_replication else None if proxy: From 952b9cd87589d36e305d122537cb3d6b378739bf Mon Sep 17 00:00:00 2001 From: YLJALDC Date: Sat, 10 Oct 2020 01:05:03 +0000 Subject: [PATCH 5/8] rewrite to reduce intra-class code dependency --- autodist/graph_item.py | 71 +++++++++++++++---- autodist/kernel/graph_transformer.py | 13 ++-- .../kernel/synchronization/ps_synchronizer.py | 36 ++++++---- 3 files changed, 87 insertions(+), 33 deletions(-) diff --git a/autodist/graph_item.py b/autodist/graph_item.py index 7b42ba7..4e2a037 100644 --- a/autodist/graph_item.py +++ b/autodist/graph_item.py @@ -242,12 +242,26 @@ def __init__(self, graph: ops.Graph = None, graph_def: GraphDef = None): # Info self.info = Info() self.optimizer, self.optimizer_args, self.optimizer_kwargs = None, None, None + + + # Optimizing the var_op_name_to_grad query. + # used to inform the var_op_name_to_grad_dict that the graph has been modified + # only used when the synchronizer is calling the lookup with optimize=True self.updated = True - self.var_op_name_to_grad_dict = dict() #None + # used to cached the result of var_op_name_to_grad function from last time + self.var_op_name_to_grad_dict = dict() + # map the updated op to its inputs variables, used to optimize var_op_name_to_grad self.update_op_depend_var = defaultdict(list) + + # on if this graph is in loop optimize mode for the first time self.first_time_loop = True self.var_quried = [] + + def end_loop_optimize(self): + """end a loop of synchronizer apply, so that first_time_loop is reset""" + self.first_time_loop = True + def get_trainable_variables(self): """Get variables that need to be synchronized if doing data parallelism.""" return [op.outputs[0] for op in self.trainable_var_op_to_var] @@ -325,7 +339,6 @@ def all_update_ops(self): @property def var_op_name_to_grad_info(self): """A mapping from VarHandleOp name (e.g. "W" not "W:0") to its (grad, var, update_op) tuple.""" - # if the graph has not been rewritten, return old dict instead of generating a new one if not self.updated: return self.var_op_name_to_grad_dict expected_var_ops = {var.op: (grad, var) for grad, var in self.grad_target_pairs.items()} @@ -339,28 +352,56 @@ def var_op_name_to_grad_info(self): # TODO: we should not hardcode this scope. # It is actually coming from the name given to the saver is_saving = update_op_scope.endswith('save') + # TODO(future): support one variable -> multiple update ops (see AdamWeightDecay optimizer) - #print(on_trainable_variable, is_initialization, is_saving, self._is_auxiliary(op)) if on_trainable_variable and not is_initialization and not is_saving and not self._is_auxiliary(op): if var_op.name in res: raise ValueError('A variable cannot correspond to more than one update op for now.') res[var_op.name] = expected_var_ops[var_op] + (op,) + self.updated = False + self.var_op_name_to_grad_dict = res + return res + + @property + def var_op_name_to_grad_info_optimize(self): + """A mapping from VarHandleOp name (e.g. "W" not "W:0") to its (grad, var, update_op) tuple. + An optimized version that is aware of this method is iteratively used""" + # if the graph has not been rewritten, return old dict instead of generating a new one + if not self.updated: + return self.var_op_name_to_grad_dict + expected_var_ops = {var.op: (grad, var) for grad, var in self.grad_target_pairs.items()} + res = {} + for op in self.all_update_ops: + var_op = op.inputs[op_info.UPDATE_OP_VAR_POS].op + on_trainable_variable = var_op in expected_var_ops + var_scope = var_op.name + update_op_scope = parse_name_scope(op.name) + is_initialization = update_op_scope == var_scope + # TODO: we should not hardcode this scope. + # It is actually coming from the name given to the saver + is_saving = update_op_scope.endswith('save') + # TODO(future): support one variable -> multiple update ops (see AdamWeightDecay optimizer) + if on_trainable_variable and not is_initialization and not is_saving and not self._is_auxiliary(op): + if var_op.name in res: + raise ValueError('A variable cannot correspond to more than one update op for now.') + #res[var_op.name] = expected_var_ops[var_op] + (op,) self.var_op_name_to_grad_dict[var_op.name] = expected_var_ops[var_op] + (op,) # analyze what var_ops the op depends on, if all removed, then can remove this op from the loop - if self.first_time_loop: - self.update_op_depend_var[op].append(var_op.name) - - assert len(self.var_quried) <= 1 - if len(self.var_quried) > 0: - if var_op.name == self.var_quried[0]: - self.update_op_depend_var[op].remove(var_op.name) - self.var_quried.remove(var_op.name) - if len(self.update_op_depend_var[op]) == 0: - self.all_update_ops.remove(op) + # if self.first_time_loop: + # self.update_op_depend_var[op].append(var_op.name) + # + # assert len(self.var_quried) <= 1 + # if len(self.var_quried) > 0: + # if var_op.name == self.var_quried[0]: + # self.update_op_depend_var[op].remove(var_op.name) + # self.var_quried.remove(var_op.name) + # if len(self.update_op_depend_var[op]) == 0: + # self.all_update_ops.remove(op) + # recalculated the dict, set the indicator self.updated = False self.first_time_loop = False - return self.var_op_name_to_grad_dict #res + return self.var_op_name_to_grad_dict def _is_auxiliary(self, update_op: ops.Operation): @@ -398,7 +439,7 @@ def grad_target_pairs(self): ) if isinstance(g, tuple) else self.graph.get_tensor_by_name(g): self.graph.get_tensor_by_name(t) for g, t in self._grad_target_pairs.items()} - @cached_property + @property def trainable_var_op_to_var(self): """ Mapping from trainable variable ops (e.g. VarHandleOps) to the Variables. diff --git a/autodist/kernel/graph_transformer.py b/autodist/kernel/graph_transformer.py index fc5ff63..0c6526c 100644 --- a/autodist/kernel/graph_transformer.py +++ b/autodist/kernel/graph_transformer.py @@ -145,9 +145,11 @@ def _in_graph_apply(self, graph_item: GraphItem): GraphItem """ new_graph_item = graph_item - new_graph_item.first_time_loop = True for var_name, syncer in self._synchronizers.items(): - new_graph_item = syncer.in_graph_apply(new_graph_item, var_name) + new_graph_item = syncer.in_graph_apply(new_graph_item, var_name, optimize = True) + new_graph_item.end_loop_optimize() + # MUST turn off the optimize after use + new_graph_item.loop_optimize = False return new_graph_item def _between_graph_apply(self, multi_gpu_graph_item: GraphItem): @@ -161,11 +163,12 @@ def _between_graph_apply(self, multi_gpu_graph_item: GraphItem): GraphItem """ new_graph_item = multi_gpu_graph_item - new_graph_item.first_time_loop = True - GraphItem.all_update_ops.fget.cache_clear() + #GraphItem.all_update_ops.fget.cache_clear() for var_name, syncer in self._synchronizers.items(): - new_graph_item = syncer.between_graph_apply(new_graph_item, var_name) + new_graph_item = syncer.between_graph_apply(new_graph_item, var_name, optimize = True) + new_graph_item.end_loop_optimize() self._prune_colocation_groups(new_graph_item) + new_graph_item.loop_optimize = False # TODO: make this work # update_shard_values_for_worker(num_workers, worker_id) return new_graph_item diff --git a/autodist/kernel/synchronization/ps_synchronizer.py b/autodist/kernel/synchronization/ps_synchronizer.py index e44ffdd..f9c256a 100644 --- a/autodist/kernel/synchronization/ps_synchronizer.py +++ b/autodist/kernel/synchronization/ps_synchronizer.py @@ -63,13 +63,14 @@ def __init__(self, config: synchronizers_pb2.PSSynchronizer): self._var_op_to_accum_apply_op = {} super().__init__() - def in_graph_apply(self, graph_item, var_name): + def in_graph_apply(self, graph_item, var_name, optimize = False): """ Apply in-graph ps synchronization. Args: graph_item: the old graph item var_name: the variable name w/o replica prefix + optimize: True if this is iteratively called Returns: graph_item.GraphItem @@ -80,13 +81,15 @@ def in_graph_apply(self, graph_item, var_name): master_replica_index = 0 with item.graph.as_default(): - self._prune_control_dependencies(item, var_op_name, master_replica=master_replica_index) + self._prune_control_dependencies(item, var_op_name, master_replica=master_replica_index, optimize=optimize) self._share_variable(item, var_op_name, master_replica=master_replica_index) master_var_name = ops.prepend_name_scope(var_name, replica_prefix(master_replica_index)) master_var_op_name = get_op_name(master_var_name) - #item.updated = True # force graph item to recalculate the dict - grad, target, update_op = item.var_op_name_to_grad_info[master_var_op_name] - item.var_quried.append(master_var_op_name) + if optimize: + grad, target, update_op = item.var_op_name_to_grad_info_optimize[master_var_op_name] + item.var_quried.append(master_var_op_name) + else: + grad, target, update_op = item.var_op_name_to_grad_info[master_var_op_name] #print(grad, target, update_op,master_var_op_name,master_var_name) #assert False agg_grad = self._aggregate_gradients(item, old_update_op=update_op, old_grad=grad, old_target=target) @@ -212,7 +215,7 @@ def ctrl_consumers(op): raise RuntimeError("Incorrect old_grad.") return agg_grad - def _prune_control_dependencies(self, graph_item, var_op_name, master_replica=0): + def _prune_control_dependencies(self, graph_item, var_op_name, master_replica=0, optimize = False): """ Prune the control dependencies between the train_op on non-master replica and update op. @@ -227,8 +230,11 @@ def _prune_control_dependencies(self, graph_item, var_op_name, master_replica=0) if i == master_replica: continue this_var_op_name = ops.prepend_name_scope(var_op_name, replica_prefix(i)) - graph_item.updated = True - _, _, update_op = graph_item.var_op_name_to_grad_info[this_var_op_name] + if optimize: + graph_item.updated = True + _, _, update_op = graph_item.var_op_name_to_grad_info_optimize[this_var_op_name] + else: + _, _, update_op = graph_item.var_op_name_to_grad_info[this_var_op_name] source_op = self._get_optimizer_source_op(update_op) remove_from_control_consumers(get_control_consumers(source_op), source_op) @@ -250,13 +256,14 @@ def _get_optimizer_source_op(update_op): _BETWEEN_GRAPH_APPLY_SCOPE = 'autodist-between'.lower() - def between_graph_apply(self, graph_item, var_name): + def between_graph_apply(self, graph_item, var_name, optimize = False): """ Apply between-graph synchronization to the target ops in the graph. Args: graph_item: The current graph. var_name: the variable to be synchronized. + optimize: True if iteratively called Returns: graph_item.GraphItem: updated graph item. @@ -266,10 +273,12 @@ def between_graph_apply(self, graph_item, var_name): item = graph_item # here the variable on replica:0 has been shared, so the original var_name won't work var_op_name = ops.prepend_name_scope(get_op_name(var_name), replica_prefix(0)) - item.updated = True - gradient, target, update_op = item.var_op_name_to_grad_info[var_op_name] - item.var_quried.append(var_op_name) - #print(item.var_quried) + if optimize: + item.updated = True + gradient, target, update_op = item.var_op_name_to_grad_info_optimize[var_op_name] + item.var_quried.append(var_op_name) + else: + gradient, target, update_op = item.var_op_name_to_grad_info[var_op_name] with item.graph.as_default(): proxy = self._create_proxy(item, gradient, target) if self._local_replication else None if proxy: @@ -304,6 +313,7 @@ def add_sync_op(self, graph_item, var_update_op, variable_replicator=None): this_worker_cpu = this_worker_cpu.replace(device_type='CPU', device_index=0) var_op = var_update_op.inputs[UPDATE_OP_VAR_POS].op + #print(graph_item.trainable_var_op_to_var) is_trainable = var_op in graph_item.trainable_var_op_to_var source_op = self._get_optimizer_source_op(var_update_op) cc = get_control_consumers(source_op) From c015cf1d864eb526c3ef6719eadaa441c56f0547 Mon Sep 17 00:00:00 2001 From: YLJALDC Date: Sat, 10 Oct 2020 02:03:13 +0000 Subject: [PATCH 6/8] add all_reduce rewrite --- .../all_reduce_synchronizer.py | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/autodist/kernel/synchronization/all_reduce_synchronizer.py b/autodist/kernel/synchronization/all_reduce_synchronizer.py index 1a37a94..20c597f 100644 --- a/autodist/kernel/synchronization/all_reduce_synchronizer.py +++ b/autodist/kernel/synchronization/all_reduce_synchronizer.py @@ -66,7 +66,7 @@ def __init__(self, config: synchronizers_pb2.AllReduceSynchronizer): self._group = config.group super().__init__() - def in_graph_apply(self, graph_item, var_name): + def in_graph_apply(self, graph_item, var_name, optimize = False): """ Perform in-graph synchronization based on AllReduce and TensorFlow Collective Ops. @@ -75,6 +75,7 @@ def in_graph_apply(self, graph_item, var_name): Args: graph_item (graph_item.GraphItem): the graph_item to be distributed var_name (str): the corresponded variable name + optimize: True if this is iteratively called Returns: graph_item.GraphItem: The new graph @@ -88,8 +89,11 @@ def in_graph_apply(self, graph_item, var_name): # Throw an error if the variable is sparse master_op_name = ops.prepend_name_scope(var_op_name, replica_prefix(0)) - graph_item.updated = True - grad, _, _ = graph_item.var_op_name_to_grad_info[master_op_name] + if optimize: + graph_item.updated = True + grad, _, _ = graph_item.var_op_name_to_grad_info_optimize[master_op_name] + else: + grad, _, _ = graph_item.var_op_name_to_grad_info[master_op_name] with item.graph.as_default(): self._share_initializer(item, var_op_name, master_replica=0) if isinstance(grad, ops.IndexedSlices): @@ -98,7 +102,7 @@ def in_graph_apply(self, graph_item, var_name): self._collect_dense_gradients(item, var_op_name) return item - def _collect_dense_gradients(self, graph_item, var_op_name): + def _collect_dense_gradients(self, graph_item, var_op_name, optimize = False): """Append collective ops after the gradient is calculated.""" if self.num_replicas * self.num_workers <= 1: raise ValueError('CollectiveOps requires collective group size > 1') @@ -116,8 +120,11 @@ def _collect_dense_gradients(self, graph_item, var_op_name): for i in range(0, self.num_replicas): op_name = ops.prepend_name_scope(var_op_name, replica_prefix(i)) - graph_item.updated = True - grad, _, _ = graph_item.var_op_name_to_grad_info[op_name] + if optimize: + graph_item.updated = True + grad, _, _ = graph_item.var_op_name_to_grad_info_optimize[op_name] + else: + grad, _, _ = graph_item.var_op_name_to_grad_info[op_name] # TODO (Tairui): (3) Merge of reduction for performance grad_consumers = get_consumers(grad.op) # this line must happen before the reduction @@ -128,7 +135,7 @@ def _collect_dense_gradients(self, graph_item, var_op_name): update_consumers(grad_consumers, grad, reduced_grad) # TODO(Hao): update grad, target pair here or not? - def _collect_sparse_gradients(self, graph_item, var_op_name): + def _collect_sparse_gradients(self, graph_item, var_op_name, optimize = False): """Append collective ops after the gradient is calculated.""" if self.num_workers > 1 and not ENV.AUTODIST_INTERNAL_TF.value: raise NotImplementedError('Currently the collective NCCL AllGather is not supported in TensorFlow release.' @@ -142,8 +149,11 @@ def _collect_sparse_gradients(self, graph_item, var_op_name): raise ValueError('CollectiveOps requires collective group size > 1') for i in range(0, self.num_replicas): op_name = ops.prepend_name_scope(var_op_name, replica_prefix(i)) - graph_item.updated = True - grad, _, _ = graph_item.var_op_name_to_grad_info[op_name] + if optimize: + graph_item.updated = True + grad, _, _ = graph_item.var_op_name_to_grad_info_optimize[op_name] + else: + grad, _, _ = graph_item.var_op_name_to_grad_info[op_name] # TODO (Tairui): (3) Merge of reduction for performance indices_c_ops = grad.indices.consumers() indices_cc_ops = get_control_consumers(grad.indices.op) @@ -195,6 +205,6 @@ def _share_initializer(self, graph_item, var_op_name, master_replica=0): init_assign_op._update_input(1, master_init_tensor) # pylint: disable=no-self-use - def between_graph_apply(self, graph_item, var_name): + def between_graph_apply(self, graph_item, var_name, optimize=False): """Allreduce synchronizer will do nothing in between-graph synchronization.""" return graph_item From 9189dee9f0f2fb64856c19d55ebe61aaad40f7d6 Mon Sep 17 00:00:00 2001 From: YLJALDC Date: Sat, 10 Oct 2020 06:52:07 +0000 Subject: [PATCH 7/8] clean version of single machine optim; start multi-machine next --- autodist/graph_item.py | 51 ++++++++++++++++++---------- autodist/kernel/graph_transformer.py | 10 +++--- 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/autodist/graph_item.py b/autodist/graph_item.py index 4e2a037..90e406c 100644 --- a/autodist/graph_item.py +++ b/autodist/graph_item.py @@ -255,12 +255,24 @@ def __init__(self, graph: ops.Graph = None, graph_def: GraphDef = None): # on if this graph is in loop optimize mode for the first time self.first_time_loop = True + self.loop_phase = False self.var_quried = [] + self.useful_update_op = [] + # how many local replica is this graph comprised of + self.num_replica = 0 + self.var_op_appear_time = defaultdict(int) + + + def start_loop_optimize(self): + """start a loop of synchronizer apply""" + self.first_time_loop = True + self.loop_phase = True def end_loop_optimize(self): - """end a loop of synchronizer apply, so that first_time_loop is reset""" + """end a loop of synchronizer apply""" self.first_time_loop = True + self.loop_phase = False def get_trainable_variables(self): """Get variables that need to be synchronized if doing data parallelism.""" @@ -339,8 +351,10 @@ def all_update_ops(self): @property def var_op_name_to_grad_info(self): """A mapping from VarHandleOp name (e.g. "W" not "W:0") to its (grad, var, update_op) tuple.""" - if not self.updated: - return self.var_op_name_to_grad_dict + # this method only called when the caller does not know there is an optimization for this. + # so if it is in loop phase, we compute the dict again. + if (not self.updated and not self.loop_phase): + return self.var_op_name_to_grad_dict expected_var_ops = {var.op: (grad, var) for grad, var in self.grad_target_pairs.items()} res = {} for op in self.all_update_ops: @@ -370,8 +384,11 @@ def var_op_name_to_grad_info_optimize(self): if not self.updated: return self.var_op_name_to_grad_dict expected_var_ops = {var.op: (grad, var) for grad, var in self.grad_target_pairs.items()} - res = {} - for op in self.all_update_ops: + res = [] + # keep a list of useful update_op + if self.first_time_loop: + self.useful_update_op = self.all_update_ops.copy() + for op in self.useful_update_op: var_op = op.inputs[op_info.UPDATE_OP_VAR_POS].op on_trainable_variable = var_op in expected_var_ops var_scope = var_op.name @@ -384,23 +401,23 @@ def var_op_name_to_grad_info_optimize(self): if on_trainable_variable and not is_initialization and not is_saving and not self._is_auxiliary(op): if var_op.name in res: raise ValueError('A variable cannot correspond to more than one update op for now.') - #res[var_op.name] = expected_var_ops[var_op] + (op,) + res.append(var_op.name) self.var_op_name_to_grad_dict[var_op.name] = expected_var_ops[var_op] + (op,) - # analyze what var_ops the op depends on, if all removed, then can remove this op from the loop - # if self.first_time_loop: - # self.update_op_depend_var[op].append(var_op.name) - # - # assert len(self.var_quried) <= 1 - # if len(self.var_quried) > 0: - # if var_op.name == self.var_quried[0]: - # self.update_op_depend_var[op].remove(var_op.name) - # self.var_quried.remove(var_op.name) - # if len(self.update_op_depend_var[op]) == 0: - # self.all_update_ops.remove(op) + #analyze what var_ops the op depends on, if all removed, then can remove this op from the loop + if self.first_time_loop: + self.update_op_depend_var[op].append(var_op.name) + + assert len(self.var_quried) <= 1 + if len(self.var_quried) > 0: + if var_op.name == self.var_quried[0]: + self.var_op_appear_time[var_op] += 1 + self.var_quried.remove(var_op.name) + self.useful_update_op.remove(op) # recalculated the dict, set the indicator self.updated = False self.first_time_loop = False + #print(self.var_op_name_to_grad_dict["AutoDist-Replica-0/word_embeddings/embeddings"]) return self.var_op_name_to_grad_dict diff --git a/autodist/kernel/graph_transformer.py b/autodist/kernel/graph_transformer.py index 0c6526c..291bd8c 100644 --- a/autodist/kernel/graph_transformer.py +++ b/autodist/kernel/graph_transformer.py @@ -145,11 +145,10 @@ def _in_graph_apply(self, graph_item: GraphItem): GraphItem """ new_graph_item = graph_item + new_graph_item.start_loop_optimize() for var_name, syncer in self._synchronizers.items(): new_graph_item = syncer.in_graph_apply(new_graph_item, var_name, optimize = True) - new_graph_item.end_loop_optimize() - # MUST turn off the optimize after use - new_graph_item.loop_optimize = False + new_graph_item.end_loop_optimize() return new_graph_item def _between_graph_apply(self, multi_gpu_graph_item: GraphItem): @@ -163,12 +162,11 @@ def _between_graph_apply(self, multi_gpu_graph_item: GraphItem): GraphItem """ new_graph_item = multi_gpu_graph_item - #GraphItem.all_update_ops.fget.cache_clear() + new_graph_item.start_loop_optimize() for var_name, syncer in self._synchronizers.items(): new_graph_item = syncer.between_graph_apply(new_graph_item, var_name, optimize = True) - new_graph_item.end_loop_optimize() + new_graph_item.end_loop_optimize() self._prune_colocation_groups(new_graph_item) - new_graph_item.loop_optimize = False # TODO: make this work # update_shard_values_for_worker(num_workers, worker_id) return new_graph_item From 98fcbc1381609e23ea75a0fb7412d71df0b18c21 Mon Sep 17 00:00:00 2001 From: YLJALDC Date: Wed, 14 Oct 2020 01:51:38 +0000 Subject: [PATCH 8/8] bert files --- examples/benchmark/bert_config.json | 13 +++++++++++++ examples/benchmark/tf_examples.tfrecord | Bin 0 -> 49090 bytes 2 files changed, 13 insertions(+) create mode 100644 examples/benchmark/bert_config.json create mode 100644 examples/benchmark/tf_examples.tfrecord diff --git a/examples/benchmark/bert_config.json b/examples/benchmark/bert_config.json new file mode 100644 index 0000000..a7efa97 --- /dev/null +++ b/examples/benchmark/bert_config.json @@ -0,0 +1,13 @@ +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "max_position_embeddings": 512, + "num_attention_heads": 16, + "num_hidden_layers": 24, + "type_vocab_size": 2, + "vocab_size": 30522 +} diff --git a/examples/benchmark/tf_examples.tfrecord b/examples/benchmark/tf_examples.tfrecord new file mode 100644 index 0000000000000000000000000000000000000000..49bad5ad29814410b34d075ea394f9f714d16691 GIT binary patch literal 49090 zcmeHw31HN8_J6-snlT*T%yfDMN?S@PwA|%d3d$jZAeRD)3V5|&5n(}ab=5@*SfE%r zE3(3Zt%yPw1qB5Q2nZ`|TUw45N(+_4a#dhDh;shkH%BMQWYU>l3;zG?(McwgWHRr4 z-}QN)*Wo@d;onQ)pI>Fy`>@^F`s1IUJTC5~F|UpvGxnv)mX>@J+dQvfCSK5VKhDI* za276(%EZf}v+xm|Y|5R6z8{*6ui_lMf%9U=vS!D}--sJaUY;`9GR*QgJH*Rt+C$a% zKQAx#L&Vmd97o2-O_(CwXd>cgAa)uOzfX1kKl11wJHwlOVC;m6uZy>J74bI2UP5t) z@dw-pev%mTtips*rqHIhiP7fw!%%SisOh|(1E3OdU z{@6?p%8J{Mk66fJZuV}JiT7CNceOETa5uUB@%zF`@E6=Ad>sy36PRX6H!ly}=)WJ6 z#343j8C>S-yI0`$=7hoBX5l+<7uop4+owaxC5~j`w3&$fiuYN|n7k)e_c`;{b?aiz z&cw4Rh-Jm6Kn21g|2iH$${IMB4Q8zq;@+GxW-`c4+=Lh7#=QFc3vsVbw$$P8V-aH8 zu)Yofp7__vW2cP$>x9XcK;ECV)?w<^Zx9&TIksEh(d6Z^<0roMx)*vNZTWd`QQE$G z)aMpkQm1l~>HJ-*Q-pb*F9NKpd#pebuviyQC;xYzgFVRYV z013&*#UV*Sq$~^DAaH|-5a^DWrOo6i)6{_M9r z%*M>)a=>P+^vephF|#@q4@(VLVVZAF@ZHerEc53u9?oj)(H?e|%6R;N_}>s)hT=#O zF2Ferzw1qMFqvyjHeCX3Jswto&-sDc(G~dsK8bIfK>z+4!7}{;mk^lFuNqm(tT5%^ zy-hOlPm7S90Y%590gJ=!46!qXQ^|s+nfO8w{d8WyKHkpkG_T7?8#rp`S9Q5!NsU;} z#S^$<#zY%)4xhq{!6@0nJ_|~HI}0y*rFfW)IU7-gcLmYQTzKX3rwGgx{Jvl%7L41= zm4bOowa(5*n}jpMUU+M}-@h8!nLECP_$b)JLOdGOUK@H|I(0=dAZdfSE9|dz*ch!EHQ__q#s~3^ z6!g!)6{eGxLU>SRjEzw!YL#6t#kneY>5+kRPy8#mqML8C`&0%l1M!;?y9veJ!X(+0 z-tP2=DDi#t@8;mzT*M%qyXT-B5cTQ9KpW!k_!3=-1Q&-2phY;IOKAP~LSOsAu*IZQEXP4gb6| zADodCz#h2c279uOp{4XO)o<23x|QM#5De&B%I`h}U-y{hW~*FWgtzxsf28&-rQ7;a zp{}UDLDe^^O~k5xE2=ZFKdAJ01LD6#Y&wc7#3|+r_#0Y;(+#U{ZLY z)GWtYqDq>QmbCuTg=P9Q*y5j}J>i z|5N1IaP~kQj~DRrYkB;_ZGnU{Yh}{oi+S<^_G{G-zY6iGh+To=a)U`p2BIN`{NsYp zLt_jg{m1WN%{wA9^WAk^&cgi^%FeC+jA>^~Y1$_~-Vy)6oeG z7qQ0>5>HOz`Ih;Oui-p6*M%U6nIcy%gG+DsyO0LyAbMAy|Lvbo6yXGJZ`ib7(8t{8 zt>}eU_qmM=+GgX6|3Z7)+8JePhZtmbq&yyxlRJM(@7t%IuU|mZh}P|5I&|&+V2`0M zPk77AMZPm17vc*kDA`mTxD1kDnGjpeF=yiC-2N~-^EG!8@?=LK5n7ue>j$=m)BhqV z(}E{wKf-Qum8?xR754aL8oFsdf>V4?;B0(;q>VXmF2d)row+Wg_^xm#-mcA?uQ9cE zUKBi2e0H`A5FsaoTnGQ=R6rTCxXJaSXv-8kQ;drn=fInpc=6tRGflVWtwP@cpw*QEyM!qz` zYe>X`PIV+EMM0^fnvy@gTAAZqtvs8!`c8S0OMzd{# zWgR~o{TF6CQCq2lcsIDbnDfBkbGvglh%8R;KSLl&5dt`6bJ4Jhf>Rsq(E>2 z>@m5)MX*wrf3C8Ds$RWKKB@lSc9xPi2S}>%kC9P3*q|{dh@W9 zzvrzeW{@2VkyOke(@e#mqq6$;`p(4+NTsXF8S6%6<22LeQH7A>q1gkvq;Fb;XpL=0 z%iV6-l zZoT@xu{|H|+yAir!`Gzk;&WMxkP9V=iNRNH%vH ze$=v5bOxS&ZF)ct-?xy{#zNJp*m}w?@2hOFymE@^(WhL6UD@~+o*9#e)7#|*CG~(@ z?cw$45|pb~#mo%7)N#%l2!-}oKY|GCUyU|!`S`+mMCO906i*VDBI*;B=70^O5fmY= z;-`~X6F-4o5;1xrV(dhmtKvj-wBV$2QC#6@!3k(xT;*sHb!D8Kg}TdGX3gM}c-Ode z@X#TaJcG~Se9-GZ0{@hX`#=yM92U{6OV@5a9v<}gDDu}yP%G4jnXE8n<9)Hormr~~ z_#co}T`3RbajD(J19oj$dpWT!&Aw|bEM$j2~cZ1r!{E~aCbhaVd> zc*IkqMcc=A(XJhld;}Z*kyS_sg-LHM0$;j(DWXDbhmvMqz;4;ur;Ts{*gHH7troc<0ivfOH%hrwV~TuiFKOX+uUDUEx1b-**gjXrz&m*=(S)PD3gogb(QbhLe0iO`+r#oPUeECt*=u!-FJYWeH#M;GO`Erj>E1)W zVQ|az3;m_cOu)zNi%l>sh|I=?Jr-FmD^ly#qJfTkFnU!v4RlY(jILYZdo?J76<*uu z0QR72W!R1Qorv9z;?y>TD72p_yjh~KAXNJ`76jbO0-S0lq5&b)e%y$m%gjj&SRdJf zltNUv(;sZe=6pnr$kx&-S2xE+#%}2(*^uE(Y=8D^ABeP-OW-xc+Yx&Oa7qqjgpldf z7+KedR5HIS5%H*5c;BSmZ=RsO9xPu%wO6&A(wLkYQ!+D#$l-zFfvVzzgdRi+4;(g` zT5w4GN-rYNR_h6~DXtYnJBG)zqf^ll=E!+c7bc7Wses~z(# zV@LcI#Qq8hX22a%M5!98NJOL}0p(WW6k$_^Qlt=61x_)+mjV?xv;u6G3{-@CoC2jt zWRPM!$-|I(qoDr0hSy*rrm%iEyVxlq1N*_p7jgZ^03)pg_Ge)St{*-n!}^H>)t}Hv zgX*V!7x7imZKfV)4hn7^)1hN*&)$y>e|f?bm%%uNdQy-lz&cX{{?$kXYaYdG%!h@f z=%-MnqOBmcquXMHkeGNh_901y23-B|H#Wl+Foe?DxoI-8AxH1 zD@+1zl1v|V(qd=6<$uQOxzhGmpfhMuL>9kg7$NVOAYliA^sn6U=xgxUPg@G`mSJKt zZ)YN+r#J-ZoNRpVpJ-bq%HjZ>xpKfUI9q%HzCIz)xQJRjUr+~ z9!~0%kB5EAbUewNNF2#{n)(%T@dQ%wHb#|;CztUxnh}O%JQTPTx%hiD3uhdDV7^gs zXp>f*`VJWW?C6)rPkL?2n_k5)6;6wm)ajk*DZj9v`_#Chb_j5dD1_tm%2ySthw@;r zyU6&d@9UN-xVVJ)i-Y@sYF7=P# z(cQ63OGJ5n=!^eCoRP?t;QXfK7f_W_$XaFMr2%A1|Gj*EXcpe;6!W#|8}_@$|FL!INdX zYV%+%_((S5XIQJCAp);3rT8x4wnZ%kC`PLJP6$OGQp~;3anrZ3Q3tIhsXfNAr#=GgFb`a4GDY(=rp-@LbS*; zp>`54R!7M0H;$UWA1Z@TjEW=BnMk4aVsV6AdPb~`P)|)RjsWn+&Lo?zC8GT_k($xO zM)Q))+8~gq0=#V#tnb^CP||Z-ny2$=e-+<6iGM$g)^V2s&UGC=OO~4cl#4s`B?dKa z+NwjxP7n7V`uM09$=H8*u|2fX-G*<)MpJ+Y)W@;_KnSowD6U9Qp7? zn0Y-RYO8TuAlE~@ytd}{EAXTTfVBoR42_IZFOTiDUpR~pVS^#hN}u6PcC9vN<&?56ufZy_R{LT}yjW@>v^hpw zX`x9i&u-4GqpQVWyLBbc4ywRl^&31g`l{RmU@o}G+Ul?!v@#sdhp{2FyY@ocMC%&x zns)_dVIf*|+)K46s^fGp1KQ$PMM19?nbv*c6+cU(BPx(- z!z#;t_xqS|qh@V7c5+-H|LeMzvyu{^xKD<#XR4`x-f@X)d&NRBUs3n_2J!0=yH4z% zCz|$oK%CJ6Dy`EIp?RK&t@Gt&Izn6L=~_v|MnY+=BsI`UsCod^B3P<3R?kM21goyu z%7-*l-3Y7i4jjany8l)mm<)itAPf3A)t{+7cfgtyeWt4vdz*HT9LRp_3eHa>xorF^ zM zqYA-pApUp6UPW=r+*t|4oiuWXWOF8DEz=%#PM!%!I7k&cGG{byzp@I^+*x`%lGFp< zykjBSpgqM`jM`~9OXK!3fN_@tR?Bl_0$x=J?g0R+1gwflGM9)|0)SNkQ%PjVDlBQU zt!J%(k3>#DA|LITY5a4dJU`H9V=9T}iNg&s3vPxp_ApRy||3GoH zDTWH@4Q}s0XiE&C9Wk`yh(27Q>j>InjsP2lx-SMk0cokNqo5z={7C4FS+tTOupqlD zcgBE!cHT_XjWM)iN!c0`U)q8;-KCh`hwRkBBdCNaW%l_VfP@q|ILXGGm{Nd`Jp?Bz zgZ;@SdTBj@&BBI!)aRN zfB+MQH>fx=e&_ zo?W0%&?X2PCD69~G!s#27nKK2`M#AD`O9iX6k0~tOSPK8RtqAkSr+7pb@U9XEV7#2 ztMo3$z=?X6;3mylLVIcKBZG%N`BdE4H>Y}S+8iAlE9p~cOv5tvyPB`DIPz+DucPK7 zCvAt880XH*(!WWamxVoGd3RWug?o|O%!uumgxVR}_6mju93rqi`#Qi1FScKvhmxQx zGZQbG79vZ$u#l-t3uFR7QQ!bU{L2lCDDl)Tf?a`2N@hhCc(~~@MrBusXjal8p>y+I z?bk7O)#`DUz;N)>9v<@e@DVS(82?vL1{eHb2U;eZ$^b7-9k4>YKdi`~E-%!6Q(HaP zO3m_3?Db>!;kUJ>sj>l^tRn9Ig=$fy$~!L1L3QrFYdw?sDw8+eo45znAyBll@6HEWrc8VXY8kL%bfhYF7#FuF#Tx)F zn~igWmRT=j3L$`$%5rm&B?o`$cV)~fC^Hq{1(9pagc5QRC3J^k3)I*h2Px;i?}2^< zLNk$*Q-tmiB_k)12|0{(&K%Mywp*`0Pd__qbR2ng60jHO^X--3 zGO^DQ2-UvyTLhVdPq||f^ymyYy$d@+xW;(xUG~ur?IqLH_|Y?c*e`1kkmf4#cT*GY zS|N79ye~RtMcW`tkplkSS4)9NE}M(5024>K z1f4-~<%vC-$EB$~CrA!~+jotLyKC3JDx@BK80;4k7Tr3g+e1Bi4trt3M6hO4c0OI8 zO^M%i{+*t{*6hB!_UCs&os)_8_Rqq3O_B!Mn8l{GU!em-r*li8dp2GQ$tc)TO#Km} zjLC?^hyz@=>b##wi355qG2##9dqZ?%Ac3)^tIlzQ?*zBF1({OW?JDX9VXd z1%sQ7XNA?SQLP5MR5J>B*D9imRJ|ISEm|P~n+?5Fy4Y-5UecN&2QU=;MH!k6vjs$C zHle4|Axa_6JShOorZg$L<^$^;(4=UqPUNVcTy-K@-9%_MuratOI>B0y5g{>oXqi%~ zq>Xhk7uBXir_Nn^_Uk`r@W?SQO?usHb)(Y-O{F-gOT+yq*-wnw2E2jemT%f3y7$9< zhCcq}^MYsOf^8r#ARAO~kD2yK9fT1Y5|KWTl^!=sbOY4_?$JwgXg#*drZ^nusfyY; z&Z6@Qh!~Z?i}Q#tLF{Q1cK|r4lpXdEM2YQujt+adL(-t14=fX%fUvslTwkIXfFR~1 z+~N!ZdQyrplDSEh#Ixkfwld0jV{6Jgt3^2zW1!IgaM98EAsoAmV?OlgtQlzqa`SZO zXCl~qO6sf`Ez_89!~*L<;namdwM20*j?>dum6Ck*Dcs%i*ydA5cu777vWVh8|7ELR59&Wp;LVlMA;jNnF^21)RpebuD+KRqdY7uMt)Ul_I zHTCL&B~anj6M^WGP?}4nR}X1d(W#H8zBv(#_==$Zj>);esHo$+C@|`K=x6!~C4RY* zFGlud;NGVEIhz4sESAN;NqA(7Htl13Kla3k7bd>u#Xh86dt37@*{CuoW)=200PLj# zRp~w;F5Nx|SV^xyo8PgA(gD8>SrZ$8%qq7Q>vITj6_E zU9}BvvBGN`9pE0ZhN4M8%Rv5g-qJgRY9D(+<$e_* zz5ucLa_bzSopS=?ZiT&b3g&i2^PEK74pFqs+b(v`DQVlqiF{-u4CHfzsP~p{2A)J z_#ge6I(MSp#V%Ii0usG>7??TDJ5Jf@30;U3XG0r3?J;tvCz;o?ywj5y_IcWwr6xN= z+x~!6a|qeG8=ZyHOF^@>uLIlq&1Z@*p`+B6I#+BP$)*CBkU%*%=u?Loa+Hr{@#xQJt6D%QGvBWQ9pN^G|Gv6NdgN@?3Y> z%H;TM3{}uLHBcA(H;rr+)3tBEAx{j425cA&>ow79TC^6h<~EAVQwgi$@psv8QvPjR zkr%A|oPze{);XGcP|&%OcnYTwIV@Z@!w|U_MonTt4q>^_0c5RWp`)mgk`x|TVQKQi zkQ1ILDdFMnBtm$2MLB5XN(bd3qNNUS03D?cA`rp@8J>a2 z$F4meA36FJV1R)_V^3{fpe^5;ZAse_f!NK?K%dzX4ro0J@STuJ+pj*(soP)waU(yQW~=Sbh1if`=n-EXRA^$Td; zw*6mv4jKZiQlrQJ&FhVQy|*2dDxa13C1l zwb~DRwq4NI{%`k25nhHF1__XgIc6#IyNKtF{)3||r57#zt(m5t^^nZ4Kn= zgYY)+e^Y$Fx~+kbg&k?D4Q;j)J8j@dm|YQE4CPF*(*`O!&P3x743|7as;?|sAvNn|@oJKqh z>O4*-z>viI>3sU~i3P&qCJd;DWlAI{C*%|)$0@z`XF>2bO{x+HKl!~>eIHy5EW12}$hFu84XQXqx) zAI0f@ln3-{u%xs$y)FE%^}*Z|km@;#U7(8CXL$3|yxCj&T1 zah}Nw{>rV8@|nCMm&g9~XjUtDL#Fbe)8Z+Yhg)NLVOA`mb9qH759mgLMrR3;0As`i zGGBltJE(j~`5bwmV-}~#J7W>(wPi4ZFhu?yWxg?1dp4*^%T}$s_Z;x}6Hh@m^-E)4 znc(%YE&PPm{`^I2$(r-c*rurw857UpQ=mp?BNBZ7)##4zQTzq81kn-%tfaNQ6jYNR z=e)+~?$Iim?m^S}E2lbcbUk2&cEm%RD#nhE2`e-a0cR1T zN5NNH7OfXO0yTxEHy-A``e)%y(x|HV_ZA`OUF%Z=`~ zxQrgYzNzDRDFN`+qdvFTk~(ZVt<4GL8?%jO#b#rQNX>zVdxSnd#lI@Kor?3g@v^E< z(gW+^#@#3mVRx(6^luUW4TAJGw1|~YVI}I3Z;l=@no2rpp&eobV7_$V8xbbxDPX=t znGn~ua2c<>iko!fWi{JOH?HX3A;$K@caVPlRI@_p{SZb^*5!LuQHc8(zko(9VgQNLr~LeU@SI1rGtKo! zlow9jH7?En9Mp*mz?yDpPVtux(F*6?G*;KQ??410q9ww->Gi(@bWNXqhEDT6g7@)| zBgw>9fMYRZ;%7lnsz^5NWYL!%dtsW-d$8$=QPRq~2bb~uP*)H$VJ?$_BLom*N61ZxB6RufIgb}-4{y5) zhSb9=d3Lz5jWN#Q-e(HpLp7H>n$osS7s-!WdM?|G-RM)5OCWC_(*yN5J|gzvM@K$8 z>V-F`EzzGFzj;(41d>7rMRAKPym|_y89Uef45Yz}V4%bNe!0N&x$2%WC7NgJ-lkR+ z^|oD^5+XqhAn2wqceDfpLT4e9?_UH4H%%h}ac1074MUbGx0H=2LXXE)puH3u2_Rb0 zaBh(+R@$;Q+qyX!DQCSnHxiH;9pVWA;^xEPCY*g&xYLS>n~!3$+zd8^au`#OtO|Om z_h74SA6_+nmbTB!6Evna$o6Er>a1PvNvd#^XHSfMU*!YcM*Ix~OkQzN11A71S533f zmcVDSv;fXA8+I zs6k}{2?CvFOA@%fVbG5sukO9S`8SG^*ys;_O(N9gm(lyU0!x=PHC2DMEwontiuYN| zn7k)e_c`;H)P8@rN^ez`eDC{x{rTXKb}_L4Y8*IZ?xN1Dk!ArW>m|Wh*Bn3js z#0tC#Cn4t|%oHxd@Z{4h`|;Ix(`F*C^soN;*ibUx zaL4ETsQ#q$S)q9WIRY9*Kpw0x?FT&9Q2+x1ZR^OS|8Uwd<$nUp(|3XUfA_V6;04@nur!`-?2rhXXlf!^P^+W`4QSYY0sq5Rqi9B^( zB95V#0g}{$w(w_fTRWq+U1=7D?IO!K0E6iouDjN+s)}V|Obr5Cwd>OT!2tt@J~L+g z#MdT+WzuhI%*2};r~75$r7-p$Fm-e;Ru%?;+(`h!0R(P86j?ICGF{=k#%GfuTF}eA zqE2qAMr96TT>yV*C@1;+L>$&aEA1ix2pEE$C$$I;zY+oze;5-AACb{N85ZR<3_4YX z;(tgfmkW4m0$n5X;IBNQ<-M;EIb(IqXh~Rrl<*&ND2k$g;B$`N5wa|rs9Q&hUpxZP z6#@GLRATeH+L$!B9|!d3msoi+M&`Ym>oT@V(6_=hYSyJ&_dbsd95!;y^J6Fc-PM4O zv+vTF?l9!SsEMS*Noea1%&zsNke~!GOyH_BUjJzN33pGAM5~OBRQ#E{*1dBvsx7^p zgJc7EBEYuXvHTYhN{L(v%>Sb!XCb_z6K4q>IIAYv07?m<*&wOr+3zFBDx2b+$)AKK zb0z(gq6P~S+>#L4(wr7yY4sam%)cK->j17P7h#jS(iUh4Gqvm{QW1_sA%_7_P>Nj-QlcQfG6JG-dPk)3Ho#0E!nXXT`6S5#O e*_8BL8E{(VV@lcdU2i(M^Z-uhz1HS<_x?Yt#L&k8 literal 0 HcmV?d00001