diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index a7860e343..3858462b1 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -886,7 +886,7 @@ def _raise_loopy_err(x): _update_nesting_constraints(relaxed_priorities, warn) # ordered_loop_nests: A mapping from the unordered loop nests to their - # ordered couterparts. For example. If we had only one loop nest + # ordered counterparts. For example. If we had only one loop nest # `frozenset({"i", "j", "k"})`, and the prioirities said added the # constraint that "i" must be nested within "k", then `ordered_loop_nests` # would be: `{frozenset({"i", "j", "k"}): ["j", "k", "i"]}` i.e. the loop diff --git a/loopy/statistics.py b/loopy/statistics.py index 2d0537fdb..99b163f80 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -422,16 +422,16 @@ def to_bytes(self): bytes_map = get_mem_access_map(knl).to_bytes() params = {"n": 512, "m": 256, "l": 128} - s1_g_ld_byt = bytes_map.filter_by( + s1_g_ld_bytes = bytes_map.filter_by( mtype=["global"], lid_strides={0: 1}, direction=["load"]).eval_and_sum(params) - s2_g_ld_byt = bytes_map.filter_by( + s2_g_ld_bytes = bytes_map.filter_by( mtype=["global"], lid_strides={0: 2}, direction=["load"]).eval_and_sum(params) - s1_g_st_byt = bytes_map.filter_by( + s1_g_st_bytes = bytes_map.filter_by( mtype=["global"], lid_strides={0: 1}, direction=["store"]).eval_and_sum(params) - s2_g_st_byt = bytes_map.filter_by( + s2_g_st_bytes = bytes_map.filter_by( mtype=["global"], lid_strides={0: 2}, direction=["store"]).eval_and_sum(params) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index d04fa5b2d..2c91643ac 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -47,6 +47,7 @@ RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext, + flatten, get_dependencies, ) from loopy.transform.array_buffer_map import ( @@ -296,7 +297,7 @@ def map_substitution(self, name, tag, arguments, expn_state): new_outer_expr = var(self.temporary_name) if stor_subscript: - new_outer_expr = new_outer_expr.index(tuple(stor_subscript)) + new_outer_expr = new_outer_expr[tuple(stor_subscript)] # Can't possibly be nested, and no need to traverse # further as compute expression has already been seen @@ -928,7 +929,7 @@ def add_assumptions(d): storage_axis_subst_dict[ prior_storage_axis_name_dict.get(arg_name, arg_name)] = \ - arg+base_index + flatten(arg+base_index) rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index ef878c90f..ca31368d2 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -43,7 +43,7 @@ # {{{ privatize temporaries with iname -from loopy.symbolic import IdentityMapper +from loopy.symbolic import IdentityMapper, flatten class ExtraInameIndexInserter(IdentityMapper): @@ -66,7 +66,7 @@ def map_subscript(self, expr): self.seen_priv_axis_inames.update(v.name for v in extra_idx) - new_idx = index + tuple(v - self.iname_to_lbound[v.name] + new_idx = index + tuple(flatten(v - self.iname_to_lbound[v.name]) for v in extra_idx) if len(new_idx) == 1: @@ -81,7 +81,7 @@ def map_variable(self, expr): else: self.seen_priv_axis_inames.update(v.name for v in new_idx) - new_idx = tuple(v - self.iname_to_lbound[v.name] + new_idx = tuple(flatten(v - self.iname_to_lbound[v.name]) for v in new_idx) if len(new_idx) == 1: diff --git a/loopy/transform/realize_reduction.py b/loopy/transform/realize_reduction.py index 8aea6541a..7d1f3c870 100644 --- a/loopy/transform/realize_reduction.py +++ b/loopy/transform/realize_reduction.py @@ -711,7 +711,7 @@ def _add_to_depends_on(insn_id, new_depends_on_params): needs_replacement = True - # {{{ generate a new assignent instruction + # {{{ generate a new assignment instruction new_assignee_name = var_name_gen( "{insn_id}_retval_{assignee_nr}"