Skip to content

Commit

Permalink
fix bug in tl.store mask for kernel _to_fp8_row_major_t_and_non_t
Browse files Browse the repository at this point in the history
ghstack-source-id: f95151dc71feb9c6af2788a3e2ccc65d08f4bff3
ghstack-comment-id: 2575984684
Pull Request resolved: #1516
  • Loading branch information
danielvegamyhre committed Jan 8, 2025
1 parent 953e1a6 commit 832bb97
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ def _to_fp8_row_major_t_and_non_t(
block_col_offs[:, None] * row_major_t_out_stride_row
+ block_row_offs[None, :] * row_major_t_out_stride_col
)
mask = (block_row_offs[:, None] < row_major_t_num_rows) & (
block_col_offs[None, :] < row_major_t_num_cols
mask = (block_col_offs[:, None] < row_major_t_num_rows) & (
block_row_offs[None, :] < row_major_t_num_cols
)
tl.store(row_major_t_out_ptr + row_major_t_offs, fp8_vals.trans(1, 0), mask=mask)

Expand Down
4 changes: 3 additions & 1 deletion torchao/prototype/float8nocompile/test/train_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def model2():
return TestModel()


@pytest.mark.parametrize("input_shape", [(16, 32), (1, 16, 32), (2, 16, 32)])
@pytest.mark.parametrize(
"input_shape", [(16, 32), (1, 16, 32), (2, 16, 32), (128, 8192, 32)]
)
def test_model_weights_and_gradients(model1, model2, input_shape: tuple[int, int]):
assert torch.cuda.is_available()
device = torch.device("cuda")
Expand Down

0 comments on commit 832bb97

Please sign in to comment.