Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CPU Kernel Tests #1439

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
05144e5
test_utils refactor, local_cpu_allocator
oOTigger Jul 11, 2024
3bb8ff6
test utils modification, cast, reverse, and replicate cpu kernels
oOTigger Jul 12, 2024
968cd6d
combine kernel
oOTigger Jul 14, 2024
723515b
combine kernels .h file
oOTigger Jul 14, 2024
ba586ae
Implementations for methods for machine_views and associated modules …
Marsella8 Jul 19, 2024
e6e2161
test utils logic cleanup, reverse cpu_kernel pedagogical implmentatio…
oOTigger Jul 31, 2024
29a2cf3
Merge branch 'repo-refactor' into cpu-kernels-tests
oOTigger Sep 20, 2024
366bd94
Merge branch 'repo-refactor' into cpu-kernels-tests
oOTigger Sep 24, 2024
c9c33fd
cpu_kernel's refactor, generic tensor accessor indexing
oOTigger Oct 8, 2024
2a5b38a
Merge branch 'repo-refactor' into cpu-kernels-tests
oOTigger Oct 8, 2024
d50914c
accessor.h formatting
oOTigger Oct 8, 2024
f1f2698
mk_runtime_error formatting
oOTigger Oct 8, 2024
a7422f7
reverse_kernels include
oOTigger Oct 8, 2024
ee19931
Merge branch 'repo-refactor' into cpu-kernels-tests
oOTigger Oct 15, 2024
5863880
test_utils refactor and clarity
oOTigger Oct 15, 2024
e869ace
formatting
oOTigger Oct 15, 2024
de230cb
comment removal reverse_kernels
oOTigger Oct 15, 2024
3fc8718
Issue #1435, tests for managed stream and handle
oOTigger Oct 16, 2024
d1c9e90
#1435 formatting
oOTigger Oct 16, 2024
7106dec
#1409 issue, change datatype for linear kernels away from void *
oOTigger Oct 16, 2024
51c3eb7
R & W accessor changes, minimize code bloat
oOTigger Nov 5, 2024
878cff1
code formatting and refactor
oOTigger Nov 16, 2024
42f1fce
issue #1502 & issue #1540
oOTigger Nov 22, 2024
8f05203
format check
oOTigger Nov 22, 2024
8db629d
Merge branch 'master' into cpu-kernels-tests
lockshaw Dec 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions lib/kernels/include/kernels/cast_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

#include "device.h"
#include "kernels/accessor.h"
#include "kernels/ff_handle.h"
#include "op-attrs/activation.dtg.h"

namespace FlexFlow {
namespace Kernels {
Expand Down
27 changes: 27 additions & 0 deletions lib/kernels/include/kernels/cast_kernels_cpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#ifndef _FLEXFLOW_OPS_KERNELS_CAST_KERNELS_CPU_H
#define _FLEXFLOW_OPS_KERNELS_CAST_KERNELS_CPU_H

#include "device.h"
#include "kernels/accessor.h"

namespace FlexFlow {
namespace Kernels {
namespace Cast {
namespace CPU {

void forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type);

void backward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type);

} // namespace CPU
} // namespace Cast
} // namespace Kernels
} // namespace FlexFlow

#endif
23 changes: 23 additions & 0 deletions lib/kernels/include/kernels/combine_kernels_cpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#ifndef _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_CPU_H
#define _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_CPU_H

#include "device.h"
#include "kernels/accessor.h"

namespace FlexFlow {
namespace Kernels {
namespace Combine {
namespace CPU {

void forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output);

void backward_kernel(GenericTensorAccessorR const &output_grad,
GenericTensorAccessorW const &input_grad);

} // namespace CPU
} // namespace Combine
} // namespace Kernels
} // namespace FlexFlow

#endif // _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_CPU_H
22 changes: 22 additions & 0 deletions lib/kernels/include/kernels/local_cpu_allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include "kernels/allocation.h"
#include <unordered_set>

namespace FlexFlow {

struct LocalCPUAllocator : public IAllocator {
LocalCPUAllocator() = default;

Check warning on line 7 in lib/kernels/include/kernels/local_cpu_allocator.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/local_cpu_allocator.h#L7

Added line #L7 was not covered by tests
LocalCPUAllocator(LocalCPUAllocator const &) = delete;
LocalCPUAllocator(LocalCPUAllocator &&) = delete;
~LocalCPUAllocator() override;

void *allocate(size_t) override;
void deallocate(void *) override;

private:
std::unordered_set<void *> ptrs;
};
CHECK_RC_COPY_VIRTUAL_COMPLIANT(LocalCPUAllocator);

Allocator create_local_cpu_memory_allocator();

} // namespace FlexFlow
24 changes: 24 additions & 0 deletions lib/kernels/include/kernels/replicate_kernels_cpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef _FLEXFLOW_OPS_KERNELS_REPLICATE_KERNELS_CPU_H
#define _FLEXFLOW_OPS_KERNELS_REPLICATE_KERNELS_CPU_H

#include "device.h"
#include "kernels/accessor.h"

namespace FlexFlow {
namespace Kernels {
namespace Replicate {
namespace CPU {

void forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output);

void backward_kernel(GenericTensorAccessorW const &input,
GenericTensorAccessorR const &output,
size_t num_replicas);

} // namespace CPU
} // namespace Replicate
} // namespace Kernels
} // namespace FlexFlow

#endif // _FLEXFLOW_OPS_KERNELS_REPLICATE_KERNELS_CPU_H
29 changes: 29 additions & 0 deletions lib/kernels/include/kernels/reverse_kernels_cpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef _FLEXFLOW_OPS_KERNELS_REVERSE_KERNELS_CPU_H
#define _FLEXFLOW_OPS_KERNELS_REVERSE_KERNELS_CPU_H

#include "device.h"

namespace FlexFlow {
namespace Kernels {
namespace Reverse {
namespace CPU {

void forward_kernel(float const *in_ptr,
float *out_ptr,
coord_t num_out_blks,
coord_t reverse_dim_size,
coord_t in_blk_size,
coord_t output_size);

void backward_kernel(float const *out_grad_ptr,
float *in_grad_ptr,
coord_t num_out_blks,
coord_t reverse_dim_size,
coord_t in_blk_size,
coord_t input_size);
} // namespace CPU
} // namespace Reverse
} // namespace Kernels
} // namespace FlexFlow

#endif // _FLEXFLOW_OPS_KERNELS_REVERSE_KERNELS_CPU_H
59 changes: 59 additions & 0 deletions lib/kernels/src/cpu/cast_kernels.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#include "kernels/cast_kernels_cpu.h"
#include "kernels/datatype_dispatch.h"

namespace FlexFlow {
namespace Kernels {
namespace Cast {
namespace CPU {

template <typename IDT, typename ODT>
void cast_forward(IDT const *input, ODT *output, size_t volume) {

Check warning on line 10 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L10

Added line #L10 was not covered by tests
for (size_t i = 0; i < volume; ++i) {
output[i] = static_cast<ODT>(input[i]);

Check warning on line 12 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L12

Added line #L12 was not covered by tests
}
}

template <typename IDT, typename ODT>
void cast_backward(IDT const *input, ODT *output, size_t volume, ODT beta) {

Check warning on line 17 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L17

Added line #L17 was not covered by tests
for (size_t i = 0; i < volume; i++) {
output[i] = static_cast<ODT>(input[i]) + beta * output[i];

Check warning on line 19 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L19

Added line #L19 was not covered by tests
}
}

template <DataType IDT, DataType ODT>
struct ForwardKernel {
void operator()(GenericTensorAccessorR const &input,

Check warning on line 25 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L25

Added line #L25 was not covered by tests
GenericTensorAccessorW const &output) {
size_t volume = input.shape.get_volume();
cast_forward(input.get<IDT>(), output.get<ODT>(), volume);

Check warning on line 28 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L27-L28

Added lines #L27 - L28 were not covered by tests
}
};

template <DataType IDT, DataType ODT>
struct BackwardKernel {
void operator()(GenericTensorAccessorR const &input,

Check warning on line 34 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L34

Added line #L34 was not covered by tests
GenericTensorAccessorW const &output) {
size_t volume = input.shape.get_volume();
cast_backward(
input.get<IDT>(), output.get<ODT>(), volume, cast_to<ODT>(1.0f));

Check warning on line 38 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L36-L38

Added lines #L36 - L38 were not covered by tests
}
};

void forward_kernel(GenericTensorAccessorR const &input,

Check warning on line 42 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L42

Added line #L42 was not covered by tests
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type) {
DataTypeDispatch2<ForwardKernel>{}(input_type, output_type, input, output);

Check warning on line 46 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L46

Added line #L46 was not covered by tests
}

void backward_kernel(GenericTensorAccessorR const &input,

Check warning on line 49 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L49

Added line #L49 was not covered by tests
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type) {
DataTypeDispatch2<BackwardKernel>{}(input_type, output_type, input, output);

Check warning on line 53 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L53

Added line #L53 was not covered by tests
}

} // namespace CPU
} // namespace Cast
} // namespace Kernels
} // namespace FlexFlow
44 changes: 44 additions & 0 deletions lib/kernels/src/cpu/combine_kernels.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include "kernels/combine_kernels_cpu.h"
#include "kernels/datatype_dispatch.h"

namespace FlexFlow {
namespace Kernels {
namespace Combine {
namespace CPU {

template <DataType DT>
struct ForwardKernel {
void operator()(GenericTensorAccessorR const &input,

Check warning on line 11 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L11

Added line #L11 was not covered by tests
GenericTensorAccessorW const &output) {
memcpy(output.get<DT>(),
input.get<DT>(),
input.shape.get_volume() * size_of_datatype(DT));

Check warning on line 15 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L13-L15

Added lines #L13 - L15 were not covered by tests
}
};

template <DataType DT>
struct BackwardKernel {
void operator()(GenericTensorAccessorR const &output_grad,

Check warning on line 21 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L21

Added line #L21 was not covered by tests
GenericTensorAccessorW const &input_grad) {
size_t num_elements = output_grad.shape.get_volume();

Check warning on line 23 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L23

Added line #L23 was not covered by tests
for (int i = 0; i < num_elements; ++i) {
input_grad.get<DT>()[i] += output_grad.get<DT>()[i];

Check warning on line 25 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L25

Added line #L25 was not covered by tests
}
}
};

void forward_kernel(GenericTensorAccessorR const &input,

Check warning on line 30 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L30

Added line #L30 was not covered by tests
GenericTensorAccessorW const &output) {
DataTypeDispatch1<ForwardKernel>{}(input.data_type, input, output);

Check warning on line 32 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L32

Added line #L32 was not covered by tests
}

void backward_kernel(GenericTensorAccessorR const &output_grad,

Check warning on line 35 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L35

Added line #L35 was not covered by tests
GenericTensorAccessorW const &input_grad) {
DataTypeDispatch1<BackwardKernel>{}(
input_grad.data_type, output_grad, input_grad);

Check warning on line 38 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L37-L38

Added lines #L37 - L38 were not covered by tests
}

} // namespace CPU
} // namespace Combine
} // namespace Kernels
} // namespace FlexFlow
61 changes: 61 additions & 0 deletions lib/kernels/src/cpu/replicate_kernels.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#include "kernels/datatype_dispatch.h"
#include "kernels/replicate_kernels_cpu.h"

namespace FlexFlow {
namespace Kernels {
namespace Replicate {
namespace CPU {

template <typename T>
void replicate_backward_kernel(T *input,

Check warning on line 10 in lib/kernels/src/cpu/replicate_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/replicate_kernels.cc#L10

Added line #L10 was not covered by tests
T const *output,
size_t num_elements,
size_t num_replicas) {
for (size_t i = 0; i < num_elements; ++i) {
T sum = 0;
for (size_t j = 0; j < num_replicas; ++j) {
sum += output[i + j * num_elements];

Check warning on line 17 in lib/kernels/src/cpu/replicate_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/replicate_kernels.cc#L17

Added line #L17 was not covered by tests
}
input[i] = sum;

Check warning on line 19 in lib/kernels/src/cpu/replicate_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/replicate_kernels.cc#L19

Added line #L19 was not covered by tests
}
}

// Why does replicate forward seem to only transfer memory? Shouldn't it also
// handle the replication?
template <DataType T>
struct ForwardKernel {
void operator()(GenericTensorAccessorR const &input,

Check warning on line 27 in lib/kernels/src/cpu/replicate_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/replicate_kernels.cc#L27

Added line #L27 was not covered by tests
GenericTensorAccessorW const &output) {
memcpy(output.get<T>(),
input.get<T>(),
input.shape.num_elements() * size_of_datatype(T));

Check warning on line 31 in lib/kernels/src/cpu/replicate_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/replicate_kernels.cc#L29-L31

Added lines #L29 - L31 were not covered by tests
}
};

template <DataType T>
struct BackwardKernel {
void operator()(GenericTensorAccessorW const &input,

Check warning on line 37 in lib/kernels/src/cpu/replicate_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/replicate_kernels.cc#L37

Added line #L37 was not covered by tests
GenericTensorAccessorR const &output,
size_t num_replicas) {
size_t total_elements = input.shape.num_elements() * num_replicas;
replicate_backward_kernel(

Check warning on line 41 in lib/kernels/src/cpu/replicate_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/replicate_kernels.cc#L40-L41

Added lines #L40 - L41 were not covered by tests
input.get<T>(), output.get<T>(), total_elements, num_replicas);
}
};

void forward_kernel(GenericTensorAccessorR const &input,

Check warning on line 46 in lib/kernels/src/cpu/replicate_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/replicate_kernels.cc#L46

Added line #L46 was not covered by tests
GenericTensorAccessorW const &output) {
DataTypeDispatch1<ForwardKernel>{}(input.data_type, input, output);

Check warning on line 48 in lib/kernels/src/cpu/replicate_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/replicate_kernels.cc#L48

Added line #L48 was not covered by tests
}

void backward_kernel(GenericTensorAccessorW const &input,

Check warning on line 51 in lib/kernels/src/cpu/replicate_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/replicate_kernels.cc#L51

Added line #L51 was not covered by tests
GenericTensorAccessorR const &output,
size_t num_replicas) {
DataTypeDispatch1<BackwardKernel>{}(
input.data_type, input, output, num_replicas);

Check warning on line 55 in lib/kernels/src/cpu/replicate_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/replicate_kernels.cc#L54-L55

Added lines #L54 - L55 were not covered by tests
}

} // namespace CPU
} // namespace Replicate
} // namespace Kernels
} // namespace FlexFlow
48 changes: 48 additions & 0 deletions lib/kernels/src/cpu/reverse_kernels.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include "kernels/reverse_kernels_cpu.h"

namespace FlexFlow {
namespace Kernels {
namespace Reverse {
namespace CPU {

void reverse_forward_kernel(float const *in_ptr,

Check warning on line 8 in lib/kernels/src/cpu/reverse_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/reverse_kernels.cc#L8

Added line #L8 was not covered by tests
float *out_ptr,
coord_t num_out_blks,
coord_t reverse_dim_size,
coord_t in_blk_size) {
coord_t total_elements = num_out_blks * reverse_dim_size * in_blk_size;

Check warning on line 13 in lib/kernels/src/cpu/reverse_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/reverse_kernels.cc#L13

Added line #L13 was not covered by tests
for (coord_t i = 0; i < total_elements; ++i) {
coord_t blk_idx = i / (reverse_dim_size * in_blk_size);
coord_t offset = i - blk_idx * (reverse_dim_size * in_blk_size);
coord_t reverse_dim_idx = offset / in_blk_size;
coord_t in_idx = blk_idx * (reverse_dim_size * in_blk_size) +
(reverse_dim_size - 1 - reverse_dim_idx) * in_blk_size +
(offset % in_blk_size);
out_ptr[i] = in_ptr[in_idx];

Check warning on line 21 in lib/kernels/src/cpu/reverse_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/reverse_kernels.cc#L15-L21

Added lines #L15 - L21 were not covered by tests
}
}

void forward_kernel(float const *in_ptr,

Check warning on line 25 in lib/kernels/src/cpu/reverse_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/reverse_kernels.cc#L25

Added line #L25 was not covered by tests
float *out_ptr,
coord_t num_out_blks,
coord_t reverse_dim_size,
coord_t in_blk_size,
coord_t output_size) {
reverse_forward_kernel(

Check warning on line 31 in lib/kernels/src/cpu/reverse_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/reverse_kernels.cc#L31

Added line #L31 was not covered by tests
in_ptr, out_ptr, num_out_blks, reverse_dim_size, in_blk_size);
}

void backward_kernel(float const *out_grad_ptr,

Check warning on line 35 in lib/kernels/src/cpu/reverse_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/reverse_kernels.cc#L35

Added line #L35 was not covered by tests
float *in_grad_ptr,
coord_t num_out_blks,
coord_t reverse_dim_size,
coord_t in_blk_size,
coord_t input_size) {
reverse_forward_kernel(

Check warning on line 41 in lib/kernels/src/cpu/reverse_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/reverse_kernels.cc#L41

Added line #L41 was not covered by tests
out_grad_ptr, in_grad_ptr, num_out_blks, reverse_dim_size, in_blk_size);
}

} // namespace CPU
} // namespace Reverse
} // namespace Kernels
} // namespace FlexFlow
Loading
Loading