diff --git a/hls4ml/backends/__init__.py b/hls4ml/backends/__init__.py index 1f60bdb44..68562d75e 100644 --- a/hls4ml/backends/__init__.py +++ b/hls4ml/backends/__init__.py @@ -2,16 +2,20 @@ from hls4ml.backends.fpga.fpga_backend import FPGABackend # noqa: F401 from hls4ml.backends.quartus.quartus_backend import QuartusBackend from hls4ml.backends.symbolic.symbolic_backend import SymbolicExpressionBackend - +from hls4ml.backends.vitis_accelerator_ip_flow.vitis_accelerator_ip_flow_config import ( # noqa: F401 + VitisAcceleratorIPFlowConfig, +) from hls4ml.backends.vivado.vivado_backend import VivadoBackend from hls4ml.backends.vivado_accelerator.vivado_accelerator_backend import VivadoAcceleratorBackend from hls4ml.backends.vivado_accelerator.vivado_accelerator_config import VivadoAcceleratorConfig # noqa: F401 +from hls4ml.backends.vitis_accelerator_ip_flow.vitis_accelerator_ip_flow_backend import ( # isort: skip + VitisAcceleratorIPFlowBackend, +) + from hls4ml.backends.catapult.catapult_backend import CatapultBackend # isort: skip from hls4ml.backends.vitis.vitis_backend import VitisBackend # isort: skip -from hls4ml.backends.vitis_accelerator_ip_flow.vitis_accelerator_ip_flow_backend import VitisAcceleratorIPFlowBackend # isort: skip -from hls4ml.backends.vitis_accelerator_ip_flow.vitis_accelerator_ip_flow_config import VitisAcceleratorIPFlowConfig # noqa: F401 register_backend('Vivado', VivadoBackend) register_backend('VivadoAccelerator', VivadoAcceleratorBackend) diff --git a/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_backend.py b/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_backend.py index 6ade53b39..cd57df5a4 100644 --- a/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_backend.py +++ b/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_backend.py @@ -37,22 +37,19 @@ def build( # fifo_opt=fifo_opt, ) # Get Config to view Board and Platform - from hls4ml.backends import VitisAcceleratorIPFlowConfig + # from hls4ml.backends import VitisAcceleratorIPFlowConfig - vitis_accelerator_ip_flow_config = VitisAcceleratorIPFlowConfig( - model.config, model.get_input_variables(), model.get_output_variables() - ) + # vitis_accelerator_ip_flow_config = VitisAcceleratorIPFlowConfig( + # model.config, model.get_input_variables(), model.get_output_variables() + # ) # now make a bitfile if bitfile: - # if vitis_accelerator_config.get_board().startswith('alveo'): - # self.make_xclbin(model, vitis_accelerator_config.get_platform()) - # else: curr_dir = os.getcwd() os.chdir(model.config.get_output_dir()) try: - os.system('vivado -mode batch -source design.tcl') # check if this is accepted as a command + os.system('vivado -mode batch -source design.tcl') # check if this is accepted as a command except Exception: - print("Something went wrong, check the Vivado logs") + print("Something went wrong, check the Vivado logs") os.chdir(curr_dir) return parse_vivado_report(model.config.get_output_dir()) diff --git a/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_config.py b/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_config.py index d00e54a28..07961a9b6 100644 --- a/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_config.py +++ b/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_config.py @@ -133,7 +133,14 @@ def get_driver_path(self): if self.board.startswith('alveo'): return '../templates/vitis_accelerator_ip_flow/' + 'alveo/' + self.driver + '_drivers/' + self.get_driver_file() else: - return '../templates/vitis_accelerator_ip_flow/' + self.board + '/' + self.driver + '_drivers/' + self.get_driver_file() + return ( + '../templates/vitis_accelerator_ip_flow/' + + self.board + + '/' + + self.driver + + '_drivers/' + + self.get_driver_file() + ) def get_driver_file(self): driver_ext = '.py' if self.driver == 'python' else '.h' diff --git a/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.cpp b/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.cpp index 01238643e..cf6c0b9c2 100644 --- a/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.cpp +++ b/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.cpp @@ -1,6 +1,6 @@ // hls-fpga-machine-learning insert include -void myproject_axi(hls::stream< my_pkt > &in, hls::stream< my_pkt > &out) { +void myproject_axi(hls::stream &in, hls::stream &out) { // hls-fpga-machine-learning insert interface diff --git a/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.h b/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.h index d49f98ba1..d0d88bfec 100644 --- a/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.h +++ b/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.h @@ -6,5 +6,5 @@ // hls-fpga-machine-learning insert definitions -void myproject_axi(hls::stream< my_pkt > &in, hls::stream< my_pkt > &out); +void myproject_axi(hls::stream &in, hls::stream &out); #endif diff --git a/hls4ml/templates/vitis_accelerator_ip_flow/zcu102/python_drivers/axi_stream_driver.py b/hls4ml/templates/vitis_accelerator_ip_flow/zcu102/python_drivers/axi_stream_driver.py index fda308e9c..1d70e5540 100644 --- a/hls4ml/templates/vitis_accelerator_ip_flow/zcu102/python_drivers/axi_stream_driver.py +++ b/hls4ml/templates/vitis_accelerator_ip_flow/zcu102/python_drivers/axi_stream_driver.py @@ -1,25 +1,23 @@ -from pynq import DefaultHierarchy, DefaultIP, allocate -from pynq import Overlay from datetime import datetime -import pynq.lib.dma + import numpy as np -from pynq import PL +from pynq import PL, Overlay, allocate class NeuralNetworkOverlay(Overlay): def __init__(self, bitfile_name, dtbo=None, download=True, ignore_version=False, device=None): super().__init__(bitfile_name, dtbo=None, download=True, ignore_version=False, device=None) - + def _print_dt(self, timea, timeb, N): - dt = (timeb - timea) + dt = timeb - timea dts = dt.seconds + dt.microseconds * 10**-6 rate = N / dts - print("Classified {} samples in {} seconds ({} inferences / s)".format(N, dts, rate)) + print(f"Classified {N} samples in {dts} seconds ({rate} inferences / s)") return dts, rate - + def reset_PL(): PL.reset() - + def predict(self, X, y_shape, dtype=np.float32, debug=None, profile=False, encode=None, decode=None): """ Obtain the predictions of the NN implemented in the FPGA. @@ -27,14 +25,14 @@ def predict(self, X, y_shape, dtype=np.float32, debug=None, profile=False, encod - X : the input vector. Should be numpy ndarray. - y_shape : the shape of the output vector. Needed to the accelerator to set the TLAST bit properly and for sizing the output vector shape. - - dtype : the data type of the elements of the input/output vectors. - Note: it should be set depending on the interface of the accelerator; if it uses 'float' - types for the 'data' AXI-Stream field, 'np.float32' dtype is the correct one to use. + - dtype : the data type of the elements of the input/output vectors. + Note: it should be set depending on the interface of the accelerator; if it uses 'float' + types for the 'data' AXI-Stream field, 'np.float32' dtype is the correct one to use. Instead if it uses 'ap_fixed', 'np.intA' is the correct one to use (note that A cannot - any integer value, but it can assume {..., 8, 16, 32, ...} values. Check `numpy` + any integer value, but it can assume {..., 8, 16, 32, ...} values. Check `numpy` doc for more info). - In this case the encoding/decoding has to be computed by the PS. For example for - 'ap_fixed<16,6>' type the following 2 functions are the correct one to use for encode/decode + In this case the encoding/decoding has to be computed by the PS. For example for + 'ap_fixed<16,6>' type the following 2 functions are the correct one to use for encode/decode 'float' -> 'ap_fixed<16,6>': ``` def encode(xi): @@ -52,13 +50,12 @@ def decode(yi): if encode is not None: X = encode(X) - with allocate(shape=X.shape, dtype=dtype) as input_buffer, \ - allocate(shape=y_shape, dtype=dtype) as output_buffer: + with allocate(shape=X.shape, dtype=dtype) as input_buffer, allocate(shape=y_shape, dtype=dtype) as output_buffer: input_buffer[:] = X - + if profile: timea = datetime.now() - + self.axi_dma_0.sendchannel.transfer(input_buffer) self.axi_dma_0.recvchannel.transfer(output_buffer) if debug: @@ -67,20 +64,20 @@ def decode(yi): if debug: print("Send OK") self.axi_dma_0.recvchannel.wait() - + if profile: timeb = datetime.now() - + if debug: print("Receive OK") - + result = output_buffer.copy() - + if decode is not None: result = decode(result) - + if profile: dts, rate = self._print_dt(timea, timeb, len(X)) return result, dts, rate - return result \ No newline at end of file + return result diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_helpers.h b/hls4ml/templates/vivado/nnet_utils/nnet_helpers.h index 389d68708..2a695d4e5 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_helpers.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_helpers.h @@ -167,8 +167,9 @@ template void convert_data(hls::stre } #ifdef VITIS_ACCELERATOR_IP_FLOW -//todo avoid hardcoding hls::axis and use template -template void convert_data(srcType *src, hls::stream> &dst) { +// todo avoid hardcoding hls::axis and use template +template +void convert_data(srcType *src, hls::stream> &dst) { for (size_t i = 0; i < SIZE; i++) { hls::axis ctype; ctype.data = dstType(src[i]); @@ -176,7 +177,8 @@ template void convert_data(srcTyp } } -template void convert_data(hls::stream> &src, dstType *dst) { +template +void convert_data(hls::stream> &src, dstType *dst) { for (size_t i = 0; i < SIZE; i++) { hls::axis ctype = src.read(); dst[i] = dstType(ctype.data); @@ -323,8 +325,7 @@ template void print_result(res_T result[SIZE], std::o out << std::endl; } -template ::value, int>::type = 0> +template ::value, int>::type = 0> void print_result(hls::stream &result, std::ostream &out, bool keep = false) { for (int i = 0; i < SIZE / res_T::size; i++) { res_T res_pack = result.read(); @@ -333,29 +334,29 @@ void print_result(hls::stream &result, std::ostream &out, bool keep = fal } if (keep) { result.write(res_pack); - } + } } out << std::endl; } // compatible with Vitis Accelerator for res_T = hls::axis<...> and io_parallel -template ::value, int>::type = 0> +template ::value, int>::type = 0> void print_result(hls::stream &result, std::ostream &out, bool keep = false) { for (int i = 0; i < SIZE; i++) { res_T res_pack = result.read(); - + out << res_pack.data << " "; - + if (keep) { result.write(res_pack); - } + } } out << std::endl; } // compatible with Vitis Accelerator for res_T = hls::axis and io_stream -template void print_result(hls::stream &result, std::ostream &out, bool keep = false) { +template +void print_result(hls::stream &result, std::ostream &out, bool keep = false) { for (int i = 0; i < SIZE / underlying_res_T::size; i++) { res_T res_pack; for (int j = 0; j < underlying_res_T::size; j++) { @@ -363,16 +364,15 @@ template void print_result(hl out << res_pack.data << " "; if (keep) { result.write(res_pack); - } - } + } + } } out << std::endl; } template void fill_zero(data_T data[SIZE]) { std::fill_n(data, SIZE, 0.); } -template ::value, int>::type = 0> +template ::value, int>::type = 0> void fill_zero(hls::stream &data) { for (int i = 0; i < SIZE / data_T::size; i++) { data_T data_pack; @@ -383,16 +383,14 @@ void fill_zero(hls::stream &data) { } } -template ::value, int>::type = 0> +template ::value, int>::type = 0> void fill_zero(hls::stream &data) { for (int i = 0; i < SIZE; i++) { data_T data_pack; data_pack.data = 0.; - if (i==SIZE-1) { + if (i == SIZE - 1) { data_pack.last = 1; - } - else { + } else { data_pack.last = 0; } data.write(data_pack); @@ -405,15 +403,13 @@ template void fill_zero(hls data_T data_pack; for (int j = 0; j < underlying_data_T::size; j++) { data_pack.data = 0.; - if ((i==(SIZE / underlying_data_T::size-1)) && (j==(underlying_data_T::size-1))) { + if ((i == (SIZE / underlying_data_T::size - 1)) && (j == (underlying_data_T::size - 1))) { data_pack.last = 1; - } - else { + } else { data_pack.last = 0; } data.write(data_pack); } - } } diff --git a/hls4ml/writer/__init__.py b/hls4ml/writer/__init__.py index 70a2eabd7..31238b18c 100644 --- a/hls4ml/writer/__init__.py +++ b/hls4ml/writer/__init__.py @@ -1,8 +1,8 @@ from hls4ml.writer.catapult_writer import CatapultWriter from hls4ml.writer.quartus_writer import QuartusWriter from hls4ml.writer.symbolic_writer import SymbolicExpressionWriter -from hls4ml.writer.vitis_writer import VitisWriter from hls4ml.writer.vitis_accelerator_ip_flow_writer import VitisAcceleratorIPFlowWriter +from hls4ml.writer.vitis_writer import VitisWriter from hls4ml.writer.vivado_accelerator_writer import VivadoAcceleratorWriter from hls4ml.writer.vivado_writer import VivadoWriter from hls4ml.writer.writers import Writer, get_writer, register_writer # noqa: F401 diff --git a/hls4ml/writer/vitis_accelerator_ip_flow_writer.py b/hls4ml/writer/vitis_accelerator_ip_flow_writer.py index 535a43b4b..78e1fa982 100644 --- a/hls4ml/writer/vitis_accelerator_ip_flow_writer.py +++ b/hls4ml/writer/vitis_accelerator_ip_flow_writer.py @@ -40,8 +40,10 @@ def write_axi_wrapper(self, model): newline += f'static const unsigned N_IN = {inp.size()};\n' newline += f'static const unsigned N_OUT = {out.size()};\n' if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': - newline += f'typedef hls::axis my_pkt;\n' - else: # TODO: handle this case + newline += 'typedef hls::axis my_pkt;\n' + # might need to make "float" a variable according to the + # configuration set by the user and the DMA available data widths + else: # TODO: handle this case newline += f'typedef {inp_axi_t} input_axi_t;\n' newline += f'typedef {out_axi_t} output_axi_t;\n' else: @@ -68,9 +70,9 @@ def write_axi_wrapper(self, model): newline = '' if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': newline += indent + 'bool is_last = false;\n' - if io_type == 'io_parallel': # TODO: handle io_parallel + if io_type == 'io_parallel': # TODO: handle io_parallel newline += indent + inp.type.name + ' in_local[N_IN];\n' - newline += indent + out.type.name + ' out_local[N_OUT];\n' + newline += indent + out.type.name + ' out_local[N_OUT];\n' newline += indent + 'my_pkt tmp;\n' elif io_type == 'io_stream': newline += indent + 'hls::stream<' + inp.type.name + '> in_local("input_1");\n' @@ -84,12 +86,12 @@ def write_axi_wrapper(self, model): elif '// hls-fpga-machine-learning insert call' in line: newline = indent + f'{model.config.get_project_name()}(in_local, out_local);\n' elif '// hls-fpga-machine-learning insert interface' in line: - if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_lite': # TODO: handle axi_lite + if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_lite': # TODO: handle axi_lite newline = '' newline += indent + '#pragma HLS INTERFACE ap_ctrl_none port=return\n' newline += indent + '#pragma HLS INTERFACE s_axilite port=in\n' newline += indent + '#pragma HLS INTERFACE s_axilite port=out\n' - elif self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_master': # TODO: handle axi_master + elif self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_master': # TODO: handle axi_master newline = '' newline += indent + '#pragma HLS INTERFACE s_axilite port=return bundle=CTRL_BUS\n' newline += indent + '#pragma HLS INTERFACE m_axi depth={} port=in offset=slave bundle=IN_BUS\n'.format( @@ -107,7 +109,7 @@ def write_axi_wrapper(self, model): newline += indent + '#pragma HLS DATAFLOW\n' elif '// hls-fpga-machine-learning insert enqueue' in line: io_type = model.config.get_config_value("IOType") - if io_type == 'io_parallel': # TODO: handle io_parallel + if io_type == 'io_parallel': # TODO: handle io_parallel newline = '' newline += indent + 'for(unsigned i = 0; i < N_IN; i++){\n' if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': @@ -132,22 +134,10 @@ def write_axi_wrapper(self, model): newline += indent + indent + 'for(unsigned j = 0; j < {input_t}::size; j++) {{\n' # newline += indent + indent + indent + '#pragma HLS UNROLL\n' # TODO: check if needed if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': - newline += ( - indent - + indent - + indent - + 'in.read(tmp);\n' - ) - newline += ( - indent - + indent - + indent - + 'ctype[j] = tmp.data;\n' - ) - newline += ( - indent + indent + indent + 'is_last = tmp.last;\n' - ) - else: # TODO: handle this case + newline += indent + indent + indent + 'in.read(tmp);\n' + newline += indent + indent + indent + 'ctype[j] = tmp.data;\n' + newline += indent + indent + indent + 'is_last = tmp.last;\n' + else: # TODO: handle this case newline += ( indent + indent @@ -181,17 +171,11 @@ def write_axi_wrapper(self, model): newline += indent + indent + 'for(unsigned j = 0; j < {result_t}::size; j++) {{\n' # newline += indent + indent + indent + '#pragma HLS UNROLL\n' if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': - newline += ( - indent + indent + indent + f'tmp.data = ({inp_axi_t}) (ctype[j]);\n' - ) + newline += indent + indent + indent + f'tmp.data = ({inp_axi_t}) (ctype[j]);\n' - newline += ( - indent + indent + indent + 'if(is_last) {{tmp.last = (((i+1)*(j+1))==N_OUT);}}\n' - ) + newline += indent + indent + indent + 'if(is_last) {{tmp.last = (((i+1)*(j+1))==N_OUT);}}\n' - newline += ( - indent + indent + indent + 'out.write(tmp);\n' - ) + newline += indent + indent + indent + 'out.write(tmp);\n' else: newline += indent + indent + indent + 'out[i * {result_t}::size + j] = output_axi_t(ctype[j]);\n' newline += indent + indent + '}}\n' @@ -253,7 +237,7 @@ def write_wrapper_test(self, model): ################### oldfile = f'{model.config.get_output_dir()}/{model.config.get_project_name()}_test.cpp' newfile = f'{model.config.get_output_dir()}/{model.config.get_project_name()}_test_wrapper.cpp' - + inp_axi_t, out_axi_t, inp, out = self.vitis_accelerator_ip_flow_config.get_corrected_types() f = open(oldfile) @@ -278,19 +262,15 @@ def write_wrapper_test(self, model): indent_amount = line.split(model.config.get_project_name())[0] newline = indent_amount + f'{model.config.get_project_name()}_axi(inputs,outputs);\n' elif inp.size_cpp() in line or inp.name in line or inp.type.name in line: - newline = ( - line.replace(inp.size_cpp(), 'N_IN').replace(inp.name, 'inputs').replace(inp.type.name, 'my_pkt') - ) + newline = line.replace(inp.size_cpp(), 'N_IN').replace(inp.name, 'inputs').replace(inp.type.name, 'my_pkt') elif out.size_cpp() in line or out.name in line or out.type.name in line: - newline = ( - line.replace(out.size_cpp(), 'N_OUT').replace(out.name, 'outputs').replace(out.type.name, 'my_pkt') - ) + newline = line.replace(out.size_cpp(), 'N_OUT').replace(out.name, 'outputs').replace(out.type.name, 'my_pkt') else: newline = line if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': if 'copy_data' in line: newline = newline.replace('copy_data', 'copy_data_axi').replace("0,", "") - + if io_type == 'io_stream': if 'nnet::fill_zero' in line: newline = newline.replace("nnet::fill_zero<", f"nnet::fill_zero<{inp.type.name}, ") @@ -331,7 +311,7 @@ def write_wrapper_test(self, model): elif inp.size_cpp() in line or inp.name in line or inp.type.name in line: newline = line.replace(inp.size_cpp(), 'N_IN').replace(inp.type.name, inp_axi_t) elif out.size_cpp() in line or out.name in line or out.type.name in line: - newline = line.replace(out.size_cpp(), 'N_OUT').replace(out.type.name, out_axi_t) + newline = line.replace(out.size_cpp(), 'N_OUT').replace(out.type.name, out_axi_t) else: newline = line fout.write(newline)