From 5f7074352a78d7df08d8d3fe2e51e56680a2c9d2 Mon Sep 17 00:00:00 2001 From: DrasLorus Date: Sun, 1 Aug 2021 21:15:58 +0200 Subject: [PATCH] Add new schedules HEADED, TAILED and LOOPED. - Also add some comments and correct README --- Makefile | 25 ++++- README.md | 33 ++++--- sources/modules/max.hpp | 134 ++++++++++++++++++++++++--- sources/testbenches/catch.cpp | 2 + sources/testbenches/max_catch_tb.cpp | 55 +++++++++++ sources/testbenches/max_tb.cpp | 64 +++++++------ 6 files changed, 252 insertions(+), 61 deletions(-) create mode 100644 sources/testbenches/catch.cpp create mode 100644 sources/testbenches/max_catch_tb.cpp diff --git a/Makefile b/Makefile index f819643..8abb10a 100644 --- a/Makefile +++ b/Makefile @@ -32,20 +32,31 @@ VITIS_HOME=${XILINX_HOME}/Vitis_HLS/${XILINX_VER} CXXFLAGS:=${CXXFLAGS} -isystem ${VITIS_HOME}/include SRCDIR=sources - +OBJDIR=obj BINDIR=bin -SOURCES:=$(SRCDIR)/modules/max.cpp $(SRCDIR)/testbenches/max_tb.cpp + +CATCHOBJ=$(OBJDIR)/catch.cpp.o + +SOURCES:=$(SRCDIR)/modules/max.cpp +SOURCES+=$(SRCDIR)/testbenches/max_catch_tb.cpp + OBJ=$(SOURCES:.cpp=.cpp.o) TBBIN=$(BINDIR)/tb.out -all: $(BINDIR) $(TBBIN) +all: $(BINDIR) $(OBJDIR) $(TBBIN) $(BINDIR): mkdir -p $(BINDIR) -$(TBBIN): $(OBJ) - $(CXX) -o $(TBBIN) $(OBJ) $(LDFLAGS) +$(OBJDIR): + mkdir -p $(OBJDIR) + +$(TBBIN): $(OBJ) $(CATCHOBJ) + $(CXX) -o $(TBBIN) $(OBJ) $(CATCHOBJ) $(LDFLAGS) + +$(CATCHOBJ) : + $(CXX) -o $@ -c $(SRCDIR)/testbenches/catch.cpp $(CXXFLAGS) %.cpp.o : %.cpp $(CXX) -o $@ -c $< $(CXXFLAGS) @@ -58,6 +69,10 @@ clean: clear: clean rm -vf $(TBBIN) $(BINDIR)/results.dat $(COMPDB_ENTRIES) compile_commands.json +.PHONY: clean clear +fullclear: clear + rm -vf $(CATCHOBJ) + testbench: $(TBBIN) cd $(BINDIR); ../$(TBBIN) diff --git a/README.md b/README.md index 1186148..b6ebe8c 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,25 @@ # hls_max_template -An efficient C++14 description of a max process which finds the maximum of a real array. +An efficient C++14 description of a max process which finds the maximum of a real array. Used in High Level Synthesis, produce the most efficient maximum possible by doing reduction at compile time. -Currently, it produces log2(*N*) (+1 if *N* is odd) stages of *integer_part(N/2)* comparisons, can be easily unrolled or pipelined. +Currently, it produces log2(*N*) (+1 if *N* is odd) stages of *integer_part(N/2)* comparisons, can be easily unrolled and make use of pipelines. ## Content + This repository provides a [header](sources/modules/max.hpp) which defines two recursive template classes with a `process` public method that find the maximum of an array of any size and any type. `max_template` can be used for all possible sizes while `max_pow2` works only with sizes that are power of 2. `max_struct` is used to implement `max_template` and may not be used directly. ## Usage -After including the header ( with `#include "max.hpp"` ), just call the process method like that: +After including the header (with `#include "max.hpp"`), just call the process method like that: - const T max_array = max_template::process(array); - -where `T` is a correct type (e.g. `int`, `float`, `ap_uint`, ...), `SIZE` is an unsigned integer known at compile time (e.g. a literal, a constexpr or a preprocessor constant) and `array` is a C-style array containing `SIZE` elements `T`. +``` C++ +const T max_array = max_template::process(array); +``` + +Where `T` is a comparable class — where operators `<` and `>` are implemented — e.g. `int`, `float` or `ap_uint`, `SIZE` is an unsigned integer known at compile time (e.g. a literal, a `constexpr` or a preprocessor constant) and `array` is a C-style array containing `SIZE` elements `T`. `max_pow2` is used the same. However, note that it cannot work (even if it compiles) if `SIZE` is not a power of 2. It must be compiled with `--std=c++14` (or `gnu++14` or another equivalent). @@ -27,21 +30,23 @@ Including the `max.hpp` header is all that is needed to use it in another HLS pr ## Testbench -The prototype of a testbench is provided and the files can be tested with `make testbench`. +The prototype of a testbench is provided, and the files can be tested with `make testbench`. ## Xilinx® Vitis HLS A TCL script is provided in [hls_files](hls_files) and can be used with Xilinx® Vitis HLS. Just do in a shell: - - cd hls_files - vitis_hls -f script.tcl - + +``` sh +cd hls_files +vitis_hls -f script.tcl +``` + By default, no IP are created. You may need to adjust the part and the clock targeted. -It as been successfully tested on Vitis HLS version 2020.2 and may or may not work with other versions. +It has been successfully tested on Vitis HLS version 2020.2 and may or may not work with other versions. -## Perpectives +## Perspectives -A template argument to produce either the current description or a more resources friendly (Higher latency and/or lower througput) may be added in the future. +A template argument to produce either the current description or a more resources friendly (Higher latency and/or lower throughput) may be added in the future. Another template argument specifying the most comparisons per stages is also considered. diff --git a/sources/modules/max.hpp b/sources/modules/max.hpp index f75bf72..8bea25f 100644 --- a/sources/modules/max.hpp +++ b/sources/modules/max.hpp @@ -60,17 +60,39 @@ struct max_pow2<2> { } }; -// TRUE MAX TEMPLATE BELOW +/* TRUE MAX TEMPLATE BELOW */ +/** + * @brief Defines supported schedules + * + */ +typedef enum { + REDUCED, + LOOPED, + TAILED, + HEADED +} max_type_t; + +/* REDUCED *****************/ + +/** + * @brief Recursive template to implement a completely reduced scheduling + * + * @tparam N Input size + * @tparam bEven True if N is even, false otherwise. + * Can use the Macro IS_EVEN() + */ template -class max_struct { +class max_reduced { public: template static T process(const T values[N]); }; +/* REDUCED when N is even */ + template -class max_struct { +class max_reduced { private: static constexpr unsigned half = N >> 1; @@ -79,8 +101,11 @@ public: static T process(const T values[N]); }; + +/* REDUCED when N is odd */ + template -class max_struct { +class max_reduced { private: static constexpr unsigned Nm1 = N - 1; static constexpr unsigned half = (Nm1 >> 1); @@ -92,7 +117,7 @@ public: }; template <> -class max_struct<1, false> { +class max_reduced<1, false> { public: template @@ -102,7 +127,7 @@ public: }; template <> -class max_struct<2, true> { +class max_reduced<2, true> { public: template @@ -112,7 +137,7 @@ public: }; template <> -class max_struct<3, false> { +class max_reduced<3, false> { public: template @@ -126,7 +151,7 @@ public: template template -T max_struct::process(const T values[N]) { +T max_reduced::process(const T values[N]) { static_assert(N > 2, "N cannot be less than 3!"); T half_values[half]; #pragma HLS array_partition variable = half_values complete @@ -136,12 +161,12 @@ T max_struct::process(const T values[N]) { const uint8_t jp1 = j + 1; half_values[i] = (values[j] < values[jp1] ? values[jp1] : values[j]); } - return max_struct::process(half_values); + return max_reduced::process(half_values); } template template -T max_struct::process(const T values[N]) { +T max_reduced::process(const T values[N]) { static_assert(N > 3, "N cannot be less than 3!"); T half_values[halfp1]; #pragma HLS array_partition variable = half_values complete @@ -152,11 +177,96 @@ T max_struct::process(const T values[N]) { half_values[i] = (values[j] < values[jp1] ? values[jp1] : values[j]); } half_values[half] = values[Nm1]; - return max_struct::process(half_values); + return max_reduced::process(half_values); } + +/** + * @brief User friendly max template + * + * @tparam N Input Size + * @tparam Ver Schedule wanted. Default: REDUCED. + */ +template +class max_template : public max_reduced { +}; + +template <> +class max_template<1, TAILED> { +public: + template + static T process(const T values[1]) { + return values[0]; + } +}; + +template <> +class max_template<2, TAILED> { +public: + template + static T process(const T values[2]) { + return (values[0] < values[1] ? values[1] : values[0]); + } +}; + template -class max_template : public max_struct { +class max_template { +private: + static constexpr unsigned Nm1 = N - 1; + +public: + template + static T process(const T values[N]) { + const T max_Nm1 = max_template::process(values); + return (max_Nm1 < values[Nm1] ? values[Nm1] : max_Nm1); + } +}; + +template <> +class max_template<1, HEADED> { +public: + template + static T process(const T values[1]) { + return values[0]; + } +}; + +template <> +class max_template<2, HEADED> { +public: + template + static T process(const T values[2]) { + return (values[0] < values[1] ? values[1] : values[0]); + } +}; + +template +class max_template { +private: + static constexpr unsigned Nm1 = N - 1; + +public: + template + static T process(const T values[N]) { + const T max_Nm1 = max_template::process(values + 1); + return (max_Nm1 < values[0] ? values[0] : max_Nm1); + } +}; + +template +class max_template { + +public: + template + static T process(const T values[N]) { + T max_value = values[0]; + loop_max: + for (unsigned u = 1; u < N; u++) { + const T value = values[u]; + max_value = (max_value < value ? value : max_value); + } + return max_value; + } }; #undef IS_EVEN diff --git a/sources/testbenches/catch.cpp b/sources/testbenches/catch.cpp new file mode 100644 index 0000000..4ed06df --- /dev/null +++ b/sources/testbenches/catch.cpp @@ -0,0 +1,2 @@ +#define CATCH_CONFIG_MAIN +#include diff --git a/sources/testbenches/max_catch_tb.cpp b/sources/testbenches/max_catch_tb.cpp new file mode 100644 index 0000000..7db7f29 --- /dev/null +++ b/sources/testbenches/max_catch_tb.cpp @@ -0,0 +1,55 @@ +/* + * Copyright © 2021 "DrasLorus" + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this + * file and associated documentation files (the “Software”), + * to deal in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "../modules/max.hpp" + +#include +#include +#include +#include +#include + +#include + +using std::max_element; +using std::vector; + +TEST_CASE("Factorials are computed", "[factorial]") { + srand(time(nullptr)); + + vector to_be_maxed64(64 * 8, 0); + for (auto && it : to_be_maxed64) { + it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f)); + } + + for (unsigned u = 0; u < to_be_maxed64.size() - 64; u++) { + const uint8_t * local_beg = to_be_maxed64.data() + u; + const uint8_t * local_end = local_beg + 64; + + const uint8_t max64_value = do_max_64(local_beg); + + const uint8_t max_64_test = *max_element(local_beg, local_end); + + REQUIRE(max_64_test == max64_value); + } + + // cout << (retval == 0 ? "Test passed. " : "Test failed. ") << endl; +} diff --git a/sources/testbenches/max_tb.cpp b/sources/testbenches/max_tb.cpp index ca67e6a..9394cf5 100644 --- a/sources/testbenches/max_tb.cpp +++ b/sources/testbenches/max_tb.cpp @@ -32,47 +32,51 @@ using namespace std; int main(int, char **) { srand(time(nullptr)); - vector to_be_maxed64(64, 0); + vector to_be_maxed64(64 * 8, 0); for (auto && it : to_be_maxed64) { it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f)); } - vector to_be_maxed63(63, 0); - for (auto && it : to_be_maxed63) { - it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f)); - } + // vector to_be_maxed63(63, 0); + // for (auto && it : to_be_maxed63) { + // it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f)); + // } - vector to_be_maxed64f(64, 0); - for (auto && it : to_be_maxed64f) { - it = float(rand()) / float(RAND_MAX) * 1000.f; - } + // vector to_be_maxed64f(64, 0); + // for (auto && it : to_be_maxed64f) { + // it = float(rand()) / float(RAND_MAX) * 1000.f; + // } - vector to_be_maxed63f(63, 0); - for (auto && it : to_be_maxed63f) { - it = float(rand()) / float(RAND_MAX) * 1000.f; - } - - const uint8_t max64_value = do_max_64(to_be_maxed64.data()); - // const uint8_t max63_value = do_max_63(to_be_maxed63.data()); - // const float max64f_value = do_max_64f(to_be_maxed64f.data()); - // const float max63f_value = do_max_63f(to_be_maxed63f.data()); - - // cout << unsigned(max64_value) << " " << unsigned(max63_value) << endl; - // cout << float(max64f_value) << " " << float(max63f_value) << endl; + // vector to_be_maxed63f(63, 0); + // for (auto && it : to_be_maxed63f) { + // it = float(rand()) / float(RAND_MAX) * 1000.f; + // } int retval = 0; + for (unsigned u = 0; u < to_be_maxed64.size() - 64; u++) { + const uint8_t * local_beg = to_be_maxed64.data() + u; + const uint8_t * local_end = local_beg + 64; - const uint8_t max_64_test = *max_element(to_be_maxed64.begin(), to_be_maxed64.end()); - // const uint8_t max_63_test = *max_element(to_be_maxed63.begin(), to_be_maxed63.end()); - // const float max_64f_test = *max_element(to_be_maxed64f.begin(), to_be_maxed64f.end()); - // const float max_63f_test = *max_element(to_be_maxed63f.begin(), to_be_maxed63f.end()); + const uint8_t max64_value = do_max_64(local_beg); + // const uint8_t max63_value = do_max_63(to_be_maxed63.data()); + // const float max64f_value = do_max_64f(to_be_maxed64f.data()); + // const float max63f_value = do_max_63f(to_be_maxed63f.data()); - retval += (max_64_test == max64_value ? 0 : 1); - // retval += (max_63_test == max63_value ? 0 : 1); - // retval += (max_64f_test == max64f_value ? 0 : 1); - // retval += (max_63f_test == max63f_value ? 0 : 1); + // cout << unsigned(max64_value) << " " << unsigned(max63_value) << endl; + // cout << float(max64f_value) << " " << float(max63f_value) << endl; - cout << (retval == 0 ? "Test passed. " : "Test failed. ") << unsigned(max_64_test) << " vs " << unsigned(max64_value) << endl; + const uint8_t max_64_test = *max_element(local_beg, local_end); + // const uint8_t max_63_test = *max_element(to_be_maxed63.begin(), to_be_maxed63.end()); + // const float max_64f_test = *max_element(to_be_maxed64f.begin(), to_be_maxed64f.end()); + // const float max_63f_test = *max_element(to_be_maxed63f.begin(), to_be_maxed63f.end()); + + retval += (max_64_test == max64_value ? 0 : 1); + // retval += (max_63_test == max63_value ? 0 : 1); + // retval += (max_64f_test == max64f_value ? 0 : 1); + // retval += (max_63f_test == max63f_value ? 0 : 1); + } + + cout << (retval == 0 ? "Test passed. " : "Test failed. ") << endl; return retval; }