Add new schedules HEADED, TAILED and LOOPED.

- Also add some comments and correct README
2024-11-21 04:03:19 +01:00 · 2021-08-01 21:15:58 +02:00 · 2021-08-01 21:15:58 +02:00 · 5f7074352a
commit 5f7074352a
parent 800edf3917
6 changed files with 252 additions and 61 deletions
--- a/25
+++ b/25
@ -32,20 +32,31 @@ VITIS_HOME=${XILINX_HOME}/Vitis_HLS/${XILINX_VER}
 CXXFLAGS:=${CXXFLAGS} -isystem ${VITIS_HOME}/include

 SRCDIR=sources
-
+OBJDIR=obj
 BINDIR=bin
-SOURCES:=$(SRCDIR)/modules/max.cpp $(SRCDIR)/testbenches/max_tb.cpp
+
+CATCHOBJ=$(OBJDIR)/catch.cpp.o
+
+SOURCES:=$(SRCDIR)/modules/max.cpp
+SOURCES+=$(SRCDIR)/testbenches/max_catch_tb.cpp
+
 OBJ=$(SOURCES:.cpp=.cpp.o)

 TBBIN=$(BINDIR)/tb.out

-all: $(BINDIR) $(TBBIN)
+all: $(BINDIR) $(OBJDIR) $(TBBIN)

 $(BINDIR):
 	mkdir -p $(BINDIR)

-$(TBBIN): $(OBJ)
-	$(CXX) -o $(TBBIN) $(OBJ) $(LDFLAGS)
+$(OBJDIR):
+	mkdir -p $(OBJDIR)
+
+$(TBBIN): $(OBJ) $(CATCHOBJ)
+	$(CXX) -o $(TBBIN) $(OBJ) $(CATCHOBJ) $(LDFLAGS)
+
+$(CATCHOBJ) :
+	$(CXX) -o $@ -c $(SRCDIR)/testbenches/catch.cpp $(CXXFLAGS)

 %.cpp.o : %.cpp
 	$(CXX) -o $@ -c $< $(CXXFLAGS)
@ -58,6 +69,10 @@ clean:
 clear: clean
 	rm -vf $(TBBIN) $(BINDIR)/results.dat $(COMPDB_ENTRIES) compile_commands.json

+.PHONY: clean clear
+fullclear: clear
+	rm -vf $(CATCHOBJ)
+
 testbench: $(TBBIN)
 	cd $(BINDIR); ../$(TBBIN)

--- a/README.md
+++ b/README.md
@ -1,22 +1,25 @@
 # hls_max_template

-An efficient C++14 description of a max process which finds the maximum of a real array. 
+An efficient C++14 description of a max process which finds the maximum of a real array.

 Used in High Level Synthesis, produce the most efficient maximum possible by doing reduction at compile time.

-Currently, it produces log2(*N*) (+1 if *N* is odd) stages of *integer_part(N/2)* comparisons, can be easily unrolled or pipelined.
+Currently, it produces log2(*N*) (+1 if *N* is odd) stages of *integer_part(N/2)* comparisons, can be easily unrolled and make use of pipelines.

 ## Content
+
 This repository provides a [header](sources/modules/max.hpp) which defines two recursive template classes with a `process` public method that find the maximum of an array of any size and any type.
 `max_template` can be used for all possible sizes while `max_pow2` works only with sizes that are power of 2. `max_struct` is used to implement `max_template` and may not be used directly.

 ## Usage

-After including the header ( with `#include "max.hpp"` ), just call the process method like that:
+After including the header (with `#include "max.hpp"`), just call the process method like that:

-    const T max_array = max_template<SIZE>::process(array);
-  
-where `T` is a correct type (e.g. `int`, `float`, `ap_uint`, ...), `SIZE` is an unsigned integer known at compile time (e.g. a literal, a constexpr or a preprocessor constant) and `array` is a C-style array containing `SIZE` elements `T`.
+``` C++
+const T max_array = max_template<SIZE>::process(array);
+```
+
+Where `T` is a comparable class — where operators `<` and `>` are implemented — e.g. `int`, `float` or `ap_uint`, `SIZE` is an unsigned integer known at compile time (e.g. a literal, a `constexpr` or a preprocessor constant) and `array` is a C-style array containing `SIZE` elements `T`.
 `max_pow2` is used the same. However, note that it cannot work (even if it compiles) if `SIZE` is not a power of 2. 

 It must be compiled with `--std=c++14` (or `gnu++14` or another equivalent).
@ -27,21 +30,23 @@ Including the `max.hpp` header is all that is needed to use it in another HLS pr

 ## Testbench

-The prototype of a testbench is provided and the files can be tested with `make testbench`.
+The prototype of a testbench is provided, and the files can be tested with `make testbench`.

 ## Xilinx® Vitis HLS

 A TCL script is provided in [hls_files](hls_files) and can be used with Xilinx® Vitis HLS.
 Just do in a shell:
-    
-    cd hls_files
-    vitis_hls -f script.tcl
-    
+
+``` sh
+cd hls_files
+vitis_hls -f script.tcl
+```
+
 By default, no IP are created. You may need to adjust the part and the clock targeted. 

-It as been successfully tested on Vitis HLS version 2020.2 and may or may not work with other versions.
+It has been successfully tested on Vitis HLS version 2020.2 and may or may not work with other versions.

-## Perpectives
+## Perspectives

-A template argument to produce either the current description or a more resources friendly (Higher latency and/or lower througput) may be added in the future.
+A template argument to produce either the current description or a more resources friendly (Higher latency and/or lower throughput) may be added in the future.
 Another template argument specifying the most comparisons per stages is also considered.
--- a/sources/modules/max.hpp
+++ b/sources/modules/max.hpp
@ -60,17 +60,39 @@ struct max_pow2<2> {
    }
 };

-// TRUE MAX TEMPLATE BELOW
+/* TRUE MAX TEMPLATE BELOW */

+/**
+ * @brief Defines supported schedules
+ * 
+ */
+typedef enum {
+    REDUCED,
+    LOOPED,
+    TAILED,
+    HEADED
+} max_type_t;
+
+/* REDUCED *****************/
+
+/**
+ * @brief Recursive template to implement a completely reduced scheduling
+ * 
+ * @tparam N     Input size
+ * @tparam bEven True if N is even, false otherwise.
+ *               Can use the Macro IS_EVEN()
+ */
 template <unsigned N, bool bEven>
-class max_struct {
+class max_reduced {
 public:
    template <typename T>
    static T process(const T values[N]);
 };

+/* REDUCED when N is even */
+
 template <unsigned N>
-class max_struct<N, true> {
+class max_reduced<N, true> {
 private:
    static constexpr unsigned half = N >> 1;

@ -79,8 +101,11 @@ public:
    static T process(const T values[N]);
 };

+
+/* REDUCED when N is odd */
+
 template <unsigned N>
-class max_struct<N, false> {
+class max_reduced<N, false> {
 private:
    static constexpr unsigned Nm1    = N - 1;
    static constexpr unsigned half   = (Nm1 >> 1);
@ -92,7 +117,7 @@ public:
 };

 template <>
-class max_struct<1, false> {
+class max_reduced<1, false> {

 public:
    template <typename T>
@ -102,7 +127,7 @@ public:
 };

 template <>
-class max_struct<2, true> {
+class max_reduced<2, true> {

 public:
    template <typename T>
@ -112,7 +137,7 @@ public:
 };

 template <>
-class max_struct<3, false> {
+class max_reduced<3, false> {

 public:
    template <typename T>
@ -126,7 +151,7 @@ public:

 template <unsigned N>
 template <typename T>
-T max_struct<N, true>::process(const T values[N]) {
+T max_reduced<N, true>::process(const T values[N]) {
    static_assert(N > 2, "N cannot be less than 3!");
    T half_values[half];
 #pragma HLS array_partition variable = half_values complete
@ -136,12 +161,12 @@ T max_struct<N, true>::process(const T values[N]) {
        const uint8_t jp1 = j + 1;
        half_values[i]    = (values[j] < values[jp1] ? values[jp1] : values[j]);
    }
-    return max_struct<half, IS_EVEN(half)>::process(half_values);
+    return max_reduced<half, IS_EVEN(half)>::process(half_values);
 }

 template <unsigned N>
 template <typename T>
-T max_struct<N, false>::process(const T values[N]) {
+T max_reduced<N, false>::process(const T values[N]) {
    static_assert(N > 3, "N cannot be less than 3!");
    T half_values[halfp1];
 #pragma HLS array_partition variable = half_values complete
@ -152,11 +177,96 @@ T max_struct<N, false>::process(const T values[N]) {
        half_values[i]    = (values[j] < values[jp1] ? values[jp1] : values[j]);
    }
    half_values[half] = values[Nm1];
-    return max_struct<halfp1, IS_EVEN(halfp1)>::process(half_values);
+    return max_reduced<halfp1, IS_EVEN(halfp1)>::process(half_values);
 }

+
+/**
+ * @brief User friendly max template
+ * 
+ * @tparam N    Input Size
+ * @tparam Ver  Schedule wanted. Default: REDUCED. 
+ */
+template <unsigned N, max_type_t Ver = REDUCED>
+class max_template : public max_reduced<N, IS_EVEN(N)> {
+};
+
+template <>
+class max_template<1, TAILED> {
+public:
+    template <typename T>
+    static T process(const T values[1]) {
+        return values[0];
+    }
+};
+
+template <>
+class max_template<2, TAILED> {
+public:
+    template <typename T>
+    static T process(const T values[2]) {
+        return (values[0] < values[1] ? values[1] : values[0]);
+    }
+};
+
 template <unsigned N>
-class max_template : public max_struct<N, IS_EVEN(N)> {
+class max_template<N, TAILED> {
+private:
+    static constexpr unsigned Nm1 = N - 1;
+
+public:
+    template <typename T>
+    static T process(const T values[N]) {
+        const T max_Nm1 = max_template<Nm1, TAILED>::process(values);
+        return (max_Nm1 < values[Nm1] ? values[Nm1] : max_Nm1);
+    }
+};
+
+template <>
+class max_template<1, HEADED> {
+public:
+    template <typename T>
+    static T process(const T values[1]) {
+        return values[0];
+    }
+};
+
+template <>
+class max_template<2, HEADED> {
+public:
+    template <typename T>
+    static T process(const T values[2]) {
+        return (values[0] < values[1] ? values[1] : values[0]);
+    }
+};
+
+template <unsigned N>
+class max_template<N, HEADED> {
+private:
+    static constexpr unsigned Nm1 = N - 1;
+
+public:
+    template <typename T>
+    static T process(const T values[N]) {
+        const T max_Nm1 = max_template<Nm1, HEADED>::process(values + 1);
+        return (max_Nm1 < values[0] ? values[0] : max_Nm1);
+    }
+};
+
+template <unsigned N>
+class max_template<N, LOOPED> {
+
+public:
+    template <typename T>
+    static T process(const T values[N]) {
+        T max_value = values[0];
+    loop_max:
+        for (unsigned u = 1; u < N; u++) {
+            const T value = values[u];
+            max_value     = (max_value < value ? value : max_value);
+        }
+        return max_value;
+    }
 };

 #undef IS_EVEN
--- a/sources/testbenches/catch.cpp
+++ b/sources/testbenches/catch.cpp
@ -0,0 +1,2 @@
+#define CATCH_CONFIG_MAIN
+#include <catch2/catch.hpp>
--- a/sources/testbenches/max_catch_tb.cpp
+++ b/sources/testbenches/max_catch_tb.cpp
@ -0,0 +1,55 @@
+/* 
+ * Copyright © 2021 "DrasLorus"
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of this
+ * file and associated documentation files (the “Software”),
+ * to deal in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all 
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "../modules/max.hpp"
+
+#include <algorithm>
+#include <ctime>
+#include <iostream>
+#include <random>
+#include <vector>
+
+#include <catch2/catch.hpp>
+
+using std::max_element;
+using std::vector;
+
+TEST_CASE("Factorials are computed", "[factorial]") {
+    srand(time(nullptr));
+
+    vector<uint8_t> to_be_maxed64(64 * 8, 0);
+    for (auto && it : to_be_maxed64) {
+        it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f));
+    }
+
+    for (unsigned u = 0; u < to_be_maxed64.size() - 64; u++) {
+        const uint8_t * local_beg = to_be_maxed64.data() + u;
+        const uint8_t * local_end = local_beg + 64;
+
+        const uint8_t max64_value = do_max_64(local_beg);
+ 
+        const uint8_t max_64_test = *max_element(local_beg, local_end);
+
+        REQUIRE(max_64_test == max64_value);
+    }
+
+    // cout << (retval == 0 ? "Test passed. " : "Test failed. ") << endl;
+}
--- a/sources/testbenches/max_tb.cpp
+++ b/sources/testbenches/max_tb.cpp
@ -32,47 +32,51 @@ using namespace std;
 int main(int, char **) {
    srand(time(nullptr));

-    vector<uint8_t> to_be_maxed64(64, 0);
+    vector<uint8_t> to_be_maxed64(64 * 8, 0);
    for (auto && it : to_be_maxed64) {
        it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f));
    }

-    vector<uint8_t> to_be_maxed63(63, 0);
-    for (auto && it : to_be_maxed63) {
-        it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f));
-    }
+    // vector<uint8_t> to_be_maxed63(63, 0);
+    // for (auto && it : to_be_maxed63) {
+    //     it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f));
+    // }

-    vector<float> to_be_maxed64f(64, 0);
-    for (auto && it : to_be_maxed64f) {
-        it = float(rand()) / float(RAND_MAX) * 1000.f;
-    }
+    // vector<float> to_be_maxed64f(64, 0);
+    // for (auto && it : to_be_maxed64f) {
+    //     it = float(rand()) / float(RAND_MAX) * 1000.f;
+    // }

-    vector<float> to_be_maxed63f(63, 0);
-    for (auto && it : to_be_maxed63f) {
-        it = float(rand()) / float(RAND_MAX) * 1000.f;
-    }
-
-    const uint8_t max64_value  = do_max_64(to_be_maxed64.data());
-    // const uint8_t max63_value  = do_max_63(to_be_maxed63.data());
-    // const float   max64f_value = do_max_64f(to_be_maxed64f.data());
-    // const float   max63f_value = do_max_63f(to_be_maxed63f.data());
-
-    // cout << unsigned(max64_value) << " " << unsigned(max63_value) << endl;
-    // cout << float(max64f_value) << " " << float(max63f_value) << endl;
+    // vector<float> to_be_maxed63f(63, 0);
+    // for (auto && it : to_be_maxed63f) {
+    //     it = float(rand()) / float(RAND_MAX) * 1000.f;
+    // }

    int retval = 0;
+    for (unsigned u = 0; u < to_be_maxed64.size() - 64; u++) {
+        const uint8_t * local_beg = to_be_maxed64.data() + u;
+        const uint8_t * local_end = local_beg + 64;

-    const uint8_t max_64_test  = *max_element(to_be_maxed64.begin(), to_be_maxed64.end());
-    // const uint8_t max_63_test  = *max_element(to_be_maxed63.begin(), to_be_maxed63.end());
-    // const float   max_64f_test = *max_element(to_be_maxed64f.begin(), to_be_maxed64f.end());
-    // const float   max_63f_test = *max_element(to_be_maxed63f.begin(), to_be_maxed63f.end());
+        const uint8_t max64_value = do_max_64(local_beg);
+        // const uint8_t max63_value  = do_max_63(to_be_maxed63.data());
+        // const float   max64f_value = do_max_64f(to_be_maxed64f.data());
+        // const float   max63f_value = do_max_63f(to_be_maxed63f.data());

-    retval += (max_64_test == max64_value ? 0 : 1);
-    // retval += (max_63_test == max63_value ? 0 : 1);
-    // retval += (max_64f_test == max64f_value ? 0 : 1);
-    // retval += (max_63f_test == max63f_value ? 0 : 1);
+        // cout << unsigned(max64_value) << " " << unsigned(max63_value) << endl;
+        // cout << float(max64f_value) << " " << float(max63f_value) << endl;

-    cout << (retval == 0 ? "Test passed. " : "Test failed. ") << unsigned(max_64_test) << " vs " << unsigned(max64_value) << endl;
+        const uint8_t max_64_test = *max_element(local_beg, local_end);
+        // const uint8_t max_63_test  = *max_element(to_be_maxed63.begin(), to_be_maxed63.end());
+        // const float   max_64f_test = *max_element(to_be_maxed64f.begin(), to_be_maxed64f.end());
+        // const float   max_63f_test = *max_element(to_be_maxed63f.begin(), to_be_maxed63f.end());
+
+        retval += (max_64_test == max64_value ? 0 : 1);
+        // retval += (max_63_test == max63_value ? 0 : 1);
+        // retval += (max_64f_test == max64f_value ? 0 : 1);
+        // retval += (max_63f_test == max63f_value ? 0 : 1);
+    }
+
+    cout << (retval == 0 ? "Test passed. " : "Test failed. ") << endl;

    return retval;
 }