Add new schedules HEADED, TAILED and LOOPED.

- Also add some comments and correct README
This commit is contained in:
DrasLorus 2021-08-01 21:15:58 +02:00
parent 800edf3917
commit 5f7074352a
Signed by: moniere
GPG key ID: 188DD5B072181C0F
6 changed files with 252 additions and 61 deletions

View file

@ -32,20 +32,31 @@ VITIS_HOME=${XILINX_HOME}/Vitis_HLS/${XILINX_VER}
CXXFLAGS:=${CXXFLAGS} -isystem ${VITIS_HOME}/include
SRCDIR=sources
OBJDIR=obj
BINDIR=bin
SOURCES:=$(SRCDIR)/modules/max.cpp $(SRCDIR)/testbenches/max_tb.cpp
CATCHOBJ=$(OBJDIR)/catch.cpp.o
SOURCES:=$(SRCDIR)/modules/max.cpp
SOURCES+=$(SRCDIR)/testbenches/max_catch_tb.cpp
OBJ=$(SOURCES:.cpp=.cpp.o)
TBBIN=$(BINDIR)/tb.out
all: $(BINDIR) $(TBBIN)
all: $(BINDIR) $(OBJDIR) $(TBBIN)
$(BINDIR):
mkdir -p $(BINDIR)
$(TBBIN): $(OBJ)
$(CXX) -o $(TBBIN) $(OBJ) $(LDFLAGS)
$(OBJDIR):
mkdir -p $(OBJDIR)
$(TBBIN): $(OBJ) $(CATCHOBJ)
$(CXX) -o $(TBBIN) $(OBJ) $(CATCHOBJ) $(LDFLAGS)
$(CATCHOBJ) :
$(CXX) -o $@ -c $(SRCDIR)/testbenches/catch.cpp $(CXXFLAGS)
%.cpp.o : %.cpp
$(CXX) -o $@ -c $< $(CXXFLAGS)
@ -58,6 +69,10 @@ clean:
clear: clean
rm -vf $(TBBIN) $(BINDIR)/results.dat $(COMPDB_ENTRIES) compile_commands.json
.PHONY: clean clear
fullclear: clear
rm -vf $(CATCHOBJ)
testbench: $(TBBIN)
cd $(BINDIR); ../$(TBBIN)

View file

@ -1,22 +1,25 @@
# hls_max_template
An efficient C++14 description of a max process which finds the maximum of a real array.
An efficient C++14 description of a max process which finds the maximum of a real array.
Used in High Level Synthesis, produce the most efficient maximum possible by doing reduction at compile time.
Currently, it produces log2(*N*) (+1 if *N* is odd) stages of *integer_part(N/2)* comparisons, can be easily unrolled or pipelined.
Currently, it produces log2(*N*) (+1 if *N* is odd) stages of *integer_part(N/2)* comparisons, can be easily unrolled and make use of pipelines.
## Content
This repository provides a [header](sources/modules/max.hpp) which defines two recursive template classes with a `process` public method that find the maximum of an array of any size and any type.
`max_template` can be used for all possible sizes while `max_pow2` works only with sizes that are power of 2. `max_struct` is used to implement `max_template` and may not be used directly.
## Usage
After including the header ( with `#include "max.hpp"` ), just call the process method like that:
After including the header (with `#include "max.hpp"`), just call the process method like that:
const T max_array = max_template<SIZE>::process(array);
where `T` is a correct type (e.g. `int`, `float`, `ap_uint`, ...), `SIZE` is an unsigned integer known at compile time (e.g. a literal, a constexpr or a preprocessor constant) and `array` is a C-style array containing `SIZE` elements `T`.
``` C++
const T max_array = max_template<SIZE>::process(array);
```
Where `T` is a comparable class — where operators `<` and `>` are implemented — e.g. `int`, `float` or `ap_uint`, `SIZE` is an unsigned integer known at compile time (e.g. a literal, a `constexpr` or a preprocessor constant) and `array` is a C-style array containing `SIZE` elements `T`.
`max_pow2` is used the same. However, note that it cannot work (even if it compiles) if `SIZE` is not a power of 2.
It must be compiled with `--std=c++14` (or `gnu++14` or another equivalent).
@ -27,21 +30,23 @@ Including the `max.hpp` header is all that is needed to use it in another HLS pr
## Testbench
The prototype of a testbench is provided and the files can be tested with `make testbench`.
The prototype of a testbench is provided, and the files can be tested with `make testbench`.
## Xilinx® Vitis HLS
A TCL script is provided in [hls_files](hls_files) and can be used with Xilinx® Vitis HLS.
Just do in a shell:
cd hls_files
vitis_hls -f script.tcl
``` sh
cd hls_files
vitis_hls -f script.tcl
```
By default, no IP are created. You may need to adjust the part and the clock targeted.
It as been successfully tested on Vitis HLS version 2020.2 and may or may not work with other versions.
It has been successfully tested on Vitis HLS version 2020.2 and may or may not work with other versions.
## Perpectives
## Perspectives
A template argument to produce either the current description or a more resources friendly (Higher latency and/or lower througput) may be added in the future.
A template argument to produce either the current description or a more resources friendly (Higher latency and/or lower throughput) may be added in the future.
Another template argument specifying the most comparisons per stages is also considered.

View file

@ -60,17 +60,39 @@ struct max_pow2<2> {
}
};
// TRUE MAX TEMPLATE BELOW
/* TRUE MAX TEMPLATE BELOW */
/**
* @brief Defines supported schedules
*
*/
typedef enum {
REDUCED,
LOOPED,
TAILED,
HEADED
} max_type_t;
/* REDUCED *****************/
/**
* @brief Recursive template to implement a completely reduced scheduling
*
* @tparam N Input size
* @tparam bEven True if N is even, false otherwise.
* Can use the Macro IS_EVEN()
*/
template <unsigned N, bool bEven>
class max_struct {
class max_reduced {
public:
template <typename T>
static T process(const T values[N]);
};
/* REDUCED when N is even */
template <unsigned N>
class max_struct<N, true> {
class max_reduced<N, true> {
private:
static constexpr unsigned half = N >> 1;
@ -79,8 +101,11 @@ public:
static T process(const T values[N]);
};
/* REDUCED when N is odd */
template <unsigned N>
class max_struct<N, false> {
class max_reduced<N, false> {
private:
static constexpr unsigned Nm1 = N - 1;
static constexpr unsigned half = (Nm1 >> 1);
@ -92,7 +117,7 @@ public:
};
template <>
class max_struct<1, false> {
class max_reduced<1, false> {
public:
template <typename T>
@ -102,7 +127,7 @@ public:
};
template <>
class max_struct<2, true> {
class max_reduced<2, true> {
public:
template <typename T>
@ -112,7 +137,7 @@ public:
};
template <>
class max_struct<3, false> {
class max_reduced<3, false> {
public:
template <typename T>
@ -126,7 +151,7 @@ public:
template <unsigned N>
template <typename T>
T max_struct<N, true>::process(const T values[N]) {
T max_reduced<N, true>::process(const T values[N]) {
static_assert(N > 2, "N cannot be less than 3!");
T half_values[half];
#pragma HLS array_partition variable = half_values complete
@ -136,12 +161,12 @@ T max_struct<N, true>::process(const T values[N]) {
const uint8_t jp1 = j + 1;
half_values[i] = (values[j] < values[jp1] ? values[jp1] : values[j]);
}
return max_struct<half, IS_EVEN(half)>::process(half_values);
return max_reduced<half, IS_EVEN(half)>::process(half_values);
}
template <unsigned N>
template <typename T>
T max_struct<N, false>::process(const T values[N]) {
T max_reduced<N, false>::process(const T values[N]) {
static_assert(N > 3, "N cannot be less than 3!");
T half_values[halfp1];
#pragma HLS array_partition variable = half_values complete
@ -152,11 +177,96 @@ T max_struct<N, false>::process(const T values[N]) {
half_values[i] = (values[j] < values[jp1] ? values[jp1] : values[j]);
}
half_values[half] = values[Nm1];
return max_struct<halfp1, IS_EVEN(halfp1)>::process(half_values);
return max_reduced<halfp1, IS_EVEN(halfp1)>::process(half_values);
}
/**
* @brief User friendly max template
*
* @tparam N Input Size
* @tparam Ver Schedule wanted. Default: REDUCED.
*/
template <unsigned N, max_type_t Ver = REDUCED>
class max_template : public max_reduced<N, IS_EVEN(N)> {
};
template <>
class max_template<1, TAILED> {
public:
template <typename T>
static T process(const T values[1]) {
return values[0];
}
};
template <>
class max_template<2, TAILED> {
public:
template <typename T>
static T process(const T values[2]) {
return (values[0] < values[1] ? values[1] : values[0]);
}
};
template <unsigned N>
class max_template : public max_struct<N, IS_EVEN(N)> {
class max_template<N, TAILED> {
private:
static constexpr unsigned Nm1 = N - 1;
public:
template <typename T>
static T process(const T values[N]) {
const T max_Nm1 = max_template<Nm1, TAILED>::process(values);
return (max_Nm1 < values[Nm1] ? values[Nm1] : max_Nm1);
}
};
template <>
class max_template<1, HEADED> {
public:
template <typename T>
static T process(const T values[1]) {
return values[0];
}
};
template <>
class max_template<2, HEADED> {
public:
template <typename T>
static T process(const T values[2]) {
return (values[0] < values[1] ? values[1] : values[0]);
}
};
template <unsigned N>
class max_template<N, HEADED> {
private:
static constexpr unsigned Nm1 = N - 1;
public:
template <typename T>
static T process(const T values[N]) {
const T max_Nm1 = max_template<Nm1, HEADED>::process(values + 1);
return (max_Nm1 < values[0] ? values[0] : max_Nm1);
}
};
template <unsigned N>
class max_template<N, LOOPED> {
public:
template <typename T>
static T process(const T values[N]) {
T max_value = values[0];
loop_max:
for (unsigned u = 1; u < N; u++) {
const T value = values[u];
max_value = (max_value < value ? value : max_value);
}
return max_value;
}
};
#undef IS_EVEN

View file

@ -0,0 +1,2 @@
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>

View file

@ -0,0 +1,55 @@
/*
* Copyright © 2021 "DrasLorus"
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of this
* file and associated documentation files (the Software),
* to deal in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
* the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "../modules/max.hpp"
#include <algorithm>
#include <ctime>
#include <iostream>
#include <random>
#include <vector>
#include <catch2/catch.hpp>
using std::max_element;
using std::vector;
TEST_CASE("Factorials are computed", "[factorial]") {
srand(time(nullptr));
vector<uint8_t> to_be_maxed64(64 * 8, 0);
for (auto && it : to_be_maxed64) {
it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f));
}
for (unsigned u = 0; u < to_be_maxed64.size() - 64; u++) {
const uint8_t * local_beg = to_be_maxed64.data() + u;
const uint8_t * local_end = local_beg + 64;
const uint8_t max64_value = do_max_64(local_beg);
const uint8_t max_64_test = *max_element(local_beg, local_end);
REQUIRE(max_64_test == max64_value);
}
// cout << (retval == 0 ? "Test passed. " : "Test failed. ") << endl;
}

View file

@ -32,47 +32,51 @@ using namespace std;
int main(int, char **) {
srand(time(nullptr));
vector<uint8_t> to_be_maxed64(64, 0);
vector<uint8_t> to_be_maxed64(64 * 8, 0);
for (auto && it : to_be_maxed64) {
it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f));
}
vector<uint8_t> to_be_maxed63(63, 0);
for (auto && it : to_be_maxed63) {
it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f));
}
// vector<uint8_t> to_be_maxed63(63, 0);
// for (auto && it : to_be_maxed63) {
// it = uint8_t(floor(float(rand()) / float(RAND_MAX) * 256.f));
// }
vector<float> to_be_maxed64f(64, 0);
for (auto && it : to_be_maxed64f) {
it = float(rand()) / float(RAND_MAX) * 1000.f;
}
// vector<float> to_be_maxed64f(64, 0);
// for (auto && it : to_be_maxed64f) {
// it = float(rand()) / float(RAND_MAX) * 1000.f;
// }
vector<float> to_be_maxed63f(63, 0);
for (auto && it : to_be_maxed63f) {
it = float(rand()) / float(RAND_MAX) * 1000.f;
}
const uint8_t max64_value = do_max_64(to_be_maxed64.data());
// const uint8_t max63_value = do_max_63(to_be_maxed63.data());
// const float max64f_value = do_max_64f(to_be_maxed64f.data());
// const float max63f_value = do_max_63f(to_be_maxed63f.data());
// cout << unsigned(max64_value) << " " << unsigned(max63_value) << endl;
// cout << float(max64f_value) << " " << float(max63f_value) << endl;
// vector<float> to_be_maxed63f(63, 0);
// for (auto && it : to_be_maxed63f) {
// it = float(rand()) / float(RAND_MAX) * 1000.f;
// }
int retval = 0;
for (unsigned u = 0; u < to_be_maxed64.size() - 64; u++) {
const uint8_t * local_beg = to_be_maxed64.data() + u;
const uint8_t * local_end = local_beg + 64;
const uint8_t max_64_test = *max_element(to_be_maxed64.begin(), to_be_maxed64.end());
// const uint8_t max_63_test = *max_element(to_be_maxed63.begin(), to_be_maxed63.end());
// const float max_64f_test = *max_element(to_be_maxed64f.begin(), to_be_maxed64f.end());
// const float max_63f_test = *max_element(to_be_maxed63f.begin(), to_be_maxed63f.end());
const uint8_t max64_value = do_max_64(local_beg);
// const uint8_t max63_value = do_max_63(to_be_maxed63.data());
// const float max64f_value = do_max_64f(to_be_maxed64f.data());
// const float max63f_value = do_max_63f(to_be_maxed63f.data());
retval += (max_64_test == max64_value ? 0 : 1);
// retval += (max_63_test == max63_value ? 0 : 1);
// retval += (max_64f_test == max64f_value ? 0 : 1);
// retval += (max_63f_test == max63f_value ? 0 : 1);
// cout << unsigned(max64_value) << " " << unsigned(max63_value) << endl;
// cout << float(max64f_value) << " " << float(max63f_value) << endl;
cout << (retval == 0 ? "Test passed. " : "Test failed. ") << unsigned(max_64_test) << " vs " << unsigned(max64_value) << endl;
const uint8_t max_64_test = *max_element(local_beg, local_end);
// const uint8_t max_63_test = *max_element(to_be_maxed63.begin(), to_be_maxed63.end());
// const float max_64f_test = *max_element(to_be_maxed64f.begin(), to_be_maxed64f.end());
// const float max_63f_test = *max_element(to_be_maxed63f.begin(), to_be_maxed63f.end());
retval += (max_64_test == max64_value ? 0 : 1);
// retval += (max_63_test == max63_value ? 0 : 1);
// retval += (max_64f_test == max64f_value ? 0 : 1);
// retval += (max_63f_test == max63f_value ? 0 : 1);
}
cout << (retval == 0 ? "Test passed. " : "Test failed. ") << endl;
return retval;
}