commit 5ae3fee9fa4a526b7fff5fea04ea9f6ebdf9ec21
Author: Yuanjie Huang <yuanjieh@xilinx.com>
Date:   Wed Nov 28 20:24:04 2018 -0800

    init with headers from 2019.1 (CL 2399090)

diff --git a/include/ap_common.h b/include/ap_common.h
new file mode 100644
index 0000000..f21cdba
--- /dev/null
+++ b/include/ap_common.h
@@ -0,0 +1,764 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+ */
+
+#ifndef __AP_COMMON_H__
+#define __AP_COMMON_H__
+
+// ----------------------------------------------------------------------
+
+#include <ap_decl.h>
+
+// Macro functions
+#define AP_MAX(a, b) ((a) > (b) ? (a) : (b))
+#define AP_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define AP_ABS(a) ((a) >= 0 ? (a) : -(a))
+
+#ifndef AP_ASSERT
+#ifndef __SYNTHESIS__
+#include <assert.h>
+#define AP_ASSERT(cond, msg) assert((cond) && (msg))
+#else
+#define AP_ASSERT(cond, msg)
+#endif // ifndef __SYNTHESIS__
+#endif // ifndef AP_ASSERT
+
+#ifndef __SYNTHESIS__
+// for fprintf messages.
+#include <stdio.h>
+// for exit on error.
+#include <stdlib.h>
+#endif
+
+// same disable condition as assert.
+#if !defined(__SYNTHESIS__) && !defined(NDEBUG)
+
+#define _AP_DEBUG(cond, ...)                  \
+  do {                                        \
+    if ((cond)) {                             \
+      fprintf(stderr, "DEBUG: " __VA_ARGS__); \
+      fprintf(stderr, "\n");                  \
+    }                                         \
+  } while (0)
+#define _AP_WARNING(cond, ...)                  \
+  do {                                          \
+    if ((cond)) {                               \
+      fprintf(stderr, "WARNING: " __VA_ARGS__); \
+      fprintf(stderr, "\n");                    \
+    }                                           \
+  } while (0)
+#define _AP_ERROR(cond, ...)                  \
+  do {                                        \
+    if ((cond)) {                             \
+      fprintf(stderr, "ERROR: " __VA_ARGS__); \
+      fprintf(stderr, "\n");                  \
+      abort();                                \
+    }                                         \
+  } while (0)
+
+#else // if !defined(__SYNTHESIS__) && !defined(NDEBUG)
+
+#define __AP_VOID_CAST static_cast<void>
+#define _AP_DEBUG(cond, ...) (__AP_VOID_CAST(0))
+#define _AP_WARNING(cond, ...) (__AP_VOID_CAST(0))
+#define _AP_ERROR(cond, ...) (__AP_VOID_CAST(0))
+
+#endif // if !defined(__SYNTHESIS__) && !defined(NDEBUG) else
+
+// ----------------------------------------------------------------------
+
+// Attribute only for synthesis
+#ifdef __SYNTHESIS__
+#define INLINE inline __attribute__((always_inline))
+//#define INLINE inline __attribute__((noinline))
+#else
+#define INLINE inline
+#endif
+
+#define AP_WEAK
+// __attribute__((weak))
+
+#ifndef AP_INT_MAX_W
+#define AP_INT_MAX_W 1024
+#endif
+
+#define BIT_WIDTH_UPPER_LIMIT (1 << 15)
+#if AP_INT_MAX_W > BIT_WIDTH_UPPER_LIMIT
+#error "Bitwidth exceeds 32768 (1 << 15), the maximum allowed value"
+#endif
+
+#define MAX_MODE(BITS) ((BITS + 1023) / 1024)
+
+// ----------------------------------------------------------------------
+
+// XXX apcc cannot handle global std::ios_base::Init() brought in by <iostream>
+#ifndef AP_AUTOCC
+#ifndef __SYNTHESIS__
+// for overload operator<<
+#include <iostream>
+#endif
+#endif // ifndef AP_AUTOCC
+
+#ifndef __SYNTHESIS__
+// for string format.
+#include <sstream>
+// for string.
+#include <string>
+#endif
+
+// for detecting if char is signed.
+#include <climits>
+enum { CHAR_IS_SIGNED = CHAR_MIN < 0 };
+
+// TODO we have similar traits in x_hls_utils.h, should consider unify.
+namespace _ap_type {
+template <typename _Tp>
+struct is_signed {
+  static const bool value = _Tp(-1) < _Tp(1);
+};
+
+template <typename _Tp>
+struct is_integral {
+  static const bool value = false;
+};
+#define DEF_IS_INTEGRAL(CTYPE)      \
+  template <>                       \
+  struct is_integral<CTYPE> {       \
+    static const bool value = true; \
+  };
+DEF_IS_INTEGRAL(bool)
+DEF_IS_INTEGRAL(char)
+DEF_IS_INTEGRAL(signed char)
+DEF_IS_INTEGRAL(unsigned char)
+DEF_IS_INTEGRAL(short)
+DEF_IS_INTEGRAL(unsigned short)
+DEF_IS_INTEGRAL(int)
+DEF_IS_INTEGRAL(unsigned int)
+DEF_IS_INTEGRAL(long)
+DEF_IS_INTEGRAL(unsigned long)
+DEF_IS_INTEGRAL(ap_slong)
+DEF_IS_INTEGRAL(ap_ulong)
+#undef DEF_IS_INTEGRAL
+
+template <bool, typename _Tp = void>
+struct enable_if {};
+// partial specialization for true
+template <typename _Tp>
+struct enable_if<true, _Tp> {
+  typedef _Tp type;
+};
+
+template <typename _Tp>
+struct remove_const {
+  typedef _Tp type;
+};
+
+template <typename _Tp>
+struct remove_const<_Tp const> {
+  typedef _Tp type;
+};
+} // namespace _ap_type
+
+// ----------------------------------------------------------------------
+
+// Define ssdm_int and _ssdm_op.
+#ifdef __SYNTHESIS__
+
+#if ((__clang_major__ == 3) && (__clang_minor__ == 1))
+
+/* HECTOR is a tool for formal system-level to RTL equivalence checking.
+ * https://www.research.ibm.com/haifa/conferences/hvc2008/present/CarlPixleyHVC08.pdf
+ * we used to used Hector.h here instead of following ssdm_int definition,
+ * but now it is deleted.
+ */
+template <int _AP_N, bool _AP_S>
+struct ssdm_int;
+
+#define AP_INT_BASE(_AP_N, mode)                                   \
+  template <>                                                      \
+  struct ssdm_int<_AP_N + 1024 * mode, true> {                     \
+    int V __attribute__((bitwidth(_AP_N + 1024 * mode)));          \
+    INLINE ssdm_int<_AP_N + 1024 * mode, true>(){};                \
+  };                                                               \
+  template <>                                                      \
+  struct ssdm_int<_AP_N + 1024 * mode, false> {                    \
+    unsigned int V __attribute__((bitwidth(_AP_N + 1024 * mode))); \
+    INLINE ssdm_int<_AP_N + 1024 * mode, false>(){};               \
+  };
+
+#if MAX_MODE(AP_INT_MAX_W) >= 1
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 0)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 2
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 1)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 3
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 2)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 4
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 3)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 5
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 4)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 6
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 5)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 7
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 6)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 8
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 7)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 9
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 8)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 10
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 9)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 11
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 10)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 12
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 11)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 13
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 12)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 14
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 13)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 15
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 14)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 16
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 15)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 17
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 16)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 18
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 17)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 19
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 18)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 20
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 19)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 21
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 20)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 22
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 21)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 23
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 22)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 24
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 23)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 25
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 24)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 26
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 25)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 27
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 26)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 28
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 27)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 29
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 28)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 30
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 29)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 31
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 30)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#if MAX_MODE(AP_INT_MAX_W) >= 32
+#define HANDLE_BAT(TYPE, _AP_N) AP_INT_BASE(_AP_N, 31)
+#define APINT_DEFINE_INT64
+#include "etc/autopilot_dt.def"
+#undef APINT_DEFINE_INT64
+#undef HANDLE_BAT
+#endif
+
+#undef MAX_MODE
+#undef AP_INT_BASE
+
+#else // HLS clang of higher version than 3.1
+
+template <int _AP_N, bool _AP_S>
+struct ssdm_int;
+
+template <int _AP_N>
+struct ssdm_int<_AP_N, true> {
+  int V __attribute__((bitwidth(_AP_N)));
+  INLINE ssdm_int<_AP_N, true>(){};
+};
+
+template <int _AP_N>
+struct ssdm_int<_AP_N, false> {
+  unsigned V __attribute__((bitwidth(_AP_N)));
+  INLINE ssdm_int<_AP_N, false>(){};
+};
+
+#endif // clang 3.1 test
+
+// FIXME typeof is an compiler extension.
+// FIXME use ({}) to return value is GCC extension.
+
+#define _ssdm_op_concat(Ret, X, Y)                               \
+  ({                                                             \
+    typeof(Ret) __Result__ = 0;                                  \
+    typeof(X) __X2__ = X;                                        \
+    typeof(Y) __Y2__ = Y;                                        \
+    __builtin_bit_concat((void*)(&__Result__), (void*)(&__X2__), \
+                         (void*)(&__Y2__));                      \
+    __Result__;                                                  \
+  })
+
+#define _ssdm_op_get_bit(Val, Bit)                                   \
+  ({                                                                 \
+    typeof(Val) __Val2__ = Val;                                      \
+    bool __Result__ = __builtin_bit_select((void*)(&__Val2__), Bit); \
+    __Result__;                                                      \
+  })
+
+#define _ssdm_op_set_bit(Val, Bit, Repl)                               \
+  ({                                                                   \
+    typename _ap_type::remove_const<typeof(Val)>::type __Result__ = 0; \
+    typeof(Val) __Val2__ = Val;                                        \
+    typeof(Repl) __Repl2__ = !!Repl;                                   \
+    __builtin_bit_part_set((void*)(&__Result__), (void*)(&__Val2__),   \
+                           (void*)(&__Repl2__), Bit, Bit);             \
+    __Result__;                                                        \
+  })
+
+#define _ssdm_op_get_range(Val, Lo, Hi)                                     \
+  ({                                                                        \
+    typename _ap_type::remove_const<typeof(Val)>::type __Result__ = 0;      \
+    typeof(Val) __Val2__ = Val;                                             \
+    __builtin_bit_part_select((void*)(&__Result__), (void*)(&__Val2__), Lo, \
+                              Hi);                                          \
+    __Result__;                                                             \
+  })
+
+#define _ssdm_op_set_range(Val, Lo, Hi, Repl)                          \
+  ({                                                                   \
+    typename _ap_type::remove_const<typeof(Val)>::type __Result__ = 0; \
+    typeof(Val) __Val2__ = Val;                                        \
+    typeof(Repl) __Repl2__ = Repl;                                     \
+    __builtin_bit_part_set((void*)(&__Result__), (void*)(&__Val2__),   \
+                           (void*)(&__Repl2__), Lo, Hi);               \
+    __Result__;                                                        \
+  })
+
+#include "etc/autopilot_ssdm_bits.h"
+
+extern "C" void _ssdm_string2bits(...);
+
+#endif // ifdef __SYNTHESIS__
+
+#ifndef NON_C99STRING
+#define _AP_C99 true
+#else
+#define _AP_C99 false
+#endif
+
+static inline unsigned char guess_radix(const char* s) {
+  unsigned char rd = 10; ///< default radix
+  const char* p = s;
+  // skip neg sign if it exists
+  if (p[0] == '-' || p[0] == '+') ++p;
+  // guess based on following two bits.
+  if (p[0] == '0') {
+    if (p[1] == 'b' || p[1] == 'B') {
+      rd = 2;
+    } else if (p[1] == 'o' || p[1] == 'O') {
+      rd = 8;
+    } else if (p[1] == 'x' || p[1] == 'X') {
+      rd = 16;
+    } else if (p[1] == 'd' || p[1] == 'D') {
+      rd = 10;
+    }
+  }
+  return rd;
+}
+
+// ----------------------------------------------------------------------
+
+// Forward declaration of all AP types.
+// Before ap_private definition.
+#ifdef __SYNTHESIS__
+#define _HLS_HALF_DEFINED_
+typedef __fp16 half;
+#else
+class half;
+#endif
+
+// FIXME previously, ap_int_syn.h includes hls_half.h, which includes cmath.h
+// even during synthesis. Some test cases are spoiled...
+#ifdef __cplusplus
+#ifndef __SYNTHESIS__
+#include <cmath>
+#endif
+#endif
+
+// ----------------------------------------------------------------------
+
+// Basic integral struct upon which ap_int and ap_fixed are defined.
+#ifdef __SYNTHESIS__
+// Use ssdm_int, a compiler dependent, attribute constrained integeral type as
+// basic data type.
+#define _AP_ROOT_TYPE ssdm_int
+// Basic ops.
+#define _AP_ROOT_op_concat(Ret, X, Y) _ssdm_op_concat(Ret, X, Y)
+#define _AP_ROOT_op_get_bit(Val, Bit) _ssdm_op_get_bit(Val, Bit)
+#define _AP_ROOT_op_set_bit(Val, Bit, Repl) _ssdm_op_set_bit(Val, Bit, Repl)
+#define _AP_ROOT_op_get_range(Val, Lo, Hi) _ssdm_op_get_range(Val, Lo, Hi)
+#define _AP_ROOT_op_set_range(Val, Lo, Hi, Repl) \
+  _ssdm_op_set_range(Val, Lo, Hi, Repl)
+#define _AP_ROOT_op_reduce(Op, Val) _ssdm_op_reduce(Op, Val)
+#else // ifdef __SYNTHESIS__
+// Use ap_private for compiler-independent basic data type
+template <int _AP_W, bool _AP_S, bool _AP_C = _AP_W <= 64>
+class ap_private;
+/// model ssdm_int in standard C++ for simulation.
+template <int _AP_W, bool _AP_S>
+struct ssdm_int_sim {
+  /// integral type with template-specified width and signedness.
+  ap_private<_AP_W, _AP_S> V;
+  ssdm_int_sim() {}
+};
+#define _AP_ROOT_TYPE ssdm_int_sim
+// private's ref uses _AP_ROOT_TYPE.
+#include <etc/ap_private.h>
+// XXX The C-sim model cannot use GCC-extension
+// Basic ops. Ret and Val are ap_private.
+template <typename _Tp1, typename _Tp2, typename _Tp3>
+inline _Tp1 _AP_ROOT_op_concat(const _Tp1& Ret, const _Tp2& X, const _Tp3& Y) {
+  _Tp1 r = (X).operator,(Y);
+  return r;
+}
+#define _AP_ROOT_op_get_bit(Val, Bit) (Val).get_bit((Bit))
+template <typename _Tp1, typename _Tp2, typename _Tp3>
+inline _Tp1& _AP_ROOT_op_set_bit(_Tp1& Val, const _Tp2& Bit, const _Tp3& Repl) {
+  (Val).set_bit((Bit), (Repl));
+  return Val;
+}
+// notice the order of high and low index is different in ssdm call and
+// ap_private.range()...
+#define _AP_ROOT_op_get_range(Val, Lo, Hi) (Val).range((Hi), (Lo))
+template <typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
+inline _Tp1& _AP_ROOT_op_set_range(_Tp1& Val, const _Tp2& Lo, const _Tp3& Hi,
+                                   const _Tp4& Repl) {
+  (Val).range((Hi), (Lo)) = Repl;
+  return (Val);
+}
+#define _AP_ROOT_op_and_reduce(Val) (Val).and_reduce()
+#define _AP_ROOT_op_nand_reduce(Val) (Val).nand_reduce()
+#define _AP_ROOT_op_or_reduce(Val) (Val).or_reduce()
+#define _AP_ROOT_op_xor_reduce(Val) (Val).xor_reduce()
+// ## is the concatenation in preprocessor:
+#define _AP_ROOT_op_reduce(Op, Val) _AP_ROOT_op_##Op##_reduce(Val)
+#endif // ifdef __SYNTHESIS__ else
+
+// ----------------------------------------------------------------------
+
+// Constants for half, single, double pricision floating points
+#define HALF_MAN 10
+#define FLOAT_MAN 23
+#define DOUBLE_MAN 52
+
+#define HALF_EXP 5
+#define FLOAT_EXP 8
+#define DOUBLE_EXP 11
+
+#define BIAS(e) ((1L << (e - 1L)) - 1L)
+#define HALF_BIAS BIAS(HALF_EXP)
+#define FLOAT_BIAS BIAS(FLOAT_EXP)
+#define DOUBLE_BIAS BIAS(DOUBLE_EXP)
+
+#define APFX_IEEE_DOUBLE_E_MAX DOUBLE_BIAS
+#define APFX_IEEE_DOUBLE_E_MIN (-DOUBLE_BIAS + 1)
+
+INLINE ap_ulong doubleToRawBits(double pf) {
+  union {
+    ap_ulong __L;
+    double __D;
+  } LD;
+  LD.__D = pf;
+  return LD.__L;
+}
+
+INLINE unsigned int floatToRawBits(float pf) {
+  union {
+    unsigned int __L;
+    float __D;
+  } LD;
+  LD.__D = pf;
+  return LD.__L;
+}
+
+INLINE unsigned short halfToRawBits(half pf) {
+#ifdef __SYNTHESIS__
+  union {
+    unsigned short __L;
+    half __D;
+  } LD;
+  LD.__D = pf;
+  return LD.__L;
+#else
+  return pf.get_bits();
+#endif
+}
+
+// usigned long long is at least 64-bit
+INLINE double rawBitsToDouble(ap_ulong pi) {
+  union {
+    ap_ulong __L;
+    double __D;
+  } LD;
+  LD.__L = pi;
+  return LD.__D;
+}
+
+// long is at least 32-bit
+INLINE float rawBitsToFloat(unsigned long pi) {
+  union {
+    unsigned int __L;
+    float __D;
+  } LD;
+  LD.__L = pi;
+  return LD.__D;
+}
+
+// short is at least 16-bit
+INLINE half rawBitsToHalf(unsigned short pi) {
+#ifdef __SYNTHESIS__
+  union {
+    unsigned short __L;
+    half __D;
+  } LD;
+  LD.__L = pi;
+  return LD.__D;
+#else
+  // sim model of half has a non-trivial constructor
+  half __D;
+  __D.set_bits(pi);
+  return __D;
+#endif
+}
+
+#endif // ifndef __AP_COMMON_H__ else
+
+// -*- cpp -*-
+// vim: fdm=marker:foldmarker=#if,#endif:nofoldenable
+
+// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689
diff --git a/include/ap_decl.h b/include/ap_decl.h
new file mode 100644
index 0000000..526ffa9
--- /dev/null
+++ b/include/ap_decl.h
@@ -0,0 +1,246 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+ */
+
+#ifndef __AP_DECL_H__
+#define __AP_DECL_H__
+
+// ----------------------------------------------------------------------
+
+#if !defined(__AP_FIXED_H__) && !defined(__AP_INT_H__) && !defined(__AUTOPILOT_CBE_H__) && !defined(__HLS_HALF_H__)
+#error "Only ap_fixed.h and ap_int.h can be included directly in user code."
+#endif
+
+// Test __SYNTHESIS__ only for mode
+#if !defined(__SYNTHESIS__) && (defined(AESL_SYN) || defined(__HLS_SYN__))
+//#pragma message "AESL_SYN and __HLS_SYN__ should be replaced by __SYNTHESIS__"
+#define __SYNTHESIS__
+#endif
+
+/* for safety*/
+#if (defined(_AP_N) || defined(_AP_C))
+#error One or more of the following is defined: _AP_N, _AP_C. Definition conflicts with their usage as template parameters.
+#endif
+
+/* for safety*/
+#if (defined(_AP_W) || defined(_AP_I) || defined(_AP_S) || defined(_AP_Q) || \
+     defined(_AP_O) || defined(_AP_W2) || defined(_AP_I2) ||                 \
+     defined(_AP_S2) || defined(_AP_Q2) || defined(_AP_O2) ||                \
+     defined(_AP_N) || defined(_AP_N2))
+#error \
+    "One or more of the following is defined: _AP_W, _AP_I, _AP_S, _AP_Q, _AP_O,  _AP_N, _AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2. Definition conflicts with their usage as template parameters."
+#endif
+
+/*for safety*/
+#if (defined(_AP_W3) || defined(_AP_S3) || defined(_AP_W4) || defined(_AP_S4))
+#error \
+    "One or more of the following is defined: _AP_W3, _AP_S3, _AP_W4,_AP_S4. Definition conflicts with their usage as template parameters."
+#endif
+
+#if (defined(_AP_W1) || defined(_AP_S1) || defined(_AP_T) || \
+     defined(_AP_T1) || defined(_AP_T2) || defined(_AP_T3) || defined(_AP_T4))
+#error \
+    "One or more of the following is defined: _AP_W1, _AP_S1, _AP_T,  _AP_T1, _AP_T2, _AP_T3, _AP_T4. Definition conflicts with their usage as template parameters."
+#endif
+
+#ifndef __cplusplus
+#error "AP data type can only be used in C++"
+#endif
+
+// ----------------------------------------------------------------------
+
+#ifndef __SC_COMPATIBLE__
+/// ap_fixed quantification mode
+enum ap_q_mode {
+  AP_RND,         //< rounding to plus infinity
+  AP_RND_ZERO,    //< rounding to zero
+  AP_RND_MIN_INF, //< rounding to minus infinity
+  AP_RND_INF,     //< rounding to infinity
+  AP_RND_CONV,    //< convergent rounding
+  AP_TRN,         //< truncation
+  AP_TRN_ZERO,    //< truncation to zero
+};
+
+// FIXME for legacy code
+#ifndef SYSTEMC_INCLUDED
+#define SC_RND AP_RND
+#define SC_RND_ZERO AP_RND_ZERO
+#define SC_RND_MIN_INF AP_RND_MIN_INF
+#define SC_RND_INF AP_RND_INF
+#define SC_RND_CONV AP_RND_CONV
+#define SC_TRN AP_TRN
+#define SC_TRN_ZERO AP_TRN_ZERO
+#endif // !defined(SYSTEMC_INCLUDED)
+
+/// ap_fixed saturation mode
+enum ap_o_mode {
+  AP_SAT,      //< saturation
+  AP_SAT_ZERO, //< saturation to zero
+  AP_SAT_SYM,  //< symmetrical saturation
+  AP_WRAP,     //< wrap-around (*)
+  AP_WRAP_SM,  //< sign magnitude wrap-around (*)
+};
+
+// FIXME for legacy code
+#ifndef SYSTEMC_INCLUDED
+#define SC_SAT AP_SAT
+#define SC_SAT_ZERO AP_SAT_ZERO
+#define SC_SAT_SYM AP_SAT_SYM
+#define SC_WRAP AP_WRAP
+#define SC_WRAP_SM AP_WRAP_SM
+#endif // !defined(SYSTEMC_INCLUDED)
+
+#else // defined(__SC_COMPATIBLE__)
+
+// There will not be sc_fxdefs.h, and the emu should be defined by ap_fixed.
+
+/// ap_fixed quantification mode
+enum ap_q_mode {
+  SC_RND,         //< rounding to plus infinity
+  SC_RND_ZERO,    //< rounding to zero
+  SC_RND_MIN_INF, //< rounding to minus infinity
+  SC_RND_INF,     //< rounding to infinity
+  SC_RND_CONV,    //< convergent rounding
+  SC_TRN,         //< truncation
+  SC_TRN_ZERO,    //< truncation to zero
+};
+
+#define AP_RND SC_RND
+#define AP_RND_ZERO SC_RND_ZERO
+#define AP_RND_MIN_INF SC_RND_MIN_INF
+#define AP_RND_INF SC_RND_INF
+#define AP_RND_CONV SC_RND_CONV
+#define AP_TRN SC_TRN
+#define AP_TRN_ZERO SC_TRN_ZERO
+
+/// ap_fixed saturation mode
+enum ap_o_mode {
+  SC_SAT,      //< saturation
+  SC_SAT_ZERO, //< saturation to zero
+  SC_SAT_SYM,  //< symmetrical saturation
+  SC_WRAP,     //< wrap-around (*)
+  SC_WRAP_SM,  //< sign magnitude wrap-around (*)
+};
+
+#define AP_SAT SC_SAT
+#define AP_SAT_ZERO SC_SAT_ZERO
+#define AP_SAT_SYM SC_SAT_SYM
+#define AP_WRAP SC_WRAP
+#define AP_WRAP_SM SC_WRAP_SM
+
+#endif // defined(__SC_COMPATIBLE__)
+
+template <int _AP_W, bool _AP_S>
+struct ap_int_base;
+
+template <int _AP_W>
+struct ap_int;
+
+template <int _AP_W>
+struct ap_uint;
+
+template <int _AP_W, bool _AP_S>
+struct ap_range_ref;
+
+template <int _AP_W, bool _AP_S>
+struct ap_bit_ref;
+
+template <int _AP_W1, typename _AP_T1, int _AP_W2, typename _AP_T2>
+struct ap_concat_ref;
+
+template <int _AP_W, int _AP_I, bool _AP_S = true, ap_q_mode _AP_Q = AP_TRN,
+          ap_o_mode _AP_O = AP_WRAP, int _AP_N = 0>
+struct ap_fixed_base;
+
+template <int _AP_W, int _AP_I, ap_q_mode _AP_Q = AP_TRN,
+          ap_o_mode _AP_O = AP_WRAP, int _AP_N = 0>
+struct ap_fixed;
+
+template <int _AP_W, int _AP_I, ap_q_mode _AP_Q = AP_TRN,
+          ap_o_mode _AP_O = AP_WRAP, int _AP_N = 0>
+struct ap_ufixed;
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+struct af_range_ref;
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+struct af_bit_ref;
+
+/// string base mode
+enum BaseMode { AP_BIN = 2, AP_OCT = 8, AP_DEC = 10, AP_HEX = 16 };
+
+#ifndef SYSTEMC_INCLUDED
+#define SC_BIN 2
+#define SC_OCT 8
+#define SC_DEC 10
+#define SC_HEX 16
+#endif // !defined(SYSTEMC_INCLUDED)
+
+// Alias C data types
+#ifdef _MSC_VER
+typedef signed __int64 ap_slong;
+typedef unsigned __int64 ap_ulong;
+#else  // !defined(_MSC_VER)
+typedef signed long long ap_slong;
+typedef unsigned long long ap_ulong;
+#endif // !defined(_MSC_VER)
+
+enum {
+  _AP_SIZE_char = 8,
+  _AP_SIZE_short = sizeof(short) * 8,
+  _AP_SIZE_int = sizeof(int) * 8,
+  _AP_SIZE_long = sizeof(long) * 8,
+  _AP_SIZE_ap_slong = sizeof(ap_slong) * 8
+};
+
+#endif // !defined(__AP_DECL_H__)
+
+// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689
diff --git a/include/ap_fixed.h b/include/ap_fixed.h
new file mode 100644
index 0000000..0980444
--- /dev/null
+++ b/include/ap_fixed.h
@@ -0,0 +1,384 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+*/
+
+#ifndef __AP_FIXED_H__
+#define __AP_FIXED_H__
+
+#include <ap_common.h>
+#include <ap_fixed_base.h>
+#include <ap_fixed_ref.h>
+
+//---------------------------------------------------------------
+
+/// Signed Arbitrary Precision Fixed-Point Type.
+// default for _AP_Q, _AP_O and _AP_N set in ap_decl.h
+template <int _AP_W, int _AP_I, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+struct ap_fixed : ap_fixed_base<_AP_W, _AP_I, true, _AP_Q, _AP_O, _AP_N> {
+  typedef ap_fixed_base<_AP_W, _AP_I, true, _AP_Q, _AP_O, _AP_N> Base;
+  // Constructor
+  /// default ctor
+  INLINE ap_fixed() : Base() {}
+
+  /// copy ctor from ap_fixed_base.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed(const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+                                      _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed(const volatile ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+                                               _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+  //// from ap_fixed
+  //template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+  //          int _AP_N2>
+  //INLINE ap_fixed(
+  //    const ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+  //    : Base(ap_fixed_base<_AP_W2, _AP_I2, true, _AP_Q2, _AP_O2, _AP_N2>(op)) {}
+
+  //template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+  //          int _AP_N2>
+  //INLINE ap_fixed(
+  //    const volatile ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+  //    : Base(ap_fixed_base<_AP_W2, _AP_I2, true, _AP_Q2, _AP_O2, _AP_N2>(op)) {}
+
+  //// from ap_ufixed.
+  //template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+  //          int _AP_N2>
+  //INLINE ap_fixed(
+  //    const ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+  //    : Base(ap_fixed_base<_AP_W2, _AP_I2, false, _AP_Q2, _AP_O2, _AP_N2>(op)) {
+  //}
+
+  //template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+  //          int _AP_N2>
+  //INLINE ap_fixed(
+  //    const volatile ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+  //    : Base(ap_fixed_base<_AP_W2, _AP_I2, false, _AP_Q2, _AP_O2, _AP_N2>(op)) {
+  //}
+
+  /// copy ctor from ap_int_base.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_fixed(const ap_int_base<_AP_W2, _AP_S2>& op) : Base(op) {}
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_fixed(const volatile ap_int_base<_AP_W2, _AP_S2>& op) : Base(op) {}
+
+  //// from ap_int.
+  //template <int _AP_W2>
+  //INLINE ap_fixed(const ap_int<_AP_W2>& op)
+  //    : Base(ap_int_base<_AP_W2, true>(op)) {}
+
+  //template <int _AP_W2>
+  //INLINE ap_fixed(const volatile ap_int<_AP_W2>& op)
+  //    : Base(ap_int_base<_AP_W2, true>(op)) {}
+
+  //// from ap_uint.
+  //template <int _AP_W2>
+  //INLINE ap_fixed(const ap_uint<_AP_W2>& op)
+  //    : Base(ap_int_base<_AP_W2, false>(op)) {}
+
+  //template <int _AP_W2>
+  //INLINE ap_fixed(const volatile ap_uint<_AP_W2>& op)
+  //    : Base(ap_int_base<_AP_W2, false>(op)) {}
+
+  // from ap_bit_ref.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_fixed(const ap_bit_ref<_AP_W2, _AP_S2>& op) : Base(op) {}
+
+  // from ap_range_ref.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_fixed(const ap_range_ref<_AP_W2, _AP_S2>& op) : Base(op) {}
+
+  // from ap_concat_ref.
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_fixed(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& op)
+      : Base(op) {}
+
+  // from af_bit_ref.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+  // from af_range_ref.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+// from c types.
+#define CTOR(TYPE) \
+  INLINE ap_fixed(TYPE v) : Base(v) {}
+
+  CTOR(bool)
+  CTOR(char)
+  CTOR(signed char)
+  CTOR(unsigned char)
+  CTOR(short)
+  CTOR(unsigned short)
+  CTOR(int)
+  CTOR(unsigned int)
+  CTOR(long)
+  CTOR(unsigned long)
+  CTOR(ap_slong)
+  CTOR(ap_ulong)
+  CTOR(half)
+  CTOR(float)
+  CTOR(double)
+#undef CTOR
+
+  INLINE ap_fixed(const char* s) : Base(s) {}
+
+  INLINE ap_fixed(const char* s, signed char rd) : Base(s, rd) {}
+
+  // Assignment
+  // The assignment operator is technically inherited; however, it is always
+  // hidden by an explicitly or implicitly defined assignment operator for the
+  // derived class.
+  /* XXX ctor will be used when right is not of proper type. */
+  INLINE ap_fixed& operator=(
+      const ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N>& op) {
+    Base::V = op.V;
+    return *this;
+  }
+
+  INLINE void operator=(
+      const ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N>& op) volatile {
+    Base::V = op.V;
+  }
+
+  INLINE ap_fixed& operator=(
+      const volatile ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N>& op) {
+    Base::V = op.V;
+    return *this;
+  }
+
+  INLINE void operator=(
+      const volatile ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N>& op) volatile {
+    Base::V = op.V;
+  }
+}; // struct ap_fixed.
+
+//-------------------------------------------------------------------
+
+// Unsigned Arbitrary Precision Fixed-Point Type.
+// default for _AP_Q, _AP_O and _AP_N set in ap_decl.h
+template <int _AP_W, int _AP_I, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+struct ap_ufixed : ap_fixed_base<_AP_W, _AP_I, false, _AP_Q, _AP_O, _AP_N> {
+  typedef ap_fixed_base<_AP_W, _AP_I, false, _AP_Q, _AP_O, _AP_N> Base;
+  // Constructor
+  /// default ctor
+  INLINE ap_ufixed() : Base() {}
+
+  /// copy ctor from ap_fixed_base
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_ufixed(const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+                                       _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+  /// copy ctor from ap_fixed_base
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_ufixed(const volatile ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+                                                _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+  //template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+  //          int _AP_N2>
+  //INLINE ap_ufixed(
+  //    const ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+  //    : Base(ap_fixed_base<_AP_W2, _AP_I2, true, _AP_Q2, _AP_O2, _AP_N2>(op)) {}
+
+  //template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+  //          int _AP_N2>
+  //INLINE ap_ufixed(
+  //    const volatile ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+  //    : Base(ap_fixed_base<_AP_W2, _AP_I2, true, _AP_Q2, _AP_O2, _AP_N2>(op)) {}
+
+  //template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+  //          int _AP_N2>
+  //INLINE ap_ufixed(
+  //    const ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+  //    : Base(ap_fixed_base<_AP_W2, _AP_I2, false, _AP_Q2, _AP_O2, _AP_N2>(op)) {
+  //}
+
+  //template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+  //          int _AP_N2>
+  //INLINE ap_ufixed(
+  //    const volatile ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+  //    : Base(ap_fixed_base<_AP_W2, _AP_I2, false, _AP_Q2, _AP_O2, _AP_N2>(op)) {
+  //}
+
+  /// copy ctor from ap_int_base.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_ufixed(const ap_int_base<_AP_W2, _AP_S2>& op) : Base(op) {}
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_ufixed(const volatile ap_int_base<_AP_W2, _AP_S2>& op) : Base(op) {}
+
+  //template <int _AP_W2>
+  //INLINE ap_ufixed(const ap_int<_AP_W2>& op)
+  //    : Base(ap_int_base<_AP_W2, true>(op)) {}
+
+  //template <int _AP_W2>
+  //INLINE ap_ufixed(const volatile ap_int<_AP_W2>& op)
+  //    : Base(ap_int_base<_AP_W2, true>(op)) {}
+
+  //template <int _AP_W2>
+  //INLINE ap_ufixed(const ap_uint<_AP_W2>& op)
+  //    : Base(ap_int_base<_AP_W2, false>(op)) {}
+
+  //template <int _AP_W2>
+  //INLINE ap_ufixed(const volatile ap_uint<_AP_W2>& op)
+  //    : Base(ap_int_base<_AP_W2, false>(op)) {}
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_ufixed(const ap_bit_ref<_AP_W2, _AP_S2>& op) : Base(op) {}
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_ufixed(const ap_range_ref<_AP_W2, _AP_S2>& op) : Base(op) {}
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_ufixed(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& op)
+      : Base(op) {}
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_ufixed(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_ufixed(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+#define CTOR(TYPE) \
+  INLINE ap_ufixed(TYPE v) : Base(v) {}
+
+  CTOR(bool)
+  CTOR(char)
+  CTOR(signed char)
+  CTOR(unsigned char)
+  CTOR(short)
+  CTOR(unsigned short)
+  CTOR(int)
+  CTOR(unsigned int)
+  CTOR(long)
+  CTOR(unsigned long)
+  CTOR(ap_slong)
+  CTOR(ap_ulong)
+  CTOR(half)
+  CTOR(float)
+  CTOR(double)
+#undef CTOR
+
+  INLINE ap_ufixed(const char* s) : Base(s) {}
+
+  INLINE ap_ufixed(const char* s, signed char rd) : Base(s, rd) {}
+
+  // Assignment
+  INLINE ap_ufixed& operator=(
+      const ap_ufixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N>& op) {
+    Base::V = op.V;
+    return *this;
+  }
+
+  INLINE void operator=(
+      const ap_ufixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N>& op) volatile {
+    Base::V = op.V;
+  }
+
+  INLINE ap_ufixed& operator=(
+      const volatile ap_ufixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N>& op) {
+    Base::V = op.V;
+    return *this;
+  }
+
+  INLINE void operator=(const volatile ap_ufixed<_AP_W, _AP_I, _AP_Q, _AP_O,
+                                                 _AP_N>& op) volatile {
+    Base::V = op.V;
+  }
+}; // struct ap_ufixed
+
+
+#if !defined(__SYNTHESIS__) && (defined(SYSTEMC_H) || defined(SYSTEMC_INCLUDED))
+// XXX sc_trace overload for ap_fixed is already included in
+// "ap_sysc/ap_sc_extras.h", so do not define in synthesis.
+template <int _AP_W, int _AP_I, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE void sc_trace(sc_core::sc_trace_file* tf,
+                     const ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N>& op,
+                     const std::string& name) {
+  tf->trace(sc_dt::sc_lv<_AP_W>(op.to_string(2).c_str()), name);
+}
+
+template <int _AP_W, int _AP_I, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE void sc_trace(sc_core::sc_trace_file* tf,
+                     const ap_ufixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N>& op,
+                     const std::string& name) {
+  tf->trace(sc_dt::sc_lv<_AP_W>(op.to_string(2).c_str()), name);
+}
+#endif // System C sim
+
+// Specialization of std containers, so that std::complex<ap_fixed> can have its
+// image part automatically zero-initialized when only real part is provided.
+#include <ap_fixed_special.h>
+
+#endif // ifndef __AP_FIXED_H__ else
+
+// -*- cpp -*-
diff --git a/include/ap_fixed_base.h b/include/ap_fixed_base.h
new file mode 100644
index 0000000..12b7f7b
--- /dev/null
+++ b/include/ap_fixed_base.h
@@ -0,0 +1,2384 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+*/
+
+#ifndef __AP_FIXED_BASE_H__
+#define __AP_FIXED_BASE_H__
+
+#ifndef __AP_FIXED_H__
+// TODO make this an error
+#pragma message \
+    "Only ap_fixed.h and ap_int.h can be included directly in user code."
+#endif
+
+// for ap_int_base and its reference types.
+#include <ap_int.h>
+#ifndef __SYNTHESIS__
+// for half type
+#include <hls_half.h>
+// for std io
+#include <iostream>
+#endif
+
+#ifndef __cplusplus
+#error "C++ is required to include this header file"
+#else // __cplusplus
+
+// for warning on unsupported rounding mode in conversion to float/double.
+#if !defined(__SYNTHESIS__) && __cplusplus >= 201103L && \
+    (defined(__gnu_linux__) || defined(_WIN32))
+#define AP_FIXED_ENABLE_CPP_FENV 1
+#include <cfenv>
+#endif
+
+// ----------------------------------------------------------------------
+
+/* Major TODO
+  long double support: constructor, assign and other operators.
+  binary operators with ap_fixed_base and const char*.
+  return ap_fixed/ap_ufixed when result signedness is known.
+*/
+
+// Helper function in conversion to floating point types.
+
+#ifdef __SYNTHESIS__
+#define _AP_ctype_op_get_bit(var, index) _AP_ROOT_op_get_bit(var, index)
+#define _AP_ctype_op_set_bit(var, index, x) _AP_ROOT_op_set_bit(var, index, x)
+#define _AP_ctype_op_get_range(var, low, high) \
+  _AP_ROOT_op_get_range(var, low, high)
+#define _AP_ctype_op_set_range(var, low, high, x) \
+  _AP_ROOT_op_set_range(var, low, high, x)
+#else // ifdef __SYNTHESIS__
+template <typename _Tp1, typename _Tp2>
+inline bool _AP_ctype_op_get_bit(_Tp1& var, const _Tp2& index) {
+  return !!(var & (1ull << (index)));
+}
+template <typename _Tp1, typename _Tp2, typename _Tp3>
+inline _Tp1 _AP_ctype_op_set_bit(_Tp1& var, const _Tp2& index, const _Tp3& x) {
+  var |= (((x) ? 1ull : 0ull) << (index));
+  return var;
+}
+template <typename _Tp1, typename _Tp2, typename _Tp3>
+inline _Tp1 _AP_ctype_op_get_range(_Tp1& var, const _Tp2& low,
+                                   const _Tp3& high) {
+  _Tp1 r = var;
+  ap_ulong mask = -1ll;
+  mask >>= (sizeof(_Tp1) * 8 - ((high) - (low) + 1));
+  r >>= (low);
+  r &= mask;
+  return r;
+}
+template <typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
+inline _Tp1 _AP_ctype_op_set_range(_Tp1& var, const _Tp2& low, const _Tp3& high,
+                                   const _Tp4& x) {
+  ap_ulong mask = -1ll;
+  mask >>= (_AP_SIZE_ap_slong - ((high) - (low) + 1));
+  var &= ~(mask << (low));
+  var |= ((mask & x) << (low));
+  return var;
+}
+#endif // ifdef __SYNTHESIS__
+
+
+// trait for letting base class to return derived class.
+// Notice that derived class template is incomplete, and we cannot use
+// the member of the derived class.
+template <int _AP_W2, int _AP_I2, bool _AP_S2>
+struct _ap_fixed_factory;
+template <int _AP_W2, int _AP_I2>
+struct _ap_fixed_factory<_AP_W2, _AP_I2, true> {
+  typedef ap_fixed<_AP_W2, _AP_I2> type;
+};
+template <int _AP_W2, int _AP_I2>
+struct _ap_fixed_factory<_AP_W2, _AP_I2, false> {
+  typedef ap_ufixed<_AP_W2, _AP_I2> type;
+};
+
+/// ap_fixed_base: AutoPilot fixed point.
+/** partial specialization of signed.
+  @tparam _AP_W width.
+  @tparam _AP_I integral part width.
+  @tparam _AP_S signed.
+  @tparam _AP_Q quantization mode. Default is AP_TRN.
+  @tparam _AP_O saturation mode. Default is AP_WRAP.
+  @tparam _AP_N saturation wrap value. Default is 0.
+ */
+// default for _AP_Q, _AP_O and _AP_N set in ap_decl.h
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+struct ap_fixed_base : _AP_ROOT_TYPE<_AP_W, _AP_S> {
+ public:
+  typedef _AP_ROOT_TYPE<_AP_W, _AP_S> Base;
+  static const int width = _AP_W;
+  static const int iwidth = _AP_I;
+  static const ap_q_mode qmode = _AP_Q;
+  static const ap_o_mode omode = _AP_O;
+
+  /// Return type trait.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2>
+  struct RType {
+    enum {
+      _AP_F = _AP_W - _AP_I,
+      F2 = _AP_W2 - _AP_I2,
+      mult_w = _AP_W + _AP_W2,
+      mult_i = _AP_I + _AP_I2,
+      mult_s = _AP_S || _AP_S2,
+      plus_w = AP_MAX(_AP_I + (_AP_S2 && !_AP_S), _AP_I2 + (_AP_S && !_AP_S2)) +
+               1 + AP_MAX(_AP_F, F2),
+      plus_i =
+          AP_MAX(_AP_I + (_AP_S2 && !_AP_S), _AP_I2 + (_AP_S && !_AP_S2)) + 1,
+      plus_s = _AP_S || _AP_S2,
+      minus_w =
+          AP_MAX(_AP_I + (_AP_S2 && !_AP_S), _AP_I2 + (_AP_S && !_AP_S2)) + 1 +
+          AP_MAX(_AP_F, F2),
+      minus_i =
+          AP_MAX(_AP_I + (_AP_S2 && !_AP_S), _AP_I2 + (_AP_S && !_AP_S2)) + 1,
+      minus_s = true,
+#ifndef __SC_COMPATIBLE__
+      div_w = _AP_S2 + _AP_W + AP_MAX(F2, 0),
+#else
+      div_w = _AP_S2 + _AP_W + AP_MAX(F2, 0) + AP_MAX(_AP_I2, 0),
+#endif
+      div_i = _AP_S2 + _AP_I + F2,
+      div_s = _AP_S || _AP_S2,
+      logic_w =
+          AP_MAX(_AP_I + (_AP_S2 && !_AP_S), _AP_I2 + (_AP_S && !_AP_S2)) +
+          AP_MAX(_AP_F, F2),
+      logic_i = AP_MAX(_AP_I + (_AP_S2 && !_AP_S), _AP_I2 + (_AP_S && !_AP_S2)),
+      logic_s = _AP_S || _AP_S2
+    };
+
+    typedef ap_fixed_base<_AP_W, _AP_I, _AP_S> lhs;
+    typedef ap_fixed_base<_AP_W2, _AP_I2, _AP_S2> rhs;
+
+    typedef ap_fixed_base<mult_w, mult_i, mult_s> mult_base;
+    typedef ap_fixed_base<plus_w, plus_i, plus_s> plus_base;
+    typedef ap_fixed_base<minus_w, minus_i, minus_s> minus_base;
+    typedef ap_fixed_base<logic_w, logic_i, logic_s> logic_base;
+    typedef ap_fixed_base<div_w, div_i, div_s> div_base;
+    typedef ap_fixed_base<_AP_W, _AP_I, _AP_S> arg1_base;
+
+    typedef typename _ap_fixed_factory<mult_w, mult_i, mult_s>::type mult;
+    typedef typename _ap_fixed_factory<plus_w, plus_i, plus_s>::type plus;
+    typedef typename _ap_fixed_factory<minus_w, minus_i, minus_s>::type minus;
+    typedef typename _ap_fixed_factory<logic_w, logic_i, logic_s>::type logic;
+    typedef typename _ap_fixed_factory<div_w, div_i, div_s>::type div;
+    typedef typename _ap_fixed_factory<_AP_W, _AP_I, _AP_S>::type arg1;
+  };
+
+ private:
+#ifndef __SYNTHESIS__
+  // This cannot handle hex float format string.
+  void fromString(const std::string& val, unsigned char radix) {
+    _AP_ERROR(!(radix == 2 || radix == 8 || radix == 10 || radix == 16),
+              "ap_fixed_base::fromString(%s, %d)", val.c_str(), radix);
+
+    Base::V = 0;
+    int startPos = 0;
+    int endPos = val.length();
+    int decPos = val.find(".");
+    if (decPos == -1) decPos = endPos;
+
+    // handle sign
+    bool isNegative = false;
+    if (val[0] == '-') {
+      isNegative = true;
+      ++startPos;
+    } else if (val[0] == '+')
+      ++startPos;
+
+    // If there are no integer bits, e.g.:
+    // .0000XXXX, then keep at least one bit.
+    // If the width is greater than the number of integer bits, e.g.:
+    // XXXX.XXXX, then we keep the integer bits
+    // if the number of integer bits is greater than the width, e.g.:
+    // XXX000 then we keep the integer bits.
+    // Always keep one bit.
+    ap_fixed_base<AP_MAX(_AP_I, 4) + 4, AP_MAX(_AP_I, 4) + 4, false>
+        integer_bits = 0;
+
+    // Figure out if we can shift instead of multiply
+    unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
+
+    //std::cout << "\n\n" << val << "\n";
+    //std::cout << startPos << " " << decPos << " " << endPos << "\n";
+
+    bool sticky_int = false;
+
+    // Traverse the integer digits from the MSD, multiplying by radix as we go.
+    for (int i = startPos; i < decPos; i++) {
+      // Get a digit
+      char cdigit = val[i];
+      if (cdigit == '\0') continue;
+      unsigned digit = ap_private_ops::decode_digit(cdigit, radix);
+
+      sticky_int |= integer_bits[AP_MAX(_AP_I, 4) + 4 - 1] |
+                    integer_bits[AP_MAX(_AP_I, 4) + 4 - 2] |
+                    integer_bits[AP_MAX(_AP_I, 4) + 4 - 3] |
+                    integer_bits[AP_MAX(_AP_I, 4) + 4 - 4];
+      // Shift or multiply the value by the radix
+      if (shift)
+        integer_bits <<= shift;
+      else
+        integer_bits *= radix;
+
+      // Add in the digit we just interpreted
+      integer_bits += digit;
+      //std::cout << "idigit = " << digit << " " << integer_bits.to_string()
+      //    << "  " << sticky_int <<  "\n";
+    }
+    integer_bits[AP_MAX(_AP_I, 4) + 4 - 3] =
+        integer_bits[AP_MAX(_AP_I, 4) + 4 - 3] | sticky_int;
+
+    ap_fixed_base<AP_MAX(_AP_W - _AP_I, 0) + 4 + 4, 4, false> fractional_bits = 0;
+    bool sticky = false;
+
+    // Traverse the fractional digits from the LSD, dividing by radix as we go.
+    for (int i = endPos - 1; i >= decPos + 1; i--) {
+      // Get a digit
+      char cdigit = val[i];
+      if (cdigit == '\0') continue;
+      unsigned digit = ap_private_ops::decode_digit(cdigit, radix);
+      // Add in the digit we just interpreted
+      fractional_bits += digit;
+
+      sticky |= fractional_bits[0] | fractional_bits[1] | fractional_bits[2] |
+                fractional_bits[3];
+      // Shift or divide the value by the radix
+      if (shift)
+        fractional_bits >>= shift;
+      else
+        fractional_bits /= radix;
+
+      //std::cout << "fdigit = " << digit << " " << fractional_bits.to_string()
+      //    << " " << sticky << "\n";
+    }
+
+    //std::cout << "Int =" << integer_bits.to_string() << " " <<
+    //    fractional_bits.to_string() << "\n";
+
+    fractional_bits[0] = fractional_bits[0] | sticky;
+
+    if (isNegative)
+      *this = -(integer_bits + fractional_bits);
+    else
+      *this = integer_bits + fractional_bits;
+
+    //std::cout << "end = " << this->to_string(16) << "\n";
+  }
+
+  /// report invalid constrction of ap_fixed_base
+  INLINE void report() {
+    if (!_AP_S && _AP_O == AP_WRAP_SM) {
+      fprintf(stderr, "ap_ufxied<...> cannot support AP_WRAP_SM.\n");
+      exit(1);
+    }
+    if (_AP_W > MAX_MODE(AP_INT_MAX_W) * 1024) {
+      fprintf(stderr,
+              "[E] ap_%sfixed<%d, ...>: Bitwidth exceeds the "
+              "default max value %d. Please use macro "
+              "AP_INT_MAX_W to set a larger max value.\n",
+              _AP_S ? "" : "u", _AP_W, MAX_MODE(AP_INT_MAX_W) * 1024);
+      exit(1);
+    }
+  }
+#else
+  INLINE void report() {}
+#endif // ifdef __SYNTHESIS__
+
+  /// @name helper functions.
+  //  @{
+  INLINE void overflow_adjust(bool underflow, bool overflow, bool lD,
+                              bool sign) {
+    if (!underflow && !overflow) return;
+    if (_AP_O == AP_WRAP) {
+      if (_AP_N == 0) return;
+      if (_AP_S) {
+        // signed AP_WRAP
+        // n_bits == 1
+        Base::V = _AP_ROOT_op_set_bit(Base::V, _AP_W - 1, sign);
+        if (_AP_N > 1) {
+          // n_bits > 1
+          ap_int_base<_AP_W, false> mask(-1);
+          if (sign) mask.V = 0;
+          Base::V =
+              _AP_ROOT_op_set_range(Base::V, _AP_W - _AP_N, _AP_W - 2, mask.V);
+        }
+      } else {
+        // unsigned AP_WRAP
+        ap_int_base<_AP_W, false> mask(-1);
+        Base::V =
+            _AP_ROOT_op_set_range(Base::V, _AP_W - _AP_N, _AP_W - 1, mask.V);
+      }
+    } else if (_AP_O == AP_SAT_ZERO) {
+      Base::V = 0;
+    } else if (_AP_O == AP_WRAP_SM && _AP_S) {
+      bool Ro = _AP_ROOT_op_get_bit(Base::V, _AP_W - 1);
+      if (_AP_N == 0) {
+        if (lD != Ro) {
+          Base::V = ~Base::V;
+          Base::V = _AP_ROOT_op_set_bit(Base::V, _AP_W - 1, lD);
+        }
+      } else {
+        if (_AP_N == 1 && sign != Ro) {
+          Base::V = ~Base::V;
+        } else if (_AP_N > 1) {
+          bool lNo = _AP_ROOT_op_get_bit(Base::V, _AP_W - _AP_N);
+          if (lNo == sign) Base::V = ~Base::V;
+          ap_int_base<_AP_W, false> mask(-1);
+          if (sign) mask.V = 0;
+          Base::V =
+              _AP_ROOT_op_set_range(Base::V, _AP_W - _AP_N, _AP_W - 2, mask.V);
+        }
+        Base::V = _AP_ROOT_op_set_bit(Base::V, _AP_W - 1, sign);
+      }
+    } else {
+      if (_AP_S) {
+        if (overflow) {
+          Base::V = 1;
+          Base::V <<= _AP_W - 1;
+          Base::V = ~Base::V;
+        } else if (underflow) {
+          Base::V = 1;
+          Base::V <<= _AP_W - 1;
+          if (_AP_O == AP_SAT_SYM) Base::V |= 1;
+        }
+      } else {
+        if (overflow)
+          Base::V = ~(ap_int_base<_AP_W, false>(0).V);
+        else if (underflow)
+          Base::V = 0;
+      }
+    }
+  }
+
+  INLINE bool quantization_adjust(bool qb, bool r, bool s) {
+    bool carry = (bool)_AP_ROOT_op_get_bit(Base::V, _AP_W - 1);
+    if (_AP_Q == AP_TRN) return false;
+    if (_AP_Q == AP_RND_ZERO)
+      qb &= s || r;
+    else if (_AP_Q == AP_RND_MIN_INF)
+      qb &= r;
+    else if (_AP_Q == AP_RND_INF)
+      qb &= !s || r;
+    else if (_AP_Q == AP_RND_CONV)
+      qb &= _AP_ROOT_op_get_bit(Base::V, 0) || r;
+    else if (_AP_Q == AP_TRN_ZERO)
+      qb = s && (qb || r);
+    Base::V += qb;
+    return carry && (!(bool)_AP_ROOT_op_get_bit(Base::V, _AP_W - 1));
+  }
+  //  @}
+
+ public:
+  /// @name constructors.
+  //  @{
+  /// default ctor.
+  INLINE ap_fixed_base() {}
+
+  /// copy ctor.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed_base(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    operator=(op);
+    report();
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed_base(
+      const volatile ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    operator=(op);
+    report();
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_fixed_base(const ap_int_base<_AP_W2, _AP_S2>& op) {
+    ap_fixed_base<_AP_W2, _AP_W2, _AP_S2> tmp;
+    tmp.V = op.V;
+    operator=(tmp);
+    report();
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_fixed_base(const volatile ap_int_base<_AP_W2, _AP_S2>& op) {
+    ap_fixed_base<_AP_W2, _AP_W2, _AP_S2> tmp;
+    tmp.V = op.V;
+    operator=(tmp);
+    report();
+  }
+
+#ifndef __SYNTHESIS__
+#ifndef NON_C99STRING
+  INLINE ap_fixed_base(const char* s, signed char rd = 0) {
+    unsigned char radix = rd;
+    std::string str = ap_private_ops::parseString(s, radix); // will guess rd, default 10
+    _AP_ERROR(radix == 0, "ap_fixed_base(const char* \"%s\", %d), str=%s, radix = %d",
+              s, rd, str.c_str(), radix); // TODO remove this check
+    fromString(str, radix);
+  }
+#else
+  INLINE ap_fixed_base(const char* s, signed char rd = 10) {
+    ap_int_base<_AP_W, _AP_S> t(s, rd);
+    Base::V = t.V;
+  }
+#endif // ifndef NON_C99STRING
+#else // ifndef __SYNTHESIS__
+  // XXX _ssdm_string2bits only takes const string and const radix.
+  // It seems XFORM will do compile time processing of the string.
+  INLINE ap_fixed_base(const char* s) {
+    typeof(Base::V) t;
+    _ssdm_string2bits((void*)(&t), (const char*)(s), 10, _AP_I, _AP_S, _AP_Q,
+                      _AP_O, _AP_N, _AP_C99);
+    Base::V = t;
+  }
+  INLINE ap_fixed_base(const char* s, signed char rd) {
+    typeof(Base::V) t;
+    _ssdm_string2bits((void*)(&t), (const char*)(s), rd, _AP_I, _AP_S, _AP_Q,
+                      _AP_O, _AP_N, _AP_C99);
+    Base::V = t;
+  }
+#endif // ifndef __SYNTHESIS__ else
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_fixed_base(const ap_bit_ref<_AP_W2, _AP_S2>& op) {
+    *this = ((bool)op);
+    report();
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_fixed_base(const ap_range_ref<_AP_W2, _AP_S2>& op) {
+    *this = (ap_int_base<_AP_W2, false>(op));
+    report();
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_fixed_base(
+      const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& op) {
+    *this = (ap_int_base<_AP_W2 + _AP_W3, false>(op));
+    report();
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed_base(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    *this = (bool(op));
+    report();
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed_base(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    *this = (ap_int_base<_AP_W2, false>(op));
+    report();
+  }
+
+  // ctors from c types.
+  // make a temp ap_fixed_base first, and use ap_fixed_base.operator=
+#define CTOR_FROM_INT(C_TYPE, _AP_W2, _AP_S2)        \
+  INLINE ap_fixed_base(const C_TYPE x) {             \
+    ap_fixed_base<(_AP_W2), (_AP_W2), (_AP_S2)> tmp; \
+    tmp.V = x;                                       \
+    *this = tmp;                                     \
+  }
+
+  CTOR_FROM_INT(bool, 1, false)
+  CTOR_FROM_INT(char, 8, CHAR_IS_SIGNED)
+  CTOR_FROM_INT(signed char, 8, true)
+  CTOR_FROM_INT(unsigned char, 8, false)
+  CTOR_FROM_INT(short, _AP_SIZE_short, true)
+  CTOR_FROM_INT(unsigned short, _AP_SIZE_short, false)
+  CTOR_FROM_INT(int, _AP_SIZE_int, true)
+  CTOR_FROM_INT(unsigned int, _AP_SIZE_int, false)
+  CTOR_FROM_INT(long, _AP_SIZE_long, true)
+  CTOR_FROM_INT(unsigned long, _AP_SIZE_long, false)
+  CTOR_FROM_INT(ap_slong, _AP_SIZE_ap_slong, true)
+  CTOR_FROM_INT(ap_ulong, _AP_SIZE_ap_slong, false)
+#undef CTOR_FROM_INT
+/*
+ * TODO:
+ *Theere used to be several funtions which were AP_WEAK.
+ *Now they're all INLINE expect ap_fixed_base(double d)
+ *Maybe we can use '#pragma HLS inline' instead of INLINE.
+ */
+  AP_WEAK ap_fixed_base(double d) {
+    ap_int_base<64, false> ireg;
+    ireg.V = doubleToRawBits(d);
+    bool isneg = _AP_ROOT_op_get_bit(ireg.V, 63);
+
+    ap_int_base<DOUBLE_EXP + 1, true> exp;
+    ap_int_base<DOUBLE_EXP, false> exp_tmp;
+    exp_tmp.V =
+        _AP_ROOT_op_get_range(ireg.V, DOUBLE_MAN, DOUBLE_MAN + DOUBLE_EXP - 1);
+    exp = exp_tmp - DOUBLE_BIAS;
+    ap_int_base<DOUBLE_MAN + 2, true> man;
+    man.V = _AP_ROOT_op_get_range(ireg.V, 0, DOUBLE_MAN - 1);
+    // do not support NaN
+    _AP_WARNING(exp == APFX_IEEE_DOUBLE_E_MAX + 1 && man.V != 0,
+                "assign NaN to fixed point value");
+    man.V = _AP_ROOT_op_set_bit(man.V, DOUBLE_MAN, 1);
+    if (isneg) man = -man;
+    if ((ireg.V & 0x7fffffffffffffffLL) == 0) {
+      Base::V = 0;
+    } else {
+      int _AP_W2 = DOUBLE_MAN + 2, _AP_I2 = exp.V + 2, _AP_F = _AP_W - _AP_I,
+          F2 = _AP_W2 - _AP_I2;
+      bool _AP_S2 = true,
+           QUAN_INC = F2 > _AP_F &&
+                      !(_AP_Q == AP_TRN || (_AP_Q == AP_TRN_ZERO && !_AP_S2));
+      bool carry = false;
+      // handle quantization
+      unsigned sh_amt = (F2 > _AP_F) ? F2 - _AP_F : _AP_F - F2;
+      if (F2 == _AP_F)
+        Base::V = man.V;
+      else if (F2 > _AP_F) {
+        if (sh_amt < DOUBLE_MAN + 2)
+          Base::V = man.V >> sh_amt;
+        else {
+          Base::V = isneg ? -1 : 0;
+        }
+        if ((_AP_Q != AP_TRN) && !((_AP_Q == AP_TRN_ZERO) && !_AP_S2)) {
+          bool qb = (F2 - _AP_F > _AP_W2) ? isneg : (bool)_AP_ROOT_op_get_bit(
+                                                        man.V, F2 - _AP_F - 1);
+          bool r =
+              (F2 > _AP_F + 1)
+                  ? _AP_ROOT_op_get_range(man.V, 0, (F2 - _AP_F - 2 < _AP_W2)
+                                                        ? (F2 - _AP_F - 2)
+                                                        : (_AP_W2 - 1)) != 0
+                  : false;
+          carry = quantization_adjust(qb, r, isneg);
+        }
+      } else { // no quantization
+        Base::V = man.V;
+        if (sh_amt < _AP_W)
+          Base::V = Base::V << sh_amt;
+        else
+          Base::V = 0;
+      }
+      // handle overflow/underflow
+      if ((_AP_O != AP_WRAP || _AP_N != 0) &&
+          ((!_AP_S && _AP_S2) ||
+           _AP_I - _AP_S <
+               _AP_I2 - _AP_S2 +
+                   (QUAN_INC ||
+                    (_AP_S2 && (_AP_O == AP_SAT_SYM))))) { // saturation
+        bool deleted_zeros = _AP_S2 ? true : !carry, deleted_ones = true;
+        bool neg_src = isneg;
+        bool lD = false;
+        int pos1 = F2 - _AP_F + _AP_W;
+        int pos2 = F2 - _AP_F + _AP_W + 1;
+        bool newsignbit = _AP_ROOT_op_get_bit(Base::V, _AP_W - 1);
+        if (pos1 < _AP_W2 && pos1 >= 0)
+          // lD = _AP_ROOT_op_get_bit(man.V, pos1);
+          lD = (man.V >> pos1) & 1;
+        if (pos1 < _AP_W2) {
+          bool Range1_all_ones = true;
+          bool Range1_all_zeros = true;
+          bool Range2_all_ones = true;
+          ap_int_base<DOUBLE_MAN + 2, false> Range2;
+          ap_int_base<DOUBLE_MAN + 2, false> all_ones(-1);
+
+          if (pos2 >= 0 && pos2 < _AP_W2) {
+            // Range2.V = _AP_ROOT_op_get_range(man.V,
+            //                        pos2, _AP_W2 - 1);
+            Range2.V = man.V;
+            Range2.V >>= pos2;
+            Range2_all_ones = Range2 == (all_ones >> pos2);
+          } else if (pos2 < 0)
+            Range2_all_ones = false;
+          if (pos1 >= 0 && pos2 < _AP_W2) {
+            Range1_all_ones = Range2_all_ones && lD;
+            Range1_all_zeros = !Range2.V && !lD;
+          } else if (pos2 == _AP_W2) {
+            Range1_all_ones = lD;
+            Range1_all_zeros = !lD;
+          } else if (pos1 < 0) {
+            Range1_all_zeros = !man.V;
+            Range1_all_ones = false;
+          }
+
+          deleted_zeros =
+              deleted_zeros && (carry ? Range1_all_ones : Range1_all_zeros);
+          deleted_ones =
+              carry ? Range2_all_ones && (pos1 < 0 || !lD) : Range1_all_ones;
+          neg_src = isneg && !(carry && Range1_all_ones);
+        } else
+          neg_src = isneg && newsignbit;
+        bool neg_trg = _AP_S && newsignbit;
+        bool overflow = (neg_trg || !deleted_zeros) && !isneg;
+        bool underflow = (!neg_trg || !deleted_ones) && neg_src;
+        if ((_AP_O == AP_SAT_SYM) && _AP_S2 && _AP_S)
+          underflow |=
+              neg_src &&
+              (_AP_W > 1 ? _AP_ROOT_op_get_range(Base::V, 0, _AP_W - 2) == 0
+                         : true);
+        overflow_adjust(underflow, overflow, lD, neg_src);
+      }
+    }
+    report();
+  }
+
+  // TODO more optimized implementation.
+  INLINE ap_fixed_base(float d) { *this = ap_fixed_base(double(d)); }
+
+  // TODO more optimized implementation.
+  INLINE ap_fixed_base(half d) { *this = ap_fixed_base(double(d)); }
+  //  @}
+
+  /// @name assign operator
+  /// assign, using another ap_fixed_base of same template parameters.
+  /*
+  INLINE ap_fixed_base& operator=(
+      const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) {
+    Base::V = op.V;
+    return *this;
+  }
+  */
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed_base& operator=(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+
+    const int _AP_F = _AP_W - _AP_I;
+    const int F2 = _AP_W2 - _AP_I2;
+    const int QUAN_INC =
+          F2 > _AP_F && !(_AP_Q == AP_TRN || (_AP_Q == AP_TRN_ZERO && !_AP_S2));
+
+    if (!op) Base::V = 0;
+    bool carry = false;
+    bool signbit = _AP_ROOT_op_get_bit(op.V, _AP_W2 - 1);
+    bool isneg = signbit && _AP_S2;
+    if (F2 == _AP_F)
+      Base::V = op.V;
+    else if (F2 > _AP_F) {
+      unsigned int sh_amt = F2 - _AP_F;
+      //  moves bits right, handle quantization.
+      if (sh_amt < _AP_W2) {
+        Base::V = op.V >> sh_amt;
+      } else {
+        Base::V = isneg ? -1 : 0;
+      }
+      if (_AP_Q != AP_TRN && !(_AP_Q == AP_TRN_ZERO && !_AP_S2)) {
+        bool qbit = _AP_ROOT_op_get_bit(op.V, F2 - _AP_F - 1);
+        // bit after LSB.
+        bool qb = (F2 - _AP_F > _AP_W2) ? _AP_S2 && signbit : qbit;
+        enum { hi = ((F2 - _AP_F - 2) < _AP_W2) ? (F2 - _AP_F - 2) : (_AP_W2 - 1) };
+        // bits after qb.
+        bool r = (F2 > _AP_F + 1) ? (_AP_ROOT_op_get_range(op.V, 0, hi) != 0) : false;
+        carry = quantization_adjust(qb, r, isneg);
+      }
+    } else {
+      unsigned  sh_amt = _AP_F - F2;
+      // moves bits left, no quantization
+      if (sh_amt < _AP_W) {
+        if (_AP_W > _AP_W2) {
+          // extend and then shift, avoid losing bits.
+          Base::V = op.V;
+          Base::V <<= sh_amt;
+        } else {
+          // shift and truncate.
+          Base::V = op.V << sh_amt;
+        }
+      } else {
+        Base::V = 0;
+      }
+    }
+    // handle overflow/underflow
+    if ((_AP_O != AP_WRAP || _AP_N != 0) &&
+        ((!_AP_S && _AP_S2) ||
+         _AP_I - _AP_S <
+             _AP_I2 - _AP_S2 +
+                 (QUAN_INC || (_AP_S2 && _AP_O == AP_SAT_SYM)))) { // saturation
+      bool deleted_zeros = _AP_S2 ? true : !carry;
+      bool deleted_ones = true;
+      bool neg_src = isneg;
+      bool newsignbit = _AP_ROOT_op_get_bit(Base::V, _AP_W - 1);
+      enum { pos1 = F2 - _AP_F + _AP_W, pos2 = F2 - _AP_F + _AP_W + 1 };
+      bool lD = (pos1 < _AP_W2 && pos1 >= 0) ? _AP_ROOT_op_get_bit(op.V, pos1)
+                                             : false;
+      if (pos1 < _AP_W2) {
+        bool Range1_all_ones = true;
+        bool Range1_all_zeros = true;
+        bool Range2_all_ones = true;
+        ap_int_base<_AP_W2, false> all_ones(-1);
+
+        if (pos2 < _AP_W2 && pos2 >= 0) {
+          ap_int_base<_AP_W2, false> Range2;
+          Range2.V = _AP_ROOT_op_get_range(op.V, pos2, _AP_W2 - 1);
+          Range2_all_ones = Range2 == (all_ones >> pos2);
+        } else if (pos2 < 0) {
+          Range2_all_ones = false;
+        }
+
+        if (pos1 >= 0 && pos2 < _AP_W2) {
+          ap_int_base<_AP_W2, false> Range1;
+          Range1.V = _AP_ROOT_op_get_range(op.V, pos1, _AP_W2 - 1);
+          Range1_all_ones = Range1 == (all_ones >> pos1);
+          Range1_all_zeros = !Range1.V;
+        } else if (pos2 == _AP_W2) {
+          Range1_all_ones = lD;
+          Range1_all_zeros = !lD;
+        } else if (pos1 < 0) {
+          Range1_all_zeros = !op.V;
+          Range1_all_ones = false;
+        }
+
+        deleted_zeros =
+            deleted_zeros && (carry ? Range1_all_ones : Range1_all_zeros);
+        deleted_ones =
+            carry ? Range2_all_ones && (pos1 < 0 || !lD) : Range1_all_ones;
+        neg_src = isneg && !(carry && Range1_all_ones);
+      } else
+        neg_src = isneg && newsignbit;
+      bool neg_trg = _AP_S && newsignbit;
+      bool overflow = (neg_trg || !deleted_zeros) && !isneg;
+      bool underflow = (!neg_trg || !deleted_ones) && neg_src;
+      if ((_AP_O == AP_SAT_SYM) && _AP_S2 && _AP_S)
+        underflow |=
+            neg_src &&
+            (_AP_W > 1 ? _AP_ROOT_op_get_range(Base::V, 0, _AP_W - 2) == 0
+                       : true);
+
+      overflow_adjust(underflow, overflow, lD, neg_src);
+    }
+    return *this;
+  } // operator= 
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed_base& operator=(
+      const volatile ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    operator=(const_cast<const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(op));
+    return *this;
+  }
+
+  /// Set this ap_fixed_base with ULL.
+  INLINE ap_fixed_base& setBits(ap_ulong bv) {
+    // TODO when ull is not be long enough...
+    Base::V = bv;
+    return *this;
+  }
+
+  /// Return a ap_fixed_base object whose this->V is assigned by bv.
+  static INLINE ap_fixed_base bitsToFixed(ap_ulong bv) {
+    // TODO fix when ull is not be long enough...
+    ap_fixed_base t;
+#ifdef __SYNTHESIS__
+    t.V = bv;
+#else
+    t.V.set_bits(bv);
+#endif
+    return t;
+  }
+
+  // Explicit conversion functions to ap_int_base.
+  /** Captures all integer bits, in truncate mode.
+   *  @param[in] Cnative follow conversion from double to int.
+   */
+  INLINE ap_int_base<AP_MAX(_AP_I, 1), _AP_S> to_ap_int_base(
+      bool Cnative = true) const {
+    ap_int_base<AP_MAX(_AP_I, 1), _AP_S> ret;
+    if (_AP_I == 0) {
+      ret.V = 0;
+    } else if (_AP_I > 0 && _AP_I <= _AP_W) {
+      ret.V = _AP_ROOT_op_get_range(Base::V, _AP_W - _AP_I, _AP_W - 1);
+    } else if (_AP_I > _AP_W) {
+      ret.V = _AP_ROOT_op_get_range(Base::V, 0, _AP_W - 1);
+      ret.V <<= (_AP_I - _AP_W);
+    }
+    /* Consider the following case
+     *   float f = -7.5f;
+     *   ap_fixed<8,4> t = f;  // -8 0 0 0 . 0.5
+     *   int i = t.to_int();
+     * the result should be -7 instead of -8.
+     * Therefore, after truncation, the value should be increated by 1.
+     * For (-1, 0), carry to MSB will happen, but result 0 is still correct.
+     */
+    if (Cnative && _AP_I < _AP_W) {
+      // Follow C native data type, conversion from double to int
+      if (_AP_S && _AP_ROOT_op_get_bit(Base::V, _AP_W - 1) && (_AP_I < _AP_W) &&
+          (_AP_ROOT_op_get_range(
+               Base::V, 0, _AP_I < 0 ? _AP_W - 1 : _AP_W - _AP_I - 1) != 0))
+        ++ret;
+    } else {
+      // Follow OSCI library, conversion from sc_fixed to sc_int
+    }
+    return ret;
+  };
+
+ public:
+  template <int _AP_W2, bool _AP_S2>
+  INLINE operator ap_int_base<_AP_W2, _AP_S2>() const {
+    return ap_int_base<_AP_W2, _AP_S2>(to_ap_int_base());
+  }
+
+  // Explicit conversion function to C built-in integral type.
+  INLINE char to_char() const { return to_ap_int_base().to_char(); }
+
+  INLINE int to_int() const { return to_ap_int_base().to_int(); }
+
+  INLINE unsigned to_uint() const { return to_ap_int_base().to_uint(); }
+
+  INLINE ap_slong to_int64() const { return to_ap_int_base().to_int64(); }
+
+  INLINE ap_ulong to_uint64() const { return to_ap_int_base().to_uint64(); }
+
+  /// covert function to double.
+  /** only round-half-to-even mode supported, does not obey FE env. */
+  INLINE double to_double() const {
+#if defined(AP_FIXED_ENABLE_CPP_FENV)
+    _AP_WARNING(std::fegetround() != FE_TONEAREST,
+                "Only FE_TONEAREST is supported");
+#endif
+    enum { BITS = DOUBLE_MAN + DOUBLE_EXP + 1 };
+    if (!Base::V) return 0.0f;
+    bool s = _AP_S && _AP_ROOT_op_get_bit(Base::V, _AP_W - 1); ///< sign.
+    ap_int_base<_AP_W, false> tmp;
+    if (s)
+      tmp.V = -Base::V; // may truncate one bit extra from neg in sim.
+    else
+      tmp.V = Base::V;
+    int l = tmp.countLeadingZeros(); ///< number of leading zeros.
+    int e = _AP_I - l - 1 + DOUBLE_BIAS; ///< exponent
+    int lsb_index = _AP_W - l - 1 - DOUBLE_MAN;
+    // more than 0.5?
+    bool a = (lsb_index >=2) ?
+        (_AP_ROOT_op_get_range(tmp.V, 0, lsb_index - 2) != 0) : 0;
+    // round to even
+    a |= (lsb_index >=0) ? _AP_ROOT_op_get_bit(tmp.V, lsb_index) : 0;
+    // ull is at least 64-bit
+    ap_ulong m;
+    // may actually left shift, ensure buffer is wide enough.
+    if (_AP_W > BITS) {
+      m = (lsb_index >= 1) ? (ap_ulong)(tmp.V >> (lsb_index - 1))
+                           : (ap_ulong)(tmp.V << (1 - lsb_index));
+    } else {
+      m = (ap_ulong)tmp.V;
+      m = (lsb_index >= 1) ? (m >> (lsb_index - 1))
+                           : (m << (1 - lsb_index));
+    }
+    m += a;
+    m >>= 1;
+    //std::cout << '\n' << std::hex << m << '\n'; // TODO delete this
+    // carry to MSB, increase exponent
+    if (_AP_ctype_op_get_bit(m, DOUBLE_MAN + 1)) {
+      e += 1;
+    }
+    // set sign and exponent
+    m = _AP_ctype_op_set_bit(m, BITS - 1, s);
+    //std::cout << m << '\n'; // TODO delete this
+    m = _AP_ctype_op_set_range(m, DOUBLE_MAN, DOUBLE_MAN + DOUBLE_EXP - 1, e);
+    //std::cout << std::hex << m << std::dec << std::endl; // TODO delete this
+    // cast to fp
+    return rawBitsToDouble(m);
+  }
+
+  /// convert function to float.
+  /** only round-half-to-even mode supported, does not obey FE env. */
+  INLINE float to_float() const {
+#if defined(AP_FIXED_ENABLE_CPP_FENV)
+    _AP_WARNING(std::fegetround() != FE_TONEAREST,
+                "Only FE_TONEAREST is supported");
+#endif
+    enum { BITS = FLOAT_MAN + FLOAT_EXP + 1 };
+    if (!Base::V) return 0.0f;
+    bool s = _AP_S && _AP_ROOT_op_get_bit(Base::V, _AP_W - 1); ///< sign.
+    ap_int_base<_AP_W, false> tmp;
+    if (s)
+      tmp.V = -Base::V; // may truncate one bit extra from neg in sim.
+    else
+      tmp.V = Base::V;
+    int l = tmp.countLeadingZeros();  ///< number of leading zeros.
+    int e = _AP_I - l - 1 + FLOAT_BIAS; ///< exponent
+    int lsb_index = _AP_W - l - 1 - FLOAT_MAN;
+    // more than 0.5?
+    bool a = (lsb_index >=2) ?
+        (_AP_ROOT_op_get_range(tmp.V, 0, lsb_index - 2) != 0) : 0;
+    // round to even
+    a |= (lsb_index >=0) ? _AP_ROOT_op_get_bit(tmp.V, lsb_index) : 0;
+    // ul is at least 32-bit
+    unsigned long m;
+    // may actually left shift, ensure buffer is wide enough.
+    if (_AP_W > BITS) {
+      m = (lsb_index >= 1) ? (unsigned long)(tmp.V >> (lsb_index - 1))
+                           : (unsigned long)(tmp.V << (1 - lsb_index));
+    } else {
+      m = (unsigned long)tmp.V;
+      m = (lsb_index >= 1) ? (m >> (lsb_index - 1))
+                           : (m << (1 - lsb_index));
+    }
+    m += a;
+    m >>= 1;
+    // carry to MSB, increase exponent
+    if (_AP_ctype_op_get_bit(m, FLOAT_MAN + 1)) {
+      e += 1;
+    }
+    // set sign and exponent
+    m = _AP_ctype_op_set_bit(m, BITS - 1, s);
+    m = _AP_ctype_op_set_range(m, FLOAT_MAN, FLOAT_MAN + FLOAT_EXP - 1, e);
+    // cast to fp
+    return rawBitsToFloat(m);
+  }
+
+  /// convert function to half.
+  /** only round-half-to-even mode supported, does not obey FE env. */
+  INLINE half to_half() const {
+#if defined(AP_FIXED_ENABLE_CPP_FENV)
+    _AP_WARNING(std::fegetround() != FE_TONEAREST,
+                "Only FE_TONEAREST is supported");
+#endif
+    enum { BITS = HALF_MAN + HALF_EXP + 1 };
+    if (!Base::V) return 0.0f;
+    bool s = _AP_S && _AP_ROOT_op_get_bit(Base::V, _AP_W - 1); ///< sign.
+    ap_int_base<_AP_W, false> tmp;
+    if (s)
+      tmp.V = -Base::V; // may truncate one bit extra from neg in sim.
+    else
+      tmp.V = Base::V;
+    int l = tmp.countLeadingZeros();  ///< number of leading zeros.
+    int e = _AP_I - l - 1 + HALF_BIAS; ///< exponent
+    int lsb_index = _AP_W - l - 1 - HALF_MAN;
+    // more than 0.5?
+    bool a = (lsb_index >=2) ?
+        (_AP_ROOT_op_get_range(tmp.V, 0, lsb_index - 2) != 0) : 0;
+    // round to even
+    a |= (lsb_index >=0) ? _AP_ROOT_op_get_bit(tmp.V, lsb_index) : 0;
+    // short is at least 16-bit
+    unsigned short m;
+    // may actually left shift, ensure buffer is wide enough.
+    if (_AP_W > BITS) {
+      m = (lsb_index >= 1) ? (unsigned short)(tmp.V >> (lsb_index - 1))
+                           : (unsigned short)(tmp.V << (1 - lsb_index));
+    } else {
+      m = (unsigned short)tmp.V;
+      m = (lsb_index >= 1) ? (m >> (lsb_index - 1))
+                           : (m << (1 - lsb_index));
+    }
+    m += a;
+    m >>= 1;
+    // carry to MSB, increase exponent
+    if (_AP_ctype_op_get_bit(m, HALF_MAN + 1)) {
+      e += 1;
+    }
+    // set sign and exponent
+    m = _AP_ctype_op_set_bit(m, BITS - 1, s);
+    m = _AP_ctype_op_set_range(m, HALF_MAN, HALF_MAN + HALF_EXP - 1, e);
+    // cast to fp
+    return rawBitsToHalf(m);
+  }
+
+  // FIXME inherited from old code, this may loose precision!
+  INLINE operator long double() const { return (long double)to_double(); }
+
+  INLINE operator double() const { return to_double(); }
+
+  INLINE operator float() const { return to_float(); }
+
+  INLINE operator half() const { return to_half(); }
+
+  INLINE operator bool() const { return (bool)Base::V != 0; }
+
+  INLINE operator char() const { return (char)to_int(); }
+
+  INLINE operator signed char() const { return (signed char)to_int(); }
+
+  INLINE operator unsigned char() const { return (unsigned char)to_uint(); }
+
+  INLINE operator short() const { return (short)to_int(); }
+
+  INLINE operator unsigned short() const { return (unsigned short)to_uint(); }
+
+  INLINE operator int() const { return to_int(); }
+
+  INLINE operator unsigned int() const { return to_uint(); }
+
+// FIXME don't assume data width...
+#ifdef __x86_64__
+  INLINE operator long() const { return (long)to_int64(); }
+
+  INLINE operator unsigned long() const { return (unsigned long)to_uint64(); }
+#else
+  INLINE operator long() const { return (long)to_int(); }
+
+  INLINE operator unsigned long() const { return (unsigned long)to_uint(); }
+#endif // ifdef __x86_64__ else
+
+  INLINE operator ap_ulong() const { return to_uint64(); }
+
+  INLINE operator ap_slong() const { return to_int64(); }
+
+  INLINE int length() const { return _AP_W; };
+
+  // bits_to_int64 deleted.
+#ifndef __SYNTHESIS__
+  // Used in autowrap, when _AP_W < 64.
+  INLINE ap_ulong bits_to_uint64() const {
+    return (Base::V).to_uint64();
+  }
+#endif
+
+  // Count the number of zeros from the most significant bit
+  // to the first one bit. Note this is only for ap_fixed_base whose
+  // _AP_W <= 64, otherwise will incur assertion.
+  INLINE int countLeadingZeros() {
+#ifdef __SYNTHESIS__
+    // TODO: used llvm.ctlz intrinsic ?
+    if (_AP_W <= 32) {
+      ap_int_base<32, false> t(-1ULL);
+      t.range(_AP_W - 1, 0) = this->range(0, _AP_W - 1);
+      return __builtin_ctz(t.V);
+    } else if (_AP_W <= 64) {
+      ap_int_base<64, false> t(-1ULL);
+      t.range(_AP_W - 1, 0) = this->range(0, _AP_W - 1);
+      return __builtin_ctzll(t.V);
+    } else {
+      enum {__N = (_AP_W + 63) / 64};
+      int NZeros = 0;
+      int i = 0;
+      bool hitNonZero = false;
+      for (i = 0; i < __N - 1; ++i) {
+        ap_int_base<64, false> t;
+        t.range(0, 63) = this->range(_AP_W - i * 64 - 64, _AP_W - i * 64 - 1);
+        NZeros += hitNonZero ? 0 : __builtin_clzll(t.V);
+        hitNonZero |= (t != 0);
+      }
+      if (!hitNonZero) {
+        ap_int_base<64, false> t(-1ULL);
+        t.range(63 - (_AP_W - 1) % 64, 63) = this->range(0, (_AP_W - 1) % 64);
+        NZeros += __builtin_clzll(t.V);
+      }
+      return NZeros;
+    }
+#else
+    return Base::V.countLeadingZeros();
+#endif
+  }
+
+  // Arithmetic : Binary
+  // -------------------------------------------------------------------------
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE typename RType<_AP_W2, _AP_I2, _AP_S2>::mult operator*(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2)
+      const {
+    typename RType<_AP_W2, _AP_I2, _AP_S2>::mult_base r, t;
+    r.V = Base::V;
+    t.V = op2.V;
+    r.V *= op2.V;
+    return r;
+  }
+
+  // multiply function deleted.
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE typename RType<_AP_W2, _AP_I2, _AP_S2>::div operator/(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2)
+      const {
+    typename RType<_AP_W2, _AP_I2, _AP_S2>::div_base r;
+#ifndef __SYNTHESIS__
+    enum {F2 = _AP_W2-_AP_I2,
+              _W1=AP_MAX(_AP_W + AP_MAX(F2, 0) + ((_AP_S2 && !_AP_S) ? 1 : 0), _AP_W2 + ((_AP_S && !_AP_S2) ? 1 : 0))};
+    ap_int_base<_W1,_AP_S||_AP_S2> dividend,divisior;
+    ap_int_base<_W1,_AP_S> tmp1;
+    ap_int_base<_W1,_AP_S2> tmp2;
+    tmp1.V = Base::V;
+    tmp1.V <<= AP_MAX(F2,0);
+    tmp2.V = op2.V;
+    dividend = tmp1;
+    divisior = tmp2;
+    r.V = ((_AP_S||_AP_S2) ? dividend.V.sdiv(divisior.V): dividend.V.udiv(divisior.V));
+#else
+    #ifndef __SC_COMPATIBLE__
+        ap_fixed_base<_AP_W + AP_MAX(_AP_W2 - _AP_I2, 0),_AP_I, _AP_S> t(*this);
+    #else
+        ap_fixed_base<_AP_W + AP_MAX(_AP_W2 - _AP_I2, 0) + AP_MAX(_AP_I2, 0),_AP_I, _AP_S> t(*this);
+    #endif
+        r.V = t.V / op2.V;
+#endif
+/*
+    enum {
+      F2 = _AP_W2 - _AP_I2,
+      shl = AP_MAX(F2, 0) + AP_MAX(_AP_I2, 0),
+#ifndef __SC_COMPATIBLE__
+      shr = AP_MAX(_AP_I2, 0),
+#else
+      shr = 0,
+#endif
+      W3 = _AP_S2 + _AP_W + shl,
+      S3 = _AP_S || _AP_S2,
+    };
+    ap_int_base<W3, S3> dividend, t;
+    dividend.V = Base::V;
+    // multiply both by (1 << F2), and than do integer division.
+    dividend.V <<= (int) shl;
+#ifdef __SYNTHESIS__
+    // .V's have right signedness, and will have right extending.
+    t.V = dividend.V / op2.V;
+#else
+    // XXX op2 may be wider than dividend, and sdiv and udiv takes the same with
+    // as left hand operand, so data might be truncated by mistake if not
+    // handled here.
+    t.V = S3 ? dividend.V.sdiv(op2.V) : dividend.V.udiv(op2.V);
+#endif
+    r.V = t.V >> (int) shr;
+*/
+    return r;
+  }
+
+#define OP_BIN_AF(Sym, Rty)                                                \
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,         \
+            ap_o_mode _AP_O2, int _AP_N2>                                  \
+  INLINE typename RType<_AP_W2, _AP_I2, _AP_S2>::Rty operator Sym(         \
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& \
+          op2) const {                                                     \
+    typename RType<_AP_W2, _AP_I2, _AP_S2>::Rty##_base ret, lhs(*this),    \
+        rhs(op2);                                                          \
+    ret.V = lhs.V Sym rhs.V;                                               \
+    return ret;                                                            \
+  }
+
+  OP_BIN_AF(+, plus)
+  OP_BIN_AF(-, minus)
+  OP_BIN_AF(&, logic)
+  OP_BIN_AF(|, logic)
+  OP_BIN_AF(^, logic)
+
+// Arithmetic : assign
+// -------------------------------------------------------------------------
+#define OP_ASSIGN_AF(Sym)                                                  \
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,         \
+            ap_o_mode _AP_O2, int _AP_N2>                                  \
+  INLINE ap_fixed_base& operator Sym##=(                                   \
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& \
+          op2) {                                                           \
+    *this = operator Sym(op2);                                             \
+    return *this;                                                          \
+  }
+
+  OP_ASSIGN_AF(*)
+  OP_ASSIGN_AF(/)
+  OP_ASSIGN_AF(+)
+  OP_ASSIGN_AF(-)
+  OP_ASSIGN_AF(&)
+  OP_ASSIGN_AF(|)
+  OP_ASSIGN_AF(^)
+
+  // Prefix and postfix increment and decrement.
+  // -------------------------------------------------------------------------
+
+  /// Prefix increment
+  INLINE ap_fixed_base& operator++() {
+    operator+=(ap_fixed_base<_AP_W - _AP_I + 1, 1, false>(1));
+    return *this;
+  }
+
+  /// Prefix decrement.
+  INLINE ap_fixed_base& operator--() {
+    operator-=(ap_fixed_base<_AP_W - _AP_I + 1, 1, false>(1));
+    return *this;
+  }
+
+  /// Postfix increment
+  INLINE const ap_fixed_base operator++(int) {
+    ap_fixed_base r(*this);
+    operator++();
+    return r;
+  }
+
+  /// Postfix decrement
+  INLINE const ap_fixed_base operator--(int) {
+    ap_fixed_base r(*this);
+    operator--();
+    return r;
+  }
+
+  // Unary arithmetic.
+  // -------------------------------------------------------------------------
+  INLINE ap_fixed_base operator+() { return *this; }
+
+  INLINE ap_fixed_base<_AP_W + 1, _AP_I + 1, true> operator-() const {
+    ap_fixed_base<_AP_W + 1, _AP_I + 1, true> r(*this);
+    r.V = -r.V;
+    return r;
+  }
+
+  INLINE ap_fixed_base<_AP_W, _AP_I, true, _AP_Q, _AP_O, _AP_N> getNeg() {
+    ap_fixed_base<_AP_W, _AP_I, true, _AP_Q, _AP_O, _AP_N> r(*this);
+    r.V = -r.V;
+    return r;
+  }
+
+  // Not (!)
+  // -------------------------------------------------------------------------
+  INLINE bool operator!() const { return Base::V == 0; }
+
+  // Bitwise complement
+  // -------------------------------------------------------------------------
+  // XXX different from Mentor's ac_fixed.
+  INLINE ap_fixed_base<_AP_W, _AP_I, _AP_S> operator~() const {
+    ap_fixed_base<_AP_W, _AP_I, _AP_S> r;
+    r.V = ~Base::V;
+    return r;
+  }
+
+  // Shift
+  // -------------------------------------------------------------------------
+  // left shift is the same as moving point right, i.e. increate I.
+  template <int _AP_SHIFT>
+  INLINE ap_fixed_base<_AP_W, _AP_I + _AP_SHIFT, _AP_S> lshift() const {
+    ap_fixed_base<_AP_W, _AP_I + _AP_SHIFT, _AP_S> r;
+    r.V = Base::V;
+    return r;
+  }
+
+  template <int _AP_SHIFT>
+  INLINE ap_fixed_base<_AP_W, _AP_I - _AP_SHIFT, _AP_S> rshift() const {
+    ap_fixed_base<_AP_W, _AP_I - _AP_SHIFT, _AP_S> r;
+    r.V = Base::V;
+    return r;
+  }
+
+  // Because the return type is the type of the the first operand, shift assign
+  // operators do not carry out any quantization or overflow
+  // While systemc, shift assigns for sc_fixed/sc_ufixed will result in
+  // quantization or overflow (depending on the mode of the first operand)
+  INLINE ap_fixed_base operator<<(unsigned int sh) const {
+    ap_fixed_base r;
+    r.V = Base::V << sh;
+// TODO check shift overflow?
+#ifdef __SC_COMPATIBLE__
+    if (sh == 0) return r;
+    if (_AP_O != AP_WRAP || _AP_N != 0) {
+      bool neg_src = _AP_S && _AP_ROOT_op_get_bit(Base::V, _AP_W - 1);
+      bool allones, allzeros;
+      ap_int_base<_AP_W, false> ones(-1);
+      if (sh <= _AP_W) {
+        ap_int_base<_AP_W, false> range1;
+        range1.V = _AP_ROOT_op_get_range(
+            const_cast<ap_fixed_base*>(this)->Base::V, _AP_W - sh, _AP_W - 1);
+        allones = range1 == (ones >> (_AP_W - sh));
+        allzeros = range1 == 0;
+      } else {
+        allones = false;
+        allzeros = Base::V == 0;
+      }
+      bool overflow = !allzeros && !neg_src;
+      bool underflow = !allones && neg_src;
+      if ((_AP_O == AP_SAT_SYM) && _AP_S)
+        underflow |=
+            neg_src &&
+            (_AP_W > 1 ? _AP_ROOT_op_get_range(r.V, 0, _AP_W - 2) == 0 : true);
+      bool lD = false;
+      if (sh < _AP_W) lD = _AP_ROOT_op_get_bit(Base::V, _AP_W - sh - 1);
+      r.overflow_adjust(underflow, overflow, lD, neg_src);
+    }
+#endif
+    return r;
+  }
+
+  INLINE ap_fixed_base operator>>(unsigned int sh) const {
+    ap_fixed_base r;
+    r.V = Base::V >> sh;
+// TODO check shift overflow?
+#ifdef __SC_COMPATIBLE__
+    if (sh == 0) return r;
+    if (_AP_Q != AP_TRN) {
+      bool qb = false;
+      if (sh <= _AP_W) qb = _AP_ROOT_op_get_bit(Base::V, sh - 1);
+      bool rb = false;
+      if (sh > 1 && sh <= _AP_W)
+        rb = _AP_ROOT_op_get_range(const_cast<ap_fixed_base*>(this)->Base::V, 0,
+                                   sh - 2) != 0;
+      else if (sh > _AP_W)
+        rb = Base::V != 0;
+      r.quantization_adjust(qb, rb,
+                            _AP_S && _AP_ROOT_op_get_bit(Base::V, _AP_W - 1));
+    }
+#endif
+    return r;
+  }
+
+  // left and right shift for int
+  INLINE ap_fixed_base operator<<(int sh) const {
+    ap_fixed_base r;
+    bool isNeg = sh < 0;
+    unsigned int ush = isNeg ? -sh : sh;
+    if (isNeg) {
+      return operator>>(ush);
+    } else {
+      return operator<<(ush);
+    }
+  }
+
+  INLINE ap_fixed_base operator>>(int sh) const {
+    bool isNeg = sh < 0;
+    unsigned int ush = isNeg ? -sh : sh;
+    if (isNeg) {
+      return operator<<(ush);
+    } else {
+      return operator>>(ush);
+    }
+  }
+
+  // left and right shift for ap_int.
+  template <int _AP_W2>
+  INLINE ap_fixed_base operator<<(const ap_int_base<_AP_W2, true>& op2) const {
+    // TODO the code seems not optimal. ap_fixed<8,8> << ap_int<2> needs only a
+    // small mux, but integer need a big one!
+    int sh = op2.to_int();
+    return operator<<(sh);
+  }
+
+  template <int _AP_W2>
+  INLINE ap_fixed_base operator>>(const ap_int_base<_AP_W2, true>& op2) const {
+    int sh = op2.to_int();
+    return operator>>(sh);
+  }
+
+  // left and right shift for ap_uint.
+  template <int _AP_W2>
+  INLINE ap_fixed_base operator<<(const ap_int_base<_AP_W2, false>& op2) const {
+    unsigned int sh = op2.to_uint();
+    return operator<<(sh);
+  }
+
+  template <int _AP_W2>
+  INLINE ap_fixed_base operator>>(const ap_int_base<_AP_W2, false>& op2) const {
+    unsigned int sh = op2.to_uint();
+    return operator>>(sh);
+  }
+
+  // left and right shift for ap_fixed
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed_base operator<<(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&
+          op2) {
+    return operator<<(op2.to_ap_int_base());
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed_base operator>>(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&
+          op2) {
+    return operator>>(op2.to_ap_int_base());
+  }
+
+  // Shift assign.
+  // -------------------------------------------------------------------------
+
+  // left shift assign.
+  INLINE ap_fixed_base& operator<<=(const int sh) {
+    *this = operator<<(sh);
+    return *this;
+  }
+
+  INLINE ap_fixed_base& operator<<=(const unsigned int sh) {
+    *this = operator<<(sh);
+    return *this;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_fixed_base& operator<<=(const ap_int_base<_AP_W2, _AP_S2>& sh) {
+    *this = operator<<(sh.to_int());
+    return *this;
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed_base& operator<<=(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&
+          sh) {
+    *this = operator<<(sh.to_int());
+    return *this;
+  }
+
+  // right shift assign.
+  INLINE ap_fixed_base& operator>>=(const int sh) {
+    *this = operator>>(sh);
+    return *this;
+  }
+
+  INLINE ap_fixed_base& operator>>=(const unsigned int sh) {
+    *this = operator>>(sh);
+    return *this;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_fixed_base& operator>>=(const ap_int_base<_AP_W2, _AP_S2>& sh) {
+    *this = operator>>(sh.to_int());
+    return *this;
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_fixed_base& operator>>=(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&
+          sh) {
+    *this = operator>>(sh.to_int());
+    return *this;
+  }
+
+// Comparisons.
+// -------------------------------------------------------------------------
+#define OP_CMP_AF(Sym)                                                         \
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,             \
+            ap_o_mode _AP_O2, int _AP_N2>                                      \
+  INLINE bool operator Sym(const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, \
+                                               _AP_O2, _AP_N2>& op2) const {   \
+    enum { _AP_F = _AP_W - _AP_I, F2 = _AP_W2 - _AP_I2 };                      \
+    if (_AP_F == F2)                                                           \
+      return Base::V Sym op2.V;                                                \
+    else if (_AP_F > F2)                                                       \
+      return Base::V Sym ap_fixed_base<AP_MAX(_AP_W2 + _AP_F - F2, 1), _AP_I2, \
+                                       _AP_S2, _AP_Q2, _AP_O2, _AP_N2>(op2).V; \
+    else                                                                       \
+      return ap_fixed_base<AP_MAX(_AP_W + F2 - _AP_F + 1, 1), _AP_I + 1,       \
+                           _AP_S, _AP_Q, _AP_O, _AP_N>(*this).V Sym op2.V;     \
+    return false;                                                              \
+  }
+
+  OP_CMP_AF(>)
+  OP_CMP_AF(<)
+  OP_CMP_AF(>=)
+  OP_CMP_AF(<=)
+  OP_CMP_AF(==)
+  OP_CMP_AF(!=)
+// FIXME: Move compare with double out of struct ap_fixed_base defination
+//        and combine it with compare operator(double, ap_fixed_base)
+#define DOUBLE_CMP_AF(Sym) \
+  INLINE bool operator Sym(double d) const { return to_double() Sym d; }
+
+  DOUBLE_CMP_AF(>)
+  DOUBLE_CMP_AF(<)
+  DOUBLE_CMP_AF(>=)
+  DOUBLE_CMP_AF(<=)
+  DOUBLE_CMP_AF(==)
+  DOUBLE_CMP_AF(!=)
+
+  // Bit and Slice Select
+  INLINE af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> operator[](
+      unsigned index) {
+    _AP_WARNING(index >= _AP_W, "Attempting to read bit beyond MSB");
+    return af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(this, index);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> operator[](
+      const ap_int_base<_AP_W2, _AP_S2>& index) {
+    _AP_WARNING(index < 0, "Attempting to read bit with negative index");
+    _AP_WARNING(index >= _AP_W, "Attempting to read bit beyond MSB");
+    return af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(this,
+                                                                index.to_int());
+  }
+
+  INLINE bool operator[](unsigned index) const {
+    _AP_WARNING(index >= _AP_W, "Attempting to read bit beyond MSB");
+    return _AP_ROOT_op_get_bit(const_cast<ap_fixed_base*>(this)->V, index);
+  }
+
+  INLINE af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> bit(
+      unsigned index) {
+    _AP_WARNING(index >= _AP_W, "Attempting to read bit beyond MSB");
+    return af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(this, index);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> bit(
+      const ap_int_base<_AP_W2, _AP_S2>& index) {
+    _AP_WARNING(index < 0, "Attempting to read bit with negative index");
+    _AP_WARNING(index >= _AP_W, "Attempting to read bit beyond MSB");
+    return af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(this,
+                                                                index.to_int());
+  }
+
+  INLINE bool bit(unsigned index) const {
+    _AP_WARNING(index >= _AP_W, "Attempting to read bit beyond MSB");
+    return _AP_ROOT_op_get_bit(const_cast<ap_fixed_base*>(this)->V, index);
+  }
+
+  template <int _AP_W2>
+  INLINE af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> get_bit(
+      const ap_int_base<_AP_W2, true>& index) {
+    _AP_WARNING(index < _AP_I - _AP_W,
+                "Attempting to read bit with negative index");
+    _AP_WARNING(index >= _AP_I, "Attempting to read bit beyond MSB");
+    return af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(
+        this, index.to_int() + _AP_W - _AP_I);
+  }
+
+  INLINE bool get_bit(int index) const {
+    _AP_WARNING(index >= _AP_I, "Attempting to read bit beyond MSB");
+    _AP_WARNING(index < _AP_I - _AP_W, "Attempting to read bit beyond MSB");
+    return _AP_ROOT_op_get_bit(const_cast<ap_fixed_base*>(this)->V,
+                               index + _AP_W - _AP_I);
+  }
+#if 0
+  INLINE af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> get_bit(
+      int index) {
+    _AP_WARNING(index < _AP_I - _AP_W,
+              "Attempting to read bit with negative index");
+    _AP_WARNING(index >= _AP_I, "Attempting to read bit beyond MSB");
+    return af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(
+        this, index + _AP_W - _AP_I);
+  }
+#endif
+
+  template <int _AP_W2>
+  INLINE bool get_bit(const ap_int_base<_AP_W2, true>& index) const {
+    _AP_WARNING(index >= _AP_I, "Attempting to read bit beyond MSB");
+    _AP_WARNING(index < _AP_I - _AP_W, "Attempting to read bit beyond MSB");
+    return _AP_ROOT_op_get_bit(const_cast<ap_fixed_base*>(this)->V,
+                               index.to_int() + _AP_W - _AP_I);
+  }
+
+  INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> range(int Hi,
+                                                                      int Lo) {
+    _AP_WARNING((Hi >= _AP_W) || (Lo >= _AP_W), "Out of bounds in range()");
+    return af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(this, Hi, Lo);
+  }
+
+  // This is a must to strip constness to produce reference type.
+  INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> range(
+      int Hi, int Lo) const {
+    _AP_WARNING((Hi >= _AP_W) || (Lo >= _AP_W), "Out of bounds in range()");
+    return af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(
+        const_cast<ap_fixed_base*>(this), Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> range(
+      const ap_int_base<_AP_W2, _AP_S2>& HiIdx,
+      const ap_int_base<_AP_W3, _AP_S3>& LoIdx) {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return this->range(Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> range(
+      const ap_int_base<_AP_W2, _AP_S2>& HiIdx,
+      const ap_int_base<_AP_W3, _AP_S3>& LoIdx) const {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return this->range(Hi, Lo);
+  }
+
+  INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> range() {
+    return this->range(_AP_W - 1, 0);
+  }
+
+  INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> range() const {
+    return this->range(_AP_W - 1, 0);
+  }
+
+  INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> operator()(
+      int Hi, int Lo) {
+    return this->range(Hi, Lo);
+  }
+
+  INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> operator()(
+      int Hi, int Lo) const {
+    return this->range(Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> operator()(
+      const ap_int_base<_AP_W2, _AP_S2>& HiIdx,
+      const ap_int_base<_AP_W3, _AP_S3>& LoIdx) {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return this->range(Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> operator()(
+      const ap_int_base<_AP_W2, _AP_S2>& HiIdx,
+      const ap_int_base<_AP_W3, _AP_S3>& LoIdx) const {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return this->range(Hi, Lo);
+  }
+
+  INLINE bool is_zero() const { return Base::V == 0; }
+
+  INLINE bool is_neg() const {
+    if (_AP_S && _AP_ROOT_op_get_bit(Base::V, _AP_W - 1)) return true;
+    return false;
+  }
+
+  INLINE int wl() const { return _AP_W; }
+
+  INLINE int iwl() const { return _AP_I; }
+
+  INLINE ap_q_mode q_mode() const { return _AP_Q; }
+
+  INLINE ap_o_mode o_mode() const { return _AP_O; }
+
+  INLINE int n_bits() const { return _AP_N; }
+
+  // print a string representation of this number in the given radix.
+  // Radix support is 2, 8, 10, or 16.
+  // The result will include a prefix indicating the radix, except for decimal,
+  // where no prefix is needed.  The default is to output a signed representation
+  // of signed numbers, or an unsigned representation  of unsigned numbers.  For
+  // non-decimal formats, this can be changed by the 'sign' argument.
+#ifndef __SYNTHESIS__
+  std::string to_string(unsigned char radix = 2, bool sign = _AP_S) const {
+    // XXX in autosim/autowrap.tcl "(${name}).to_string(2).c_str()" is used to
+    // initialize sc_lv, which seems incapable of handling format "-0b".
+    if (radix == 2) sign = false;
+
+    std::string str;
+    str.clear();
+    char step = 0;
+    bool isNeg = sign && (Base::V < 0);
+
+    // Extend to take care of the -MAX case.
+    ap_fixed_base<_AP_W + 1, _AP_I + 1> tmp(*this);
+    if (isNeg) {
+      tmp = -tmp;
+      str += '-';
+    }
+    std::string prefix;
+    switch (radix) {
+      case 2:
+        prefix = "0b";
+        step = 1;
+        break;
+      case 8:
+        prefix = "0o";
+        step = 3;
+        break;
+      case 16:
+        prefix = "0x";
+        step = 4;
+        break;
+      default:
+        break;
+    }
+
+    if (_AP_I > 0) {
+      // Note we drop the quantization and rounding flags here.  The
+      // integer part is always in range, and the fractional part we
+      // want to drop.  Also, the number is always positive, because
+      // of the absolute value above.
+      ap_int_base<AP_MAX(_AP_I + 1, 1), false> int_part;
+      //   [1] [ I ] d [ W - I ]
+      //    |     |            |
+      //    |    W-I           0
+      //    W
+      int_part.V = _AP_ROOT_op_get_range(
+          tmp.V, _AP_W - _AP_I, _AP_W);
+      str += int_part.to_string(radix, false);
+    } else {
+      str += prefix;
+      str += '0';
+    }
+
+    ap_fixed_base<AP_MAX(_AP_W - _AP_I, 1), 0, false> frac_part = tmp;
+
+    if (radix == 10) {
+      if (frac_part != 0) {
+        str += ".";
+        while (frac_part != 0) {
+          char digit = (frac_part * radix).to_char();
+          str += static_cast<char>(digit + '0');
+          frac_part *= radix;
+        }
+      }
+    } else {
+      if (frac_part != 0) {
+        str += ".";
+        for (signed i = _AP_W - _AP_I - 1; i >= 0; i -= step) {
+          char digit = frac_part.range(i, AP_MAX(0, i - step + 1)).to_char();
+          // If we have a partial bit pattern at the end, then we need
+          // to put it in the high-order bits of 'digit'.
+          int offset = AP_MIN(0, i - step + 1);
+          digit <<= -offset;
+          str += digit < 10 ? static_cast<char>(digit + '0')
+                            : static_cast<char>(digit - 10 + 'a');
+        }
+        if (radix == 16)
+          str += "p0"; // C99 Hex constants are required to have an exponent.
+      }
+    }
+    return str;
+  }
+#else
+  // XXX HLS will delete this in synthesis
+  INLINE char* to_string(unsigned char radix = 2, bool sign = _AP_S) const {
+    return 0;
+  }
+#endif
+}; // struct ap_fixed_base.
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE void b_not(
+    ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) {
+  ret.V = ~op.V;
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE void b_and(
+    ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op1,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+  ret.V = op1.V & op2.V;
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE void b_or(
+    ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op1,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+  ret.V = op1.V | op2.V;
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE void b_xor(
+    ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op1,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+  ret.V = op1.V ^ op2.V;
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N, int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+          ap_o_mode _AP_O2, int _AP_N2>
+INLINE void neg(
+    ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+    const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+  ap_fixed_base<_AP_W2 + !_AP_S2, _AP_I2 + !_AP_S2, true, _AP_Q2, _AP_O2,
+                _AP_N2>
+      t;
+  t.V = -op.V;
+  ret = t;
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N, int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+          ap_o_mode _AP_O2, int _AP_N2>
+INLINE void lshift(
+    ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+    const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op,
+    int i) {
+  enum {
+    F2 = _AP_W2 - _AP_I2,
+    _AP_I3 = AP_MAX(_AP_I, _AP_I2),
+    _AP_W3 = _AP_I3 + F2,
+  };
+  // wide buffer
+  ap_fixed_base<_AP_W3, _AP_I3, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> t;
+  t.V = op.V;
+  t.V <<= i; // FIXME overflow?
+  // handle quantization and overflow
+  ret = t;
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N, int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+          ap_o_mode _AP_O2, int _AP_N2>
+INLINE void rshift(
+    ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+    const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op,
+    int i) {
+  enum {
+    F = _AP_W - _AP_I,
+    F2 = _AP_W2 - _AP_I2,
+    F3 = AP_MAX(F, F2),
+    _AP_W3 = _AP_I2 + F3,
+    sh = F - F2,
+  };
+  // wide buffer
+  ap_fixed_base<_AP_W3, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> t;
+  t.V = op.V;
+  if (sh >= 0)
+    t.V <<= (int) sh;
+  t.V >>= i;
+  // handle quantization and overflow
+  ret = t;
+}
+
+//// FIXME
+//// These partial specialization ctors allow code like
+////   char c = 'a';
+////   ap_fixed_base<8, 8, true> x(c);
+//// but what bout ap_fixed_base<9, 9, true> y(c) ?
+//
+
+#ifndef __SYNTHESIS__
+INLINE std::string scientificFormat(std::string& input) {
+  if (input.length() == 0) return input;
+
+  size_t decPosition = input.find('.');
+  if (decPosition == std::string::npos) decPosition = input.length();
+
+  size_t firstNonZeroPos = 0;
+  for (; input[firstNonZeroPos] > '9' || input[firstNonZeroPos] < '1';
+       firstNonZeroPos++)
+    ;
+
+  int exp;
+  if (firstNonZeroPos > decPosition)
+    exp = decPosition - firstNonZeroPos;
+  else
+    exp = decPosition - firstNonZeroPos - 1;
+  std::string expString = "";
+  if (exp == 0)
+    ;
+  else if (exp < 0) {
+    expString += "e-";
+    exp = -exp;
+  } else
+    expString += "e+";
+
+  if (exp < 10 && exp > 0) {
+    expString += '0';
+    expString += (char)('0' + exp);
+  } else if (exp != 0) {
+    std::string tmp;
+
+    std::ostringstream oss;
+    oss << exp;
+
+    tmp = oss.str();
+    expString += tmp;
+  }
+
+  int lastNonZeroPos = (int)(input.length() - 1);
+  for (; lastNonZeroPos >= 0; --lastNonZeroPos)
+    if (input[lastNonZeroPos] <= '9' && input[lastNonZeroPos] > '0') break;
+
+  std::string ans = "";
+  ans += input[firstNonZeroPos];
+  if (firstNonZeroPos != (size_t)lastNonZeroPos) {
+    ans += '.';
+    for (int i = firstNonZeroPos + 1; i <= lastNonZeroPos; i++)
+      if (input[i] != '.') ans += input[i];
+  }
+
+  ans += expString;
+  return ans;
+}
+
+INLINE std::string reduceToPrecision(std::string& input, int precision) {
+  bool isZero = true;
+  size_t inputLen = input.length();
+  for (size_t i = 0; i < inputLen && isZero; i++)
+    if (input[i] != '.' && input[i] != '0') isZero = false;
+  if (isZero) return "0";
+
+  // Find the first valid number, skip '-'
+  int FirstNonZeroPos = 0;
+  int LastNonZeroPos = (int)inputLen - 1;
+  int truncBitPosition = 0;
+  size_t decPosition = input.find('.');
+  for (; input[FirstNonZeroPos] < '1' || input[FirstNonZeroPos] > '9';
+       FirstNonZeroPos++)
+    ;
+
+  for (; input[LastNonZeroPos] < '1' || input[LastNonZeroPos] > '9';
+       LastNonZeroPos--)
+    ;
+
+  if (decPosition == std::string::npos) decPosition = inputLen;
+  // Count the valid number, to decide whether we need to truncate
+  if ((int)decPosition > LastNonZeroPos) {
+    if (LastNonZeroPos - FirstNonZeroPos + 1 <= precision) return input;
+    truncBitPosition = FirstNonZeroPos + precision;
+  } else if ((int)decPosition < FirstNonZeroPos) { // This is pure decimal
+    if (LastNonZeroPos - FirstNonZeroPos + 1 <= precision) {
+      if (FirstNonZeroPos - decPosition - 1 < 4) {
+        return input;
+      } else {
+        if (input[0] == '-') {
+          std::string tmp = input.substr(1, inputLen - 1);
+          return std::string("-") + scientificFormat(tmp);
+        } else
+          return scientificFormat(input);
+      }
+    }
+    truncBitPosition = FirstNonZeroPos + precision;
+  } else {
+    if (LastNonZeroPos - FirstNonZeroPos <= precision) return input;
+    truncBitPosition = FirstNonZeroPos + precision + 1;
+  }
+
+  // duplicate the input string, we want to add "0" before the valid numbers
+  // This is easy for quantization, since we may change 9999 to 10000
+  std::string ans = "";
+  std::string dupInput = "0";
+  if (input[0] == '-') {
+    ans += '-';
+    dupInput += input.substr(1, inputLen - 1);
+  } else {
+    dupInput += input.substr(0, inputLen);
+    ++truncBitPosition;
+  }
+
+  // Add 'carry' after truncation, if necessary
+  bool carry = dupInput[truncBitPosition] > '4';
+  for (int i = truncBitPosition - 1; i >= 0 && carry; i--) {
+    if (dupInput[i] == '.') continue;
+    if (dupInput[i] == '9')
+      dupInput[i] = '0';
+    else {
+      ++dupInput[i];
+      carry = false;
+    }
+  }
+
+  // bits outside precision range should be set to 0
+  if (dupInput[0] == '1')
+    FirstNonZeroPos = 0;
+  else {
+    FirstNonZeroPos = 0;
+    while (dupInput[FirstNonZeroPos] < '1' || dupInput[FirstNonZeroPos] > '9')
+      ++FirstNonZeroPos;
+  }
+
+  unsigned it = FirstNonZeroPos;
+  int NValidNumber = 0;
+  while (it < dupInput.length()) {
+    if (dupInput[it] == '.') {
+      ++it;
+      continue;
+    }
+    ++NValidNumber;
+    if (NValidNumber > precision) dupInput[it] = '0';
+    ++it;
+  }
+
+  // Here we wanted to adjust the truncate position and the value
+  decPosition = dupInput.find('.');
+  if (decPosition == std::string::npos) // When this is integer
+    truncBitPosition = (int)dupInput.length();
+  else
+    for (truncBitPosition = (int)(dupInput.length() - 1); truncBitPosition >= 0;
+         --truncBitPosition) {
+      if (dupInput[truncBitPosition] == '.') break;
+      if (dupInput[truncBitPosition] != '0') {
+        truncBitPosition++;
+        break;
+      }
+    }
+
+  if (dupInput[0] == '1')
+    dupInput = dupInput.substr(0, truncBitPosition);
+  else
+    dupInput = dupInput.substr(1, truncBitPosition - 1);
+
+  decPosition = dupInput.find('.');
+  if (decPosition != std::string::npos) {
+    size_t it = 0;
+    for (it = decPosition + 1; dupInput[it] == '0'; it++)
+      ;
+    if (it - decPosition - 1 < 4) {
+      ans += dupInput;
+      return ans;
+    } else {
+      ans += scientificFormat(dupInput);
+      return ans;
+    }
+  } else if ((int)(dupInput.length()) <= precision) {
+    ans += dupInput;
+    return ans;
+  }
+
+  ans += scientificFormat(dupInput);
+  return ans;
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE void print(
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& x) {
+  if (_AP_I > 0) {
+    ap_int_base<_AP_I, _AP_S> p1;
+    p1.V = x.V >> (_AP_W - _AP_I);
+    print(p1.V); // print overlaod for .V should exit
+  } else {
+    printf("0");
+  }
+  printf(".");
+  if (_AP_I < _AP_W) {
+    ap_int_base<_AP_W - _AP_I, false> p2;
+    p2.V = _AP_ROOT_op_get_range(x.V, 0, _AP_W - _AP_I);
+    print(p2.V, false); // print overlaod for .V should exit
+  }
+}
+#endif // ifndef __SYNTHESIS__
+
+// XXX the following two functions have to exist in synthesis,
+// as some old HLS Video Library code uses the ostream overload,
+// although HLS will later delete I/O function call.
+
+/// Output streaming
+//-----------------------------------------------------------------------------
+// XXX apcc cannot handle global std::ios_base::Init() brought in by <iostream>
+#ifndef AP_AUTOCC
+#ifndef __SYNTHESIS__
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE std::ostream& operator<<(
+    std::ostream& out,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& x) {
+  // TODO support std::ios_base::fmtflags
+  unsigned width = out.width();
+  unsigned precision = out.precision();
+  char fill = out.fill();
+  std::string str = x.to_string(10, _AP_S);
+  str = reduceToPrecision(str, precision);
+  if (width > str.length()) {
+    for (unsigned i = 0; i < width - str.length(); ++i)
+      out << fill;
+  }
+  out << str;
+  return out;
+}
+#endif // ifndef __SYNTHESIS__
+
+/// Input streaming
+// -----------------------------------------------------------------------------
+#ifndef __SYNTHESIS__
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE std::istream& operator>>(
+    std::istream& in,
+    ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& x) {
+  double d;
+  in >> d;
+  x = ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(d);
+  return in;
+}
+#endif
+#endif // ifndef AP_AUTOCC
+
+/// Operators mixing Integers with ap_fixed_base
+// -----------------------------------------------------------------------------
+#define AF_BIN_OP_WITH_INT_SF(BIN_OP, C_TYPE, _AP_W2, _AP_S2, RTYPE)     \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,           \
+            ap_o_mode _AP_O, int _AP_N>                                  \
+  INLINE typename ap_fixed_base<_AP_W, _AP_I, _AP_S>::template RType<    \
+      _AP_W2, _AP_W2, _AP_S2>::RTYPE                                     \
+  operator BIN_OP(                                                       \
+      const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op, \
+      C_TYPE i_op) {                                                     \
+    return op.operator BIN_OP(ap_int_base<_AP_W2, _AP_S2>(i_op));        \
+  }
+
+#define AF_BIN_OP_WITH_INT(BIN_OP, C_TYPE, _AP_W2, _AP_S2, RTYPE)           \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N>                                     \
+  INLINE typename ap_fixed_base<_AP_W, _AP_I, _AP_S>::template RType<       \
+      _AP_W2, _AP_W2, _AP_S2>::RTYPE                                        \
+  operator BIN_OP(                                                          \
+      const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,    \
+      C_TYPE i_op) {                                                        \
+    return op.operator BIN_OP(ap_fixed_base<_AP_W2, _AP_W2, _AP_S2>(i_op)); \
+  }                                                                         \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N>                                     \
+  INLINE typename ap_fixed_base<_AP_W, _AP_I, _AP_S>::template RType<       \
+      _AP_W2, _AP_W2, _AP_S2>::RTYPE                                        \
+  operator BIN_OP(                                                          \
+      C_TYPE i_op,                                                          \
+      const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) {  \
+    return ap_fixed_base<_AP_W2, _AP_W2, _AP_S2>(i_op).operator BIN_OP(op); \
+  }
+
+#define AF_REL_OP_WITH_INT(REL_OP, C_TYPE, _AP_W2, _AP_S2)                  \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N>                                     \
+  INLINE bool operator REL_OP(                                              \
+      const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,    \
+      C_TYPE i_op) {                                                        \
+    return op.operator REL_OP(ap_fixed_base<_AP_W2, _AP_W2, _AP_S2>(i_op)); \
+  }                                                                         \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N>                                     \
+  INLINE bool operator REL_OP(                                              \
+      C_TYPE i_op,                                                          \
+      const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) {  \
+    return ap_fixed_base<_AP_W2, _AP_W2, _AP_S2>(i_op).operator REL_OP(op); \
+  }
+
+#define AF_ASSIGN_OP_WITH_INT(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2)               \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,                 \
+            ap_o_mode _AP_O, int _AP_N>                                        \
+  INLINE ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>&              \
+  operator ASSIGN_OP(                                                          \
+      ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,             \
+      C_TYPE i_op) {                                                           \
+    return op.operator ASSIGN_OP(ap_fixed_base<_AP_W2, _AP_W2, _AP_S2>(i_op)); \
+  }
+
+#define AF_ASSIGN_OP_WITH_INT_SF(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2)  \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,       \
+            ap_o_mode _AP_O, int _AP_N>                              \
+  INLINE ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>&    \
+  operator ASSIGN_OP(                                                \
+      ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,   \
+      C_TYPE i_op) {                                                 \
+    return op.operator ASSIGN_OP(ap_int_base<_AP_W2, _AP_S2>(i_op)); \
+  }
+
+#define ALL_AF_OP_WITH_INT(C_TYPE, BITS, SIGN)               \
+  AF_BIN_OP_WITH_INT(+, C_TYPE, (BITS), (SIGN), plus)     \
+  AF_BIN_OP_WITH_INT(-, C_TYPE, (BITS), (SIGN), minus)    \
+  AF_BIN_OP_WITH_INT(*, C_TYPE, (BITS), (SIGN), mult)     \
+  AF_BIN_OP_WITH_INT(/, C_TYPE, (BITS), (SIGN), div)      \
+  AF_BIN_OP_WITH_INT(&, C_TYPE, (BITS), (SIGN), logic)    \
+  AF_BIN_OP_WITH_INT(|, C_TYPE, (BITS), (SIGN), logic)    \
+  AF_BIN_OP_WITH_INT(^, C_TYPE, (BITS), (SIGN), logic)    \
+  AF_BIN_OP_WITH_INT_SF(>>, C_TYPE, (BITS), (SIGN), lhs)  \
+  AF_BIN_OP_WITH_INT_SF(<<, C_TYPE, (BITS), (SIGN), lhs)  \
+                                                          \
+  AF_ASSIGN_OP_WITH_INT(+=, C_TYPE, (BITS), (SIGN))     \
+  AF_ASSIGN_OP_WITH_INT(-=, C_TYPE, (BITS), (SIGN))     \
+  AF_ASSIGN_OP_WITH_INT(*=, C_TYPE, (BITS), (SIGN))     \
+  AF_ASSIGN_OP_WITH_INT(/=, C_TYPE, (BITS), (SIGN))     \
+  AF_ASSIGN_OP_WITH_INT(&=, C_TYPE, (BITS), (SIGN))     \
+  AF_ASSIGN_OP_WITH_INT(|=, C_TYPE, (BITS), (SIGN))     \
+  AF_ASSIGN_OP_WITH_INT(^=, C_TYPE, (BITS), (SIGN))     \
+  AF_ASSIGN_OP_WITH_INT_SF(>>=, C_TYPE, (BITS), (SIGN)) \
+  AF_ASSIGN_OP_WITH_INT_SF(<<=, C_TYPE, (BITS), (SIGN)) \
+                                                          \
+  AF_REL_OP_WITH_INT(>, C_TYPE, (BITS), (SIGN))           \
+  AF_REL_OP_WITH_INT(<, C_TYPE, (BITS), (SIGN))           \
+  AF_REL_OP_WITH_INT(>=, C_TYPE, (BITS), (SIGN))          \
+  AF_REL_OP_WITH_INT(<=, C_TYPE, (BITS), (SIGN))          \
+  AF_REL_OP_WITH_INT(==, C_TYPE, (BITS), (SIGN))          \
+  AF_REL_OP_WITH_INT(!=, C_TYPE, (BITS), (SIGN))
+
+ALL_AF_OP_WITH_INT(bool, 1, false)
+ALL_AF_OP_WITH_INT(char, 8, CHAR_IS_SIGNED)
+ALL_AF_OP_WITH_INT(signed char, 8, true)
+ALL_AF_OP_WITH_INT(unsigned char, 8, false)
+ALL_AF_OP_WITH_INT(short, _AP_SIZE_short, true)
+ALL_AF_OP_WITH_INT(unsigned short, _AP_SIZE_short, false)
+ALL_AF_OP_WITH_INT(int, _AP_SIZE_int, true)
+ALL_AF_OP_WITH_INT(unsigned int, _AP_SIZE_int, false)
+ALL_AF_OP_WITH_INT(long, _AP_SIZE_long, true)
+ALL_AF_OP_WITH_INT(unsigned long, _AP_SIZE_long, false)
+ALL_AF_OP_WITH_INT(ap_slong, _AP_SIZE_ap_slong, true)
+ALL_AF_OP_WITH_INT(ap_ulong, _AP_SIZE_ap_slong, false)
+
+#undef ALL_AF_OP_WITH_INT
+#undef AF_BIN_OP_WITH_INT
+#undef AF_BIN_OP_WITH_INT_SF
+#undef AF_ASSIGN_OP_WITH_INT
+#undef AF_ASSIGN_OP_WITH_INT_SF
+#undef AF_REL_OP_WITH_INT
+
+/*
+ * **********************************************************************
+ * TODO
+ * There is no operator defined with float/double/long double, so that
+ * code like
+ *   ap_fixed<8,4> a = 1.5f;
+ *   a += 0.5f;
+ * will fail in compilation.
+ * Operator with warning about conversion might be wanted.
+ * **********************************************************************
+ */
+
+#define AF_BIN_OP_WITH_AP_INT(BIN_OP, RTYPE)                                \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2>            \
+  INLINE typename ap_fixed_base<_AP_W2, _AP_W2, _AP_S2>::template RType<    \
+      _AP_W, _AP_I, _AP_S>::RTYPE                                           \
+  operator BIN_OP(                                                          \
+      const ap_int_base<_AP_W2, _AP_S2>& i_op,                              \
+      const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) {  \
+    return ap_fixed_base<_AP_W2, _AP_W2, _AP_S2>(i_op).operator BIN_OP(op); \
+  }                                                                         \
+                                                                            \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2>            \
+  INLINE typename ap_fixed_base<_AP_W, _AP_I, _AP_S>::template RType<       \
+      _AP_W2, _AP_W2, _AP_S2>::RTYPE                                        \
+  operator BIN_OP(                                                          \
+      const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,    \
+      const ap_int_base<_AP_W2, _AP_S2>& i_op) {                            \
+    return op.operator BIN_OP(ap_fixed_base<_AP_W2, _AP_W2, _AP_S2>(i_op)); \
+  }
+
+#define AF_REL_OP_WITH_AP_INT(REL_OP)                                       \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2>            \
+  INLINE bool operator REL_OP(                                              \
+      const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,    \
+      const ap_int_base<_AP_W2, _AP_S2>& i_op) {                            \
+    return op.operator REL_OP(ap_fixed_base<_AP_W2, _AP_W2, _AP_S2>(i_op)); \
+  }                                                                         \
+                                                                            \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2>            \
+  INLINE bool operator REL_OP(                                              \
+      const ap_int_base<_AP_W2, _AP_S2>& i_op,                              \
+      const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) {  \
+    return ap_fixed_base<_AP_W2, _AP_W2, _AP_S2>(i_op).operator REL_OP(op); \
+  }
+
+#define AF_ASSIGN_OP_WITH_AP_INT(ASSIGN_OP)                                    \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,                 \
+            ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2>               \
+  INLINE ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>&              \
+  operator ASSIGN_OP(                                                          \
+      ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,             \
+      const ap_int_base<_AP_W2, _AP_S2>& i_op) {                               \
+    return op.operator ASSIGN_OP(ap_fixed_base<_AP_W2, _AP_W2, _AP_S2>(i_op)); \
+  }                                                                            \
+                                                                               \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,                 \
+            ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2>               \
+  INLINE ap_int_base<_AP_W2, _AP_S2>& operator ASSIGN_OP(                      \
+      ap_int_base<_AP_W2, _AP_S2>& i_op,                                       \
+      const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) {     \
+    return i_op.operator ASSIGN_OP(op.to_ap_int_base());                       \
+  }
+
+AF_BIN_OP_WITH_AP_INT(+, plus)
+AF_BIN_OP_WITH_AP_INT(-, minus)
+AF_BIN_OP_WITH_AP_INT(*, mult)
+AF_BIN_OP_WITH_AP_INT(/, div)
+AF_BIN_OP_WITH_AP_INT(&, logic)
+AF_BIN_OP_WITH_AP_INT(|, logic)
+AF_BIN_OP_WITH_AP_INT(^, logic)
+
+#undef AF_BIN_OP_WITH_AP_INT
+
+AF_ASSIGN_OP_WITH_AP_INT(+=)
+AF_ASSIGN_OP_WITH_AP_INT(-=)
+AF_ASSIGN_OP_WITH_AP_INT(*=)
+AF_ASSIGN_OP_WITH_AP_INT(/=)
+AF_ASSIGN_OP_WITH_AP_INT(&=)
+AF_ASSIGN_OP_WITH_AP_INT(|=)
+AF_ASSIGN_OP_WITH_AP_INT(^=)
+
+#undef AF_ASSIGN_OP_WITH_AP_INT
+
+AF_REL_OP_WITH_AP_INT(==)
+AF_REL_OP_WITH_AP_INT(!=)
+AF_REL_OP_WITH_AP_INT(>)
+AF_REL_OP_WITH_AP_INT(>=)
+AF_REL_OP_WITH_AP_INT(<)
+AF_REL_OP_WITH_AP_INT(<=)
+
+#undef AF_REL_OP_WITH_AP_INT
+
+// Relational Operators with double
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE bool operator==(
+    double op1,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+  return op2.operator==(op1);
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE bool operator!=(
+    double op1,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+  return op2.operator!=(op1);
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE bool operator>(
+    double op1,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+  return op2.operator<(op1);
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE bool operator>=(
+    double op1,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+  return op2.operator<=(op1);
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE bool operator<(
+    double op1,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+  return op2.operator>(op1);
+}
+
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE bool operator<=(
+    double op1,
+    const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+  return op2.operator>=(op1);
+}
+
+#endif // ifndef __cplusplus else
+
+#endif // ifndef __AP_FIXED_BASE_H__ else
+
+// -*- cpp -*-
+
+// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689
diff --git a/include/ap_fixed_ref.h b/include/ap_fixed_ref.h
new file mode 100644
index 0000000..76cab3b
--- /dev/null
+++ b/include/ap_fixed_ref.h
@@ -0,0 +1,754 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+*/
+
+#ifndef __AP_FIXED_REF_H__
+#define __AP_FIXED_REF_H__
+
+#ifndef __AP_FIXED_H__
+// TODO make this an error
+#pragma message \
+    "Only ap_fixed.h and ap_int.h can be included directly in user code."
+#endif
+
+#ifndef __cplusplus
+#error "C++ is required to include this header file"
+
+#else
+#ifndef __SYNTHESIS__
+#include <iostream>
+#endif
+/// Proxy class, which allows bit selection  to be used as both rvalue (for
+/// reading) and lvalue (for writing)
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+struct af_bit_ref {
+#ifdef _MSC_VER
+#pragma warning(disable : 4521 4522)
+#endif
+  typedef ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> ref_type;
+  ref_type& d_bv;
+  int d_index;
+
+ public:
+  INLINE af_bit_ref(
+      const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ref)
+      : d_bv(ref.d_bv), d_index(ref.d_index) {
+#ifndef __SYNTHESIS__
+    _AP_WARNING(d_index < 0, "Index of bit vector  (%d) cannot be negative.",
+                d_index);
+    _AP_WARNING(d_index >= _AP_W, "Index of bit vector (%d) out of range (%d).",
+                d_index, _AP_W);
+#endif
+  }
+
+  INLINE af_bit_ref(ref_type* bv, int index = 0) : d_bv(*bv), d_index(index) {}
+
+  INLINE af_bit_ref(const ref_type* bv, int index = 0)
+      : d_bv(*const_cast<ref_type*>(bv)), d_index(index) {}
+
+  /// convert operators.
+  INLINE operator bool() const { return _AP_ROOT_op_get_bit(d_bv.V, d_index); }
+
+  /// @name assign operators
+  //  @{
+  INLINE af_bit_ref& operator=(bool val) {
+    d_bv.V = _AP_ROOT_op_set_bit(d_bv.V, d_index, val);
+    return *this;
+  }
+
+  // Be explicit to prevent it from being deleted, as field d_bv
+  // is of reference type.
+  INLINE af_bit_ref& operator=(const af_bit_ref& val) {
+    return operator=(bool(val));
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE af_bit_ref& operator=(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    return operator=(bool(val));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE af_bit_ref& operator=(const ap_bit_ref<_AP_W2, _AP_S2>& val) {
+    return operator=(bool(val));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE af_bit_ref& operator=(const ap_int_base<_AP_W2, _AP_S2>& val) {
+    return operator=(val != 0);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE af_bit_ref& operator=(const ap_range_ref<_AP_W2, _AP_S2>& val) {
+    return operator=(ap_int_base<_AP_W2, false>(val));
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE af_bit_ref& operator=(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    return operator=(ap_int_base<_AP_W2, false>(val));
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE af_bit_ref& operator=(
+      const ap_concat_ref<_AP_W2, _AP_T3, _AP_W3, _AP_T3>& val) {
+    return operator=(ap_int_base<_AP_W2 + _AP_W3, false>(val));
+  }
+  //  @}
+
+  /// @name concatenate operators
+  //  @{
+  template <int _AP_W2, int _AP_S2>
+  INLINE ap_concat_ref<1, af_bit_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+  operator,(ap_int_base<_AP_W2, _AP_S2> &op) {
+    return ap_concat_ref<1, af_bit_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >(
+        *this, op);
+  }
+
+  template <int _AP_W2, int _AP_S2>
+  INLINE ap_concat_ref<1, af_bit_ref, 1, ap_bit_ref<_AP_W2, _AP_S2> > operator,(
+      const ap_bit_ref<_AP_W2, _AP_S2> &op) {
+    return ap_concat_ref<1, af_bit_ref, 1, ap_bit_ref<_AP_W2, _AP_S2> >(*this,
+                                                                        op);
+  }
+
+  template <int _AP_W2, int _AP_S2>
+  INLINE ap_concat_ref<1, af_bit_ref, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+  operator,(const ap_range_ref<_AP_W2, _AP_S2> &op) {
+    return ap_concat_ref<1, af_bit_ref, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >(
+        *this, op);
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_concat_ref<1, af_bit_ref, _AP_W2 + _AP_W3,
+                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+  operator,(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &op) {
+    return ap_concat_ref<1, af_bit_ref, _AP_W2 + _AP_W3,
+                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(*this,
+                                                                         op);
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_concat_ref<
+      1, af_bit_ref, _AP_W2,
+      af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+  operator,(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &op) {
+    return ap_concat_ref<
+        1, af_bit_ref, _AP_W2,
+        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this,
+                                                                       op);
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_concat_ref<1, af_bit_ref, 1, af_bit_ref<_AP_W2, _AP_I2, _AP_S2,
+                                                    _AP_Q2, _AP_O2, _AP_N2> >
+  operator,(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &op) {
+    return ap_concat_ref<1, af_bit_ref, 1, af_bit_ref<_AP_W2, _AP_I2, _AP_S2,
+                                                      _AP_Q2, _AP_O2, _AP_N2> >(
+        *this,
+        const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(
+            op));
+  }
+  //  @}
+
+  /// @name comparison
+  //  @{
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE bool operator==(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    return get() == op.get();
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE bool operator!=(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    return get() != op.get();
+  }
+  //  @}
+
+  INLINE bool operator~() const {
+    bool bit = _AP_ROOT_op_get_bit(d_bv.V, d_index);
+    return bit ? false : true;
+  }
+
+  INLINE bool get() const { return _AP_ROOT_op_get_bit(d_bv.V, d_index); }
+
+  INLINE int length() const { return 1; }
+
+#ifndef __SYNTHESIS__
+  std::string to_string() const { return get() ? "1" : "0"; }
+#else
+  // XXX HLS will delete this in synthesis
+  INLINE char* to_string() const { return 0; }
+#endif
+}; // struct af_bit_ref
+
+// XXX apcc cannot handle global std::ios_base::Init() brought in by <iostream>
+#ifndef AP_AUTOCC
+#ifndef __SYNTHESIS__
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE std::ostream& operator<<(
+    std::ostream& os,
+    const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& x) {
+  os << x.to_string();
+  return os;
+}
+#endif // ifndef __SYNTHESIS__
+#endif // ifndef AP_AUTOCC
+
+/// Range (slice) reference.
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+struct af_range_ref {
+#ifdef _MSC_VER
+#pragma warning(disable : 4521 4522)
+#endif
+  typedef ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> ref_type;
+  ref_type& d_bv;
+  int l_index;
+  int h_index;
+
+ public:
+  /// copy ctor
+  INLINE af_range_ref(
+      const af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ref)
+      : d_bv(ref.d_bv), l_index(ref.l_index), h_index(ref.h_index) {}
+
+  /// ctor from ap_fixed_base, higher and lower bound.
+  /** if h is less than l, the bits selected will be returned in reverse order.
+   */
+  INLINE af_range_ref(ref_type* bv, int h, int l)
+      : d_bv(*bv), l_index(l), h_index(h) {
+#ifndef __SYNTHESIS__
+    _AP_WARNING(h < 0 || l < 0,
+                "Higher bound(%d) and lower(%d) bound cannot be negative.", h,
+                l);
+    _AP_WARNING(h >= _AP_W || l >= _AP_W,
+                "Higher bound(%d) or lower(%d) bound out of range.", h, l);
+    _AP_WARNING(h < l, "The bits selected will be returned in reverse order.");
+#endif
+  }
+
+  INLINE af_range_ref(const ref_type* bv, int h, int l)
+      : d_bv(*const_cast<ref_type*>(bv)), l_index(l), h_index(h) {
+#ifndef __SYNTHESIS__
+    _AP_WARNING(h < 0 || l < 0,
+                "Higher bound(%d) and lower(%d) bound cannot be negative.", h,
+                l);
+    _AP_WARNING(h >= _AP_W || l >= _AP_W,
+                "Higher bound(%d) or lower(%d) bound out of range.", h, l);
+    _AP_WARNING(h < l, "The bits selected will be returned in reverse order.");
+#endif
+  }
+
+  /// @name assign operators
+  //  @{
+
+#define ASSIGN_CTYPE_TO_AF_RANGE(DATA_TYPE)                          \
+  INLINE af_range_ref& operator=(const DATA_TYPE val) {              \
+    ap_int_base<_AP_W, false> loc(val);                              \
+    d_bv.V = _AP_ROOT_op_set_range(d_bv.V, l_index, h_index, loc.V); \
+    return *this;                                                    \
+  }
+
+  ASSIGN_CTYPE_TO_AF_RANGE(bool)
+  ASSIGN_CTYPE_TO_AF_RANGE(char)
+  ASSIGN_CTYPE_TO_AF_RANGE(signed char)
+  ASSIGN_CTYPE_TO_AF_RANGE(unsigned char)
+  ASSIGN_CTYPE_TO_AF_RANGE(short)
+  ASSIGN_CTYPE_TO_AF_RANGE(unsigned short)
+  ASSIGN_CTYPE_TO_AF_RANGE(int)
+  ASSIGN_CTYPE_TO_AF_RANGE(unsigned int)
+  ASSIGN_CTYPE_TO_AF_RANGE(long)
+  ASSIGN_CTYPE_TO_AF_RANGE(unsigned long)
+  ASSIGN_CTYPE_TO_AF_RANGE(ap_slong)
+  ASSIGN_CTYPE_TO_AF_RANGE(ap_ulong)
+  ASSIGN_CTYPE_TO_AF_RANGE(half)
+  ASSIGN_CTYPE_TO_AF_RANGE(float)
+  ASSIGN_CTYPE_TO_AF_RANGE(double)
+#undef ASSIGN_CTYPE_TO_AF_RANGE
+
+  /// assgin using a string. XXX crucial for cosim.
+  INLINE af_range_ref& operator=(const char* val) {
+    const ap_int_base<_AP_W, false> tmp(val); // XXX figure out radix
+    d_bv.V = _AP_ROOT_op_set_range(d_bv.V, l_index, h_index, tmp.V);
+    return *this;
+  }
+
+  /// assign from ap_int_base.
+  // NOTE Base of other assgin operators.
+  template <int _AP_W3, bool _AP_S3>
+  INLINE af_range_ref& operator=(const ap_int_base<_AP_W3, _AP_S3>& val) {
+    d_bv.V = _AP_ROOT_op_set_range(d_bv.V, l_index, h_index, val.V);
+    return *this;
+  }
+
+  /// assign from range reference to ap_int_base.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE af_range_ref& operator=(const ap_range_ref<_AP_W2, _AP_S2>& val) {
+    const ap_int_base<_AP_W2, false> tmp(val);
+    return operator=(tmp);
+  }
+
+  /// assign from bit reference to ap_int_base..
+  template <int _AP_W2, bool _AP_S2>
+  INLINE af_range_ref& operator=(const ap_bit_ref<_AP_W2, _AP_S2>& val) {
+    const ap_int_base<1, false> tmp((bool)val);
+    return operator=(tmp);
+  }
+
+  /// assgin from ap_fixed_base.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE af_range_ref& operator=(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&
+          val) {
+    d_bv.V = _AP_ROOT_op_set_range(d_bv.V, l_index, h_index, val.V);
+    return *this;
+  }
+
+  /// copy assgin.
+  // XXX This has to be explicit, otherwise it will be deleted, as d_bv is
+  // of reference type.
+  INLINE af_range_ref& operator=(const af_range_ref& val) {
+    ap_int_base<_AP_W, false> tmp(val);
+    return operator=(tmp);
+  }
+
+  /// assign from range reference to ap_fixed_base.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE af_range_ref& operator=(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    ap_int_base<_AP_W2, false> tmp(val);
+    return operator=(tmp);
+  }
+
+  /// assign from bit reference to ap_fixed_base.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE af_range_ref& operator=(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    ap_int_base<1, false> tmp((bool)val);
+    return operator=(tmp);
+  }
+
+  /// assign from compound reference.
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE af_range_ref& operator=(
+      const ap_concat_ref<_AP_W2, _AP_T3, _AP_W3, _AP_T3>& val) {
+    const ap_int_base<_AP_W2 + _AP_W3, false> tmp(val);
+    return operator=(tmp);
+  }
+  //  @}
+
+  /// @name comparison operators with ap_range_ref.
+  //  @{
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator==(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_int_base<_AP_W, false> lop(*this);
+    ap_int_base<_AP_W2, false> rop(op2);
+    return lop == rop;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator!=(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    return !(operator==(op2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_int_base<_AP_W, false> lop(*this);
+    ap_int_base<_AP_W2, false> rop(op2);
+    return lop < rop;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_int_base<_AP_W, false> lop(*this);
+    ap_int_base<_AP_W2, false> rop(op2);
+    return lop > rop;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<=(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    return !(operator>(op2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>=(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    return !(operator<(op2));
+  }
+  //  @}
+
+  /// @name comparison operators with af_range_ref.
+  //  @{
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE bool operator==(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+    ap_int_base<_AP_W, false> lop(*this);
+    ap_int_base<_AP_W2, false> rop(op2);
+    return lop == rop;
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE bool operator!=(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+    return !(operator==(op2));
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE bool operator<(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+    ap_int_base<_AP_W, false> lop(*this);
+    ap_int_base<_AP_W2, false> rop(op2);
+    return lop < rop;
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE bool operator>(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+    ap_int_base<_AP_W, false> lop(*this);
+    ap_int_base<_AP_W2, false> rop(op2);
+    return lop > rop;
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE bool operator<=(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+    return !(operator>(op2));
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE bool operator>=(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+    return !(operator<(op2));
+  }
+  //  @}
+
+  /// @name concatenate operators.
+  /// @{
+  /// concatenate with ap_int_base.
+  template <int _AP_W2, int _AP_S2>
+  INLINE
+      ap_concat_ref<_AP_W, af_range_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+      operator,(ap_int_base<_AP_W2, _AP_S2> &op) {
+    return ap_concat_ref<_AP_W, af_range_ref, _AP_W2,
+                         ap_int_base<_AP_W2, _AP_S2> >(*this, op);
+  }
+
+  /// concatenate with ap_bit_ref.
+  template <int _AP_W2, int _AP_S2>
+  INLINE ap_concat_ref<_AP_W, af_range_ref, 1, ap_bit_ref<_AP_W2, _AP_S2> >
+  operator,(const ap_bit_ref<_AP_W2, _AP_S2> &op) {
+    return ap_concat_ref<_AP_W, af_range_ref, 1, ap_bit_ref<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_bit_ref<_AP_W2, _AP_S2>&>(op));
+  }
+
+  /// concatenate with ap_bit_ref.
+  template <int _AP_W2, int _AP_S2>
+  INLINE ap_concat_ref<_AP_W, af_range_ref, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+  operator,(const ap_range_ref<_AP_W2, _AP_S2> &op) {
+    return ap_concat_ref<_AP_W, af_range_ref, _AP_W2,
+                         ap_range_ref<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_range_ref<_AP_W2, _AP_S2>&>(op));
+  }
+
+  /// concatenate with ap_concat_ref.
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_concat_ref<_AP_W, af_range_ref, _AP_W2 + _AP_W3,
+                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+  operator,(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &op) {
+    return ap_concat_ref<_AP_W, af_range_ref, _AP_W2 + _AP_W3,
+                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(
+        *this, const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>&>(op));
+  }
+
+  /// concatenate with another af_range_ref.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE
+      ap_concat_ref<_AP_W, af_range_ref, _AP_W2,
+                    af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+      operator,(const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>
+                    &op) {
+    return ap_concat_ref<
+        _AP_W, af_range_ref, _AP_W2,
+        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+        *this,
+        const_cast<af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(
+            op));
+  }
+
+  /// concatenate with another af_bit_ref.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE
+      ap_concat_ref<_AP_W, af_range_ref, 1,
+                    af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+      operator,(
+          const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &op) {
+    return ap_concat_ref<
+        _AP_W, af_range_ref, 1,
+        af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+        *this,
+        const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(
+            op));
+  }
+  //  @}
+
+  INLINE operator ap_ulong() const {
+    ap_int_base<_AP_W, false> ret;
+    ret.V = _AP_ROOT_op_get_range(d_bv.V, l_index, h_index);
+    return ret.to_uint64();
+  }
+
+  INLINE operator ap_int_base<_AP_W, false>() const {
+    ap_int_base<_AP_W, false> ret;
+    ret.V = _AP_ROOT_op_get_range(d_bv.V, l_index, h_index);
+    return ret;
+  }
+
+  INLINE ap_int_base<_AP_W, false> to_ap_int_base() const {
+    ap_int_base<_AP_W, false> ret;
+    ret.V = _AP_ROOT_op_get_range(d_bv.V, l_index, h_index);
+    return ret;
+  }
+
+  // used in ap_fixed_base::to_string()
+  INLINE char to_char() const {
+    return (char)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE int to_int() const {
+    return (int)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE unsigned to_uint() const {
+    return (unsigned)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE long to_long() const {
+    return (long)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE unsigned long to_ulong() const {
+    return (unsigned long)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE ap_slong to_int64() const {
+    return (ap_slong)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE ap_ulong to_uint64() const {
+    return (ap_ulong)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE ap_int_base<_AP_W, false> get() const {
+    ap_int_base<_AP_W, false> ret;
+    ret.V = _AP_ROOT_op_get_range(d_bv.V, l_index, h_index);
+    return ret;
+  }
+
+  template <int _AP_W2>
+  INLINE void set(const ap_int_base<_AP_W2, false>& val) {
+    d_bv.V = _AP_ROOT_op_set_range(d_bv.V, l_index, h_index, val.V);
+  }
+
+  INLINE int length() const {
+    return h_index >= l_index ? h_index - l_index + 1 : l_index - h_index + 1;
+  }
+
+#ifndef __SYNTHESIS__
+  std::string to_string(signed char rd = 2) const {
+    ap_int_base<_AP_W, false> ret;
+    ret.V = _AP_ROOT_op_get_range(d_bv.V, l_index, h_index);
+    return ret.to_string(rd);
+  }
+#else
+  // XXX HLS will delete this in synthesis
+  INLINE char* to_string(signed char rd = 2) const {
+    return 0;
+  }
+#endif
+}; // struct af_range_ref
+
+// XXX apcc cannot handle global std::ios_base::Init() brought in by <iostream>
+#ifndef AP_AUTOCC
+#ifndef __SYNTHESIS__
+template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+          int _AP_N>
+INLINE std::ostream& operator<<(
+    std::ostream& os,
+    const af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& x) {
+  os << x.to_string();
+  return os;
+}
+#endif
+#endif // ifndef AP_AUTOCC
+
+#define AF_REF_REL_OP_WITH_INT(REL_OP, C_TYPE, _AP_W2, _AP_S2)            \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,            \
+            ap_o_mode _AP_O, int _AP_N>                                   \
+  INLINE bool operator REL_OP(                                            \
+      const af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,   \
+      C_TYPE op2) {                                                       \
+    return ap_int_base<_AP_W, false>(op)                                  \
+        REL_OP ap_int_base<_AP_W2, _AP_S2>(op2);                          \
+  }                                                                       \
+                                                                          \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,            \
+            ap_o_mode _AP_O, int _AP_N>                                   \
+  INLINE bool operator REL_OP(                                            \
+      C_TYPE op2,                                                         \
+      const af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) { \
+    return ap_int_base<_AP_W2, _AP_S2>(op2)                               \
+        REL_OP ap_int_base<_AP_W, false>(op);                             \
+  }                                                                       \
+                                                                          \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,            \
+            ap_o_mode _AP_O, int _AP_N>                                   \
+  INLINE bool operator REL_OP(                                            \
+      const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,     \
+      C_TYPE op2) {                                                       \
+    return bool(op) REL_OP op2;                                           \
+  }                                                                       \
+                                                                          \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,            \
+            ap_o_mode _AP_O, int _AP_N>                                   \
+  INLINE bool operator REL_OP(                                            \
+      C_TYPE op2,                                                         \
+      const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) {   \
+    return op2 REL_OP bool(op);                                           \
+  }
+
+#define AF_REF_REL_OPS_WITH_INT(C_TYPE, _AP_W2, _AP_S2)  \
+  AF_REF_REL_OP_WITH_INT(>, C_TYPE, (_AP_W2), (_AP_S2))  \
+  AF_REF_REL_OP_WITH_INT(<, C_TYPE, (_AP_W2), (_AP_S2))  \
+  AF_REF_REL_OP_WITH_INT(>=, C_TYPE, (_AP_W2), (_AP_S2)) \
+  AF_REF_REL_OP_WITH_INT(<=, C_TYPE, (_AP_W2), (_AP_S2)) \
+  AF_REF_REL_OP_WITH_INT(==, C_TYPE, (_AP_W2), (_AP_S2)) \
+  AF_REF_REL_OP_WITH_INT(!=, C_TYPE, (_AP_W2), (_AP_S2))
+
+AF_REF_REL_OPS_WITH_INT(bool, 1, false)
+AF_REF_REL_OPS_WITH_INT(char, 8, CHAR_IS_SIGNED)
+AF_REF_REL_OPS_WITH_INT(signed char, 8, true)
+AF_REF_REL_OPS_WITH_INT(unsigned char, 8, false)
+AF_REF_REL_OPS_WITH_INT(short, _AP_SIZE_short, true)
+AF_REF_REL_OPS_WITH_INT(unsigned short, _AP_SIZE_short, false)
+AF_REF_REL_OPS_WITH_INT(int, _AP_SIZE_int, true)
+AF_REF_REL_OPS_WITH_INT(unsigned int, _AP_SIZE_int, false)
+AF_REF_REL_OPS_WITH_INT(long, _AP_SIZE_long, true)
+AF_REF_REL_OPS_WITH_INT(unsigned long, _AP_SIZE_long, false)
+AF_REF_REL_OPS_WITH_INT(ap_slong, _AP_SIZE_ap_slong, true)
+AF_REF_REL_OPS_WITH_INT(ap_ulong, _AP_SIZE_ap_slong, false)
+
+#undef AF_REF_REL_OP_INT
+#undef AF_REF_REL_OPS_WITH_INT
+
+#define AF_REF_REL_OP_WITH_AP_INT(REL_OP)                                 \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,            \
+            ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2>          \
+  INLINE bool operator REL_OP(                                            \
+      const af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,   \
+      const ap_int_base<_AP_W2, _AP_S>& op2) {                            \
+    return ap_int_base<_AP_W, false>(op) REL_OP op2;                      \
+  }                                                                       \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,            \
+            ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2>          \
+  INLINE bool operator REL_OP(                                            \
+      const ap_int_base<_AP_W2, _AP_S2>& op2,                             \
+      const af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) { \
+    return op2 REL_OP ap_int_base<_AP_W, false>(op);                      \
+  }                                                                       \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,            \
+            ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2>          \
+  INLINE bool operator REL_OP(                                            \
+      const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,     \
+      const ap_int_base<_AP_W2, _AP_S2>& op2) {                           \
+    return ap_int_base<1, false>(op) REL_OP op2;                          \
+  }                                                                       \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,            \
+            ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2>          \
+  INLINE bool operator REL_OP(                                            \
+      const ap_int_base<_AP_W2, _AP_S2>& op2,                             \
+      const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) {   \
+    return op2 REL_OP ap_int_base<1, false>(op);                          \
+  }
+
+AF_REF_REL_OP_WITH_AP_INT(>)
+AF_REF_REL_OP_WITH_AP_INT(<)
+AF_REF_REL_OP_WITH_AP_INT(>=)
+AF_REF_REL_OP_WITH_AP_INT(<=)
+AF_REF_REL_OP_WITH_AP_INT(==)
+AF_REF_REL_OP_WITH_AP_INT(!=)
+
+#endif // ifndef __cplusplus
+
+#endif // ifndef __AP_FIXED_REF_H__
+
+// -*- cpp -*-
+
+// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689
diff --git a/include/ap_fixed_special.h b/include/ap_fixed_special.h
new file mode 100644
index 0000000..5ce88a2
--- /dev/null
+++ b/include/ap_fixed_special.h
@@ -0,0 +1,258 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+*/
+
+#ifndef AP_FIXED_SPECIAL_H
+#define AP_FIXED_SPECIAL_H
+#ifndef __SYNTHESIS__
+#include <cstdio>
+#include <cstdlib>
+#endif
+// FIXME AP_AUTOCC cannot handle many standard headers, so declare instead of
+// include.
+// #include <complex>
+namespace std {
+template<typename _Tp> class complex;
+}
+
+/*
+  TODO: Modernize the code using C++11/C++14
+  1. constexpr http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0415r0.html
+  2. move constructor
+*/
+
+namespace std {
+/*
+   Specialize std::complex<ap_fixed> to zero initialization ap_fixed.
+
+   To reduce the area cost, ap_fixed is not zero initialized, just like basic
+   types float or double. However, libstdc++ provides specialization for float,
+   double and long double, initializing image part to 0 when not specified.
+
+   This has become a difficulty in switching legacy code from these C types to
+   ap_fixed. To ease the tranform of legacy code, we have to implement
+   specialization of std::complex<> for our type.
+
+   As ap_fixed is a template, it is impossible to specialize only the methods
+   that causes default initialization of value type in std::complex<>. An
+   explicit full specialization of the template class has to be done, covering
+   all the member functions and operators of std::complex<> as specified
+   in standard 26.2.4 and 26.2.5.
+*/
+template <int _AP_W, int _AP_I, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+struct complex<ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> > {
+  typedef ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> _Tp;
+  typedef _Tp value_type;
+
+  // 26.2.4/1
+  // Constructor without argument
+  // Default initialize, so that in dataflow, the variable is only written once.
+  complex() : _M_real(_Tp()), _M_imag(_Tp()) {}
+  // Constructor with ap_fixed.
+  // Zero initialize image part when not specified, so that `C(1) == C(1,0)`
+  complex(const _Tp &__r, const _Tp &__i = _Tp(0))
+      : _M_real(__r), _M_imag(__i) {}
+
+  // Constructor with another complex number
+  template <typename _Up>
+  complex(const complex<_Up> &__z) : _M_real(__z.real()), _M_imag(__z.imag()) {}
+
+#if __cplusplus >= 201103L
+  const _Tp& real() const { return _M_real; }
+  const _Tp& imag() const { return _M_imag; }
+#else
+  _Tp& real() { return _M_real; }
+  const _Tp& real() const { return _M_real; }
+  _Tp& imag() { return _M_imag; }
+  const _Tp& imag() const { return _M_imag; }
+#endif
+ 
+  void real(_Tp __val) { _M_real = __val; }
+
+  void imag(_Tp __val) { _M_imag = __val; }
+
+  // Assign this complex number with ap_fixed.
+  // Zero initialize image poarrt, so that `C c; c = 1; c == C(1,0);`
+  complex<_Tp> &operator=(const _Tp __t) {
+    _M_real = __t;
+    _M_imag = _Tp(0);
+    return *this;
+  }
+
+  // 26.2.5/1
+  // Add ap_fixed to this complex number.
+  complex<_Tp> &operator+=(const _Tp &__t) {
+    _M_real += __t;
+    return *this;
+  }
+
+  // 26.2.5/3
+  // Subtract ap_fixed from this complex number.
+  complex<_Tp> &operator-=(const _Tp &__t) {
+    _M_real -= __t;
+    return *this;
+  }
+
+  // 26.2.5/5
+  // Multiply this complex number by ap_fixed.
+  complex<_Tp> &operator*=(const _Tp &__t) {
+    _M_real *= __t;
+    _M_imag *= __t;
+    return *this;
+  }
+
+  // 26.2.5/7
+  // Divide this complex number by ap_fixed.
+  complex<_Tp> &operator/=(const _Tp &__t) {
+    _M_real /= __t;
+    _M_imag /= __t;
+    return *this;
+  }
+
+  // Assign complex number to this complex number.
+  template <typename _Up>
+  complex<_Tp> &operator=(const complex<_Up> &__z) {
+    _M_real = __z.real();
+    _M_imag = __z.imag();
+    return *this;
+  }
+
+  // 26.2.5/9
+  // Add complex number to this.
+  template <typename _Up>
+  complex<_Tp> &operator+=(const complex<_Up> &__z) {
+    _M_real += __z.real();
+    _M_imag += __z.imag();
+    return *this;
+  }
+
+  // 26.2.5/11
+  // Subtract complex number from this.
+  template <typename _Up>
+  complex<_Tp> &operator-=(const complex<_Up> &__z) {
+    _M_real -= __z.real();
+    _M_imag -= __z.imag();
+    return *this;
+  }
+
+  // 26.2.5/13
+  // Multiply this by complex number.
+  template <typename _Up>
+  complex<_Tp> &operator*=(const complex<_Up> &__z) {
+    const _Tp __r = _M_real * __z.real() - _M_imag * __z.imag();
+    _M_imag = _M_real * __z.imag() + _M_imag * __z.real();
+    _M_real = __r;
+    return *this;
+  }
+
+  // 26.2.5/15
+  // Divide this by complex number.
+  template <typename _Up>
+  complex<_Tp> &operator/=(const complex<_Up> &__z) {
+    complex<_Tp> cj (__z.real(), -__z.imag());
+    complex<_Tp> a = (*this) * cj;
+    complex<_Tp> b = cj * __z;
+    _M_real = a.real() / b.real();
+    _M_imag = a.imag() / b.real();
+    return *this;
+  }
+
+ private:
+  _Tp _M_real;
+  _Tp _M_imag;
+
+}; // struct complex<ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> >
+
+/*
+   Non-member operations
+   These operations are not required by standard in 26.2.6, but libstdc++
+   defines them for
+   float, double or long double's specialization.
+*/
+// Compare complex number with ap_fixed.
+template <int _AP_W, int _AP_I, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+inline bool operator==(
+    const complex<ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> > &__x,
+    const ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> &__y) {
+  return __x.real() == __y &&
+         __x.imag() == 0;
+}
+
+// Compare ap_fixed with complex number.
+template <int _AP_W, int _AP_I, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+inline bool operator==(
+    const ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> &__x,
+    const complex<ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> > &__y) {
+  return __x == __y.real() &&
+         0 == __y.imag();
+}
+
+// Compare complex number with ap_fixed.
+template <int _AP_W, int _AP_I, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+inline bool operator!=(
+    const complex<ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> > &__x,
+    const ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> &__y) {
+  return __x.real() != __y ||
+         __x.imag() != 0;
+}
+
+// Compare ap_fixed with complex number.
+template <int _AP_W, int _AP_I, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+inline bool operator!=(
+    const ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> &__x,
+    const complex<ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N> > &__y) {
+  return __x != __y.real() ||
+         0 != __y.imag();
+}
+
+}  // namespace std
+
+#endif  // ifndef AP_FIXED_SPECIAL_H
+
+// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689
diff --git a/include/ap_int.h b/include/ap_int.h
new file mode 100644
index 0000000..707fef3
--- /dev/null
+++ b/include/ap_int.h
@@ -0,0 +1,352 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+*/
+
+#ifndef __AP_INT_H__
+#define __AP_INT_H__
+
+#include <ap_common.h>
+#include <ap_int_base.h>
+#include <ap_int_ref.h>
+
+//---------------------------------------------------------------
+
+/// Sign Arbitrary Precision Type.
+template <int _AP_W>
+struct ap_int : ap_int_base<_AP_W, true> {
+  typedef ap_int_base<_AP_W, true> Base;
+  // Constructor
+  INLINE ap_int() : Base() {}
+  template <int _AP_W2>
+  INLINE ap_int(const ap_int<_AP_W2>& op) {
+    Base::V = op.V;
+  }
+
+  template <int _AP_W2>
+  INLINE ap_int(const volatile ap_int<_AP_W2>& op) {
+    Base::V = op.V;
+  }
+
+  template <int _AP_W2>
+  INLINE ap_int(const ap_uint<_AP_W2>& op) {
+    Base::V = op.V;
+  }
+
+  template <int _AP_W2>
+  INLINE ap_int(const volatile ap_uint<_AP_W2>& op) {
+    Base::V = op.V;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int(const ap_range_ref<_AP_W2, _AP_S2>& ref) : Base(ref) {}
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int(const ap_bit_ref<_AP_W2, _AP_S2>& ref) : Base(ref) {}
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_int(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& ref)
+      : Base(ref) {}
+
+  template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+            int _AP_N2>
+  INLINE ap_int(const ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base((ap_fixed_base<_AP_W2, _AP_I2, true, _AP_Q2, _AP_O2, _AP_N2>)op) {}
+
+  template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+            int _AP_N2>
+  INLINE ap_int(const ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base((ap_fixed_base<_AP_W2, _AP_I2, false, _AP_Q2, _AP_O2, _AP_N2>)op) {
+  }
+
+  template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+            int _AP_N2>
+  INLINE ap_int(
+      const volatile ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base((ap_fixed_base<_AP_W2, _AP_I2, true, _AP_Q2, _AP_O2, _AP_N2>)op) {}
+
+  template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+            int _AP_N2>
+  INLINE ap_int(
+      const volatile ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base((ap_fixed_base<_AP_W2, _AP_I2, false, _AP_Q2, _AP_O2, _AP_N2>)op) {
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int(const ap_int_base<_AP_W2, _AP_S2>& op) {
+    Base::V = op.V;
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_int(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_int(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_int(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+#define CTOR(TYPE) \
+  INLINE ap_int(TYPE val) { Base::V = val; }
+  CTOR(bool)
+  CTOR(char)
+  CTOR(signed char)
+  CTOR(unsigned char)
+  CTOR(short)
+  CTOR(unsigned short)
+  CTOR(int)
+  CTOR(unsigned int)
+  CTOR(long)
+  CTOR(unsigned long)
+  CTOR(ap_slong)
+  CTOR(ap_ulong)
+#undef CTOR
+  ap_int(double val) : Base(val) {}
+  ap_int(float val) : Base(val) {}
+  ap_int(half val) : Base(val) {}
+
+  // ap_int_base will guess radix if radix is not provided.
+  INLINE ap_int(const char* s) : Base(s) {}
+
+  INLINE ap_int(const char* s, signed char rd) : Base(s, rd) {}
+
+  // Assignment
+  /* ctor will be used when right is not of proper type. */
+
+  INLINE ap_int& operator=(const ap_int<_AP_W>& op2) {
+    Base::V = op2.V;
+    return *this;
+  }
+
+  /* cannot bind volatile reference to non-volatile type. */
+  INLINE ap_int& operator=(const volatile ap_int<_AP_W>& op2) {
+    Base::V = op2.V;
+    return *this;
+  }
+
+  /* cannot return volatile *this. */
+  INLINE void operator=(const ap_int<_AP_W>& op2) volatile { Base::V = op2.V; }
+
+  INLINE void operator=(const volatile ap_int<_AP_W>& op2) volatile {
+    Base::V = op2.V;
+  }
+
+}; // struct ap_int.
+
+//---------------------------------------------------------------
+
+/// Unsigned Arbitrary Precision Type.
+template <int _AP_W>
+struct ap_uint : ap_int_base<_AP_W, false> {
+  typedef ap_int_base<_AP_W, false> Base;
+  // Constructor
+  INLINE ap_uint() : Base() {}
+  template <int _AP_W2>
+  INLINE ap_uint(const ap_uint<_AP_W2>& op) {
+    Base::V = op.V;
+  }
+
+  template <int _AP_W2>
+  INLINE ap_uint(const ap_int<_AP_W2>& op) {
+    Base::V = op.V;
+  }
+
+  template <int _AP_W2>
+  INLINE ap_uint(const volatile ap_uint<_AP_W2>& op) {
+    Base::V = op.V;
+  }
+
+  template <int _AP_W2>
+  INLINE ap_uint(const volatile ap_int<_AP_W2>& op) {
+    Base::V = op.V;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_uint(const ap_range_ref<_AP_W2, _AP_S2>& ref) : Base(ref) {}
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_uint(const ap_bit_ref<_AP_W2, _AP_S2>& ref) : Base(ref) {}
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_uint(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& ref)
+      : Base(ref) {}
+
+  template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+            int _AP_N2>
+  INLINE ap_uint(const ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base((ap_fixed_base<_AP_W2, _AP_I2, true, _AP_Q2, _AP_O2, _AP_N2>)op) {}
+
+  template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+            int _AP_N2>
+  INLINE ap_uint(const ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base((ap_fixed_base<_AP_W2, _AP_I2, false, _AP_Q2, _AP_O2, _AP_N2>)op) {
+  }
+
+  template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+            int _AP_N2>
+  INLINE ap_uint(
+      const volatile ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base((ap_fixed_base<_AP_W2, _AP_I2, true, _AP_Q2, _AP_O2, _AP_N2>)op) {}
+
+  template <int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2,
+            int _AP_N2>
+  INLINE ap_uint(
+      const volatile ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base((ap_fixed_base<_AP_W2, _AP_I2, false, _AP_Q2, _AP_O2, _AP_N2>)op) {
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_uint(const ap_int_base<_AP_W2, _AP_S2>& op) {
+    Base::V = op.V;
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_uint(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_uint(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_uint(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op)
+      : Base(op) {}
+
+#define CTOR(TYPE) \
+  INLINE ap_uint(TYPE val) { Base::V = val; }
+  CTOR(bool)
+  CTOR(char)
+  CTOR(signed char)
+  CTOR(unsigned char)
+  CTOR(short)
+  CTOR(unsigned short)
+  CTOR(int)
+  CTOR(unsigned int)
+  CTOR(long)
+  CTOR(unsigned long)
+  CTOR(ap_slong)
+  CTOR(ap_ulong)
+#undef CTOR
+  ap_uint(double val) : Base(val) {}
+  ap_uint(float val) : Base(val) {}
+  ap_uint(half val) : Base(val) {}
+
+  // ap_int_base will guess radix if radix is not provided.
+  INLINE ap_uint(const char* s) : Base(s) {}
+
+  INLINE ap_uint(const char* s, signed char rd) : Base(s, rd) {}
+
+  // Assignment
+  /* XXX ctor will be used when right is not of proper type. */
+
+  INLINE ap_uint& operator=(const ap_uint<_AP_W>& op2) {
+    Base::V = op2.V;
+    return *this;
+  }
+
+  /* cannot bind volatile reference to non-volatile type. */
+  INLINE ap_uint& operator=(const volatile ap_uint<_AP_W>& op2) {
+    Base::V = op2.V;
+    return *this;
+  }
+
+  /* cannot return volatile *this. */
+  INLINE void operator=(const ap_uint<_AP_W>& op2) volatile { Base::V = op2.V; }
+
+  INLINE void operator=(const volatile ap_uint<_AP_W>& op2) volatile {
+    Base::V = op2.V;
+  }
+
+}; // struct ap_uint.
+
+#define ap_bigint ap_int
+#define ap_biguint ap_uint
+
+#if !defined(__SYNTHESIS__) && (defined(SYSTEMC_H) || defined(SYSTEMC_INCLUDED))
+// XXX sc_trace overload for ap_fixed is already included in
+// "ap_sysc/ap_sc_extras.h", so do not define in synthesis.
+template <int _AP_W>
+INLINE void sc_trace(sc_core::sc_trace_file* tf, const ap_int<_AP_W>& op,
+                     const std::string& name) {
+  if (tf) tf->trace(sc_dt::sc_lv<_AP_W>(op.to_string(2).c_str()), name);
+}
+
+template <int _AP_W>
+INLINE void sc_trace(sc_core::sc_trace_file* tf, const ap_uint<_AP_W>& op,
+                     const std::string& name) {
+  if (tf) tf->trace(sc_dt::sc_lv<_AP_W>(op.to_string(2).c_str()), name);
+}
+#endif // System C sim
+
+#include <ap_int_special.h>
+
+#endif // ifndef __AP_INT_H__ else
+
+// FIXME user should include ap_fixed.h when using ap_fixed.
+// to avoid circular inclusion, must check whether this is required by
+// ap_fixed.h
+#ifndef __AP_FIXED_H__
+#include <ap_fixed.h>
+#endif
+
+// -*- cpp -*-
diff --git a/include/ap_int_base.h b/include/ap_int_base.h
new file mode 100644
index 0000000..3062adf
--- /dev/null
+++ b/include/ap_int_base.h
@@ -0,0 +1,1918 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+*/
+
+#ifndef __AP_INT_BASE_H__
+#define __AP_INT_BASE_H__
+
+#ifndef __AP_INT_H__
+// TODO make this an error
+#pragma message \
+    "Only ap_fixed.h and ap_int.h can be included directly in user code."
+#endif
+
+#ifndef __cplusplus
+#error "C++ is required to include this header file"
+#else
+
+#include <ap_common.h>
+#ifndef __SYNTHESIS__
+#include <hls_half.h>
+#include <iostream>
+#include <string.h>
+#endif
+
+// for ptrdiff_t used in increasing pointer.
+#include <cstddef>
+
+/* ----------------------------------------------------------------
+ * ap_int_base: AutoPilot integer/Arbitrary precision integer.
+ * ----------------------------------------------------------------
+ */
+
+/* helper trait. Selecting the smallest C type that can hold the value,
+ * return 64 bit C type if not possible.
+ */
+template <int _AP_N, bool _AP_S>
+struct retval;
+
+// at least 64 bit
+template <int _AP_N>
+struct retval<_AP_N, true> {
+  typedef ap_slong Type;
+};
+
+template <int _AP_N>
+struct retval<_AP_N, false> {
+  typedef ap_ulong Type;
+};
+
+// at least 8 bit
+template <>
+struct retval<1, true> {
+  typedef signed char Type;
+};
+
+template <>
+struct retval<1, false> {
+  typedef unsigned char Type;
+};
+
+// at least 16 bit
+template <>
+struct retval<2, true> {
+  typedef short Type;
+};
+
+template <>
+struct retval<2, false> {
+  typedef unsigned short Type;
+};
+
+// at least 32 bit
+template <>
+struct retval<3, true> {
+  typedef long Type;
+};
+
+template <>
+struct retval<3, false> {
+  typedef unsigned long Type;
+};
+
+template <>
+struct retval<4, true> {
+  typedef long Type;
+};
+
+template <>
+struct retval<4, false> {
+  typedef unsigned long Type;
+};
+
+// trait for letting base class to return derived class.
+// Notice that derived class template is incomplete, and we cannot use
+// the member of the derived class.
+template <int _AP_W2, bool _AP_S2>
+struct _ap_int_factory;
+template <int _AP_W2>
+struct _ap_int_factory<_AP_W2,true> { typedef ap_int<_AP_W2> type; };
+template <int _AP_W2>
+struct _ap_int_factory<_AP_W2,false> { typedef ap_uint<_AP_W2> type; };
+
+template <int _AP_W, bool _AP_S>
+struct ap_int_base : public _AP_ROOT_TYPE<_AP_W, _AP_S> {
+ public:
+  typedef _AP_ROOT_TYPE<_AP_W, _AP_S> Base;
+
+  /* ap_int_base<_AP_W, _AP_S, true>
+   * typedef typename retval<(_AP_W + 7) / 8, _AP_S>::Type RetType;
+   *
+   * ap_int_base<_AP_W, _AP_S, false>
+   * typedef typename retval<8, _AP_S>::Type RetType;
+   */
+  typedef typename retval<AP_MAX((_AP_W + 7) / 8, 8), _AP_S>::Type RetType;
+
+  static const int width = _AP_W;
+
+  template <int _AP_W2, bool _AP_S2>
+  struct RType {
+    enum {
+      mult_w = _AP_W + _AP_W2,
+      mult_s = _AP_S || _AP_S2,
+      plus_w =
+          AP_MAX(_AP_W + (_AP_S2 && !_AP_S), _AP_W2 + (_AP_S && !_AP_S2)) + 1,
+      plus_s = _AP_S || _AP_S2,
+      minus_w =
+          AP_MAX(_AP_W + (_AP_S2 && !_AP_S), _AP_W2 + (_AP_S && !_AP_S2)) + 1,
+      minus_s = true,
+      div_w = _AP_W + _AP_S2,
+      div_s = _AP_S || _AP_S2,
+      mod_w = AP_MIN(_AP_W, _AP_W2 + (!_AP_S2 && _AP_S)),
+      mod_s = _AP_S,
+      logic_w = AP_MAX(_AP_W + (_AP_S2 && !_AP_S), _AP_W2 + (_AP_S && !_AP_S2)),
+      logic_s = _AP_S || _AP_S2
+    };
+
+
+    typedef ap_int_base<mult_w, mult_s> mult_base;
+    typedef ap_int_base<plus_w, plus_s> plus_base;
+    typedef ap_int_base<minus_w, minus_s> minus_base;
+    typedef ap_int_base<logic_w, logic_s> logic_base;
+    typedef ap_int_base<div_w, div_s> div_base;
+    typedef ap_int_base<mod_w, mod_s> mod_base;
+    typedef ap_int_base<_AP_W, _AP_S> arg1_base;
+
+    typedef typename _ap_int_factory<mult_w, mult_s>::type mult;
+    typedef typename _ap_int_factory<plus_w, plus_s>::type plus;
+    typedef typename _ap_int_factory<minus_w, minus_s>::type minus;
+    typedef typename _ap_int_factory<logic_w, logic_s>::type logic;
+    typedef typename _ap_int_factory<div_w, div_s>::type div;
+    typedef typename _ap_int_factory<mod_w, mod_s>::type mod;
+    typedef typename _ap_int_factory<_AP_W, _AP_S>::type arg1;
+    typedef bool reduce;
+  };
+
+  /* Constructors.
+   * ----------------------------------------------------------------
+   */
+  /// default ctor
+  INLINE ap_int_base() {
+    /*
+      #ifdef __SC_COMPATIBLE__
+      Base::V = 0;
+      #endif
+    */
+  }
+
+  /// copy ctor
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base(const ap_int_base<_AP_W2, _AP_S2>& op) {
+    Base::V = op.V;
+  }
+
+  /// volatile copy ctor
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base(const volatile ap_int_base<_AP_W2, _AP_S2>& op) {
+    Base::V = op.V;
+  }
+
+// XXX C++11 feature.
+// The explicit specifier specifies that a constructor or conversion function
+// (since C++11) doesn't allow implicit conversions or copy-initialization.
+//   ap_int_base<W,S> x = 1;
+//   ap_int_base<W,S> foo() { return 1; }
+// but allows
+//   ap_int_base<W,S> x(1);
+//   ap_int_base<W,S> y {1};
+
+/// from all c types.
+#define CTOR_FROM_INT(Type, Size, Signed) \
+  INLINE ap_int_base(const Type op) { Base::V = op; }
+
+  CTOR_FROM_INT(bool, 1, false)
+  CTOR_FROM_INT(char, 8, CHAR_IS_SIGNED)
+  CTOR_FROM_INT(signed char, 8, true)
+  CTOR_FROM_INT(unsigned char, 8, false)
+  CTOR_FROM_INT(short, _AP_SIZE_short, true)
+  CTOR_FROM_INT(unsigned short, _AP_SIZE_short, false)
+  CTOR_FROM_INT(int, _AP_SIZE_int, true)
+  CTOR_FROM_INT(unsigned int, _AP_SIZE_int, false)
+  CTOR_FROM_INT(long, _AP_SIZE_long, true)
+  CTOR_FROM_INT(unsigned long, _AP_SIZE_long, false)
+  CTOR_FROM_INT(ap_slong, _AP_SIZE_ap_slong, true)
+  CTOR_FROM_INT(ap_ulong, _AP_SIZE_ap_slong, false)
+#undef CTOR_FROM_INT
+
+  /// ctor from half.
+  //  TODO optimize
+  INLINE ap_int_base(half op) {
+    ap_int_base<_AP_W, _AP_S> t((float)op);
+    Base::V = t.V;
+  }
+
+  /// ctor from float.
+  INLINE ap_int_base(float op) {
+    const int BITS = FLOAT_MAN + FLOAT_EXP + 1;
+    ap_int_base<BITS, false> reg;
+    reg.V = floatToRawBits(op);
+    bool is_neg = _AP_ROOT_op_get_bit(reg.V, BITS - 1);
+
+    ap_int_base<FLOAT_EXP + 1, true> exp = 0;
+    exp.V = _AP_ROOT_op_get_range(reg.V, FLOAT_MAN, BITS - 2);
+    exp = exp - FLOAT_BIAS;
+
+    ap_int_base<FLOAT_MAN + 2, true> man;
+    man.V = _AP_ROOT_op_get_range(reg.V, 0, FLOAT_MAN - 1);
+    // check for NaN
+    _AP_WARNING(exp == ((unsigned char)(FLOAT_BIAS + 1)) && man.V != 0,
+                "assign NaN to ap integer value");
+    // set leading 1.
+    man.V = _AP_ROOT_op_set_bit(man.V, FLOAT_MAN, 1);
+    //if (is_neg) man = -man;
+
+    if ((reg.V & 0x7ffffffful) == 0) {
+      Base::V = 0;
+    } else {
+      int sh_amt = FLOAT_MAN - exp.V;
+      if (sh_amt == 0) {
+        Base::V = man.V;
+      } else if (sh_amt > 0) {
+        if (sh_amt < FLOAT_MAN + 2) {
+          Base::V = man.V >> sh_amt;
+        } else {
+          if (is_neg)
+            Base::V = -1;
+          else
+            Base::V = 0;
+        }
+      } else {
+        sh_amt = -sh_amt;
+        if (sh_amt < _AP_W) {
+          Base::V = man.V;
+          Base::V <<= sh_amt;
+        } else {
+          Base::V = 0;
+        }
+      }
+    }
+    if (is_neg) *this = -(*this);
+  }
+
+  /// ctor from double.
+  INLINE ap_int_base(double op) {
+    const int BITS = DOUBLE_MAN + DOUBLE_EXP + 1;
+    ap_int_base<BITS, false> reg;
+    reg.V = doubleToRawBits(op);
+    bool is_neg = _AP_ROOT_op_get_bit(reg.V, BITS - 1);
+
+    ap_int_base<DOUBLE_EXP + 1, true> exp = 0;
+    exp.V = _AP_ROOT_op_get_range(reg.V, DOUBLE_MAN, BITS - 2);
+    exp = exp - DOUBLE_BIAS;
+
+    ap_int_base<DOUBLE_MAN + 2, true> man;
+    man.V = _AP_ROOT_op_get_range(reg.V, 0, DOUBLE_MAN - 1);
+    // check for NaN
+    _AP_WARNING(exp == ((unsigned char)(DOUBLE_BIAS + 1)) && man.V != 0,
+                "assign NaN to ap integer value");
+    // set leading 1.
+    man.V = _AP_ROOT_op_set_bit(man.V, DOUBLE_MAN, 1);
+    //if (is_neg) man = -man;
+
+    if ((reg.V & 0x7fffffffffffffffull) == 0) {
+      Base::V = 0;
+    } else {
+      int sh_amt = DOUBLE_MAN - exp.V;
+      if (sh_amt == 0) {
+        Base::V = man.V;
+      } else if (sh_amt > 0) {
+        if (sh_amt < DOUBLE_MAN + 2) {
+          Base::V = man.V >> sh_amt;
+        } else {
+          if (is_neg)
+            Base::V = -1;
+          else
+            Base::V = 0;
+        }
+      } else {
+        sh_amt = -sh_amt;
+        if (sh_amt < _AP_W) {
+          Base::V = man.V;
+          Base::V <<= sh_amt;
+        } else {
+          Base::V = 0;
+        }
+      }
+    }
+    if (is_neg) *this = -(*this);
+  }
+
+  /// from higer rank type.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_int_base(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    Base::V = op.to_ap_int_base().V;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base(const ap_range_ref<_AP_W2, _AP_S2>& ref) {
+    Base::V = (ref.get()).V;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base(const ap_bit_ref<_AP_W2, _AP_S2>& ref) {
+    Base::V = ref.operator bool();
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_int_base(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& ref) {
+    const ap_int_base<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>::_AP_WR,
+                      false>
+        tmp = ref.get();
+    Base::V = tmp.V;
+  }
+
+  /* radix has default value in set */
+
+#ifndef __SYNTHESIS__
+  INLINE ap_int_base(const char* s, signed char rd = 0) {
+    if (rd == 0)
+      rd = guess_radix(s);
+    unsigned int length = strlen(s);
+    Base::V.fromString(s, length, rd);
+  }
+#else
+  // XXX __builtin_bit_from_string(...) requires const C string and radix.
+  INLINE ap_int_base(const char* s) {
+    typeof(Base::V) t;
+    _ssdm_string2bits((void*)(&t), (const char*)(s), 10, _AP_W, _AP_S,
+                      AP_TRN, AP_WRAP, 0, _AP_C99);
+    Base::V = t;
+  }
+  INLINE ap_int_base(const char* s, signed char rd) {
+    typeof(Base::V) t;
+    _ssdm_string2bits((void*)(&t), (const char*)(s), rd, _AP_W, _AP_S,
+                      AP_TRN, AP_WRAP, 0, _AP_C99);
+    Base::V = t;
+  }
+#endif
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_int_base(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    Base::V = (val.get()).V;
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_int_base(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    Base::V = val.operator bool();
+  }
+
+  INLINE ap_int_base read() volatile {
+    /*AP_DEBUG(printf("call read %d\n", Base::V););*/
+    ap_int_base ret;
+    ret.V = Base::V;
+    return ret;
+  }
+
+  INLINE void write(const ap_int_base<_AP_W, _AP_S>& op2) volatile {
+    /*AP_DEBUG(printf("call write %d\n", op2.V););*/
+    Base::V = op2.V;
+  }
+
+  /* Another form of "write".*/
+  template <int _AP_W2, bool _AP_S2>
+  INLINE void operator=(
+      const volatile ap_int_base<_AP_W2, _AP_S2>& op2) volatile {
+    Base::V = op2.V;
+  }
+
+  INLINE void operator=(
+      const volatile ap_int_base<_AP_W, _AP_S>& op2) volatile {
+    Base::V = op2.V;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE void operator=(const ap_int_base<_AP_W2, _AP_S2>& op2) volatile {
+    Base::V = op2.V;
+  }
+
+  INLINE void operator=(const ap_int_base<_AP_W, _AP_S>& op2) volatile {
+    Base::V = op2.V;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base& operator=(
+      const volatile ap_int_base<_AP_W2, _AP_S2>& op2) {
+    Base::V = op2.V;
+    return *this;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base& operator=(const ap_int_base<_AP_W2, _AP_S2>& op2) {
+    Base::V = op2.V;
+    return *this;
+  }
+
+  INLINE ap_int_base& operator=(const volatile ap_int_base<_AP_W, _AP_S>& op2) {
+    Base::V = op2.V;
+    return *this;
+  }
+
+  INLINE ap_int_base& operator=(const ap_int_base<_AP_W, _AP_S>& op2) {
+    Base::V = op2.V;
+    return *this;
+  }
+
+
+#define ASSIGN_OP_FROM_INT(Type, Size, Signed) \
+  INLINE ap_int_base& operator=(Type op) {     \
+    Base::V = op;                              \
+    return *this;                              \
+  }
+
+  ASSIGN_OP_FROM_INT(bool, 1, false)
+  ASSIGN_OP_FROM_INT(char, 8, CHAR_IS_SIGNED)
+  ASSIGN_OP_FROM_INT(signed char, 8, true)
+  ASSIGN_OP_FROM_INT(unsigned char, 8, false)
+  ASSIGN_OP_FROM_INT(short, _AP_SIZE_short, true)
+  ASSIGN_OP_FROM_INT(unsigned short, _AP_SIZE_short, false)
+  ASSIGN_OP_FROM_INT(int, _AP_SIZE_int, true)
+  ASSIGN_OP_FROM_INT(unsigned int, _AP_SIZE_int, false)
+  ASSIGN_OP_FROM_INT(long, _AP_SIZE_long, true)
+  ASSIGN_OP_FROM_INT(unsigned long, _AP_SIZE_long, false)
+  ASSIGN_OP_FROM_INT(ap_slong, _AP_SIZE_ap_slong, true)
+  ASSIGN_OP_FROM_INT(ap_ulong, _AP_SIZE_ap_slong, false)
+
+#undef ASSIGN_OP_FROM_INT
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base& operator=(const ap_bit_ref<_AP_W2, _AP_S2>& op2) {
+    Base::V = (bool)op2;
+    return *this;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base& operator=(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    Base::V = (ap_int_base<_AP_W2, false>(op2)).V;
+    return *this;
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_int_base& operator=(
+      const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& op2) {
+    Base::V = op2.get().V;
+    return *this;
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_int_base& operator=(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    Base::V = op.to_ap_int_base().V;
+    return *this;
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_int_base& operator=(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    Base::V = (bool)op;
+    return *this;
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_int_base& operator=(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    Base::V = ((const ap_int_base<_AP_W2, false>)(op)).V;
+    return *this;
+  }
+
+  // FIXME: UG902 has clearly required user to use to_int() to convert to built-in
+  // types, but this implicit conversion is relied on in hls_cordic.h and hls_rsr.h.
+  // For example:
+  //     int d_exp = fps_x.exp - fps_y.exp;
+  INLINE operator RetType() const { return (RetType)(Base::V); }
+
+  /* Explicit conversions to C types.
+   * ----------------------------------------------------------------
+   */
+  INLINE bool to_bool() const { return (bool)(Base::V); }
+  INLINE char to_char() const { return (char)(Base::V); }
+  INLINE signed char to_schar() const { return (signed char)(Base::V); }
+  INLINE unsigned char to_uchar() const { return (unsigned char)(Base::V); }
+  INLINE short to_short() const { return (short)(Base::V); }
+  INLINE unsigned short to_ushort() const { return (unsigned short)(Base::V); }
+  INLINE int to_int() const { return (int)(Base::V); }
+  INLINE unsigned to_uint() const { return (unsigned)(Base::V); }
+  INLINE long to_long() const { return (long)(Base::V); }
+  INLINE unsigned long to_ulong() const { return (unsigned long)(Base::V); }
+  INLINE ap_slong to_int64() const { return (ap_slong)(Base::V); }
+  INLINE ap_ulong to_uint64() const { return (ap_ulong)(Base::V); }
+  INLINE float to_float() const { return (float)(Base::V); }
+  INLINE double to_double() const { return (double)(Base::V); }
+
+  // TODO decide if user-defined conversion should be provided.
+#if 0
+  INLINE operator char() const { return (char)(Base::V); }
+  INLINE operator signed char() const { return (signed char)(Base::V); }
+  INLINE operator unsigned char() const { return (unsigned char)(Base::V); }
+  INLINE operator short() const { return (short)(Base::V); }
+  INLINE operator unsigned short() const { return (unsigned short)(Base::V); }
+  INLINE operator int() const { return (int)(Base::V); }
+  INLINE operator unsigned int () const { return (unsigned)(Base::V); }
+  INLINE operator long () const { return (long)(Base::V); }
+  INLINE operator unsigned long () const { return (unsigned long)(Base::V); }
+  INLINE operator ap_slong () { return (ap_slong)(Base::V); }
+  INLINE operator ap_ulong () { return (ap_ulong)(Base::V); }
+#endif
+
+  /* Helper methods.
+     ----------------------------------------------------------------
+  */
+  /* we cannot call a non-volatile function on a volatile instance.
+   * but calling a volatile function is ok.
+   * XXX deleted non-volatile version.
+   */
+  INLINE int length() const volatile { return _AP_W; }
+
+  /*Return true if the value of ap_int_base instance is zero*/
+  INLINE bool iszero() const { return Base::V == 0; }
+
+  /*Return true if the value of ap_int_base instance is zero*/
+  INLINE bool is_zero() const { return Base::V == 0; }
+
+  /* x < 0 */
+  INLINE bool sign() const {
+    if (_AP_S &&
+        _AP_ROOT_op_get_bit(Base::V, _AP_W - 1))
+      return true;
+    else
+      return false;
+  }
+
+  /* x[i] = 0 */
+  INLINE void clear(int i) {
+    AP_ASSERT(i >= 0 && i < _AP_W, "position out of range");
+    Base::V = _AP_ROOT_op_set_bit(Base::V, i, 0);
+  }
+
+  /* x[i] = !x[i]*/
+  INLINE void invert(int i) {
+    AP_ASSERT(i >= 0 && i < _AP_W, "position out of range");
+    bool val = _AP_ROOT_op_get_bit(Base::V, i);
+    if (val)
+      Base::V = _AP_ROOT_op_set_bit(Base::V, i, 0);
+    else
+      Base::V = _AP_ROOT_op_set_bit(Base::V, i, 1);
+  }
+
+  INLINE bool test(int i) const {
+    AP_ASSERT(i >= 0 && i < _AP_W, "position out of range");
+    return _AP_ROOT_op_get_bit(Base::V, i);
+  }
+
+  // Get self. For ap_concat_ref expansion.
+  INLINE ap_int_base& get() { return *this; }
+
+  // Set the ith bit into 1
+  INLINE void set(int i) {
+    AP_ASSERT(i >= 0 && i < _AP_W, "position out of range");
+    Base::V = _AP_ROOT_op_set_bit(Base::V, i, 1);
+  }
+
+  // Set the ith bit into v
+  INLINE void set(int i, bool v) {
+    AP_ASSERT(i >= 0 && i < _AP_W, "position out of range");
+    Base::V = _AP_ROOT_op_set_bit(Base::V, i, v);
+  }
+
+  // This is used for sc_lv and sc_bv, which is implemented by sc_uint
+  // Rotate an ap_int_base object n places to the left
+  INLINE ap_int_base& lrotate(int n) {
+    AP_ASSERT(n >= 0 && n < _AP_W, "shift value out of range");
+    // TODO unify this.
+#ifdef __SYNTHESIS__
+    typeof(Base::V) l_p = Base::V << n;
+    typeof(Base::V) r_p = Base::V >> (_AP_W - n);
+    Base::V = l_p | r_p;
+#else
+    Base::V.lrotate(n);
+#endif
+    return *this;
+  }
+
+  // This is used for sc_lv and sc_bv, which is implemented by sc_uint
+  // Rotate an ap_int_base object n places to the right
+  INLINE ap_int_base& rrotate(int n) {
+    AP_ASSERT(n >= 0 && n < _AP_W, "shift value out of range");
+    // TODO unify this.
+#ifdef __SYNTHESIS__
+    typeof(Base::V) l_p = Base::V << (_AP_W - n);
+    typeof(Base::V) r_p = Base::V >> n;
+    Base::V = l_p | r_p;
+#else
+    Base::V.rrotate(n);
+#endif
+    return *this;
+  }
+
+  // Reverse the contents of ap_int_base instance.
+  // I.e. LSB becomes MSB and vise versa.
+  INLINE ap_int_base& reverse() {
+    Base::V = _AP_ROOT_op_get_range(Base::V, _AP_W - 1, 0);
+    return *this;
+  }
+
+  // Set the ith bit into v
+  INLINE void set_bit(int i, bool v) {
+    Base::V = _AP_ROOT_op_set_bit(Base::V, i, v);
+  }
+
+  // Get the value of ith bit
+  INLINE bool get_bit(int i) const {
+    return (bool)_AP_ROOT_op_get_bit(Base::V, i);
+  }
+
+  // complements every bit
+  INLINE void b_not() { Base::V = ~Base::V; }
+
+#define OP_ASSIGN_AP(Sym)                                                    \
+  template <int _AP_W2, bool _AP_S2>                                         \
+  INLINE ap_int_base& operator Sym(const ap_int_base<_AP_W2, _AP_S2>& op2) { \
+    Base::V Sym op2.V;                                                       \
+    return *this;                                                            \
+  }
+
+  /* Arithmetic assign.
+   * ----------------------------------------------------------------
+   */
+  OP_ASSIGN_AP(*=)
+  OP_ASSIGN_AP(+=)
+  OP_ASSIGN_AP(-=)
+  OP_ASSIGN_AP(/=)
+  OP_ASSIGN_AP(%=)
+#undef OP_ASSIGN_AP
+
+  /* Bitwise assign: and, or, xor.
+   * ----------------------------------------------------------------
+   */
+#define OP_ASSIGN_AP_CHK(Sym)                                                \
+  template <int _AP_W2, bool _AP_S2>                                         \
+  INLINE ap_int_base& operator Sym(const ap_int_base<_AP_W2, _AP_S2>& op2) { \
+    _AP_WARNING((_AP_W != _AP_W2),                                           \
+                "Bitsize mismatch for ap_[u]int" #Sym "ap_[u]int.");         \
+    Base::V Sym op2.V;                                                       \
+    return *this;                                                            \
+  }
+  OP_ASSIGN_AP_CHK(&=)
+  OP_ASSIGN_AP_CHK(|=)
+  OP_ASSIGN_AP_CHK(^=)
+#undef OP_ASSIGN_AP_CHK
+
+  /* Prefix increment, decrement.
+   * ----------------------------------------------------------------
+   */
+  INLINE ap_int_base& operator++() {
+    operator+=((ap_int_base<1, false>)1);
+    return *this;
+  }
+  INLINE ap_int_base& operator--() {
+    operator-=((ap_int_base<1, false>)1);
+    return *this;
+  }
+
+  /* Postfix increment, decrement
+   * ----------------------------------------------------------------
+   */
+  INLINE const typename RType<_AP_W,_AP_S>::arg1 operator++(int) {
+    ap_int_base t = *this;
+    operator+=((ap_int_base<1, false>)1);
+    return t;
+  }
+  INLINE const typename RType<_AP_W,_AP_S>::arg1 operator--(int) {
+    ap_int_base t = *this;
+    operator-=((ap_int_base<1, false>)1);
+    return t;
+  }
+
+  /* Unary arithmetic.
+   * ----------------------------------------------------------------
+   */
+  INLINE typename RType<_AP_W,_AP_S>::arg1 operator+() const { return *this; }
+
+  // TODO used to be W>64 only... need check.
+  INLINE typename RType<1, false>::minus operator-() const {
+    return ap_int_base<1, false>(0) - *this;
+  }
+
+  /* Not (!)
+   * ----------------------------------------------------------------
+   */
+  INLINE bool operator!() const { return Base::V == 0; }
+
+  /* Bitwise (arithmetic) unary: complement
+     ----------------------------------------------------------------
+  */
+  // XXX different from Mentor's ac_int!
+  INLINE typename RType<_AP_W,_AP_S>::arg1 operator~() const {
+    ap_int_base<_AP_W, _AP_S> r;
+    r.V = ~Base::V;
+    return r;
+  }
+
+  /* Shift (result constrained by left operand).
+   * ----------------------------------------------------------------
+   */
+  template <int _AP_W2>
+  INLINE typename RType<_AP_W,_AP_S>::arg1 operator<<(const ap_int_base<_AP_W2, true>& op2) const {
+    bool isNeg = _AP_ROOT_op_get_bit(op2.V, _AP_W2 - 1);
+    ap_int_base<_AP_W2, false> sh = op2;
+    if (isNeg) {
+      sh = -op2;
+      return operator>>(sh);
+    } else
+      return operator<<(sh);
+  }
+
+  template <int _AP_W2>
+  INLINE typename RType<_AP_W,_AP_S>::arg1 operator<<(const ap_int_base<_AP_W2, false>& op2) const {
+    ap_int_base r;
+    r.V = Base::V << op2.to_uint();
+    return r;
+  }
+
+  template <int _AP_W2>
+  INLINE typename RType<_AP_W,_AP_S>::arg1 operator>>(const ap_int_base<_AP_W2, true>& op2) const {
+    bool isNeg = _AP_ROOT_op_get_bit(op2.V, _AP_W2 - 1);
+    ap_int_base<_AP_W2, false> sh = op2;
+    if (isNeg) {
+      sh = -op2;
+      return operator<<(sh);
+    }
+    return operator>>(sh);
+  }
+
+  template <int _AP_W2>
+  INLINE typename RType<_AP_W,_AP_S>::arg1 operator>>(const ap_int_base<_AP_W2, false>& op2) const {
+    ap_int_base r;
+    r.V = Base::V >> op2.to_uint();
+    return r;
+  }
+
+  // FIXME we standalone operator>> for ap_int_base and ap_range_ref.
+#if 0
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base operator<<(const ap_range_ref<_AP_W2, _AP_S2>& op2) const {
+    return *this << (op2.operator ap_int_base<_AP_W2, false>());
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base operator>>(const ap_range_ref<_AP_W2, _AP_S2>& op2) const {
+    return *this >> (op2.operator ap_int_base<_AP_W2, false>());
+  }
+#endif
+
+  /* Shift assign
+   * ----------------------------------------------------------------
+   */
+  template <int _AP_W2>
+  INLINE ap_int_base& operator<<=(const ap_int_base<_AP_W2, true>& op2) {
+    bool isNeg = _AP_ROOT_op_get_bit(op2.V, _AP_W2 - 1);
+    ap_int_base<_AP_W2, false> sh = op2;
+    if (isNeg) {
+      sh = -op2;
+      return operator>>=(sh);
+    } else
+      return operator<<=(sh);
+  }
+
+  template <int _AP_W2>
+  INLINE ap_int_base& operator<<=(const ap_int_base<_AP_W2, false>& op2) {
+    Base::V <<= op2.to_uint();
+    return *this;
+  }
+
+  template <int _AP_W2>
+  INLINE ap_int_base& operator>>=(const ap_int_base<_AP_W2, true>& op2) {
+    bool isNeg = _AP_ROOT_op_get_bit(op2.V, _AP_W2 - 1);
+    ap_int_base<_AP_W2, false> sh = op2;
+    if (isNeg) {
+      sh = -op2;
+      return operator<<=(sh);
+    }
+    return operator>>=(sh);
+  }
+
+  template <int _AP_W2>
+  INLINE ap_int_base& operator>>=(const ap_int_base<_AP_W2, false>& op2) {
+    Base::V >>= op2.to_uint();
+    return *this;
+  }
+
+  // FIXME we standalone operator>> for ap_int_base and ap_range_ref.
+#if 0
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base& operator<<=(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    return *this <<= (op2.operator ap_int_base<_AP_W2, false>());
+  }
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_int_base& operator>>=(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    return *this >>= (op2.operator ap_int_base<_AP_W2, false>());
+  }
+#endif
+
+  /* Equality and Relational.
+   * ----------------------------------------------------------------
+   */
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator==(const ap_int_base<_AP_W2, _AP_S2>& op2) const {
+    return Base::V == op2.V;
+  }
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator!=(const ap_int_base<_AP_W2, _AP_S2>& op2) const {
+    return !(Base::V == op2.V);
+  }
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<(const ap_int_base<_AP_W2, _AP_S2>& op2) const {
+    return Base::V < op2.V;
+  }
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>=(const ap_int_base<_AP_W2, _AP_S2>& op2) const {
+    return Base::V >= op2.V;
+  }
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>(const ap_int_base<_AP_W2, _AP_S2>& op2) const {
+    return Base::V > op2.V;
+  }
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<=(const ap_int_base<_AP_W2, _AP_S2>& op2) const {
+    return Base::V <= op2.V;
+  }
+
+  /* Bit and Part Select
+   * ----------------------------------------------------------------
+   */
+  INLINE ap_range_ref<_AP_W, _AP_S> range(int Hi, int Lo) {
+    _AP_ERROR(Hi >= _AP_W, "Hi(%d)out of bound(%d) in range()", Hi, _AP_W);
+    _AP_ERROR(Lo >= _AP_W, "Lo(%d)out of bound(%d) in range()", Lo, _AP_W);
+    return ap_range_ref<_AP_W, _AP_S>(this, Hi, Lo);
+  }
+
+  // This is a must to strip constness to produce reference type.
+  INLINE ap_range_ref<_AP_W, _AP_S> range(int Hi, int Lo) const {
+    _AP_ERROR(Hi >= _AP_W, "Hi(%d)out of bound(%d) in range()", Hi, _AP_W);
+    _AP_ERROR(Lo >= _AP_W, "Lo(%d)out of bound(%d) in range()", Lo, _AP_W);
+    return ap_range_ref<_AP_W, _AP_S>(const_cast<ap_int_base*>(this), Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE ap_range_ref<_AP_W, _AP_S> range(
+      const ap_int_base<_AP_W2, _AP_S2>& HiIdx,
+      const ap_int_base<_AP_W3, _AP_S3>& LoIdx) {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return this->range(Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE ap_range_ref<_AP_W, _AP_S> range(
+      const ap_int_base<_AP_W2, _AP_S2>& HiIdx,
+      const ap_int_base<_AP_W3, _AP_S3>& LoIdx) const {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return this->range(Hi, Lo);
+  }
+
+  INLINE ap_range_ref<_AP_W, _AP_S> range() {
+    return this->range(_AP_W - 1, 0);
+  }
+
+  INLINE ap_range_ref<_AP_W, _AP_S> range() const {
+    return this->range(_AP_W - 1, 0);
+  }
+
+  INLINE ap_range_ref<_AP_W, _AP_S> operator()(int Hi, int Lo) {
+    return this->range(Hi, Lo);
+  }
+
+  INLINE ap_range_ref<_AP_W, _AP_S> operator()(int Hi, int Lo) const {
+    return this->range(Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE ap_range_ref<_AP_W, _AP_S> operator()(
+      const ap_int_base<_AP_W2, _AP_S2>& HiIdx,
+      const ap_int_base<_AP_W3, _AP_S3>& LoIdx) {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return this->range(Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE ap_range_ref<_AP_W, _AP_S> operator()(
+      const ap_int_base<_AP_W2, _AP_S2>& HiIdx,
+      const ap_int_base<_AP_W3, _AP_S3>& LoIdx) const {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return this->range(Hi, Lo);
+  }
+
+#if 0
+  template<int Hi, int Lo>
+  INLINE ap_int_base<Hi-Lo+1, false> slice() const {
+    AP_ASSERT(Hi >= Lo && Hi < _AP_W && Lo < _AP_W, "Out of bounds in slice()");
+    ap_int_base<Hi-Lo+1, false> tmp ;
+    tmp.V = _AP_ROOT_op_get_range(Base::V, Lo, Hi);
+    return tmp;
+  }
+
+  INLINE ap_bit_ref<_AP_W,_AP_S> operator [] ( unsigned int uindex) {
+    AP_ASSERT(uindex < _AP_W, "Attempting to read bit beyond MSB");
+    ap_bit_ref<_AP_W,_AP_S> bvh( this, uindex );
+    return bvh;
+  }
+#endif
+
+  INLINE ap_bit_ref<_AP_W, _AP_S> operator[](int index) {
+    AP_ASSERT(index >= 0, "Attempting to read bit with negative index");
+    AP_ASSERT(index < _AP_W, "Attempting to read bit beyond MSB");
+    ap_bit_ref<_AP_W, _AP_S> bvh(this, index);
+    return bvh;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_bit_ref<_AP_W, _AP_S> operator[](
+      const ap_int_base<_AP_W2, _AP_S2>& index) {
+    AP_ASSERT(index >= 0, "Attempting to read bit with negative index");
+    AP_ASSERT(index < _AP_W, "Attempting to read bit beyond MSB");
+    ap_bit_ref<_AP_W, _AP_S> bvh(this, index.to_int());
+    return bvh;
+  }
+
+  INLINE bool operator[](int index) const {
+    AP_ASSERT(index >= 0, "Attempting to read bit with negative index");
+    AP_ASSERT(index < _AP_W, "Attempting to read bit beyond MSB");
+    ap_bit_ref<_AP_W, _AP_S> br(this, index);
+    return br.to_bool();
+  }
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator[](const ap_int_base<_AP_W2, _AP_S2>& index) const {
+    AP_ASSERT(index < _AP_W, "Attempting to read bit beyond MSB");
+    ap_bit_ref<_AP_W, _AP_S> br(this, index.to_int());
+    return br.to_bool();
+  }
+
+  INLINE ap_bit_ref<_AP_W, _AP_S> bit(int index) {
+    AP_ASSERT(index >= 0, "Attempting to read bit with negative index");
+    AP_ASSERT(index < _AP_W, "Attempting to read bit beyond MSB");
+    ap_bit_ref<_AP_W, _AP_S> bvh(this, index);
+    return bvh;
+  }
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_bit_ref<_AP_W, _AP_S> bit(
+      const ap_int_base<_AP_W2, _AP_S2>& index) {
+    AP_ASSERT(index >= 0, "Attempting to read bit with negative index");
+    AP_ASSERT(index < _AP_W, "Attempting to read bit beyond MSB");
+    ap_bit_ref<_AP_W, _AP_S> bvh(this, index.to_int());
+    return bvh;
+  }
+
+  INLINE bool bit(int index) const {
+    AP_ASSERT(index >= 0, "Attempting to read bit with negative index");
+    AP_ASSERT(index < _AP_W, "Attempting to read bit beyond MSB");
+    ap_bit_ref<_AP_W, _AP_S> br(this, index);
+    return br.to_bool();
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool bit(const ap_int_base<_AP_W2, _AP_S2>& index) const {
+    return bit(index.to_int());
+  }
+
+#if 0
+  template<typename _AP_T>
+  INLINE bool operator[](_AP_T index) const {
+    AP_ASSERT(index < _AP_W, "Attempting to read bit beyond MSB");
+    ap_bit_ref<_AP_W,_AP_S> br = operator[](index);
+    return br.to_bool();
+  }
+#endif
+
+  // Count the number of zeros from the most significant bit
+  // to the first one bit.
+  INLINE int countLeadingZeros() {
+#ifdef __SYNTHESIS__
+    if (_AP_W <= 32) {
+      ap_int_base<32, false> t(-1UL), x;
+      x.V = _AP_ROOT_op_get_range(this->V, _AP_W - 1, 0); // reverse
+      t.V = _AP_ROOT_op_set_range(t.V, 0, _AP_W - 1, x.V);
+      return __builtin_ctz(t.V); // count trailing zeros.
+    } else if (_AP_W <= 64) {
+      ap_int_base<64, false> t(-1ULL);
+      ap_int_base<64, false> x;
+      x.V = _AP_ROOT_op_get_range(this->V, _AP_W - 1, 0); // reverse
+      t.V = _AP_ROOT_op_set_range(t.V, 0, _AP_W - 1, x.V);
+      return __builtin_ctzll(t.V); // count trailing zeros.
+    } else {
+      enum { __N = (_AP_W + 63) / 64 };
+      int NZeros = 0;
+      int i = 0;
+      bool hitNonZero = false;
+      for (i = 0; i < __N - 1; ++i) {
+        ap_int_base<64, false> t;
+        t.V = _AP_ROOT_op_get_range(this->V, _AP_W - i * 64 - 64, _AP_W - i * 64 - 1);
+        NZeros += hitNonZero ? 0 : __builtin_clzll(t.V); // count leading zeros.
+        hitNonZero |= (t.V != 0);
+      }
+      if (!hitNonZero) {
+        ap_int_base<64, false> t(-1ULL);
+        enum { REST = (_AP_W - 1) % 64 };
+        ap_int_base<64, false> x;
+        x.V = _AP_ROOT_op_get_range(this->V, 0, REST);
+        t.V = _AP_ROOT_op_set_range(t.V, 63 - REST, 63, x.V);
+        NZeros += __builtin_clzll(t.V);
+      }
+      return NZeros;
+    }
+#else
+    return (Base::V).countLeadingZeros();
+#endif
+  } // countLeadingZeros
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<_AP_W, ap_int_base, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+  concat(const ap_int_base<_AP_W2, _AP_S2>& a2) const {
+    return ap_concat_ref<_AP_W, ap_int_base, _AP_W2,
+                         ap_int_base<_AP_W2, _AP_S2> >(
+        const_cast<ap_int_base<_AP_W, _AP_S>&>(*this),
+        const_cast<ap_int_base<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<_AP_W, ap_int_base, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+  concat(ap_int_base<_AP_W2, _AP_S2>& a2) {
+    return ap_concat_ref<_AP_W, ap_int_base, _AP_W2,
+                         ap_int_base<_AP_W2, _AP_S2> >(*this, a2);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE
+      ap_concat_ref<_AP_W, ap_int_base, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+      operator,(const ap_range_ref<_AP_W2, _AP_S2> &a2) const {
+    return ap_concat_ref<_AP_W, ap_int_base, _AP_W2,
+                         ap_range_ref<_AP_W2, _AP_S2> >(
+        const_cast<ap_int_base<_AP_W, _AP_S>&>(*this),
+        const_cast<ap_range_ref<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE
+      ap_concat_ref<_AP_W, ap_int_base, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+      operator,(ap_range_ref<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<_AP_W, ap_int_base, _AP_W2,
+                         ap_range_ref<_AP_W2, _AP_S2> >(*this, a2);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<_AP_W, ap_int_base, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+  operator,(const ap_int_base<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<_AP_W, ap_int_base, _AP_W2,
+                         ap_int_base<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_int_base<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<_AP_W, ap_int_base, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+  operator,(ap_int_base<_AP_W2, _AP_S2> &a2) const {
+    return ap_concat_ref<_AP_W, ap_int_base, _AP_W2,
+                         ap_int_base<_AP_W2, _AP_S2> >(
+        const_cast<ap_int_base<_AP_W, _AP_S>&>(*this), a2);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<_AP_W, ap_int_base, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+  operator,(const ap_int_base<_AP_W2, _AP_S2> &a2) const {
+    return ap_concat_ref<_AP_W, ap_int_base, _AP_W2,
+                         ap_int_base<_AP_W2, _AP_S2> >(
+        const_cast<ap_int_base<_AP_W, _AP_S>&>(*this),
+        const_cast<ap_int_base<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<_AP_W, ap_int_base, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+  operator,(ap_int_base<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<_AP_W, ap_int_base, _AP_W2,
+                         ap_int_base<_AP_W2, _AP_S2> >(*this, a2);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<_AP_W, ap_int_base, 1, ap_bit_ref<_AP_W2, _AP_S2> >
+  operator,(const ap_bit_ref<_AP_W2, _AP_S2> &a2) const {
+    return ap_concat_ref<_AP_W, ap_int_base, 1, ap_bit_ref<_AP_W2, _AP_S2> >(
+        const_cast<ap_int_base<_AP_W, _AP_S>&>(*this),
+        const_cast<ap_bit_ref<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<_AP_W, ap_int_base, 1, ap_bit_ref<_AP_W2, _AP_S2> >
+  operator,(ap_bit_ref<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<_AP_W, ap_int_base, 1, ap_bit_ref<_AP_W2, _AP_S2> >(
+        *this, a2);
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_concat_ref<_AP_W, ap_int_base, _AP_W2 + _AP_W3,
+                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+  operator,(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) {
+    return ap_concat_ref<_AP_W, ap_int_base, _AP_W2 + _AP_W3,
+                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(
+        const_cast<ap_int_base<_AP_W, _AP_S>&>(*this),
+        const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>&>(a2));
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_concat_ref<_AP_W, ap_int_base, _AP_W2 + _AP_W3,
+                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+  operator,(ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) {
+    return ap_concat_ref<_AP_W, ap_int_base, _AP_W2 + _AP_W3,
+                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(*this,
+                                                                         a2);
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_concat_ref<
+      _AP_W, ap_int_base, _AP_W2,
+      af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+  operator,(const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>
+                &a2) const {
+    return ap_concat_ref<
+        _AP_W, ap_int_base, _AP_W2,
+        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+        const_cast<ap_int_base<_AP_W, _AP_S>&>(*this),
+        const_cast<
+            af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(a2));
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_concat_ref<
+      _AP_W, ap_int_base, _AP_W2,
+      af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+  operator,(af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &a2) {
+    return ap_concat_ref<
+        _AP_W, ap_int_base, _AP_W2,
+        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this,
+                                                                       a2);
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE
+      ap_concat_ref<_AP_W, ap_int_base, 1,
+                    af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+      operator,(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>
+                    &a2) const {
+    return ap_concat_ref<
+        _AP_W, ap_int_base, 1,
+        af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+        const_cast<ap_int_base<_AP_W, _AP_S>&>(*this),
+        const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(
+            a2));
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE
+      ap_concat_ref<_AP_W, ap_int_base, 1,
+                    af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+      operator,(
+          af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &a2) {
+    return ap_concat_ref<
+        _AP_W, ap_int_base, 1,
+        af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this, a2);
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_int_base<AP_MAX(_AP_W2 + _AP_W3, _AP_W), _AP_S> operator&(
+      const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+    return *this & a2.get();
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_int_base<AP_MAX(_AP_W2 + _AP_W3, _AP_W), _AP_S> operator|(
+      const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+    return *this | a2.get();
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_int_base<AP_MAX(_AP_W2 + _AP_W3, _AP_W), _AP_S> operator^(
+      const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+    return *this ^ a2.get();
+  }
+
+  template <int _AP_W3>
+  INLINE void set(const ap_int_base<_AP_W3, false>& val) {
+    Base::V = val.V;
+  }
+
+  /* Reduce operations.
+   * ----------------------------------------------------------------
+   */
+  // XXX non-const version deleted.
+  INLINE bool and_reduce() const { return _AP_ROOT_op_reduce(and, Base::V); }
+  INLINE bool nand_reduce() const { return _AP_ROOT_op_reduce(nand, Base::V); }
+  INLINE bool or_reduce() const { return _AP_ROOT_op_reduce(or, Base::V); }
+  INLINE bool nor_reduce() const { return !(_AP_ROOT_op_reduce(or, Base::V)); }
+  INLINE bool xor_reduce() const { return _AP_ROOT_op_reduce (xor, Base::V); }
+  INLINE bool xnor_reduce() const {
+    return !(_AP_ROOT_op_reduce (xor, Base::V));
+  }
+
+  /* Output as a string.
+   * ----------------------------------------------------------------
+   */
+#ifndef __SYNTHESIS__
+  std::string to_string(signed char rd = 2, bool sign = _AP_S) const {
+    // XXX in autosim/autowrap.tcl "(${name}).to_string(2).c_str()" is used to
+    // initialize sc_lv, which seems incapable of handling format "-0b".
+    if (rd == 2) sign = false;
+    return (Base::V).to_string(rd, sign);
+  }
+#else
+  INLINE char* to_string(signed char rd = 2, bool sign = _AP_S) const {
+    return 0;
+  }
+#endif
+}; // struct ap_int_base
+
+// XXX apcc cannot handle global std::ios_base::Init() brought in by <iostream>
+#ifndef AP_AUTOCC
+#ifndef __SYNTHESIS__
+template <int _AP_W, bool _AP_S>
+INLINE std::ostream& operator<<(std::ostream& os,
+                                const ap_int_base<_AP_W, _AP_S>& x) {
+  std::ios_base::fmtflags ff = std::cout.flags();
+  if (ff & std::cout.hex) {
+    os << x.to_string(16); // don't print sign
+  } else if (ff & std::cout.oct) {
+    os << x.to_string(8); // don't print sign
+  } else {
+    os << x.to_string(10);
+  }
+  return os;
+}
+#endif // ifndef __SYNTHESIS__
+
+#ifndef __SYNTHESIS__
+template <int _AP_W, bool _AP_S>
+INLINE std::istream& operator>>(std::istream& in,
+                                ap_int_base<_AP_W, _AP_S>& op) {
+  std::string str;
+  in >> str;
+  const std::ios_base::fmtflags basefield = in.flags() & std::ios_base::basefield;
+  unsigned radix = (basefield == std::ios_base::dec) ? 0 : (
+                     (basefield == std::ios_base::oct) ? 8 : (
+                       (basefield == std::ios_base::hex) ? 16 : 0));
+  op = ap_int_base<_AP_W, _AP_S>(str.c_str(), radix);
+  return in;
+}
+#endif // ifndef __SYNTHESIS__
+#endif // ifndef AP_AUTOCC
+
+/* Operators with another ap_int_base.
+ * ----------------------------------------------------------------
+ */
+#define OP_BIN_AP(Sym, Rty)                                                   \
+  template <int _AP_W, bool _AP_S, int _AP_W2, bool _AP_S2>                   \
+  INLINE                                                                      \
+      typename ap_int_base<_AP_W, _AP_S>::template RType<_AP_W2, _AP_S2>::Rty \
+      operator Sym(const ap_int_base<_AP_W, _AP_S>& op,                       \
+                   const ap_int_base<_AP_W2, _AP_S2>& op2) {                  \
+    typename ap_int_base<_AP_W, _AP_S>::template RType<                       \
+        _AP_W2, _AP_S2>::Rty##_base lhs(op);                                  \
+    typename ap_int_base<_AP_W, _AP_S>::template RType<                       \
+        _AP_W2, _AP_S2>::Rty##_base rhs(op2);                                 \
+    typename ap_int_base<_AP_W, _AP_S>::template RType<                       \
+        _AP_W2, _AP_S2>::Rty##_base ret;                                      \
+    ret.V = lhs.V Sym rhs.V;                                                  \
+    return ret;                                                               \
+  }
+
+OP_BIN_AP(*, mult)
+OP_BIN_AP(+, plus)
+OP_BIN_AP(-, minus)
+OP_BIN_AP(&, logic)
+OP_BIN_AP(|, logic)
+OP_BIN_AP(^, logic)
+
+#define OP_BIN_AP2(Sym, Rty)                                                  \
+  template <int _AP_W, bool _AP_S, int _AP_W2, bool _AP_S2>                   \
+  INLINE                                                                      \
+      typename ap_int_base<_AP_W, _AP_S>::template RType<_AP_W2, _AP_S2>::Rty \
+      operator Sym(const ap_int_base<_AP_W, _AP_S>& op,                       \
+                   const ap_int_base<_AP_W2, _AP_S2>& op2) {                  \
+    typename ap_int_base<_AP_W, _AP_S>::template RType<                       \
+        _AP_W2, _AP_S2>::Rty##_base ret;                                      \
+    ret.V = op.V Sym op2.V;                                                   \
+    return ret;                                                               \
+  }
+
+OP_BIN_AP2(/, div)
+OP_BIN_AP2(%, mod)
+
+// shift operators are defined inside class.
+// compound assignment operators are defined inside class.
+
+/* Operators with a pointer type.
+ * ----------------------------------------------------------------
+ *   char a[100];
+ *   char* ptr = a;
+ *   ap_int<2> n = 3;
+ *   char* ptr2 = ptr + n*2;
+ * avoid ambiguous errors.
+ */
+#define OP_BIN_WITH_PTR(BIN_OP)                                           \
+  template <typename PTR_TYPE, int _AP_W, bool _AP_S>                     \
+  INLINE PTR_TYPE* operator BIN_OP(PTR_TYPE* i_op,                        \
+                                   const ap_int_base<_AP_W, _AP_S>& op) { \
+    std::ptrdiff_t op2 = op.to_long(); /* Not all implementation */            \
+    return i_op BIN_OP op2;                                               \
+  }                                                                       \
+  template <typename PTR_TYPE, int _AP_W, bool _AP_S>                     \
+  INLINE PTR_TYPE* operator BIN_OP(const ap_int_base<_AP_W, _AP_S>& op,   \
+                                   PTR_TYPE* i_op) {                      \
+    std::ptrdiff_t op2 = op.to_long(); /* Not all implementation */            \
+    return op2 BIN_OP i_op;                                               \
+  }
+
+OP_BIN_WITH_PTR(+)
+OP_BIN_WITH_PTR(-)
+
+/* Operators with a native floating point types.
+ * ----------------------------------------------------------------
+ */
+// float OP ap_int
+// when ap_int<wa>'s width > 64, then trunc ap_int<w> to ap_int<64>
+#define OP_BIN_WITH_FLOAT(BIN_OP, C_TYPE)                              \
+  template <int _AP_W, bool _AP_S>                                     \
+  INLINE C_TYPE operator BIN_OP(C_TYPE i_op,                           \
+                                const ap_int_base<_AP_W, _AP_S>& op) { \
+    typename ap_int_base<_AP_W, _AP_S>::RetType op2 = op;              \
+    return i_op BIN_OP op2;                                            \
+  }                                                                    \
+  template <int _AP_W, bool _AP_S>                                     \
+  INLINE C_TYPE operator BIN_OP(const ap_int_base<_AP_W, _AP_S>& op,   \
+                                C_TYPE i_op) {                         \
+    typename ap_int_base<_AP_W, _AP_S>::RetType op2 = op;              \
+    return op2 BIN_OP i_op;                                            \
+  }
+
+#define ALL_OP_WITH_FLOAT(C_TYPE) \
+  OP_BIN_WITH_FLOAT(*, C_TYPE) \
+  OP_BIN_WITH_FLOAT(/, C_TYPE) \
+  OP_BIN_WITH_FLOAT(+, C_TYPE) \
+  OP_BIN_WITH_FLOAT(-, C_TYPE)
+
+ALL_OP_WITH_FLOAT(half)
+ALL_OP_WITH_FLOAT(float)
+ALL_OP_WITH_FLOAT(double)
+
+// TODO no shift?
+
+/* Operators with a native integral types.
+ * ----------------------------------------------------------------
+ */
+// arithmetic and bitwise operators.
+#define OP_BIN_WITH_INT(BIN_OP, C_TYPE, _AP_W2, _AP_S2, RTYPE)             \
+  template <int _AP_W, bool _AP_S>                                         \
+  INLINE typename ap_int_base<_AP_W, _AP_S>::template RType<_AP_W2,        \
+                                                            _AP_S2>::RTYPE \
+  operator BIN_OP(C_TYPE i_op, const ap_int_base<_AP_W, _AP_S>& op) {      \
+    return ap_int_base<_AP_W2, _AP_S2>(i_op) BIN_OP(op);                   \
+  }                                                                        \
+  template <int _AP_W, bool _AP_S>                                         \
+  INLINE typename ap_int_base<_AP_W, _AP_S>::template RType<_AP_W2,        \
+                                                            _AP_S2>::RTYPE \
+  operator BIN_OP(const ap_int_base<_AP_W, _AP_S>& op, C_TYPE i_op) {      \
+    return op BIN_OP ap_int_base<_AP_W2, _AP_S2>(i_op);                    \
+  }
+
+#define ALL_OP_BIN_WITH_INT(C_TYPE, _AP_W2, _AP_S2)    \
+  OP_BIN_WITH_INT(*, C_TYPE, _AP_W2, _AP_S2, mult)  \
+  OP_BIN_WITH_INT(+, C_TYPE, _AP_W2, _AP_S2, plus)  \
+  OP_BIN_WITH_INT(-, C_TYPE, _AP_W2, _AP_S2, minus) \
+  OP_BIN_WITH_INT(/, C_TYPE, _AP_W2, _AP_S2, div)   \
+  OP_BIN_WITH_INT(%, C_TYPE, _AP_W2, _AP_S2, mod)   \
+  OP_BIN_WITH_INT(&, C_TYPE, _AP_W2, _AP_S2, logic) \
+  OP_BIN_WITH_INT(|, C_TYPE, _AP_W2, _AP_S2, logic) \
+  OP_BIN_WITH_INT(^, C_TYPE, _AP_W2, _AP_S2, logic)
+
+ALL_OP_BIN_WITH_INT(bool, 1, false)
+ALL_OP_BIN_WITH_INT(char, 8, CHAR_IS_SIGNED)
+ALL_OP_BIN_WITH_INT(signed char, 8, true)
+ALL_OP_BIN_WITH_INT(unsigned char, 8, false)
+ALL_OP_BIN_WITH_INT(short, _AP_SIZE_short, true)
+ALL_OP_BIN_WITH_INT(unsigned short, _AP_SIZE_short, false)
+ALL_OP_BIN_WITH_INT(int, _AP_SIZE_int, true)
+ALL_OP_BIN_WITH_INT(unsigned int, _AP_SIZE_int, false)
+ALL_OP_BIN_WITH_INT(long, _AP_SIZE_long, true)
+ALL_OP_BIN_WITH_INT(unsigned long, _AP_SIZE_long, false)
+ALL_OP_BIN_WITH_INT(ap_slong, _AP_SIZE_ap_slong, true)
+ALL_OP_BIN_WITH_INT(ap_ulong, _AP_SIZE_ap_slong, false)
+
+#undef OP_BIN_WITH_INT
+#undef ALL_OP_BIN_WITH_INT
+
+// shift operators.
+#define ALL_OP_SHIFT_WITH_INT(C_TYPE, _AP_W2, _AP_S2)    \
+  template <int _AP_W, bool _AP_S>                       \
+  INLINE typename ap_int_base<_AP_W, _AP_S>::template RType<_AP_W,_AP_S>::arg1 operator<<(           \
+      const ap_int_base<_AP_W, _AP_S>& op, C_TYPE op2) { \
+    ap_int_base<_AP_W, _AP_S> r;                         \
+    if (_AP_S2)                                          \
+      r.V = op2 >= 0 ? (op.V << op2) : (op.V >> (-op2)); \
+    else                                                 \
+      r.V = op.V << op2;                                 \
+    return r;                                            \
+  }                                                      \
+  template <int _AP_W, bool _AP_S>                       \
+  INLINE typename ap_int_base<_AP_W, _AP_S>::template RType<_AP_W,_AP_S>::arg1 operator>>(           \
+      const ap_int_base<_AP_W, _AP_S>& op, C_TYPE op2) { \
+    ap_int_base<_AP_W, _AP_S> r;                         \
+    if (_AP_S2)                                          \
+      r.V = op2 >= 0 ? (op.V >> op2) : (op.V << (-op2)); \
+    else                                                 \
+      r.V = op.V >> op2;                                 \
+    return r;                                            \
+  }
+
+ALL_OP_SHIFT_WITH_INT(char, 8, CHAR_IS_SIGNED)
+ALL_OP_SHIFT_WITH_INT(signed char, 8, true)
+ALL_OP_SHIFT_WITH_INT(short, _AP_SIZE_short, true)
+ALL_OP_SHIFT_WITH_INT(int, _AP_SIZE_int, true)
+ALL_OP_SHIFT_WITH_INT(long, _AP_SIZE_long, true)
+ALL_OP_SHIFT_WITH_INT(ap_slong, _AP_SIZE_ap_slong, true)
+
+#undef ALL_OP_SHIFT_WITH_INT
+
+#define ALL_OP_SHIFT_WITH_INT(C_TYPE, _AP_W2, _AP_S2)    \
+  template <int _AP_W, bool _AP_S>                       \
+  INLINE typename ap_int_base<_AP_W, _AP_S>::template RType<_AP_W,_AP_S>::arg1 operator<<(           \
+      const ap_int_base<_AP_W, _AP_S>& op, C_TYPE op2) { \
+    ap_int_base<_AP_W, _AP_S> r;                         \
+    r.V = op.V << op2;                                   \
+    return r;                                            \
+  }                                                      \
+  template <int _AP_W, bool _AP_S>                       \
+  INLINE typename ap_int_base<_AP_W, _AP_S>::template RType<_AP_W,_AP_S>::arg1 operator>>(           \
+      const ap_int_base<_AP_W, _AP_S>& op, C_TYPE op2) { \
+    ap_int_base<_AP_W, _AP_S> r;                         \
+    r.V = op.V >> op2;                                   \
+    return r;                                            \
+  }
+ALL_OP_SHIFT_WITH_INT(bool, 1, false)
+ALL_OP_SHIFT_WITH_INT(unsigned char, 8, false)
+ALL_OP_SHIFT_WITH_INT(unsigned short, _AP_SIZE_short, false)
+ALL_OP_SHIFT_WITH_INT(unsigned int, _AP_SIZE_int, false)
+ALL_OP_SHIFT_WITH_INT(unsigned long, _AP_SIZE_long, false)
+ALL_OP_SHIFT_WITH_INT(ap_ulong, _AP_SIZE_ap_slong, false)
+
+#undef ALL_OP_SHIFT_WITH_INT
+
+// compound assign operators.
+#define OP_ASSIGN_WITH_INT(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2)       \
+  template <int _AP_W, bool _AP_S>                                  \
+  INLINE ap_int_base<_AP_W, _AP_S>& operator ASSIGN_OP(             \
+      ap_int_base<_AP_W, _AP_S>& op, C_TYPE op2) {                  \
+    return op ASSIGN_OP ap_int_base<_AP_W2, _AP_S2>(op2);           \
+  }
+
+// TODO int a; ap_int<16> b; a += b;
+
+#define ALL_OP_ASSIGN_WITH_INT(C_TYPE, _AP_W2, _AP_S2) \
+  OP_ASSIGN_WITH_INT(+=, C_TYPE, _AP_W2, _AP_S2)       \
+  OP_ASSIGN_WITH_INT(-=, C_TYPE, _AP_W2, _AP_S2)       \
+  OP_ASSIGN_WITH_INT(*=, C_TYPE, _AP_W2, _AP_S2)       \
+  OP_ASSIGN_WITH_INT(/=, C_TYPE, _AP_W2, _AP_S2)       \
+  OP_ASSIGN_WITH_INT(%=, C_TYPE, _AP_W2, _AP_S2)       \
+  OP_ASSIGN_WITH_INT(&=, C_TYPE, _AP_W2, _AP_S2)       \
+  OP_ASSIGN_WITH_INT(|=, C_TYPE, _AP_W2, _AP_S2)       \
+  OP_ASSIGN_WITH_INT(^=, C_TYPE, _AP_W2, _AP_S2)       \
+  OP_ASSIGN_WITH_INT(>>=, C_TYPE, _AP_W2, _AP_S2)      \
+  OP_ASSIGN_WITH_INT(<<=, C_TYPE, _AP_W2, _AP_S2)
+
+ALL_OP_ASSIGN_WITH_INT(bool, 1, false)
+ALL_OP_ASSIGN_WITH_INT(char, 8, CHAR_IS_SIGNED)
+ALL_OP_ASSIGN_WITH_INT(signed char, 8, true)
+ALL_OP_ASSIGN_WITH_INT(unsigned char, 8, false)
+ALL_OP_ASSIGN_WITH_INT(short, _AP_SIZE_short, true)
+ALL_OP_ASSIGN_WITH_INT(unsigned short, _AP_SIZE_short, false)
+ALL_OP_ASSIGN_WITH_INT(int, _AP_SIZE_int, true)
+ALL_OP_ASSIGN_WITH_INT(unsigned int, _AP_SIZE_int, false)
+ALL_OP_ASSIGN_WITH_INT(long, _AP_SIZE_long, true)
+ALL_OP_ASSIGN_WITH_INT(unsigned long, _AP_SIZE_long, false)
+ALL_OP_ASSIGN_WITH_INT(ap_slong, _AP_SIZE_ap_slong, true)
+ALL_OP_ASSIGN_WITH_INT(ap_ulong, _AP_SIZE_ap_slong, false)
+
+#undef OP_ASSIGN_WITH_INT
+#undef ALL_OP_ASSIGN_WITH_INT
+
+// equality and relational operators.
+#define OP_REL_WITH_INT(REL_OP, C_TYPE, _AP_W2, _AP_S2)              \
+  template <int _AP_W, bool _AP_S>                                   \
+  INLINE bool operator REL_OP(C_TYPE i_op,                           \
+                              const ap_int_base<_AP_W, _AP_S>& op) { \
+    return ap_int_base<_AP_W2, _AP_S2>(i_op) REL_OP op;              \
+  }                                                                  \
+  template <int _AP_W, bool _AP_S>                                   \
+  INLINE bool operator REL_OP(const ap_int_base<_AP_W, _AP_S>& op,   \
+                              C_TYPE op2) {                          \
+    return op REL_OP ap_int_base<_AP_W2, _AP_S2>(op2);               \
+  }
+
+#define ALL_OP_REL_WITH_INT(C_TYPE, _AP_W2, _AP_S2) \
+  OP_REL_WITH_INT(>, C_TYPE, _AP_W2, _AP_S2)        \
+  OP_REL_WITH_INT(<, C_TYPE, _AP_W2, _AP_S2)        \
+  OP_REL_WITH_INT(>=, C_TYPE, _AP_W2, _AP_S2)       \
+  OP_REL_WITH_INT(<=, C_TYPE, _AP_W2, _AP_S2)       \
+  OP_REL_WITH_INT(==, C_TYPE, _AP_W2, _AP_S2)       \
+  OP_REL_WITH_INT(!=, C_TYPE, _AP_W2, _AP_S2)
+
+ALL_OP_REL_WITH_INT(bool, 1, false)
+ALL_OP_REL_WITH_INT(char, 8, CHAR_IS_SIGNED)
+ALL_OP_REL_WITH_INT(signed char, 8, true)
+ALL_OP_REL_WITH_INT(unsigned char, 8, false)
+ALL_OP_REL_WITH_INT(short, _AP_SIZE_short, true)
+ALL_OP_REL_WITH_INT(unsigned short, _AP_SIZE_short, false)
+ALL_OP_REL_WITH_INT(int, _AP_SIZE_int, true)
+ALL_OP_REL_WITH_INT(unsigned int, _AP_SIZE_int, false)
+ALL_OP_REL_WITH_INT(long, _AP_SIZE_long, true)
+ALL_OP_REL_WITH_INT(unsigned long, _AP_SIZE_long, false)
+ALL_OP_REL_WITH_INT(ap_slong, _AP_SIZE_ap_slong, true)
+ALL_OP_REL_WITH_INT(ap_ulong, _AP_SIZE_ap_slong, false)
+
+#undef OP_REL_WITH_INT
+#undef ALL_OP_BIN_WITH_INT
+
+#define OP_REL_WITH_DOUBLE_OR_FLOAT(Sym)                            \
+  template <int _AP_W, bool _AP_S>                                  \
+  INLINE bool operator Sym(const ap_int_base<_AP_W, _AP_S>& op1,    \
+                           double op2) {                            \
+    return op1.to_double() Sym op2 ;                                \
+  }                                                                 \
+  template <int _AP_W, bool _AP_S>                                  \
+  INLINE bool operator Sym(double op1,                              \
+                           const ap_int_base<_AP_W, _AP_S>& op2) {  \
+    return op1 Sym op2.to_double() ;                                \
+  }                                                                 \
+  template <int _AP_W, bool _AP_S>                                  \
+  INLINE bool operator Sym(const ap_int_base<_AP_W, _AP_S>& op1,    \
+                           float op2) {                             \
+    return op1.to_double() Sym op2 ;                                \
+  }                                                                 \
+  template <int _AP_W, bool _AP_S>                                  \
+  INLINE bool operator Sym(float op1,                               \
+                           const ap_int_base<_AP_W, _AP_S>& op2) {  \
+    return op1 Sym op2.to_double() ;                                \
+  }
+  OP_REL_WITH_DOUBLE_OR_FLOAT(>)
+  OP_REL_WITH_DOUBLE_OR_FLOAT(<)
+  OP_REL_WITH_DOUBLE_OR_FLOAT(>=)
+  OP_REL_WITH_DOUBLE_OR_FLOAT(<=)
+  OP_REL_WITH_DOUBLE_OR_FLOAT(==)
+  OP_REL_WITH_DOUBLE_OR_FLOAT(!=)
+
+#undef OP_REL_WITH_DOUBLE_OR_FLOAT
+
+
+/* Operators with ap_bit_ref.
+ * ------------------------------------------------------------
+ */
+// arithmetic, bitwise and shift operators.
+#define OP_BIN_WITH_RANGE(BIN_OP, RTYPE)                                     \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                \
+  INLINE typename ap_int_base<_AP_W1, _AP_S1>::template RType<_AP_W2,        \
+                                                              _AP_S2>::RTYPE \
+  operator BIN_OP(const ap_range_ref<_AP_W1, _AP_S1>& op1,                   \
+                  const ap_int_base<_AP_W2, _AP_S2>& op2) {                  \
+    return ap_int_base<_AP_W1, false>(op1) BIN_OP op2;                       \
+  }                                                                          \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                \
+  INLINE typename ap_int_base<_AP_W1, _AP_S1>::template RType<_AP_W2,        \
+                                                              _AP_S2>::RTYPE \
+  operator BIN_OP(const ap_int_base<_AP_W1, _AP_S1>& op1,                    \
+                  const ap_range_ref<_AP_W2, _AP_S2>& op2) {                 \
+    return op1 BIN_OP ap_int_base<_AP_W2, false>(op2);                       \
+  }
+
+OP_BIN_WITH_RANGE(+, plus)
+OP_BIN_WITH_RANGE(-, minus)
+OP_BIN_WITH_RANGE(*, mult)
+OP_BIN_WITH_RANGE(/, div)
+OP_BIN_WITH_RANGE(%, mod)
+OP_BIN_WITH_RANGE(&, logic)
+OP_BIN_WITH_RANGE(|, logic)
+OP_BIN_WITH_RANGE(^, logic)
+OP_BIN_WITH_RANGE(>>, arg1)
+OP_BIN_WITH_RANGE(<<, arg1)
+
+#undef OP_BIN_WITH_RANGE
+
+// compound assignment operators.
+#define OP_ASSIGN_WITH_RANGE(ASSIGN_OP)                                      \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                \
+  INLINE ap_int_base<_AP_W1, _AP_S1>& operator ASSIGN_OP(                    \
+      ap_int_base<_AP_W1, _AP_S1>& op1, ap_range_ref<_AP_W2, _AP_S2>& op2) { \
+    return op1 ASSIGN_OP ap_int_base<_AP_W2, false>(op2);                    \
+  }                                                                          \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                \
+  INLINE ap_range_ref<_AP_W1, _AP_S1>& operator ASSIGN_OP(                   \
+      ap_range_ref<_AP_W1, _AP_S1>& op1, ap_int_base<_AP_W2, _AP_S2>& op2) { \
+    ap_int_base<_AP_W1, false> tmp(op1);                                     \
+    tmp ASSIGN_OP op2;                                                       \
+    op1 = tmp;                                                               \
+    return op1;                                                              \
+  }
+
+OP_ASSIGN_WITH_RANGE(+=)
+OP_ASSIGN_WITH_RANGE(-=)
+OP_ASSIGN_WITH_RANGE(*=)
+OP_ASSIGN_WITH_RANGE(/=)
+OP_ASSIGN_WITH_RANGE(%=)
+OP_ASSIGN_WITH_RANGE(&=)
+OP_ASSIGN_WITH_RANGE(|=)
+OP_ASSIGN_WITH_RANGE(^=)
+OP_ASSIGN_WITH_RANGE(>>=)
+OP_ASSIGN_WITH_RANGE(<<=)
+
+#undef OP_ASSIGN_WITH_RANGE
+
+// equality and relational operators
+#define OP_REL_WITH_RANGE(REL_OP)                                          \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>              \
+  INLINE bool operator REL_OP(const ap_range_ref<_AP_W1, _AP_S1>& op1,     \
+                              const ap_int_base<_AP_W2, _AP_S2>& op2) {    \
+    return ap_int_base<_AP_W1, false>(op1).operator REL_OP(op2);           \
+  }                                                                        \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>              \
+  INLINE bool operator REL_OP(const ap_int_base<_AP_W1, _AP_S1>& op1,      \
+                              const ap_range_ref<_AP_W2, _AP_S2>& op2) {   \
+    return op1.operator REL_OP(op2.operator ap_int_base<_AP_W2, false>()); \
+  }
+
+OP_REL_WITH_RANGE(==)
+OP_REL_WITH_RANGE(!=)
+OP_REL_WITH_RANGE(>)
+OP_REL_WITH_RANGE(>=)
+OP_REL_WITH_RANGE(<)
+OP_REL_WITH_RANGE(<=)
+
+#undef OP_REL_WITH_RANGE
+
+/* Operators with ap_bit_ref.
+ * ------------------------------------------------------------
+ */
+// arithmetic, bitwise and shift operators.
+#define OP_BIN_WITH_BIT(BIN_OP, RTYPE)                                         \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                  \
+  INLINE typename ap_int_base<_AP_W1, _AP_S1>::template RType<1, false>::RTYPE \
+  operator BIN_OP(const ap_int_base<_AP_W1, _AP_S1>& op1,                      \
+                  const ap_bit_ref<_AP_W2, _AP_S2>& op2) {                     \
+    return op1 BIN_OP ap_int_base<1, false>(op2);                              \
+  }                                                                            \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                  \
+  INLINE typename ap_int_base<1, false>::template RType<_AP_W2, _AP_S2>::RTYPE \
+  operator BIN_OP(const ap_bit_ref<_AP_W1, _AP_S1>& op1,                       \
+                  const ap_int_base<_AP_W2, _AP_S2>& op2) {                    \
+    return ap_int_base<1, false>(op1) BIN_OP op2;                              \
+  }
+
+OP_BIN_WITH_BIT(+, plus)
+OP_BIN_WITH_BIT(-, minus)
+OP_BIN_WITH_BIT(*, mult)
+OP_BIN_WITH_BIT(/, div)
+OP_BIN_WITH_BIT(%, mod)
+OP_BIN_WITH_BIT(&, logic)
+OP_BIN_WITH_BIT(|, logic)
+OP_BIN_WITH_BIT(^, logic)
+OP_BIN_WITH_BIT(>>, arg1)
+OP_BIN_WITH_BIT(<<, arg1)
+
+#undef OP_BIN_WITH_BIT
+
+// compound assignment operators.
+#define OP_ASSIGN_WITH_BIT(ASSIGN_OP)                                      \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>              \
+  INLINE ap_int_base<_AP_W1, _AP_S1>& operator ASSIGN_OP(                  \
+      ap_int_base<_AP_W1, _AP_S1>& op1, ap_bit_ref<_AP_W2, _AP_S2>& op2) { \
+    return op1 ASSIGN_OP ap_int_base<1, false>(op2);                       \
+  }                                                                        \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>              \
+  INLINE ap_bit_ref<_AP_W1, _AP_S1>& operator ASSIGN_OP(                   \
+      ap_bit_ref<_AP_W1, _AP_S1>& op1, ap_int_base<_AP_W2, _AP_S2>& op2) { \
+    ap_int_base<1, false> tmp(op1);                                        \
+    tmp ASSIGN_OP op2;                                                     \
+    op1 = tmp;                                                             \
+    return op1;                                                            \
+  }
+
+OP_ASSIGN_WITH_BIT(+=)
+OP_ASSIGN_WITH_BIT(-=)
+OP_ASSIGN_WITH_BIT(*=)
+OP_ASSIGN_WITH_BIT(/=)
+OP_ASSIGN_WITH_BIT(%=)
+OP_ASSIGN_WITH_BIT(&=)
+OP_ASSIGN_WITH_BIT(|=)
+OP_ASSIGN_WITH_BIT(^=)
+OP_ASSIGN_WITH_BIT(>>=)
+OP_ASSIGN_WITH_BIT(<<=)
+
+#undef OP_ASSIGN_WITH_BIT
+
+// equality and relational operators.
+#define OP_REL_WITH_BIT(REL_OP)                                         \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>           \
+  INLINE bool operator REL_OP(const ap_int_base<_AP_W1, _AP_S1>& op1,   \
+                              const ap_bit_ref<_AP_W2, _AP_S2>& op2) {  \
+    return op1 REL_OP ap_int_base<1, false>(op2);                       \
+  }                                                                     \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>           \
+  INLINE bool operator REL_OP(const ap_bit_ref<_AP_W1, _AP_S1>& op1,    \
+                              const ap_int_base<_AP_W2, _AP_S2>& op2) { \
+    return ap_int_base<1, false>(op1) REL_OP op2;                       \
+  }
+
+OP_REL_WITH_BIT(==)
+OP_REL_WITH_BIT(!=)
+OP_REL_WITH_BIT(>)
+OP_REL_WITH_BIT(>=)
+OP_REL_WITH_BIT(<)
+OP_REL_WITH_BIT(<=)
+
+#undef OP_REL_WITH_BIT
+
+
+/* Operators with ap_concat_ref.
+ * ------------------------------------------------------------
+ */
+// arithmetic, bitwise and shift operators.
+// bitwise operators are defined in struct.
+// TODO specify whether to define arithmetic and bitwise operators.
+#if 0
+#define OP_BIN_WITH_CONCAT(BIN_OP, RTYPE)                                      \
+  template <int _AP_W1, typename _AP_T1, int _AP_W2, typename _AP_T2,          \
+            int _AP_W3, bool _AP_S3>                                           \
+  INLINE typename ap_int_base<_AP_W3, _AP_S3>::template RType<_AP_W1 + _AP_W2, \
+                                                              false>::RTYPE    \
+  operator BIN_OP(const ap_int_base<_AP_W3, _AP_S3>& op1,                      \
+                  const ap_concat_ref<_AP_W1, _AP_T1, _AP_W2, _AP_T2>& op2) {  \
+    /* convert ap_concat_ref to ap_int_base */                                 \
+    return op1 BIN_OP op2.get();                                               \
+  }                                                                            \
+  template <int _AP_W1, typename _AP_T1, int _AP_W2, typename _AP_T2,          \
+            int _AP_W3, bool _AP_S3>                                           \
+  INLINE typename ap_int_base<_AP_W1 + _AP_W2,                                 \
+                              false>::template RType<_AP_W3, _AP_S3>::RTYPE    \
+  operator BIN_OP(const ap_concat_ref<_AP_W1, _AP_T1, _AP_W2, _AP_T2>& op1,    \
+                  const ap_int_base<_AP_W3, _AP_S3>& op2) {                    \
+    /* convert ap_concat_ref to ap_int_base */                                 \
+    return op1.get() BIN_OP op2;                                               \
+  }
+
+OP_BIN_WITH_CONCAT(+, plus)
+OP_BIN_WITH_CONCAT(-, minus)
+OP_BIN_WITH_CONCAT(*, mult)
+OP_BIN_WITH_CONCAT(/, div)
+OP_BIN_WITH_CONCAT(%, mod)
+OP_BIN_WITH_CONCAT(&, logic)
+OP_BIN_WITH_CONCAT(|, logic)
+OP_BIN_WITH_CONCAT(^, logic)
+OP_BIN_WITH_CONCAT(>>, arg1)
+OP_BIN_WITH_CONCAT(<<, arg1)
+
+#undef OP_BIN_WITH_CONCAT
+
+// compound assignment operators.
+#define OP_ASSIGN_WITH_CONCAT(ASSIGN_OP)                                       \
+  template <int _AP_W1, typename _AP_T1, int _AP_W2, typename _AP_T2,          \
+            int _AP_W3, bool _AP_S3>                                           \
+  INLINE typename ap_int_base<_AP_W3, _AP_S3>::template RType<_AP_W1 + _AP_W2, \
+                                                              false>::RTYPE    \
+  operator ASSIGN_OP(                                                          \
+      const ap_int_base<_AP_W3, _AP_S3>& op1,                                  \
+      const ap_concat_ref<_AP_W1, _AP_T1, _AP_W2, _AP_T2>& op2) {              \
+    /* convert ap_concat_ref to ap_int_base */                                 \
+    return op1 ASSIGN_OP op2.get();                                            \
+  }                                                                            \
+  template <int _AP_W1, typename _AP_T1, int _AP_W2, typename _AP_T2,          \
+            int _AP_W3, bool _AP_S3>                                           \
+  INLINE typename ap_int_base<_AP_W1 + _AP_W2,                                 \
+                              false>::template RType<_AP_W3, _AP_S3>::RTYPE    \
+  operator ASSIGN_OP(const ap_concat_ref<_AP_W1, _AP_T1, _AP_W2, _AP_T2>& op1, \
+                     const ap_int_base<_AP_W3, _AP_S3>& op2) {                 \
+    /* convert ap_concat_ref to ap_int_base */                                 \
+    ap_int_base<_AP_W1 + _AP_W2, false> tmp = op1.get();                       \
+    tmp ASSIGN_OP op2;                                                         \
+    op1 = tmp;                                                                 \
+    return op1;                                                                \
+  }
+
+OP_ASSIGN_WITH_CONCAT(+=)
+OP_ASSIGN_WITH_CONCAT(-=)
+OP_ASSIGN_WITH_CONCAT(*=)
+OP_ASSIGN_WITH_CONCAT(/=)
+OP_ASSIGN_WITH_CONCAT(%=)
+OP_ASSIGN_WITH_CONCAT(&=)
+OP_ASSIGN_WITH_CONCAT(|=)
+OP_ASSIGN_WITH_CONCAT(^=)
+OP_ASSIGN_WITH_CONCAT(>>=)
+OP_ASSIGN_WITH_CONCAT(<<=)
+
+#undef OP_ASSIGN_WITH_CONCAT
+#endif
+
+// equality and relational operators.
+#define OP_REL_WITH_CONCAT(REL_OP)                                    \
+  template <int _AP_W1, typename _AP_T1, int _AP_W2, typename _AP_T2, \
+            int _AP_W3, bool _AP_S3>                                  \
+  INLINE bool operator REL_OP(                                        \
+      const ap_int_base<_AP_W3, _AP_S3>& op1,                         \
+      const ap_concat_ref<_AP_W1, _AP_T1, _AP_W2, _AP_T2>& op2) {     \
+    /* convert ap_concat_ref to ap_int_base */                        \
+    return op1 REL_OP op2.get();                                      \
+  }                                                                   \
+  template <int _AP_W1, typename _AP_T1, int _AP_W2, typename _AP_T2, \
+            int _AP_W3, bool _AP_S3>                                  \
+  INLINE bool operator REL_OP(                                        \
+      const ap_concat_ref<_AP_W1, _AP_T1, _AP_W2, _AP_T2>& op1,       \
+      const ap_int_base<_AP_W3, _AP_S3>& op2) {                       \
+    /* convert ap_concat_ref to ap_int_base */                        \
+    return op1.get() REL_OP op2;                                      \
+  }
+
+OP_REL_WITH_CONCAT(==)
+OP_REL_WITH_CONCAT(!=)
+OP_REL_WITH_CONCAT(>)
+OP_REL_WITH_CONCAT(>=)
+OP_REL_WITH_CONCAT(<)
+OP_REL_WITH_CONCAT(<=)
+
+#undef OP_REL_WITH_CONCAT
+
+#endif // ifndef __cplusplus else
+#endif // ifndef __AP_INT_BASE_H__ else
+
+// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689
diff --git a/include/ap_int_ref.h b/include/ap_int_ref.h
new file mode 100644
index 0000000..e2e42f6
--- /dev/null
+++ b/include/ap_int_ref.h
@@ -0,0 +1,1378 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+*/
+
+#ifndef __AP_INT_REF_H__
+#define __AP_INT_REF_H__
+
+#ifndef __AP_INT_H__
+// TODO make this an error
+#pragma message \
+    "Only ap_fixed.h and ap_int.h can be included directly in user code."
+#endif
+
+#ifndef __cplusplus
+#error "C++ is required to include this header file"
+
+#else
+
+#ifndef __SYNTHESIS__
+#include <iostream>
+#endif
+
+/* Concatination reference.
+   ----------------------------------------------------------------
+*/
+template <int _AP_W1, typename _AP_T1, int _AP_W2, typename _AP_T2>
+struct ap_concat_ref {
+  enum {
+    _AP_WR = _AP_W1 + _AP_W2,
+  };
+
+  _AP_T1& mbv1;
+  _AP_T2& mbv2;
+
+  INLINE ap_concat_ref(const ap_concat_ref<_AP_W1, _AP_T1, _AP_W2, _AP_T2>& ref)
+      : mbv1(ref.mbv1), mbv2(ref.mbv2) {}
+
+  INLINE ap_concat_ref(_AP_T1& bv1, _AP_T2& bv2) : mbv1(bv1), mbv2(bv2) {}
+
+  template <int _AP_W3, bool _AP_S3>
+  INLINE ap_concat_ref& operator=(const ap_int_base<_AP_W3, _AP_S3>& val) {
+    ap_int_base<_AP_W1 + _AP_W2, false> vval(val);
+    int W_ref1 = mbv1.length();
+    int W_ref2 = mbv2.length();
+    ap_int_base<_AP_W1, false> Part1;
+    Part1.V = _AP_ROOT_op_get_range(vval.V, W_ref2, W_ref1 + W_ref2 - 1);
+    mbv1.set(Part1);
+    ap_int_base<_AP_W2, false> Part2;
+    Part2.V = _AP_ROOT_op_get_range(vval.V, 0, W_ref2 - 1);
+    mbv2.set(Part2);
+    return *this;
+  }
+
+  // assign op from hls supported C integral types.
+  // FIXME disabled to support legacy code directly assign from sc_signal<T>
+  //template <typename T>
+  //INLINE typename _ap_type::enable_if<_ap_type::is_integral<T>::value,
+  //                                    ap_concat_ref&>::type
+  //operator=(T val) {
+  //  ap_int_base<_AP_W1 + _AP_W2, false> tmpVal(val);
+  //  return operator=(tmpVal);
+  //}
+#define ASSIGN_WITH_CTYPE(_Tp)                       \
+  INLINE ap_concat_ref& operator=(_Tp val) {         \
+    ap_int_base<_AP_W1 + _AP_W2, false> tmpVal(val); \
+    return operator=(tmpVal);                        \
+  }
+
+  ASSIGN_WITH_CTYPE(bool)
+  ASSIGN_WITH_CTYPE(char)
+  ASSIGN_WITH_CTYPE(signed char)
+  ASSIGN_WITH_CTYPE(unsigned char)
+  ASSIGN_WITH_CTYPE(short)
+  ASSIGN_WITH_CTYPE(unsigned short)
+  ASSIGN_WITH_CTYPE(int)
+  ASSIGN_WITH_CTYPE(unsigned int)
+  ASSIGN_WITH_CTYPE(long)
+  ASSIGN_WITH_CTYPE(unsigned long)
+  ASSIGN_WITH_CTYPE(ap_slong)
+  ASSIGN_WITH_CTYPE(ap_ulong)
+  ASSIGN_WITH_CTYPE(half)
+  ASSIGN_WITH_CTYPE(float)
+  ASSIGN_WITH_CTYPE(double)
+
+#undef ASSIGN_WITH_CTYPE
+
+  // Be explicit to prevent it from being deleted, as field d_bv
+  // is of reference type.
+  INLINE ap_concat_ref& operator=(
+      const ap_concat_ref<_AP_W1, _AP_T1, _AP_W2, _AP_T2>& val) {
+    ap_int_base<_AP_W1 + _AP_W2, false> tmpVal(val);
+    return operator=(tmpVal);
+  }
+
+  template <int _AP_W3, typename _AP_T3, int _AP_W4, typename _AP_T4>
+  INLINE ap_concat_ref& operator=(
+      const ap_concat_ref<_AP_W3, _AP_T3, _AP_W4, _AP_T4>& val) {
+    ap_int_base<_AP_W1 + _AP_W2, false> tmpVal(val);
+    return operator=(tmpVal);
+  }
+
+  template <int _AP_W3, bool _AP_S3>
+  INLINE ap_concat_ref& operator=(const ap_bit_ref<_AP_W3, _AP_S3>& val) {
+    ap_int_base<_AP_W1 + _AP_W2, false> tmpVal(val);
+    return operator=(tmpVal);
+  }
+  template <int _AP_W3, bool _AP_S3>
+  INLINE ap_concat_ref& operator=(const ap_range_ref<_AP_W3, _AP_S3>& val) {
+    ap_int_base<_AP_W1 + _AP_W2, false> tmpVal(val);
+    return operator=(tmpVal);
+  }
+
+  template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3,
+            ap_o_mode _AP_O3, int _AP_N3>
+  INLINE ap_concat_ref& operator=(
+      const af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3>& val) {
+    return operator=((const ap_int_base<_AP_W3, false>)(val));
+  }
+
+  template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3,
+            ap_o_mode _AP_O3, int _AP_N3>
+  INLINE ap_concat_ref& operator=(
+      const ap_fixed_base<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3>&
+          val) {
+    return operator=(val.to_ap_int_base());
+  }
+
+  template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3,
+            ap_o_mode _AP_O3, int _AP_N3>
+  INLINE ap_concat_ref& operator=(
+      const af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3>& val) {
+    return operator=((ap_ulong)(bool)(val));
+  }
+
+  INLINE operator ap_int_base<_AP_WR, false>() const { return get(); }
+
+  INLINE operator ap_ulong() const { return get().to_uint64(); }
+
+  template <int _AP_W3, bool _AP_S3>
+  INLINE ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3,
+                       ap_range_ref<_AP_W3, _AP_S3> >
+  operator,(const ap_range_ref<_AP_W3, _AP_S3> &a2) {
+    return ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3,
+                         ap_range_ref<_AP_W3, _AP_S3> >(
+        *this, const_cast<ap_range_ref<_AP_W3, _AP_S3>&>(a2));
+  }
+
+  template <int _AP_W3, bool _AP_S3>
+  INLINE
+      ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3, ap_int_base<_AP_W3, _AP_S3> >
+      operator,(ap_int_base<_AP_W3, _AP_S3> &a2) {
+    return ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3,
+                         ap_int_base<_AP_W3, _AP_S3> >(*this, a2);
+  }
+
+  template <int _AP_W3, bool _AP_S3>
+  INLINE
+      ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3, ap_int_base<_AP_W3, _AP_S3> >
+      operator,(volatile ap_int_base<_AP_W3, _AP_S3> &a2) {
+    return ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3,
+                         ap_int_base<_AP_W3, _AP_S3> >(
+        *this, const_cast<ap_int_base<_AP_W3, _AP_S3>&>(a2));
+  }
+
+  template <int _AP_W3, bool _AP_S3>
+  INLINE
+      ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3, ap_int_base<_AP_W3, _AP_S3> >
+      operator,(const ap_int_base<_AP_W3, _AP_S3> &a2) {
+    return ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3,
+                         ap_int_base<_AP_W3, _AP_S3> >(
+        *this, const_cast<ap_int_base<_AP_W3, _AP_S3>&>(a2));
+  }
+
+  template <int _AP_W3, bool _AP_S3>
+  INLINE
+      ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3, ap_int_base<_AP_W3, _AP_S3> >
+      operator,(const volatile ap_int_base<_AP_W3, _AP_S3> &a2) {
+    // FIXME op's life does not seem long enough
+    ap_int_base<_AP_W3, _AP_S3> op(a2);
+    return ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3,
+                         ap_int_base<_AP_W3, _AP_S3> >(
+        *this, const_cast<ap_int_base<_AP_W3, _AP_S3>&>(op));
+  }
+
+  template <int _AP_W3, bool _AP_S3>
+  INLINE ap_concat_ref<_AP_WR, ap_concat_ref, 1, ap_bit_ref<_AP_W3, _AP_S3> >
+  operator,(const ap_bit_ref<_AP_W3, _AP_S3> &a2) {
+    return ap_concat_ref<_AP_WR, ap_concat_ref, 1, ap_bit_ref<_AP_W3, _AP_S3> >(
+        *this, const_cast<ap_bit_ref<_AP_W3, _AP_S3>&>(a2));
+  }
+
+  template <int _AP_W3, typename _AP_T3, int _AP_W4, typename _AP_T4>
+  INLINE ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3 + _AP_W4,
+                       ap_concat_ref<_AP_W3, _AP_T3, _AP_W4, _AP_T4> >
+  operator,(const ap_concat_ref<_AP_W3, _AP_T3, _AP_W4, _AP_T4> &a2) {
+    return ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3 + _AP_W4,
+                         ap_concat_ref<_AP_W3, _AP_T3, _AP_W4, _AP_T4> >(
+        *this, const_cast<ap_concat_ref<_AP_W3, _AP_T3, _AP_W4, _AP_T4>&>(a2));
+  }
+
+  template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3,
+            ap_o_mode _AP_O3, int _AP_N3>
+  INLINE ap_concat_ref<
+      _AP_WR, ap_concat_ref, _AP_W3,
+      af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> >
+  operator,(
+      const af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> &a2) {
+    return ap_concat_ref<
+        _AP_WR, ap_concat_ref, _AP_W3,
+        af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> >(
+        *this,
+        const_cast<
+            af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3>&>(a2));
+  }
+
+  template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3,
+            ap_o_mode _AP_O3, int _AP_N3>
+  INLINE
+      ap_concat_ref<_AP_WR, ap_concat_ref, 1,
+                    af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> >
+      operator,(const af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3>
+                    &a2) {
+    return ap_concat_ref<
+        _AP_WR, ap_concat_ref, 1,
+        af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> >(
+        *this,
+        const_cast<af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3>&>(
+            a2));
+  }
+
+  template <int _AP_W3, bool _AP_S3>
+  INLINE ap_int_base<AP_MAX(_AP_WR, _AP_W3), _AP_S3> operator&(
+      const ap_int_base<_AP_W3, _AP_S3>& a2) {
+    return get() & a2;
+  }
+
+  template <int _AP_W3, bool _AP_S3>
+  INLINE ap_int_base<AP_MAX(_AP_WR, _AP_W3), _AP_S3> operator|(
+      const ap_int_base<_AP_W3, _AP_S3>& a2) {
+    return get() | a2;
+  }
+
+  template <int _AP_W3, bool _AP_S3>
+  INLINE ap_int_base<AP_MAX(_AP_WR, _AP_W3), _AP_S3> operator^(
+      const ap_int_base<_AP_W3, _AP_S3>& a2) {
+    return get() ^ a2;
+  }
+
+#if 0
+  template<int Hi, int Lo>
+  INLINE ap_int_base<Hi-Lo+1, false> slice() {
+    ap_int_base<_AP_WR, false> bv = get();
+    return bv.slice<Hi,Lo>();
+  }
+#endif
+
+  INLINE ap_int_base<_AP_WR, false> get() const {
+    ap_int_base<_AP_WR, false> tmpVal(0);
+    int W_ref1 = mbv1.length();
+    int W_ref2 = mbv2.length();
+    ap_int_base<_AP_W2, false> v2(mbv2);
+    ap_int_base<_AP_W1, false> v1(mbv1);
+    tmpVal.V = _AP_ROOT_op_set_range(tmpVal.V, 0, W_ref2 - 1, v2.V);
+    tmpVal.V =
+        _AP_ROOT_op_set_range(tmpVal.V, W_ref2, W_ref1 + W_ref2 - 1, v1.V);
+    return tmpVal;
+  }
+
+  template <int _AP_W3>
+  INLINE void set(const ap_int_base<_AP_W3, false>& val) {
+    ap_int_base<_AP_W1 + _AP_W2, false> vval(val);
+    int W_ref1 = mbv1.length();
+    int W_ref2 = mbv2.length();
+    ap_int_base<_AP_W1, false> tmpVal1;
+    tmpVal1.V = _AP_ROOT_op_get_range(vval.V, W_ref2, W_ref1 + W_ref2 - 1);
+    mbv1.set(tmpVal1);
+    ap_int_base<_AP_W2, false> tmpVal2;
+    tmpVal2.V = _AP_ROOT_op_get_range(vval.V, 0, W_ref2 - 1);
+    mbv2.set(tmpVal2);
+  }
+
+  INLINE int length() const { return mbv1.length() + mbv2.length(); }
+}; // struct ap_concat_ref
+
+/* Range (slice) reference.
+   ----------------------------------------------------------------
+*/
+template <int _AP_W, bool _AP_S>
+struct ap_range_ref {
+  // struct ssdm_int or its sim model.
+  // TODO make it possible to reference to ap_fixed_base/ap_fixed/ap_ufixed
+  //      and then we can retire af_range_ref.
+  typedef ap_int_base<_AP_W, _AP_S> ref_type;
+  ref_type& d_bv;
+  int l_index;
+  int h_index;
+
+ public:
+  INLINE ap_range_ref(const ap_range_ref<_AP_W, _AP_S>& ref)
+      : d_bv(ref.d_bv), l_index(ref.l_index), h_index(ref.h_index) {}
+
+  INLINE ap_range_ref(ref_type* bv, int h, int l)
+      : d_bv(*bv), l_index(l), h_index(h) {}
+
+  INLINE ap_range_ref(const ref_type* bv, int h, int l)
+      : d_bv(*const_cast<ref_type*>(bv)), l_index(l), h_index(h) {}
+
+  INLINE operator ap_int_base<_AP_W, false>() const {
+    ap_int_base<_AP_W, false> ret;
+    ret.V = _AP_ROOT_op_get_range(d_bv.V, l_index, h_index);
+    return ret;
+  }
+
+  INLINE operator ap_ulong() const { return to_uint64(); }
+
+  /// @name assign operators
+  //  @{
+
+  // FIXME disabled to work-around lagacy code assigning from sc_signal<T>,
+  // which dependes on implicit type conversion.
+  //
+  //   /// assign from hls supported C integral types.
+  //   template <typename T>
+  //   INLINE typename _ap_type::enable_if<_ap_type::is_integral<T>::value,
+  //                                       ap_range_ref&>::type
+  //   operator=(T val) {
+  //     ap_int_base<_AP_W, false> tmp(val);
+  //     d_bv.V = _AP_ROOT_op_set_range(d_bv.V, l_index, h_index, tmp.V);
+  //     return *this;
+  //   }
+#define ASSIGN_WITH_CTYPE(_Tp)                                       \
+  INLINE ap_range_ref& operator=(_Tp val) {                          \
+    ap_int_base<_AP_W, false> tmp(val);                              \
+    d_bv.V = _AP_ROOT_op_set_range(d_bv.V, l_index, h_index, tmp.V); \
+    return *this;                                                    \
+  }
+
+  ASSIGN_WITH_CTYPE(bool)
+  ASSIGN_WITH_CTYPE(char)
+  ASSIGN_WITH_CTYPE(signed char)
+  ASSIGN_WITH_CTYPE(unsigned char)
+  ASSIGN_WITH_CTYPE(short)
+  ASSIGN_WITH_CTYPE(unsigned short)
+  ASSIGN_WITH_CTYPE(int)
+  ASSIGN_WITH_CTYPE(unsigned int)
+  ASSIGN_WITH_CTYPE(long)
+  ASSIGN_WITH_CTYPE(unsigned long)
+  ASSIGN_WITH_CTYPE(ap_slong)
+  ASSIGN_WITH_CTYPE(ap_ulong)
+  ASSIGN_WITH_CTYPE(half)
+  ASSIGN_WITH_CTYPE(float)
+  ASSIGN_WITH_CTYPE(double)
+
+#undef ASSIGN_WITH_CTYPE
+
+  /// assign using string. XXX crucial for cosim.
+  INLINE ap_range_ref& operator=(const char* val) {
+    const ap_int_base<_AP_W, false> tmp(val); // XXX figure out radix
+    d_bv.V = _AP_ROOT_op_set_range(d_bv.V, l_index, h_index, tmp.V);
+    return *this;
+  }
+
+  /// assign from ap_int_base.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_range_ref& operator=(const ap_int_base<_AP_W2, _AP_S2>& val) {
+    ap_int_base<_AP_W, false> tmp(val);
+    d_bv.V = _AP_ROOT_op_set_range(d_bv.V, l_index, h_index, tmp.V);
+    return *this;
+  }
+
+  /// copy assign operator
+  // XXX Be explicit to prevent it from being deleted, as field d_bv
+  // is of reference type.
+  INLINE ap_range_ref& operator=(const ap_range_ref& val) {
+    return operator=((const ap_int_base<_AP_W, false>)val);
+  }
+
+  /// assign from range reference to ap_int_base.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_range_ref& operator=(const ap_range_ref<_AP_W2, _AP_S2>& val) {
+    return operator=((const ap_int_base<_AP_W2, false>)val);
+  }
+
+  /// assign from bit reference to ap_int_base.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_range_ref& operator=(const ap_bit_ref<_AP_W2, _AP_S2>& val) {
+    return operator=((ap_ulong)(bool)(val));
+  }
+
+  /// assign from ap_fixed_base.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_range_ref& operator=(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&
+          val) {
+    return operator=(val.to_ap_int_base());
+  }
+
+  /// assign from range reference to ap_fixed_base.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_range_ref& operator=(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    return operator=((const ap_int_base<_AP_W2, false>)val);
+  }
+
+  /// assign from bit reference to ap_fixed_base.
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_range_ref& operator=(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    return operator=((ap_ulong)(bool)(val));
+  }
+
+  /// assign from compound reference.
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_range_ref& operator=(
+      const ap_concat_ref<_AP_W2, _AP_T3, _AP_W3, _AP_T3>& val) {
+    return operator=((const ap_int_base<_AP_W2 + _AP_W3, false>)(val));
+  }
+  //  @}
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE
+      ap_concat_ref<_AP_W, ap_range_ref, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+      operator,(const ap_range_ref<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<_AP_W, ap_range_ref, _AP_W2,
+                         ap_range_ref<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_range_ref<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE
+      ap_concat_ref<_AP_W, ap_range_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+      operator,(ap_int_base<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<_AP_W, ap_range_ref, _AP_W2,
+                         ap_int_base<_AP_W2, _AP_S2> >(*this, a2);
+  }
+
+  INLINE
+  ap_concat_ref<_AP_W, ap_range_ref, _AP_W, ap_int_base<_AP_W, _AP_S> >
+  operator,(ap_int_base<_AP_W, _AP_S>& a2) {
+    return ap_concat_ref<_AP_W, ap_range_ref, _AP_W,
+                         ap_int_base<_AP_W, _AP_S> >(*this, a2);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE
+      ap_concat_ref<_AP_W, ap_range_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+      operator,(volatile ap_int_base<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<_AP_W, ap_range_ref, _AP_W2,
+                         ap_int_base<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_int_base<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE
+      ap_concat_ref<_AP_W, ap_range_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+      operator,(const ap_int_base<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<_AP_W, ap_range_ref, _AP_W2,
+                         ap_int_base<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_int_base<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE
+      ap_concat_ref<_AP_W, ap_range_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+      operator,(const volatile ap_int_base<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<_AP_W, ap_range_ref, _AP_W2,
+                         ap_int_base<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_int_base<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<_AP_W, ap_range_ref, 1, ap_bit_ref<_AP_W2, _AP_S2> >
+  operator,(const ap_bit_ref<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<_AP_W, ap_range_ref, 1, ap_bit_ref<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_bit_ref<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_concat_ref<_AP_W, ap_range_ref, _AP_W2 + _AP_W3,
+                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+  operator,(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) {
+    return ap_concat_ref<_AP_W, ap_range_ref, _AP_W2 + _AP_W3,
+                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(
+        *this, const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>&>(a2));
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_concat_ref<
+      _AP_W, ap_range_ref, _AP_W2,
+      af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+  operator,(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> a2) {
+    return ap_concat_ref<
+        _AP_W, ap_range_ref, _AP_W2,
+        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+        *this,
+        const_cast<
+            af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(a2));
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE
+      ap_concat_ref<_AP_W, ap_range_ref, 1,
+                    af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+      operator,(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>
+                    &a2) {
+    return ap_concat_ref<
+        _AP_W, ap_range_ref, 1,
+        af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+        *this,
+        const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(
+            a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator==(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_int_base<_AP_W, false> lop(*this);
+    ap_int_base<_AP_W2, false> hop(op2);
+    return lop == hop;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator!=(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    return !(operator==(op2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_int_base<_AP_W, false> lop(*this);
+    ap_int_base<_AP_W2, false> hop(op2);
+    return lop < hop;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<=(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_int_base<_AP_W, false> lop(*this);
+    ap_int_base<_AP_W2, false> hop(op2);
+    return lop <= hop;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    return !(operator<=(op2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>=(const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    return !(operator<(op2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_range_ref<_AP_W, _AP_S>& operator|=(
+      const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    (this->d_bv).V |= (op2.d_bv).V;
+    return *this;
+  };
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_range_ref<_AP_W, _AP_S>& operator|=(
+      const ap_int_base<_AP_W2, _AP_S2>& op2) {
+    (this->d_bv).V |= op2.V;
+    return *this;
+  };
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_range_ref<_AP_W, _AP_S>& operator&=(
+      const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    (this->d_bv).V &= (op2.d_bv).V;
+    return *this;
+  };
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_range_ref<_AP_W, _AP_S>& operator&=(
+      const ap_int_base<_AP_W2, _AP_S2>& op2) {
+    (this->d_bv).V &= op2.V;
+    return *this;
+  };
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_range_ref<_AP_W, _AP_S>& operator^=(
+      const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+    (this->d_bv).V ^= (op2.d_bv).V;
+    return *this;
+  };
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_range_ref<_AP_W, _AP_S>& operator^=(
+      const ap_int_base<_AP_W2, _AP_S2>& op2) {
+    (this->d_bv).V ^= op2.V;
+    return *this;
+  };
+
+  INLINE ap_int_base<_AP_W, false> get() const {
+    ap_int_base<_AP_W, false> ret;
+    ret.V = _AP_ROOT_op_get_range(d_bv.V, l_index, h_index);
+    return ret;
+  }
+
+  template <int _AP_W2>
+  INLINE void set(const ap_int_base<_AP_W2, false>& val) {
+    d_bv.V = _AP_ROOT_op_set_range(d_bv.V, l_index, h_index, val.V);
+  }
+
+  INLINE int length() const {
+    return h_index >= l_index ? h_index - l_index + 1 : l_index - h_index + 1;
+  }
+
+  INLINE int to_int() const {
+    return (int)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE unsigned to_uint() const {
+    return (unsigned)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE long to_long() const {
+    return (long)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE unsigned long to_ulong() const {
+    return (unsigned long)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE ap_slong to_int64() const {
+    return (ap_slong)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE ap_ulong to_uint64() const {
+    return (ap_ulong)(_AP_ROOT_op_get_range(d_bv.V, l_index, h_index));
+  }
+
+  INLINE bool and_reduce() const {
+    bool ret = true;
+    bool reverse = l_index > h_index;
+    unsigned low = reverse ? h_index : l_index;
+    unsigned high = reverse ? l_index : h_index;
+    for (unsigned i = low; i != high; ++i) {
+#ifdef __SYNTHESIS__
+#pragma HLS unroll
+#endif
+      ret &= _AP_ROOT_op_get_bit(d_bv.V, i);
+    }
+    return ret;
+  }
+
+  INLINE bool or_reduce() const {
+    bool ret = false;
+    bool reverse = l_index > h_index;
+    unsigned low = reverse ? h_index : l_index;
+    unsigned high = reverse ? l_index : h_index;
+    for (unsigned i = low; i != high; ++i) {
+#ifdef __SYNTHESIS__
+#pragma HLS unroll
+#endif
+      ret |= _AP_ROOT_op_get_bit(d_bv.V, i);
+    }
+    return ret;
+  }
+
+  INLINE bool xor_reduce() const {
+    bool ret = false;
+    bool reverse = l_index > h_index;
+    unsigned low = reverse ? h_index : l_index;
+    unsigned high = reverse ? l_index : h_index;
+    for (unsigned i = low; i != high; ++i) {
+#ifdef __SYNTHESIS__
+#pragma HLS unroll
+#endif
+      ret ^= _AP_ROOT_op_get_bit(d_bv.V, i);
+    }
+    return ret;
+  }
+#ifndef __SYNTHESIS__
+  std::string to_string(signed char radix = 2) const {
+    ap_int_base<_AP_W, false> ret;
+    ret.V = _AP_ROOT_op_get_range(d_bv.V, l_index, h_index);
+    return ret.to_string(radix);
+  }
+#else
+  // XXX HLS will delete this in synthesis
+  INLINE char* to_string(signed char radix = 2) const {
+    return 0;
+  }
+#endif
+}; // struct ap_range_ref
+
+// XXX apcc cannot handle global std::ios_base::Init() brought in by <iostream>
+#ifndef AP_AUTOCC
+#ifndef __SYNTHESIS__
+template <int _AP_W, bool _AP_S>
+INLINE std::ostream& operator<<(std::ostream& os,
+                                const ap_range_ref<_AP_W, _AP_S>& x) {
+  std::ios_base::fmtflags ff = std::cout.flags();
+  if (ff & std::cout.hex) {
+    os << x.to_string(16); // don't print sign
+  } else if (ff & std::cout.oct) {
+    os << x.to_string(8); // don't print sign
+  } else {
+    os << x.to_string(10);
+  }
+  return os;
+}
+#endif // ifndef __SYNTHESIS__
+
+#ifndef __SYNTHESIS__
+template <int _AP_W, bool _AP_S>
+INLINE std::istream& operator>>(std::istream& in,
+                                ap_range_ref<_AP_W, _AP_S>& op) {
+  std::string str;
+  in >> str;
+  op = ap_int_base<_AP_W, _AP_S>(str.c_str());
+  return in;
+}
+#endif // ifndef __SYNTHESIS__
+#endif // ifndef AP_AUTOCC
+
+/* Bit reference.
+   ----------------------------------------------------------------
+*/
+template <int _AP_W, bool _AP_S>
+struct ap_bit_ref {
+  // struct ssdm_int or its sim model.
+  // TODO make it possible to reference to ap_fixed_base/ap_fixed/ap_ufixed
+  //      and then we can retire af_bit_ref.
+  typedef ap_int_base<_AP_W, _AP_S> ref_type;
+  ref_type& d_bv;
+  int d_index;
+
+ public:
+  // copy ctor
+  INLINE ap_bit_ref(const ap_bit_ref<_AP_W, _AP_S>& ref)
+      : d_bv(ref.d_bv), d_index(ref.d_index) {}
+
+  INLINE ap_bit_ref(ref_type* bv, int index = 0) : d_bv(*bv), d_index(index) {}
+
+  INLINE ap_bit_ref(const ref_type* bv, int index = 0)
+      : d_bv(*const_cast<ref_type*>(bv)), d_index(index) {}
+
+  INLINE operator bool() const { return _AP_ROOT_op_get_bit(d_bv.V, d_index); }
+  INLINE bool to_bool() const { return _AP_ROOT_op_get_bit(d_bv.V, d_index); }
+
+  // assign op from hls supported C integral types.
+  // FIXME disabled to support sc_signal<bool>.
+  // NOTE this used to be unsigned long long.
+  //template <typename T>
+  //INLINE typename _ap_type::enable_if<_ap_type::is_integral<T>::value,
+  //                                    ap_bit_ref&>::type
+  //operator=(T val) {
+  //  d_bv.V = _AP_ROOT_op_set_bit(d_bv.V, d_index, val);
+  //  return *this;
+  //}
+#define ASSIGN_WITH_CTYPE(_Tp)                          \
+  INLINE ap_bit_ref& operator=(_Tp val) {               \
+    d_bv.V = _AP_ROOT_op_set_bit(d_bv.V, d_index, val); \
+    return *this;                                       \
+  }
+
+  ASSIGN_WITH_CTYPE(bool)
+  ASSIGN_WITH_CTYPE(char)
+  ASSIGN_WITH_CTYPE(signed char)
+  ASSIGN_WITH_CTYPE(unsigned char)
+  ASSIGN_WITH_CTYPE(short)
+  ASSIGN_WITH_CTYPE(unsigned short)
+  ASSIGN_WITH_CTYPE(int)
+  ASSIGN_WITH_CTYPE(unsigned int)
+  ASSIGN_WITH_CTYPE(long)
+  ASSIGN_WITH_CTYPE(unsigned long)
+  ASSIGN_WITH_CTYPE(ap_slong)
+  ASSIGN_WITH_CTYPE(ap_ulong)
+
+#undef ASSIGN_WITH_CTYPE
+
+#define ASSIGN_WITH_CTYPE_FP(_Tp)                           \
+  INLINE ap_bit_ref& operator=(_Tp val) {                   \
+    bool tmp_val = val;                                     \
+    d_bv.V = _AP_ROOT_op_set_bit(d_bv.V, d_index,tmp_val);  \
+    return *this;                                           \
+  }
+
+  ASSIGN_WITH_CTYPE_FP(half)
+  ASSIGN_WITH_CTYPE_FP(float)
+  ASSIGN_WITH_CTYPE_FP(double)
+
+#undef ASSIGN_WITH_CTYPE_FP
+
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_bit_ref& operator=(const ap_int_base<_AP_W2, _AP_S2>& val) {
+    return operator=((ap_ulong)(val.V != 0));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_bit_ref& operator=(const ap_range_ref<_AP_W2, _AP_S2>& val) {
+    return operator=((ap_int_base<_AP_W2, false>)val);
+  }
+
+  // Be explicit to prevent it from being deleted, as field d_bv
+  // is of reference type.
+  INLINE ap_bit_ref& operator=(const ap_bit_ref& val) {
+    return operator=((ap_ulong)(bool)val);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_bit_ref& operator=(const ap_bit_ref<_AP_W2, _AP_S2>& val) {
+    return operator=((ap_ulong)(bool)val);
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_bit_ref& operator=(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    return operator=((const ap_int_base<_AP_W2, false>)val);
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_bit_ref& operator=(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    return operator=((ap_ulong)(bool)val);
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_bit_ref& operator=(
+      const ap_concat_ref<_AP_W2, _AP_T3, _AP_W3, _AP_T3>& val) {
+    return operator=((const ap_int_base<_AP_W2 + _AP_W3, false>)val);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+  operator,(ap_int_base<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >(
+        *this, a2);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+  operator,(volatile ap_int_base<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_int_base<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+  operator,(const ap_int_base<_AP_W2, _AP_S2> &a2) {
+    ap_int_base<_AP_W2, _AP_S2> op(a2);
+    return ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_int_base<_AP_W2, _AP_S2>&>(op));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >
+  operator,(const volatile ap_int_base<_AP_W2, _AP_S2> &a2) {
+    ap_int_base<_AP_W2, _AP_S2> op(a2);
+    return ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_int_base<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_int_base<_AP_W2, _AP_S2>&>(op));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+  operator,(const ap_range_ref<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_range_ref<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_concat_ref<1, ap_bit_ref, 1, ap_bit_ref<_AP_W2, _AP_S2> > operator,(
+      const ap_bit_ref<_AP_W2, _AP_S2> &a2) {
+    return ap_concat_ref<1, ap_bit_ref, 1, ap_bit_ref<_AP_W2, _AP_S2> >(
+        *this, const_cast<ap_bit_ref<_AP_W2, _AP_S2>&>(a2));
+  }
+
+  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+  INLINE ap_concat_ref<1, ap_bit_ref, _AP_W2 + _AP_W3,
+                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+  operator,(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) {
+    return ap_concat_ref<1, ap_bit_ref, _AP_W2 + _AP_W3,
+                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(
+        *this, const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>&>(a2));
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_concat_ref<
+      1, ap_bit_ref, _AP_W2,
+      af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+  operator,(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &a2) {
+    return ap_concat_ref<
+        1, ap_bit_ref, _AP_W2,
+        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+        *this,
+        const_cast<
+            af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(a2));
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE ap_concat_ref<1, ap_bit_ref, 1, af_bit_ref<_AP_W2, _AP_I2, _AP_S2,
+                                                    _AP_Q2, _AP_O2, _AP_N2> >
+  operator,(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &a2) {
+    return ap_concat_ref<1, ap_bit_ref, 1, af_bit_ref<_AP_W2, _AP_I2, _AP_S2,
+                                                      _AP_Q2, _AP_O2, _AP_N2> >(
+        *this,
+        const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(
+            a2));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator==(const ap_bit_ref<_AP_W2, _AP_S2>& op) {
+    return get() == op.get();
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator!=(const ap_bit_ref<_AP_W2, _AP_S2>& op) {
+    return get() != op.get();
+  }
+
+  INLINE bool get() const { return _AP_ROOT_op_get_bit(d_bv.V, d_index); }
+
+  INLINE bool get() { return _AP_ROOT_op_get_bit(d_bv.V, d_index); }
+
+  template <int _AP_W3>
+  INLINE void set(const ap_int_base<_AP_W3, false>& val) {
+    operator=(val);
+  }
+
+  INLINE bool operator~() const {
+    bool bit = _AP_ROOT_op_get_bit(d_bv.V, d_index);
+    return bit ? false : true;
+  }
+
+  INLINE int length() const { return 1; }
+
+#ifndef __SYNTHESIS__
+  std::string to_string() const { return get() ? "1" : "0"; }
+#else
+  // XXX HLS will delete this in synthesis
+  INLINE char* to_string() const { return 0; }
+#endif
+}; // struct ap_bit_ref
+
+/* ap_range_ref with int.
+ * ------------------------------------------------------------
+ */
+// equality and relational operators.
+#define REF_REL_OP_WITH_INT(REL_OP, C_TYPE, _AP_W2, _AP_S2)                \
+  template <int _AP_W, bool _AP_S>                                         \
+  INLINE bool operator REL_OP(const ap_range_ref<_AP_W, _AP_S>& op,        \
+                              C_TYPE op2) {                                \
+    return ap_int_base<_AP_W, false>(op)                                   \
+        REL_OP ap_int_base<_AP_W2, _AP_S2>(op2);                           \
+  }                                                                        \
+  template <int _AP_W, bool _AP_S>                                         \
+  INLINE bool operator REL_OP(const ap_bit_ref<_AP_W, _AP_S>& op,          \
+                              C_TYPE op2) {                                \
+    return bool(op) REL_OP op2;                                            \
+  }                                                                        \
+  template <int _AP_W, bool _AP_S>                                         \
+  INLINE bool operator REL_OP(C_TYPE op2,                                  \
+                              const ap_bit_ref<_AP_W, _AP_S>& op) {        \
+    return op2 REL_OP bool(op);                                            \
+  }                                                                        \
+  template <int _AP_W, typename _AP_T, int _AP_W1, typename _AP_T1>        \
+  INLINE bool operator REL_OP(                                             \
+      const ap_concat_ref<_AP_W, _AP_T, _AP_W1, _AP_T1>& op, C_TYPE op2) { \
+    return ap_int_base<_AP_W + _AP_W1, false>(op)                          \
+        REL_OP ap_int_base<_AP_W2, _AP_S2>(op2);                           \
+  }
+
+// Make the line shorter than 5000 chars
+#define REF_REL_WITH_INT_1(C_TYPE, _AP_WI, _AP_SI) \
+  REF_REL_OP_WITH_INT(>, C_TYPE, _AP_WI, _AP_SI)   \
+  REF_REL_OP_WITH_INT(<, C_TYPE, _AP_WI, _AP_SI)   \
+  REF_REL_OP_WITH_INT(>=, C_TYPE, _AP_WI, _AP_SI)  \
+  REF_REL_OP_WITH_INT(<=, C_TYPE, _AP_WI, _AP_SI)
+
+REF_REL_WITH_INT_1(bool, 1, false)
+REF_REL_WITH_INT_1(char, 8, CHAR_IS_SIGNED)
+REF_REL_WITH_INT_1(signed char, 8, true)
+REF_REL_WITH_INT_1(unsigned char, 8, false)
+REF_REL_WITH_INT_1(short, _AP_SIZE_short, true)
+REF_REL_WITH_INT_1(unsigned short, _AP_SIZE_short, false)
+REF_REL_WITH_INT_1(int, _AP_SIZE_int, true)
+REF_REL_WITH_INT_1(unsigned int, _AP_SIZE_int, false)
+REF_REL_WITH_INT_1(long, _AP_SIZE_long, true)
+REF_REL_WITH_INT_1(unsigned long, _AP_SIZE_long, false)
+REF_REL_WITH_INT_1(ap_slong, _AP_SIZE_ap_slong, true)
+REF_REL_WITH_INT_1(ap_ulong, _AP_SIZE_ap_slong, false)
+
+// Make the line shorter than 5000 chars
+#define REF_REL_WITH_INT_2(C_TYPE, _AP_WI, _AP_SI) \
+  REF_REL_OP_WITH_INT(==, C_TYPE, _AP_WI, _AP_SI)  \
+  REF_REL_OP_WITH_INT(!=, C_TYPE, _AP_WI, _AP_SI)
+
+REF_REL_WITH_INT_2(bool, 1, false)
+REF_REL_WITH_INT_2(char, 8, CHAR_IS_SIGNED)
+REF_REL_WITH_INT_2(signed char, 8, true)
+REF_REL_WITH_INT_2(unsigned char, 8, false)
+REF_REL_WITH_INT_2(short, _AP_SIZE_short, true)
+REF_REL_WITH_INT_2(unsigned short, _AP_SIZE_short, false)
+REF_REL_WITH_INT_2(int, _AP_SIZE_int, true)
+REF_REL_WITH_INT_2(unsigned int, _AP_SIZE_int, false)
+REF_REL_WITH_INT_2(long, _AP_SIZE_long, true)
+REF_REL_WITH_INT_2(unsigned long, _AP_SIZE_long, false)
+REF_REL_WITH_INT_2(ap_slong, _AP_SIZE_ap_slong, true)
+REF_REL_WITH_INT_2(ap_ulong, _AP_SIZE_ap_slong, false)
+
+#undef REF_REL_OP_WITH_INT
+#undef REF_REL_WITH_INT_1
+#undef REF_REL_WITH_INT_2
+
+#define REF_BIN_OP_WITH_INT(BIN_OP, RTYPE, C_TYPE, _AP_W2, _AP_S2)          \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE typename ap_int_base<_AP_W, false>::template RType<_AP_W2,         \
+                                                            _AP_S2>::RTYPE  \
+  operator BIN_OP(const ap_range_ref<_AP_W, _AP_S>& op, C_TYPE op2) {       \
+    return ap_int_base<_AP_W, false>(op)                                    \
+        BIN_OP ap_int_base<_AP_W2, _AP_S2>(op2);                            \
+  }                                                                         \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE typename ap_int_base<_AP_W2, _AP_S2>::template RType<_AP_W,        \
+                                                              false>::RTYPE \
+  operator BIN_OP(C_TYPE op2, const ap_range_ref<_AP_W, _AP_S>& op) {       \
+    return ap_int_base<_AP_W2, _AP_S2>(op2)                                 \
+        BIN_OP ap_int_base<_AP_W, false>(op);                               \
+  }
+
+// arithmetic operators.
+#define REF_BIN_OP_WITH_INT_ARITH(C_TYPE, _AP_W2, _AP_S2)   \
+  REF_BIN_OP_WITH_INT(+, plus, C_TYPE, (_AP_W2), (_AP_S2))  \
+  REF_BIN_OP_WITH_INT(-, minus, C_TYPE, (_AP_W2), (_AP_S2)) \
+  REF_BIN_OP_WITH_INT(*, mult, C_TYPE, (_AP_W2), (_AP_S2))  \
+  REF_BIN_OP_WITH_INT(/, div, C_TYPE, (_AP_W2), (_AP_S2))   \
+  REF_BIN_OP_WITH_INT(%, mod, C_TYPE, (_AP_W2), (_AP_S2))
+
+REF_BIN_OP_WITH_INT_ARITH(bool, 1, false)
+REF_BIN_OP_WITH_INT_ARITH(char, 8, CHAR_IS_SIGNED)
+REF_BIN_OP_WITH_INT_ARITH(signed char, 8, true)
+REF_BIN_OP_WITH_INT_ARITH(unsigned char, 8, false)
+REF_BIN_OP_WITH_INT_ARITH(short, _AP_SIZE_short, true)
+REF_BIN_OP_WITH_INT_ARITH(unsigned short, _AP_SIZE_short, false)
+REF_BIN_OP_WITH_INT_ARITH(int, _AP_SIZE_int, true)
+REF_BIN_OP_WITH_INT_ARITH(unsigned int, _AP_SIZE_int, false)
+REF_BIN_OP_WITH_INT_ARITH(long, _AP_SIZE_long, true)
+REF_BIN_OP_WITH_INT_ARITH(unsigned long, _AP_SIZE_long, false)
+REF_BIN_OP_WITH_INT_ARITH(ap_slong, _AP_SIZE_ap_slong, true)
+REF_BIN_OP_WITH_INT_ARITH(ap_ulong, _AP_SIZE_ap_slong, false)
+
+#undef REF_BIN_OP_WITH_INT_ARITH
+
+// bitwise and shift operators
+#define REF_BIN_OP_WITH_INT_BITS(C_TYPE, _AP_W2, _AP_S2)     \
+  REF_BIN_OP_WITH_INT(&, logic, C_TYPE, (_AP_W2), (_AP_S2))  \
+  REF_BIN_OP_WITH_INT(|, logic, C_TYPE, (_AP_W2), (_AP_S2))  \
+  REF_BIN_OP_WITH_INT(^, logic, C_TYPE, (_AP_W2), (_AP_S2)) \
+  REF_BIN_OP_WITH_INT(>>, arg1, C_TYPE, (_AP_W2), (_AP_S2))  \
+  REF_BIN_OP_WITH_INT(<<, arg1, C_TYPE, (_AP_W2), (_AP_S2))
+
+REF_BIN_OP_WITH_INT_BITS(bool, 1, false)
+REF_BIN_OP_WITH_INT_BITS(char, 8, CHAR_IS_SIGNED)
+REF_BIN_OP_WITH_INT_BITS(signed char, 8, true)
+REF_BIN_OP_WITH_INT_BITS(unsigned char, 8, false)
+REF_BIN_OP_WITH_INT_BITS(short, _AP_SIZE_short, true)
+REF_BIN_OP_WITH_INT_BITS(unsigned short, _AP_SIZE_short, false)
+REF_BIN_OP_WITH_INT_BITS(int, _AP_SIZE_int, true)
+REF_BIN_OP_WITH_INT_BITS(unsigned int, _AP_SIZE_int, false)
+REF_BIN_OP_WITH_INT_BITS(long, _AP_SIZE_long, true)
+REF_BIN_OP_WITH_INT_BITS(unsigned long, _AP_SIZE_long, false)
+REF_BIN_OP_WITH_INT_BITS(ap_slong, _AP_SIZE_ap_slong, true)
+REF_BIN_OP_WITH_INT_BITS(ap_ulong, _AP_SIZE_ap_slong, false)
+
+#undef REF_BIN_OP_WITH_INT_BITS
+
+/* ap_range_ref with ap_range_ref
+ *  ------------------------------------------------------------
+ */
+#define REF_BIN_OP(BIN_OP, RTYPE)                                              \
+  template <int _AP_W, bool _AP_S, int _AP_W2, bool _AP_S2>                    \
+  INLINE                                                                       \
+      typename ap_int_base<_AP_W, false>::template RType<_AP_W2, false>::RTYPE \
+      operator BIN_OP(const ap_range_ref<_AP_W, _AP_S>& lhs,                   \
+                      const ap_range_ref<_AP_W2, _AP_S2>& rhs) {               \
+    return (lhs.operator ap_int_base<_AP_W, false>())BIN_OP(                   \
+        rhs.operator ap_int_base<_AP_W2, false>());                            \
+  }
+
+REF_BIN_OP(+, plus)
+REF_BIN_OP(-, minus)
+REF_BIN_OP(*, mult)
+REF_BIN_OP(/, div)
+REF_BIN_OP(%, mod)
+REF_BIN_OP(&, logic)
+REF_BIN_OP(|, logic)
+REF_BIN_OP(^, logic)
+REF_BIN_OP(>>, arg1)
+REF_BIN_OP(<<, arg1)
+
+/* ap_concat_ref with ap_concat_ref.
+ *  ------------------------------------------------------------
+ */
+
+//************************************************************************
+//  Implement
+//      ap_int_base<M+N> = ap_concat_ref<M> OP ap_concat_ref<N>
+//  for operators  +, -, *, /, %, >>, <<, &, |, ^
+//  Without these operators the operands are converted to int64 and
+//  larger results lose informations (higher order bits).
+//
+//                       operand OP
+//                      /          |
+//              left-concat         right-concat
+//                /     |            /         |
+//         <LW1,LT1>  <LW2,LT2>   <RW1,RT1>    <RW2,RT2>
+//
+//      _AP_LW1, _AP_LT1 (width and type of left-concat's left side)
+//      _AP_LW2, _AP_LT2 (width and type of left-concat's right side)
+//  Similarly for RHS of operand OP: _AP_RW1, AP_RW2, _AP_RT1, _AP_RT2
+//
+//  In Verilog 2001 result of concatenation is always unsigned even
+//  when both sides are signed.
+//************************************************************************
+
+#undef SYN_CONCAT_REF_BIN_OP
+
+#define SYN_CONCAT_REF_BIN_OP(BIN_OP, RTYPE)                              \
+  template <int _AP_LW1, typename _AP_LT1, int _AP_LW2, typename _AP_LT2, \
+            int _AP_RW1, typename _AP_RT1, int _AP_RW2, typename _AP_RT2> \
+  INLINE typename ap_int_base<_AP_LW1 + _AP_LW2, false>::template RType<  \
+      _AP_RW1 + _AP_RW2, false>::RTYPE                                    \
+  operator BIN_OP(                                                        \
+      const ap_concat_ref<_AP_LW1, _AP_LT1, _AP_LW2, _AP_LT2>& lhs,       \
+      const ap_concat_ref<_AP_RW1, _AP_RT1, _AP_RW2, _AP_RT2>& rhs) {     \
+    return lhs.get() BIN_OP rhs.get();                                    \
+  }
+
+SYN_CONCAT_REF_BIN_OP(+, plus)
+SYN_CONCAT_REF_BIN_OP(-, minus)
+SYN_CONCAT_REF_BIN_OP(*, mult)
+SYN_CONCAT_REF_BIN_OP(/, div)
+SYN_CONCAT_REF_BIN_OP(%, mod)
+SYN_CONCAT_REF_BIN_OP(&, logic)
+SYN_CONCAT_REF_BIN_OP(|, logic)
+SYN_CONCAT_REF_BIN_OP(^, logic)
+SYN_CONCAT_REF_BIN_OP(>>, arg1)
+SYN_CONCAT_REF_BIN_OP(<<, arg1)
+
+#undef SYN_CONCAT_REF_BIN_OP
+
+#define CONCAT_OP_WITH_INT(C_TYPE, _AP_WI, _AP_SI)                          \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE ap_int_base<_AP_W + _AP_WI, false> operator,(                      \
+      const ap_int_base<_AP_W, _AP_S> &op1, C_TYPE op2) {                   \
+    ap_int_base<_AP_WI + _AP_W, false> val(op2);                            \
+    ap_int_base<_AP_WI + _AP_W, false> ret(op1);                            \
+    ret <<= _AP_WI;                                                         \
+    if (_AP_SI) {                                                           \
+      val <<= _AP_W;                                                        \
+      val >>= _AP_W;                                                        \
+    }                                                                       \
+    ret |= val;                                                             \
+    return ret;                                                             \
+  }                                                                         \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE ap_int_base<_AP_W + _AP_WI, false> operator,(                      \
+      C_TYPE op1, const ap_int_base<_AP_W, _AP_S> &op2) {                   \
+    ap_int_base<_AP_WI + _AP_W, false> val(op1);                            \
+    ap_int_base<_AP_WI + _AP_W, false> ret(op2);                            \
+    if (_AP_S) {                                                            \
+      ret <<= _AP_WI;                                                       \
+      ret >>= _AP_WI;                                                       \
+    }                                                                       \
+    ret |= val << _AP_W;                                                    \
+    return ret;                                                             \
+  }                                                                         \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE ap_int_base<_AP_W + _AP_WI, false> operator,(                      \
+      const ap_range_ref<_AP_W, _AP_S> &op1, C_TYPE op2) {                  \
+    ap_int_base<_AP_WI + _AP_W, false> val(op2);                            \
+    ap_int_base<_AP_WI + _AP_W, false> ret(op1);                            \
+    ret <<= _AP_WI;                                                         \
+    if (_AP_SI) {                                                           \
+      val <<= _AP_W;                                                        \
+      val >>= _AP_W;                                                        \
+    }                                                                       \
+    ret |= val;                                                             \
+    return ret;                                                             \
+  }                                                                         \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE ap_int_base<_AP_W + _AP_WI, false> operator,(                      \
+      C_TYPE op1, const ap_range_ref<_AP_W, _AP_S> &op2) {                  \
+    ap_int_base<_AP_WI + _AP_W, false> val(op1);                            \
+    ap_int_base<_AP_WI + _AP_W, false> ret(op2);                            \
+    int len = op2.length();                                                 \
+    val <<= len;                                                            \
+    ret |= val;                                                             \
+    return ret;                                                             \
+  }                                                                         \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE ap_int_base<_AP_WI + 1, false> operator,(                          \
+      const ap_bit_ref<_AP_W, _AP_S> &op1, C_TYPE op2) {                    \
+    ap_int_base<_AP_WI + 1, false> val(op2);                                \
+    val[_AP_WI] = op1;                                                      \
+    return val;                                                             \
+  }                                                                         \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE ap_int_base<_AP_WI + 1, false> operator,(                          \
+      C_TYPE op1, const ap_bit_ref<_AP_W, _AP_S> &op2) {                    \
+    ap_int_base<_AP_WI + 1, false> val(op1);                                \
+    val <<= 1;                                                              \
+    val[0] = op2;                                                           \
+    return val;                                                             \
+  }                                                                         \
+  template <int _AP_W, typename _AP_T, int _AP_W2, typename _AP_T2>         \
+  INLINE ap_int_base<_AP_W + _AP_W2 + _AP_WI, false> operator,(             \
+      const ap_concat_ref<_AP_W, _AP_T, _AP_W2, _AP_T2> &op1, C_TYPE op2) { \
+    ap_int_base<_AP_WI + _AP_W + _AP_W2, _AP_SI> val(op2);                  \
+    ap_int_base<_AP_WI + _AP_W + _AP_W2, _AP_SI> ret(op1);                  \
+    if (_AP_SI) {                                                           \
+      val <<= _AP_W + _AP_W2;                                               \
+      val >>= _AP_W + _AP_W2;                                               \
+    }                                                                       \
+    ret <<= _AP_WI;                                                         \
+    ret |= val;                                                             \
+    return ret;                                                             \
+  }                                                                         \
+  template <int _AP_W, typename _AP_T, int _AP_W2, typename _AP_T2>         \
+  INLINE ap_int_base<_AP_W + _AP_W2 + _AP_WI, false> operator,(             \
+      C_TYPE op1, const ap_concat_ref<_AP_W, _AP_T, _AP_W2, _AP_T2> &op2) { \
+    ap_int_base<_AP_WI + _AP_W + _AP_W2, _AP_SI> val(op1);                  \
+    ap_int_base<_AP_WI + _AP_W + _AP_W2, _AP_SI> ret(op2);                  \
+    int len = op2.length();                                                 \
+    val <<= len;                                                            \
+    ret |= val;                                                             \
+    return ret;                                                             \
+  }                                                                         \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N>                                     \
+  INLINE ap_int_base<_AP_W + _AP_WI, false> operator,(                      \
+      const af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op1,    \
+      C_TYPE op2) {                                                         \
+    ap_int_base<_AP_WI + _AP_W, false> val(op2);                            \
+    ap_int_base<_AP_WI + _AP_W, false> ret(op1);                            \
+    if (_AP_SI) {                                                           \
+      val <<= _AP_W;                                                        \
+      val >>= _AP_W;                                                        \
+    }                                                                       \
+    ret <<= _AP_WI;                                                         \
+    ret |= val;                                                             \
+    return ret;                                                             \
+  }                                                                         \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N>                                     \
+  INLINE ap_int_base<_AP_W + _AP_WI, false> operator,(                      \
+      C_TYPE op1,                                                           \
+      const af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op2) {  \
+    ap_int_base<_AP_WI + _AP_W, false> val(op1);                            \
+    ap_int_base<_AP_WI + _AP_W, false> ret(op2);                            \
+    int len = op2.length();                                                 \
+    val <<= len;                                                            \
+    ret |= val;                                                             \
+    return ret;                                                             \
+  }                                                                         \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N>                                     \
+  INLINE ap_int_base<1 + _AP_WI, false> operator,(                          \
+      const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op1,      \
+      C_TYPE op2) {                                                         \
+    ap_int_base<_AP_WI + 1, _AP_SI> val(op2);                               \
+    val[_AP_WI] = op1;                                                      \
+    return val;                                                             \
+  }                                                                         \
+  template <int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,              \
+            ap_o_mode _AP_O, int _AP_N>                                     \
+  INLINE ap_int_base<1 + _AP_WI, false> operator,(                          \
+      C_TYPE op1,                                                           \
+      const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op2) {    \
+    ap_int_base<_AP_WI + 1, _AP_SI> val(op1);                               \
+    val <<= 1;                                                              \
+    val[0] = op2;                                                           \
+    return val;                                                             \
+  }
+
+CONCAT_OP_WITH_INT(bool, 1, false)
+CONCAT_OP_WITH_INT(char, 8, CHAR_IS_SIGNED)
+CONCAT_OP_WITH_INT(signed char, 8, true)
+CONCAT_OP_WITH_INT(unsigned char, 8, false)
+CONCAT_OP_WITH_INT(short, _AP_SIZE_short, true)
+CONCAT_OP_WITH_INT(unsigned short, _AP_SIZE_short, false)
+CONCAT_OP_WITH_INT(int, _AP_SIZE_int, true)
+CONCAT_OP_WITH_INT(unsigned int, _AP_SIZE_int, false)
+CONCAT_OP_WITH_INT(long, _AP_SIZE_long, true)
+CONCAT_OP_WITH_INT(unsigned long, _AP_SIZE_long, false)
+CONCAT_OP_WITH_INT(ap_slong, _AP_SIZE_ap_slong, true)
+CONCAT_OP_WITH_INT(ap_ulong, _AP_SIZE_ap_slong, false)
+
+#undef CONCAT_OP_WITH_INT
+
+#define CONCAT_SHIFT_WITH_INT(C_TYPE, OP)                                  \
+  template <int _AP_W, typename _AP_T, int _AP_W1, typename _AP_T1>        \
+  INLINE ap_uint<_AP_W + _AP_W1> operator OP(                              \
+      const ap_concat_ref<_AP_W, _AP_T, _AP_W1, _AP_T1> lhs, C_TYPE rhs) { \
+    return ap_uint<_AP_W + _AP_W1>(lhs).get() OP int(rhs);                 \
+  }
+
+// FIXME int(rhs) may loose precision.
+
+CONCAT_SHIFT_WITH_INT(int, <<)
+CONCAT_SHIFT_WITH_INT(unsigned int, <<)
+CONCAT_SHIFT_WITH_INT(long, <<)
+CONCAT_SHIFT_WITH_INT(unsigned long, <<)
+CONCAT_SHIFT_WITH_INT(ap_slong, <<)
+CONCAT_SHIFT_WITH_INT(ap_ulong, <<)
+
+CONCAT_SHIFT_WITH_INT(int, >>)
+CONCAT_SHIFT_WITH_INT(unsigned int, >>)
+CONCAT_SHIFT_WITH_INT(long, >>)
+CONCAT_SHIFT_WITH_INT(unsigned long, >>)
+CONCAT_SHIFT_WITH_INT(ap_slong, >>)
+CONCAT_SHIFT_WITH_INT(ap_ulong, >>)
+
+#endif // ifndef __cplusplus else
+#endif // ifndef __AP_INT_REF_H__ else
+
+// -*- cpp -*-
+
+// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689
diff --git a/include/ap_int_special.h b/include/ap_int_special.h
new file mode 100644
index 0000000..11ab151
--- /dev/null
+++ b/include/ap_int_special.h
@@ -0,0 +1,251 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+*/
+
+#ifndef AP_INT_SPECIAL_H
+#define AP_INT_SPECIAL_H
+#ifndef __SYNTHESIS__
+#include <cstdio>
+#include <cstdlib>
+#endif
+// FIXME AP_AUTOCC cannot handle many standard headers, so declare instead of
+// include.
+// #include <complex>
+namespace std {
+template<typename _Tp> class complex;
+}
+
+/*
+  TODO: Modernize the code using C++11/C++14
+  1. constexpr http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0415r0.html
+  2. move constructor
+*/
+
+namespace std {
+/*
+   Specialize std::complex<ap_int> to zero initialization ap_int.
+
+   To reduce the area cost, ap_int is not zero initialized, just like basic
+   types float or double. However, libstdc++ provides specialization for float,
+   double and long double, initializing image part to 0 when not specified.
+
+   This has become a difficulty in switching legacy code from these C types to
+   ap_int. To ease the tranform of legacy code, we have to implement
+   specialization of std::complex<> for our type.
+
+   As ap_int is a template, it is impossible to specialize only the methods
+   that causes default initialization of value type in std::complex<>. An
+   explicit full specialization of the template class has to be done, covering
+   all the member functions and operators of std::complex<> as specified
+   in standard 26.2.4 and 26.2.5.
+*/
+template <int _AP_W>
+struct complex<ap_int<_AP_W> > {
+  typedef ap_int<_AP_W> _Tp;
+  typedef _Tp value_type;
+
+  // 26.2.4/1
+  // Constructor without argument
+  // Default initialize, so that in dataflow, the variable is only written once.
+  complex() : _M_real(_Tp()), _M_imag(_Tp()) {}
+  // Constructor with ap_int.
+  // Zero initialize image part when not specified, so that `C(1) == C(1,0)`
+  complex(const _Tp &__r, const _Tp &__i = _Tp(0))
+      : _M_real(__r), _M_imag(__i) {}
+
+  // Constructor with another complex number
+  template <typename _Up>
+  complex(const complex<_Up> &__z) : _M_real(__z.real()), _M_imag(__z.imag()) {}
+
+#if __cplusplus >= 201103L
+  const _Tp& real() const { return _M_real; }
+  const _Tp& imag() const { return _M_imag; }
+#else
+  _Tp& real() { return _M_real; }
+  const _Tp& real() const { return _M_real; }
+  _Tp& imag() { return _M_imag; }
+  const _Tp& imag() const { return _M_imag; }
+#endif
+
+  void real(_Tp __val) { _M_real = __val; }
+
+  void imag(_Tp __val) { _M_imag = __val; }
+
+  // Assign this complex number with ap_int.
+  // Zero initialize image poarrt, so that `C c; c = 1; c == C(1,0);`
+  complex<_Tp> &operator=(const _Tp __t) {
+    _M_real = __t;
+    _M_imag = _Tp(0);
+    return *this;
+  }
+
+  // 26.2.5/1
+  // Add ap_int to this complex number.
+  complex<_Tp> &operator+=(const _Tp &__t) {
+    _M_real += __t;
+    return *this;
+  }
+
+  // 26.2.5/3
+  // Subtract ap_int from this complex number.
+  complex<_Tp> &operator-=(const _Tp &__t) {
+    _M_real -= __t;
+    return *this;
+  }
+
+  // 26.2.5/5
+  // Multiply this complex number by ap_int.
+  complex<_Tp> &operator*=(const _Tp &__t) {
+    _M_real *= __t;
+    _M_imag *= __t;
+    return *this;
+  }
+
+  // 26.2.5/7
+  // Divide this complex number by ap_int.
+  complex<_Tp> &operator/=(const _Tp &__t) {
+    _M_real /= __t;
+    _M_imag /= __t;
+    return *this;
+  }
+
+  // Assign complex number to this complex number.
+  template <typename _Up>
+  complex<_Tp> &operator=(const complex<_Up> &__z) {
+    _M_real = __z.real();
+    _M_imag = __z.imag();
+    return *this;
+  }
+
+  // 26.2.5/9
+  // Add complex number to this.
+  template <typename _Up>
+  complex<_Tp> &operator+=(const complex<_Up> &__z) {
+    _M_real += __z.real();
+    _M_imag += __z.imag();
+    return *this;
+  }
+
+  // 26.2.5/11
+  // Subtract complex number from this.
+  template <typename _Up>
+  complex<_Tp> &operator-=(const complex<_Up> &__z) {
+    _M_real -= __z.real();
+    _M_imag -= __z.imag();
+    return *this;
+  }
+
+  // 26.2.5/13
+  // Multiply this by complex number.
+  template <typename _Up>
+  complex<_Tp> &operator*=(const complex<_Up> &__z) {
+    const _Tp __r = _M_real * __z.real() - _M_imag * __z.imag();
+    _M_imag = _M_real * __z.imag() + _M_imag * __z.real();
+    _M_real = __r;
+    return *this;
+  }
+
+  // 26.2.5/15
+  // Divide this by complex number.
+  template <typename _Up>
+  complex<_Tp> &operator/=(const complex<_Up> &__z) {
+    complex<_Tp> cj (__z.real(), -__z.imag());
+    complex<_Tp> a = (*this) * cj;
+    complex<_Tp> b = cj * __z;
+    _M_real = a.real() / b.real();
+    _M_imag = a.imag() / b.real();
+    return *this;
+  }
+
+ private:
+  _Tp _M_real;
+  _Tp _M_imag;
+
+}; // struct complex<ap_int<_AP_W> >
+
+
+/*
+   Non-member operations
+   These operations are not required by standard in 26.2.6, but libstdc++
+   defines them for
+   float, double or long double's specialization.
+*/
+// Compare complex number with ap_int.
+template <int _AP_W>
+inline bool operator==(const complex<ap_int<_AP_W> > &__x, const ap_int<_AP_W> &__y) {
+  return __x.real() == __y &&
+         __x.imag() == 0;
+}
+
+// Compare ap_int with complex number.
+template <int _AP_W>
+inline bool operator==(const ap_int<_AP_W> &__x, const complex<ap_int<_AP_W> > &__y) {
+  return __x == __y.real() &&
+         0 == __y.imag();
+}
+
+// Compare complex number with ap_int.
+template <int _AP_W>
+inline bool operator!=(const complex<ap_int<_AP_W> > &__x, const ap_int<_AP_W> &__y) {
+  return __x.real() != __y ||
+         __x.imag() != 0;
+}
+
+// Compare ap_int with complex number.
+template <int _AP_W>
+inline bool operator!=(const ap_int<_AP_W> &__x, const complex<ap_int<_AP_W> > &__y) {
+  return __x != __y.real() ||
+         0 != __y.imag();
+}
+
+}  // namespace std
+
+#endif  // ifndef AP_INT_SPECIAL_H
+
+// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689
diff --git a/include/etc/ap_private.h b/include/etc/ap_private.h
new file mode 100644
index 0000000..d7b8f3d
--- /dev/null
+++ b/include/etc/ap_private.h
@@ -0,0 +1,7213 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+*/
+
+#ifndef __AP_PRIVATE_H__
+#define __AP_PRIVATE_H__
+
+// common macros and type declarations are now defined in ap_common.h, and
+// ap_private becomes part of it.
+#ifndef __AP_COMMON_H__
+#error "etc/ap_private.h cannot be included directly."
+#endif
+
+// forward declarations
+//template <int _AP_W, bool _AP_S, bool _AP_C = _AP_W <= 64>
+//class ap_private; // moved to ap_common.h
+template <int _AP_W, bool _AP_S>
+struct _private_range_ref;
+template <int _AP_W, bool _AP_S>
+struct _private_bit_ref;
+
+// TODO clean up this part.
+#ifndef LLVM_SUPPORT_MATHEXTRAS_H
+#define LLVM_SUPPORT_MATHEXTRAS_H
+
+#ifdef _MSC_VER
+#if _MSC_VER <= 1500
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+#else
+#include <stdint.h>
+#endif
+
+// FIXME eventually, this should have nothing to do with half.
+#include "hls_half.h"
+
+#ifndef INLINE
+#define INLINE inline
+// Enable to debug ap_int/ap_fixed
+// #define INLINE  __attribute__((weak))
+#endif
+
+// NOTE: The following support functions use the _32/_64 extensions instead of
+// type overloading so that signed and unsigned integers can be used without
+// ambiguity.
+namespace AESL_std {
+template <class DataType>
+DataType INLINE min(DataType a, DataType b) {
+  return (a >= b) ? b : a;
+}
+
+template <class DataType>
+DataType INLINE max(DataType a, DataType b) {
+  return (a >= b) ? a : b;
+}
+} // namespace AESL_std
+
+// TODO clean up included headers.
+#include <math.h>
+#include <stdio.h>
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <iomanip>
+#include <limits>
+#include <sstream>
+#include <string>
+
+namespace ap_private_ops {
+/// Hi_32 - This function returns the high 32 bits of a 64 bit value.
+static INLINE uint32_t Hi_32(uint64_t Value) {
+  return static_cast<uint32_t>(Value >> 32);
+}
+
+/// Lo_32 - This function returns the low 32 bits of a 64 bit value.
+static INLINE uint32_t Lo_32(uint64_t Value) {
+  return static_cast<uint32_t>(Value);
+}
+
+template <int _AP_W>
+INLINE bool isNegative(const ap_private<_AP_W, false>& a) {
+  return false;
+}
+
+template <int _AP_W>
+INLINE bool isNegative(const ap_private<_AP_W, true>& a) {
+  enum {
+    APINT_BITS_PER_WORD = 64,
+    _AP_N = (_AP_W + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD
+  };
+  static const uint64_t sign_mask = 1ULL << ((_AP_W - 1) % APINT_BITS_PER_WORD);
+  return (sign_mask & a.get_pVal(_AP_N - 1)) != 0;
+}
+
+/// CountLeadingZeros_32 - this function performs the platform optimal form of
+/// counting the number of zeros from the most significant bit to the first one
+/// bit.  Ex. CountLeadingZeros_32(0x00F000FF) == 8.
+/// Returns 32 if the word is zero.
+static INLINE unsigned CountLeadingZeros_32(uint32_t Value) {
+  unsigned Count; // result
+#if __GNUC__ >= 4
+// PowerPC is defined for __builtin_clz(0)
+#if !defined(__ppc__) && !defined(__ppc64__)
+  if (Value == 0) return 32;
+#endif
+  Count = __builtin_clz(Value);
+#else
+  if (Value == 0) return 32;
+  Count = 0;
+  // bisecton method for count leading zeros
+  for (unsigned Shift = 32 >> 1; Shift; Shift >>= 1) {
+    uint32_t Tmp = (Value) >> (Shift);
+    if (Tmp) {
+      Value = Tmp;
+    } else {
+      Count |= Shift;
+    }
+  }
+#endif
+  return Count;
+}
+
+/// CountLeadingZeros_64 - This function performs the platform optimal form
+/// of counting the number of zeros from the most significant bit to the first
+/// one bit (64 bit edition.)
+/// Returns 64 if the word is zero.
+static INLINE unsigned CountLeadingZeros_64(uint64_t Value) {
+  unsigned Count; // result
+#if __GNUC__ >= 4
+// PowerPC is defined for __builtin_clzll(0)
+#if !defined(__ppc__) && !defined(__ppc64__)
+  if (!Value) return 64;
+#endif
+  Count = __builtin_clzll(Value);
+#else
+  if (sizeof(long) == sizeof(int64_t)) {
+    if (!Value) return 64;
+    Count = 0;
+    // bisecton method for count leading zeros
+    for (unsigned Shift = 64 >> 1; Shift; Shift >>= 1) {
+      uint64_t Tmp = (Value) >> (Shift);
+      if (Tmp) {
+        Value = Tmp;
+      } else {
+        Count |= Shift;
+      }
+    }
+  } else {
+    // get hi portion
+    uint32_t Hi = Hi_32(Value);
+
+    // if some bits in hi portion
+    if (Hi) {
+      // leading zeros in hi portion plus all bits in lo portion
+      Count = CountLeadingZeros_32(Hi);
+    } else {
+      // get lo portion
+      uint32_t Lo = Lo_32(Value);
+      // same as 32 bit value
+      Count = CountLeadingZeros_32(Lo) + 32;
+    }
+  }
+#endif
+  return Count;
+}
+
+/// CountTrailingZeros_64 - This function performs the platform optimal form
+/// of counting the number of zeros from the least significant bit to the first
+/// one bit (64 bit edition.)
+/// Returns 64 if the word is zero.
+static INLINE unsigned CountTrailingZeros_64(uint64_t Value) {
+#if __GNUC__ >= 4
+  return (Value != 0) ? __builtin_ctzll(Value) : 64;
+#else
+  static const unsigned Mod67Position[] = {
+      64, 0,  1,  39, 2,  15, 40, 23, 3,  12, 16, 59, 41, 19, 24, 54, 4,
+      64, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55, 47, 5,  32,
+      65, 38, 14, 22, 11, 58, 18, 53, 63, 9,  61, 27, 29, 50, 43, 46, 31,
+      37, 21, 57, 52, 8,  26, 49, 45, 36, 56, 7,  48, 35, 6,  34, 33, 0};
+  return Mod67Position[(uint64_t)(-(int64_t)Value & (int64_t)Value) % 67];
+#endif
+}
+
+/// CountPopulation_64 - this function counts the number of set bits in a value,
+/// (64 bit edition.)
+static INLINE unsigned CountPopulation_64(uint64_t Value) {
+#if __GNUC__ >= 4
+  return __builtin_popcountll(Value);
+#else
+  uint64_t v = Value - (((Value) >> 1) & 0x5555555555555555ULL);
+  v = (v & 0x3333333333333333ULL) + (((v) >> 2) & 0x3333333333333333ULL);
+  v = (v + ((v) >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+  return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
+#endif
+}
+
+static INLINE uint32_t countLeadingOnes_64(uint64_t __V, uint32_t skip) {
+  uint32_t Count = 0;
+  if (skip) (__V) <<= (skip);
+  while (__V && (__V & (1ULL << 63))) {
+    Count++;
+    (__V) <<= 1;
+  }
+  return Count;
+}
+
+static INLINE std::string oct2Bin(char oct) {
+  switch (oct) {
+    case '\0': {
+      return "";
+    }
+    case '.': {
+      return ".";
+    }
+    case '0': {
+      return "000";
+    }
+    case '1': {
+      return "001";
+    }
+    case '2': {
+      return "010";
+    }
+    case '3': {
+      return "011";
+    }
+    case '4': {
+      return "100";
+    }
+    case '5': {
+      return "101";
+    }
+    case '6': {
+      return "110";
+    }
+    case '7': {
+      return "111";
+    }
+  }
+  assert(0 && "Invalid character in digit string");
+  return "";
+}
+
+static INLINE std::string hex2Bin(char hex) {
+  switch (hex) {
+    case '\0': {
+      return "";
+    }
+    case '.': {
+      return ".";
+    }
+    case '0': {
+      return "0000";
+    }
+    case '1': {
+      return "0001";
+    }
+    case '2': {
+      return "0010";
+    }
+    case '3': {
+      return "0011";
+    }
+    case '4': {
+      return "0100";
+    }
+    case '5': {
+      return "0101";
+    }
+    case '6': {
+      return "0110";
+    }
+    case '7': {
+      return "0111";
+    }
+    case '8': {
+      return "1000";
+    }
+    case '9': {
+      return "1001";
+    }
+    case 'A':
+    case 'a': {
+      return "1010";
+    }
+    case 'B':
+    case 'b': {
+      return "1011";
+    }
+    case 'C':
+    case 'c': {
+      return "1100";
+    }
+    case 'D':
+    case 'd': {
+      return "1101";
+    }
+    case 'E':
+    case 'e': {
+      return "1110";
+    }
+    case 'F':
+    case 'f': {
+      return "1111";
+    }
+  }
+  assert(0 && "Invalid character in digit string");
+  return "";
+}
+
+static INLINE uint32_t decode_digit(char cdigit, int radix) {
+  uint32_t digit = 0;
+  if (radix == 16) {
+#define isxdigit(c)                                            \
+  (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || \
+   ((c) >= 'A' && (c) <= 'F'))
+#define isdigit(c) ((c) >= '0' && (c) <= '9')
+    if (!isxdigit(cdigit)) assert(0 && "Invalid hex digit in string");
+    if (isdigit(cdigit))
+      digit = cdigit - '0';
+    else if (cdigit >= 'a')
+      digit = cdigit - 'a' + 10;
+    else if (cdigit >= 'A')
+      digit = cdigit - 'A' + 10;
+    else
+      assert(0 && "huh? we shouldn't get here");
+  } else if (isdigit(cdigit)) {
+    digit = cdigit - '0';
+  } else {
+    assert(0 && "Invalid character in digit string");
+  }
+#undef isxdigit
+#undef isdigit
+  return digit;
+}
+
+// Determine the radix of "val".
+static INLINE std::string parseString(const std::string& input, unsigned char& radix) {
+  size_t len = input.length();
+  if (len == 0) {
+    if (radix == 0) radix = 10;
+    return input;
+  }
+
+  size_t startPos = 0;
+  // Trim whitespace
+  while (input[startPos] == ' ' && startPos < len) startPos++;
+  while (input[len - 1] == ' ' && startPos < len) len--;
+
+  std::string val = input.substr(startPos, len - startPos);
+  // std::cout << "val = " << val << "\n";
+  len = val.length();
+  startPos = 0;
+
+  // If the length of the string is less than 2, then radix
+  // is decimal and there is no exponent.
+  if (len < 2) {
+    if (radix == 0) radix = 10;
+    return val;
+  }
+
+  bool isNegative = false;
+  std::string ans;
+
+  // First check to see if we start with a sign indicator
+  if (val[0] == '-') {
+    ans = "-";
+    ++startPos;
+    isNegative = true;
+  } else if (val[0] == '+')
+    ++startPos;
+
+  if (len - startPos < 2) {
+    if (radix == 0) radix = 10;
+    return val;
+  }
+
+  if (val.substr(startPos, 2) == "0x" || val.substr(startPos, 2) == "0X") {
+    // If we start with "0x", then the radix is hex.
+    radix = 16;
+    startPos += 2;
+  } else if (val.substr(startPos, 2) == "0b" ||
+             val.substr(startPos, 2) == "0B") {
+    // If we start with "0b", then the radix is binary.
+    radix = 2;
+    startPos += 2;
+  } else if (val.substr(startPos, 2) == "0o" ||
+             val.substr(startPos, 2) == "0O") {
+    // If we start with "0o", then the radix is octal.
+    radix = 8;
+    startPos += 2;
+  } else if (radix == 0) {
+    radix = 10;
+  }
+
+  int exp = 0;
+  if (radix == 10) {
+    // If radix is decimal, then see if there is an
+    // exponent indicator.
+    size_t expPos = val.find('e');
+    bool has_exponent = true;
+    if (expPos == std::string::npos) expPos = val.find('E');
+    if (expPos == std::string::npos) {
+      // No exponent indicator, so the mantissa goes to the end.
+      expPos = len;
+      has_exponent = false;
+    }
+    // std::cout << "startPos = " << startPos << " " << expPos << "\n";
+
+    ans += val.substr(startPos, expPos - startPos);
+    if (has_exponent) {
+      // Parse the exponent.
+      std::istringstream iss(val.substr(expPos + 1, len - expPos - 1));
+      iss >> exp;
+    }
+  } else {
+    // Check for a binary exponent indicator.
+    size_t expPos = val.find('p');
+    bool has_exponent = true;
+    if (expPos == std::string::npos) expPos = val.find('P');
+    if (expPos == std::string::npos) {
+      // No exponent indicator, so the mantissa goes to the end.
+      expPos = len;
+      has_exponent = false;
+    }
+
+    // std::cout << "startPos = " << startPos << " " << expPos << "\n";
+
+    assert(startPos <= expPos);
+    // Convert to binary as we go.
+    for (size_t i = startPos; i < expPos; ++i) {
+      if (radix == 16) {
+        ans += hex2Bin(val[i]);
+      } else if (radix == 8) {
+        ans += oct2Bin(val[i]);
+      } else { // radix == 2
+        ans += val[i];
+      }
+    }
+    // End in binary
+    radix = 2;
+    if (has_exponent) {
+      // Parse the exponent.
+      std::istringstream iss(val.substr(expPos + 1, len - expPos - 1));
+      iss >> exp;
+    }
+  }
+  if (exp == 0) return ans;
+
+  size_t decPos = ans.find('.');
+  if (decPos == std::string::npos) decPos = ans.length();
+  if ((int)decPos + exp >= (int)ans.length()) {
+    int i = decPos;
+    for (; i < (int)ans.length() - 1; ++i) ans[i] = ans[i + 1];
+    for (; i < (int)ans.length(); ++i) ans[i] = '0';
+    for (; i < (int)decPos + exp; ++i) ans += '0';
+    return ans;
+  } else if ((int)decPos + exp < (int)isNegative) {
+    std::string dupAns = "0.";
+    if (ans[0] == '-') dupAns = "-0.";
+    for (int i = 0; i < isNegative - (int)decPos - exp; ++i) dupAns += '0';
+    for (size_t i = isNegative; i < ans.length(); ++i)
+      if (ans[i] != '.') dupAns += ans[i];
+    return dupAns;
+  }
+
+  if (exp > 0)
+    for (size_t i = decPos; i < decPos + exp; ++i) ans[i] = ans[i + 1];
+  else {
+    if (decPos == ans.length()) ans += ' ';
+    for (int i = decPos; i > (int)decPos + exp; --i) ans[i] = ans[i - 1];
+  }
+  ans[decPos + exp] = '.';
+  return ans;
+}
+
+/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
+/// the multi-digit integer array, x[], propagating the borrowed 1 value until
+/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
+/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
+/// In other words, if y > x then this function returns 1, otherwise 0.
+/// @returns the borrow out of the subtraction
+static INLINE bool sub_1(uint64_t x[], uint32_t len, uint64_t y) {
+  for (uint32_t i = 0; i < len; ++i) {
+    uint64_t __X = x[i];
+    x[i] -= y;
+    if (y > __X)
+      y = 1; // We have to "borrow 1" from next "digit"
+    else {
+      y = 0; // No need to borrow
+      break; // Remaining digits are unchanged so exit early
+    }
+  }
+  return (y != 0);
+}
+
+/// add_1 - This function adds a single "digit" integer, y, to the multiple
+/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
+/// 1 is returned if there is a carry out, otherwise 0 is returned.
+/// @returns the carry of the addition.
+static INLINE bool add_1(uint64_t dest[], uint64_t x[], uint32_t len,
+                         uint64_t y) {
+  for (uint32_t i = 0; i < len; ++i) {
+    dest[i] = y + x[i];
+    if (dest[i] < y)
+      y = 1; // Carry one to next digit.
+    else {
+      y = 0; // No need to carry so exit early
+      break;
+    }
+  }
+  return (y != 0);
+}
+
+/// add - This function adds the integer array x to the integer array Y and
+/// places the result in dest.
+/// @returns the carry out from the addition
+/// @brief General addition of 64-bit integer arrays
+static INLINE bool add(uint64_t* dest, const uint64_t* x, const uint64_t* y,
+                       uint32_t destlen, uint32_t xlen, uint32_t ylen,
+                       bool xsigned, bool ysigned) {
+  bool carry = false;
+  uint32_t len = AESL_std::min(xlen, ylen);
+  uint32_t i;
+  for (i = 0; i < len && i < destlen; ++i) {
+    uint64_t limit =
+        AESL_std::min(x[i], y[i]); // must come first in case dest == x
+    dest[i] = x[i] + y[i] + carry;
+    carry = dest[i] < limit || (carry && dest[i] == limit);
+  }
+  if (xlen > ylen) {
+    const uint64_t yext = ysigned && int64_t(y[ylen - 1]) < 0 ? -1 : 0;
+    for (i = ylen; i < xlen && i < destlen; i++) {
+      uint64_t limit = AESL_std::min(x[i], yext);
+      dest[i] = x[i] + yext + carry;
+      carry = (dest[i] < limit) || (carry && dest[i] == limit);
+    }
+  } else if (ylen > xlen) {
+    const uint64_t xext = xsigned && int64_t(x[xlen - 1]) < 0 ? -1 : 0;
+    for (i = xlen; i < ylen && i < destlen; i++) {
+      uint64_t limit = AESL_std::min(xext, y[i]);
+      dest[i] = xext + y[i] + carry;
+      carry = (dest[i] < limit) || (carry && dest[i] == limit);
+    }
+  }
+  return carry;
+}
+
+/// @returns returns the borrow out.
+/// @brief Generalized subtraction of 64-bit integer arrays.
+static INLINE bool sub(uint64_t* dest, const uint64_t* x, const uint64_t* y,
+                       uint32_t destlen, uint32_t xlen, uint32_t ylen,
+                       bool xsigned, bool ysigned) {
+  bool borrow = false;
+  uint32_t i;
+  uint32_t len = AESL_std::min(xlen, ylen);
+  for (i = 0; i < len && i < destlen; ++i) {
+    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
+    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
+    dest[i] = x_tmp - y[i];
+  }
+  if (xlen > ylen) {
+    const uint64_t yext = ysigned && int64_t(y[ylen - 1]) < 0 ? -1 : 0;
+    for (i = ylen; i < xlen && i < destlen; i++) {
+      uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
+      borrow = yext > x_tmp || (borrow && x[i] == 0);
+      dest[i] = x_tmp - yext;
+    }
+  } else if (ylen > xlen) {
+    const uint64_t xext = xsigned && int64_t(x[xlen - 1]) < 0 ? -1 : 0;
+    for (i = xlen; i < ylen && i < destlen; i++) {
+      uint64_t x_tmp = borrow ? xext - 1 : xext;
+      borrow = y[i] > x_tmp || (borrow && xext == 0);
+      dest[i] = x_tmp - y[i];
+    }
+  }
+  return borrow;
+}
+
+/// Subtracts the RHS ap_private from this ap_private
+/// @returns this, after subtraction
+/// @brief Subtraction assignment operator.
+
+/// Multiplies an integer array, x by a a uint64_t integer and places the result
+/// into dest.
+/// @returns the carry out of the multiplication.
+/// @brief Multiply a multi-digit ap_private by a single digit (64-bit) integer.
+static INLINE uint64_t mul_1(uint64_t dest[], const uint64_t x[], uint32_t len,
+                             uint64_t y) {
+  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
+  uint64_t ly = y & 0xffffffffULL, hy = (y) >> 32;
+  uint64_t carry = 0;
+  static const uint64_t two_power_32 = 1ULL << 32;
+  // For each digit of x.
+  for (uint32_t i = 0; i < len; ++i) {
+    // Split x into high and low words
+    uint64_t lx = x[i] & 0xffffffffULL;
+    uint64_t hx = (x[i]) >> 32;
+    // hasCarry - A flag to indicate if there is a carry to the next digit.
+    // hasCarry == 0, no carry
+    // hasCarry == 1, has carry
+    // hasCarry == 2, no carry and the calculation result == 0.
+    uint8_t hasCarry = 0;
+    dest[i] = carry + lx * ly;
+    // Determine if the add above introduces carry.
+    hasCarry = (dest[i] < carry) ? 1 : 0;
+    carry = hx * ly + ((dest[i]) >> 32) + (hasCarry ? two_power_32 : 0);
+    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
+    // (2^32 - 1) + 2^32 = 2^64.
+    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+    carry += (lx * hy) & 0xffffffffULL;
+    dest[i] = ((carry) << 32) | (dest[i] & 0xffffffffULL);
+    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? two_power_32 : 0) +
+            ((carry) >> 32) + ((lx * hy) >> 32) + hx * hy;
+  }
+  return carry;
+}
+
+/// Multiplies integer array x by integer array y and stores the result into
+/// the integer array dest. Note that dest's size must be >= xlen + ylen in
+/// order to
+/// do a full precision computation. If it is not, then only the low-order words
+/// are returned.
+/// @brief Generalized multiplicate of integer arrays.
+static INLINE void mul(uint64_t dest[], const uint64_t x[], uint32_t xlen,
+                       const uint64_t y[], uint32_t ylen, uint32_t destlen) {
+  assert(xlen > 0);
+  assert(ylen > 0);
+  assert(destlen >= xlen + ylen);
+  if (xlen < destlen) dest[xlen] = mul_1(dest, x, xlen, y[0]);
+  for (uint32_t i = 1; i < ylen; ++i) {
+    uint64_t ly = y[i] & 0xffffffffULL, hy = (y[i]) >> 32;
+    uint64_t carry = 0, lx = 0, hx = 0;
+    for (uint32_t j = 0; j < xlen; ++j) {
+      lx = x[j] & 0xffffffffULL;
+      hx = (x[j]) >> 32;
+      // hasCarry - A flag to indicate if has carry.
+      // hasCarry == 0, no carry
+      // hasCarry == 1, has carry
+      // hasCarry == 2, no carry and the calculation result == 0.
+      uint8_t hasCarry = 0;
+      uint64_t resul = carry + lx * ly;
+      hasCarry = (resul < carry) ? 1 : 0;
+      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + ((resul) >> 32);
+      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+      carry += (lx * hy) & 0xffffffffULL;
+      resul = ((carry) << 32) | (resul & 0xffffffffULL);
+      if (i + j < destlen) dest[i + j] += resul;
+      carry =
+          (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
+          ((carry) >> 32) + (dest[i + j] < resul ? 1 : 0) + ((lx * hy) >> 32) +
+          hx * hy;
+    }
+    if (i + xlen < destlen) dest[i + xlen] = carry;
+  }
+}
+
+/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
+/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
+/// variables here have the same names as in the algorithm. Comments explain
+/// the algorithm and any deviation from it.
+static INLINE void KnuthDiv(uint32_t* u, uint32_t* v, uint32_t* q, uint32_t* r,
+                            uint32_t m, uint32_t n) {
+  assert(u && "Must provide dividend");
+  assert(v && "Must provide divisor");
+  assert(q && "Must provide quotient");
+  assert(u != v && u != q && v != q && "Must us different memory");
+  assert(n > 1 && "n must be > 1");
+
+  // Knuth uses the value b as the base of the number system. In our case b
+  // is 2^31 so we just set it to -1u.
+  uint64_t b = uint64_t(1) << 32;
+
+  // DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n');
+  // DEBUG(cerr << "KnuthDiv: original:");
+  // DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) <<
+  // u[i]);
+  // DEBUG(cerr << " by");
+  // DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) <<
+  // v[i-1]);
+  // DEBUG(cerr << '\n');
+  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
+  // u and v by d. Note that we have taken Knuth's advice here to use a power
+  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
+  // 2 allows us to shift instead of multiply and it is easy to determine the
+  // shift amount from the leading zeros.  We are basically normalizing the u
+  // and v so that its high bits are shifted to the top of v's range without
+  // overflow. Note that this can require an extra word in u so that u must
+  // be of length m+n+1.
+  uint32_t shift = CountLeadingZeros_32(v[n - 1]);
+  uint32_t v_carry = 0;
+  uint32_t u_carry = 0;
+  if (shift) {
+    for (uint32_t i = 0; i < m + n; ++i) {
+      uint32_t u_tmp = (u[i]) >> (32 - shift);
+      u[i] = ((u[i]) << (shift)) | u_carry;
+      u_carry = u_tmp;
+    }
+    for (uint32_t i = 0; i < n; ++i) {
+      uint32_t v_tmp = (v[i]) >> (32 - shift);
+      v[i] = ((v[i]) << (shift)) | v_carry;
+      v_carry = v_tmp;
+    }
+  }
+  u[m + n] = u_carry;
+  // DEBUG(cerr << "KnuthDiv:   normal:");
+  // DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) <<
+  // u[i]);
+  // DEBUG(cerr << " by");
+  // DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) <<
+  // v[i-1]);
+  // DEBUG(cerr << '\n');
+
+  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
+  int j = m;
+  do {
+    // DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n');
+    // D3. [Calculate q'.].
+    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
+    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
+    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
+    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
+    // on v[n-2] determines at high speed most of the cases in which the trial
+    // value qp is one too large, and it eliminates all cases where qp is two
+    // too large.
+    uint64_t dividend = ((uint64_t(u[j + n]) << 32) + u[j + n - 1]);
+    // DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n');
+    uint64_t qp = dividend / v[n - 1];
+    uint64_t rp = dividend % v[n - 1];
+    if (qp == b || qp * v[n - 2] > b * rp + u[j + n - 2]) {
+      qp--;
+      rp += v[n - 1];
+      if (rp < b && (qp == b || qp * v[n - 2] > b * rp + u[j + n - 2])) qp--;
+    }
+    // DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+
+    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
+    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
+    // consists of a simple multiplication by a one-place number, combined with
+    // a subtraction.
+    bool isNeg = false;
+    for (uint32_t i = 0; i < n; ++i) {
+      uint64_t u_tmp = uint64_t(u[j + i]) | ((uint64_t(u[j + i + 1])) << 32);
+      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
+      bool borrow = subtrahend > u_tmp;
+      /*DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp
+        << ", subtrahend == " << subtrahend
+        << ", borrow = " << borrow << '\n');*/
+
+      uint64_t result = u_tmp - subtrahend;
+      uint32_t k = j + i;
+      u[k++] = (uint32_t)(result & (b - 1)); // subtract low word
+      u[k++] = (uint32_t)((result) >> 32);   // subtract high word
+      while (borrow && k <= m + n) {         // deal with borrow to the left
+        borrow = u[k] == 0;
+        u[k]--;
+        k++;
+      }
+      isNeg |= borrow;
+      /*DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
+        u[j+i+1] << '\n');*/
+    }
+    /*DEBUG(cerr << "KnuthDiv: after subtraction:");
+      DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
+      DEBUG(cerr << '\n');*/
+    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
+    // this step is actually negative, (u[j+n]...u[j]) should be left as the
+    // true value plus b**(n+1), namely as the b's complement of
+    // the true value, and a "borrow" to the left should be remembered.
+    //
+    if (isNeg) {
+      bool carry = true; // true because b's complement is "complement + 1"
+      for (uint32_t i = 0; i <= m + n; ++i) {
+        u[i] = ~u[i] + carry; // b's complement
+        carry = carry && u[i] == 0;
+      }
+    }
+    /*DEBUG(cerr << "KnuthDiv: after complement:");
+      DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
+      DEBUG(cerr << '\n');*/
+
+    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
+    // negative, go to step D6; otherwise go on to step D7.
+    q[j] = (uint32_t)qp;
+    if (isNeg) {
+      // D6. [Add back]. The probability that this step is necessary is very
+      // small, on the order of only 2/b. Make sure that test data accounts for
+      // this possibility. Decrease q[j] by 1
+      q[j]--;
+      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
+      // A carry will occur to the left of u[j+n], and it should be ignored
+      // since it cancels with the borrow that occurred in D4.
+      bool carry = false;
+      for (uint32_t i = 0; i < n; i++) {
+        uint32_t limit = AESL_std::min(u[j + i], v[i]);
+        u[j + i] += v[i] + carry;
+        carry = u[j + i] < limit || (carry && u[j + i] == limit);
+      }
+      u[j + n] += carry;
+    }
+    /*DEBUG(cerr << "KnuthDiv: after correction:");
+      DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]);
+      DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n');*/
+
+    // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
+  } while (--j >= 0);
+
+  /*DEBUG(cerr << "KnuthDiv: quotient:");
+    DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]);
+    DEBUG(cerr << '\n');*/
+
+  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
+  // remainder may be obtained by dividing u[...] by d. If r is non-null we
+  // compute the remainder (urem uses this).
+  if (r) {
+    // The value d is expressed by the "shift" value above since we avoided
+    // multiplication by d by using a shift left. So, all we have to do is
+    // shift right here. In order to mak
+    if (shift) {
+      uint32_t carry = 0;
+      // DEBUG(cerr << "KnuthDiv: remainder:");
+      for (int i = n - 1; i >= 0; i--) {
+        r[i] = ((u[i]) >> (shift)) | carry;
+        carry = (u[i]) << (32 - shift);
+        // DEBUG(cerr << " " << r[i]);
+      }
+    } else {
+      for (int i = n - 1; i >= 0; i--) {
+        r[i] = u[i];
+        // DEBUG(cerr << " " << r[i]);
+      }
+    }
+    // DEBUG(cerr << '\n');
+  }
+  // DEBUG(cerr << std::setbase(10) << '\n');
+}
+
+template <int _AP_W, bool _AP_S>
+void divide(const ap_private<_AP_W, _AP_S>& LHS, uint32_t lhsWords,
+            const ap_private<_AP_W, _AP_S>& RHS, uint32_t rhsWords,
+            ap_private<_AP_W, _AP_S>* Quotient,
+            ap_private<_AP_W, _AP_S>* Remainder) {
+  assert(lhsWords >= rhsWords && "Fractional result");
+  enum { APINT_BITS_PER_WORD = 64 };
+  // First, compose the values into an array of 32-bit words instead of
+  // 64-bit words. This is a necessity of both the "short division" algorithm
+  // and the the Knuth "classical algorithm" which requires there to be native
+  // operations for +, -, and * on an m bit value with an m*2 bit result. We
+  // can't use 64-bit operands here because we don't have native results of
+  // 128-bits. Furthremore, casting the 64-bit values to 32-bit values won't
+  // work on large-endian machines.
+  uint64_t mask = ~0ull >> (sizeof(uint32_t) * 8);
+  uint32_t n = rhsWords * 2;
+  uint32_t m = (lhsWords * 2) - n;
+
+  // Allocate space for the temporary values we need either on the stack, if
+  // it will fit, or on the heap if it won't.
+  uint32_t SPACE[128];
+  uint32_t* __U = 0;
+  uint32_t* __V = 0;
+  uint32_t* __Q = 0;
+  uint32_t* __R = 0;
+  if ((Remainder ? 4 : 3) * n + 2 * m + 1 <= 128) {
+    __U = &SPACE[0];
+    __V = &SPACE[m + n + 1];
+    __Q = &SPACE[(m + n + 1) + n];
+    if (Remainder) __R = &SPACE[(m + n + 1) + n + (m + n)];
+  } else {
+    __U = new uint32_t[m + n + 1];
+    __V = new uint32_t[n];
+    __Q = new uint32_t[m + n];
+    if (Remainder) __R = new uint32_t[n];
+  }
+
+  // Initialize the dividend
+  memset(__U, 0, (m + n + 1) * sizeof(uint32_t));
+  for (unsigned i = 0; i < lhsWords; ++i) {
+    uint64_t tmp = LHS.get_pVal(i);
+    __U[i * 2] = (uint32_t)(tmp & mask);
+    __U[i * 2 + 1] = (tmp) >> (sizeof(uint32_t) * 8);
+  }
+  __U[m + n] = 0; // this extra word is for "spill" in the Knuth algorithm.
+
+  // Initialize the divisor
+  memset(__V, 0, (n) * sizeof(uint32_t));
+  for (unsigned i = 0; i < rhsWords; ++i) {
+    uint64_t tmp = RHS.get_pVal(i);
+    __V[i * 2] = (uint32_t)(tmp & mask);
+    __V[i * 2 + 1] = (tmp) >> (sizeof(uint32_t) * 8);
+  }
+
+  // initialize the quotient and remainder
+  memset(__Q, 0, (m + n) * sizeof(uint32_t));
+  if (Remainder) memset(__R, 0, n * sizeof(uint32_t));
+
+  // Now, adjust m and n for the Knuth division. n is the number of words in
+  // the divisor. m is the number of words by which the dividend exceeds the
+  // divisor (i.e. m+n is the length of the dividend). These sizes must not
+  // contain any zero words or the Knuth algorithm fails.
+  for (unsigned i = n; i > 0 && __V[i - 1] == 0; i--) {
+    n--;
+    m++;
+  }
+  for (unsigned i = m + n; i > 0 && __U[i - 1] == 0; i--) m--;
+
+  // If we're left with only a single word for the divisor, Knuth doesn't work
+  // so we implement the short division algorithm here. This is much simpler
+  // and faster because we are certain that we can divide a 64-bit quantity
+  // by a 32-bit quantity at hardware speed and short division is simply a
+  // series of such operations. This is just like doing short division but we
+  // are using base 2^32 instead of base 10.
+  assert(n != 0 && "Divide by zero?");
+  if (n == 1) {
+    uint32_t divisor = __V[0];
+    uint32_t remainder = 0;
+    for (int i = m + n - 1; i >= 0; i--) {
+      uint64_t partial_dividend = (uint64_t(remainder)) << 32 | __U[i];
+      if (partial_dividend == 0) {
+        __Q[i] = 0;
+        remainder = 0;
+      } else if (partial_dividend < divisor) {
+        __Q[i] = 0;
+        remainder = (uint32_t)partial_dividend;
+      } else if (partial_dividend == divisor) {
+        __Q[i] = 1;
+        remainder = 0;
+      } else {
+        __Q[i] = (uint32_t)(partial_dividend / divisor);
+        remainder = (uint32_t)(partial_dividend - (__Q[i] * divisor));
+      }
+    }
+    if (__R) __R[0] = remainder;
+  } else {
+    // Now we're ready to invoke the Knuth classical divide algorithm. In this
+    // case n > 1.
+    KnuthDiv(__U, __V, __Q, __R, m, n);
+  }
+
+  // If the caller wants the quotient
+  if (Quotient) {
+    // Set up the Quotient value's memory.
+    if (Quotient->BitWidth != LHS.BitWidth) {
+      if (Quotient->isSingleWord()) Quotient->set_VAL(0);
+    } else
+      Quotient->clear();
+
+    // The quotient is in Q. Reconstitute the quotient into Quotient's low
+    // order words.
+    if (lhsWords == 1) {
+      uint64_t tmp =
+          uint64_t(__Q[0]) | ((uint64_t(__Q[1])) << (APINT_BITS_PER_WORD / 2));
+      Quotient->set_VAL(tmp);
+    } else {
+      assert(!Quotient->isSingleWord() &&
+             "Quotient ap_private not large enough");
+      for (unsigned i = 0; i < lhsWords; ++i)
+        Quotient->set_pVal(
+            i, uint64_t(__Q[i * 2]) |
+                   ((uint64_t(__Q[i * 2 + 1])) << (APINT_BITS_PER_WORD / 2)));
+    }
+    Quotient->clearUnusedBits();
+  }
+
+  // If the caller wants the remainder
+  if (Remainder) {
+    // Set up the Remainder value's memory.
+    if (Remainder->BitWidth != RHS.BitWidth) {
+      if (Remainder->isSingleWord()) Remainder->set_VAL(0);
+    } else
+      Remainder->clear();
+
+    // The remainder is in R. Reconstitute the remainder into Remainder's low
+    // order words.
+    if (rhsWords == 1) {
+      uint64_t tmp =
+          uint64_t(__R[0]) | ((uint64_t(__R[1])) << (APINT_BITS_PER_WORD / 2));
+      Remainder->set_VAL(tmp);
+    } else {
+      assert(!Remainder->isSingleWord() &&
+             "Remainder ap_private not large enough");
+      for (unsigned i = 0; i < rhsWords; ++i)
+        Remainder->set_pVal(
+            i, uint64_t(__R[i * 2]) |
+                   ((uint64_t(__R[i * 2 + 1])) << (APINT_BITS_PER_WORD / 2)));
+    }
+    Remainder->clearUnusedBits();
+  }
+
+  // Clean up the memory we allocated.
+  if (__U != &SPACE[0]) {
+    delete[] __U;
+    delete[] __V;
+    delete[] __Q;
+    delete[] __R;
+  }
+}
+
+template <int _AP_W, bool _AP_S>
+void divide(const ap_private<_AP_W, _AP_S>& LHS, uint32_t lhsWords,
+            uint64_t RHS, ap_private<_AP_W, _AP_S>* Quotient,
+            ap_private<_AP_W, _AP_S>* Remainder) {
+  uint32_t rhsWords = 1;
+  assert(lhsWords >= rhsWords && "Fractional result");
+  enum { APINT_BITS_PER_WORD = 64 };
+  // First, compose the values into an array of 32-bit words instead of
+  // 64-bit words. This is a necessity of both the "short division" algorithm
+  // and the the Knuth "classical algorithm" which requires there to be native
+  // operations for +, -, and * on an m bit value with an m*2 bit result. We
+  // can't use 64-bit operands here because we don't have native results of
+  // 128-bits. Furthremore, casting the 64-bit values to 32-bit values won't
+  // work on large-endian machines.
+  uint64_t mask = ~0ull >> (sizeof(uint32_t) * 8);
+  uint32_t n = 2;
+  uint32_t m = (lhsWords * 2) - n;
+
+  // Allocate space for the temporary values we need either on the stack, if
+  // it will fit, or on the heap if it won't.
+  uint32_t SPACE[128];
+  uint32_t* __U = 0;
+  uint32_t* __V = 0;
+  uint32_t* __Q = 0;
+  uint32_t* __R = 0;
+  if ((Remainder ? 4 : 3) * n + 2 * m + 1 <= 128) {
+    __U = &SPACE[0];
+    __V = &SPACE[m + n + 1];
+    __Q = &SPACE[(m + n + 1) + n];
+    if (Remainder) __R = &SPACE[(m + n + 1) + n + (m + n)];
+  } else {
+    __U = new uint32_t[m + n + 1];
+    __V = new uint32_t[n];
+    __Q = new uint32_t[m + n];
+    if (Remainder) __R = new uint32_t[n];
+  }
+
+  // Initialize the dividend
+  memset(__U, 0, (m + n + 1) * sizeof(uint32_t));
+  for (unsigned i = 0; i < lhsWords; ++i) {
+    uint64_t tmp = LHS.get_pVal(i);
+    __U[i * 2] = tmp & mask;
+    __U[i * 2 + 1] = (tmp) >> (sizeof(uint32_t) * 8);
+  }
+  __U[m + n] = 0; // this extra word is for "spill" in the Knuth algorithm.
+
+  // Initialize the divisor
+  memset(__V, 0, (n) * sizeof(uint32_t));
+  __V[0] = RHS & mask;
+  __V[1] = (RHS) >> (sizeof(uint32_t) * 8);
+
+  // initialize the quotient and remainder
+  memset(__Q, 0, (m + n) * sizeof(uint32_t));
+  if (Remainder) memset(__R, 0, n * sizeof(uint32_t));
+
+  // Now, adjust m and n for the Knuth division. n is the number of words in
+  // the divisor. m is the number of words by which the dividend exceeds the
+  // divisor (i.e. m+n is the length of the dividend). These sizes must not
+  // contain any zero words or the Knuth algorithm fails.
+  for (unsigned i = n; i > 0 && __V[i - 1] == 0; i--) {
+    n--;
+    m++;
+  }
+  for (unsigned i = m + n; i > 0 && __U[i - 1] == 0; i--) m--;
+
+  // If we're left with only a single word for the divisor, Knuth doesn't work
+  // so we implement the short division algorithm here. This is much simpler
+  // and faster because we are certain that we can divide a 64-bit quantity
+  // by a 32-bit quantity at hardware speed and short division is simply a
+  // series of such operations. This is just like doing short division but we
+  // are using base 2^32 instead of base 10.
+  assert(n != 0 && "Divide by zero?");
+  if (n == 1) {
+    uint32_t divisor = __V[0];
+    uint32_t remainder = 0;
+    for (int i = m + n - 1; i >= 0; i--) {
+      uint64_t partial_dividend = (uint64_t(remainder)) << 32 | __U[i];
+      if (partial_dividend == 0) {
+        __Q[i] = 0;
+        remainder = 0;
+      } else if (partial_dividend < divisor) {
+        __Q[i] = 0;
+        remainder = partial_dividend;
+      } else if (partial_dividend == divisor) {
+        __Q[i] = 1;
+        remainder = 0;
+      } else {
+        __Q[i] = partial_dividend / divisor;
+        remainder = partial_dividend - (__Q[i] * divisor);
+      }
+    }
+    if (__R) __R[0] = remainder;
+  } else {
+    // Now we're ready to invoke the Knuth classical divide algorithm. In this
+    // case n > 1.
+    KnuthDiv(__U, __V, __Q, __R, m, n);
+  }
+
+  // If the caller wants the quotient
+  if (Quotient) {
+    // Set up the Quotient value's memory.
+    if (Quotient->BitWidth != LHS.BitWidth) {
+      if (Quotient->isSingleWord()) Quotient->set_VAL(0);
+    } else
+      Quotient->clear();
+
+    // The quotient is in Q. Reconstitute the quotient into Quotient's low
+    // order words.
+    if (lhsWords == 1) {
+      uint64_t tmp =
+          uint64_t(__Q[0]) | ((uint64_t(__Q[1])) << (APINT_BITS_PER_WORD / 2));
+      Quotient->set_VAL(tmp);
+    } else {
+      assert(!Quotient->isSingleWord() &&
+             "Quotient ap_private not large enough");
+      for (unsigned i = 0; i < lhsWords; ++i)
+        Quotient->set_pVal(
+            i, uint64_t(__Q[i * 2]) |
+                   ((uint64_t(__Q[i * 2 + 1])) << (APINT_BITS_PER_WORD / 2)));
+    }
+    Quotient->clearUnusedBits();
+  }
+
+  // If the caller wants the remainder
+  if (Remainder) {
+    // Set up the Remainder value's memory.
+    if (Remainder->BitWidth != 64 /* RHS.BitWidth */) {
+      if (Remainder->isSingleWord()) Remainder->set_VAL(0);
+    } else
+      Remainder->clear();
+
+    // The remainder is in __R. Reconstitute the remainder into Remainder's low
+    // order words.
+    if (rhsWords == 1) {
+      uint64_t tmp =
+          uint64_t(__R[0]) | ((uint64_t(__R[1])) << (APINT_BITS_PER_WORD / 2));
+      Remainder->set_VAL(tmp);
+    } else {
+      assert(!Remainder->isSingleWord() &&
+             "Remainder ap_private not large enough");
+      for (unsigned i = 0; i < rhsWords; ++i)
+        Remainder->set_pVal(
+            i, uint64_t(__R[i * 2]) |
+                   ((uint64_t(__R[i * 2 + 1])) << (APINT_BITS_PER_WORD / 2)));
+    }
+    Remainder->clearUnusedBits();
+  }
+
+  // Clean up the memory we allocated.
+  if (__U != &SPACE[0]) {
+    delete[] __U;
+    delete[] __V;
+    delete[] __Q;
+    delete[] __R;
+  }
+}
+
+/// @brief Logical right-shift function.
+template <int _AP_W, bool _AP_S, bool _AP_C>
+INLINE ap_private<_AP_W, _AP_S, _AP_C> lshr(
+    const ap_private<_AP_W, _AP_S, _AP_C>& LHS, uint32_t shiftAmt) {
+  return LHS.lshr(shiftAmt);
+}
+
+/// Left-shift the ap_private by shiftAmt.
+/// @brief Left-shift function.
+template <int _AP_W, bool _AP_S, bool _AP_C>
+INLINE ap_private<_AP_W, _AP_S, _AP_C> shl(
+    const ap_private<_AP_W, _AP_S, _AP_C>& LHS, uint32_t shiftAmt) {
+  return LHS.shl(shiftAmt);
+}
+
+} // namespace ap_private_ops
+
+#endif // LLVM_SUPPORT_MATHEXTRAS_H
+
+/// This enumeration just provides for internal constants used in this
+/// translation unit.
+enum {
+  MIN_INT_BITS = 1, ///< Minimum number of bits that can be specified
+  ///< Note that this must remain synchronized with IntegerType::MIN_INT_BITS
+  MAX_INT_BITS = (1 << 23) - 1 ///< Maximum number of bits that can be specified
+  ///< Note that this must remain synchronized with IntegerType::MAX_INT_BITS
+};
+
+//===----------------------------------------------------------------------===//
+//                              ap_private Class
+//===----------------------------------------------------------------------===//
+
+/// ap_private - This class represents arbitrary precision constant integral
+/// values.
+/// It is a functional replacement for common case unsigned integer type like
+/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
+/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
+/// than 64-bits of precision. ap_private provides a variety of arithmetic
+/// operators
+/// and methods to manipulate integer values of any bit-width. It supports both
+/// the typical integer arithmetic and comparison operations as well as bitwise
+/// manipulation.
+///
+/// The class has several invariants worth noting:
+///   * All bit, byte, and word positions are zero-based.
+///   * Once the bit width is set, it doesn't change except by the Truncate,
+///     SignExtend, or ZeroExtend operations.
+///   * All binary operators must be on ap_private instances of the same bit
+///   width.
+///     Attempting to use these operators on instances with different bit
+///     widths will yield an assertion.
+///   * The value is stored canonically as an unsigned value. For operations
+///     where it makes a difference, there are both signed and unsigned variants
+///     of the operation. For example, sdiv and udiv. However, because the bit
+///     widths must be the same, operations such as Mul and Add produce the same
+///     results regardless of whether the values are interpreted as signed or
+///     not.
+///   * In general, the class tries to follow the style of computation that LLVM
+///     uses in its IR. This simplifies its use for LLVM.
+///
+/// @brief Class for arbitrary precision integers.
+
+#if defined(_MSC_VER)
+#if _MSC_VER < 1400 && !defined(for)
+#define for if (0); else for
+#endif
+typedef unsigned __int64 ap_ulong;
+typedef signed __int64 ap_slong;
+#else
+typedef unsigned long long ap_ulong;
+typedef signed long long ap_slong;
+#endif
+template <int _AP_N8, bool _AP_S>
+struct valtype;
+
+template <int _AP_N8>
+struct valtype<_AP_N8, false> {
+  typedef uint64_t Type;
+};
+
+template <int _AP_N8>
+struct valtype<_AP_N8, true> {
+  typedef int64_t Type;
+};
+
+template <>
+struct valtype<1, false> {
+  typedef unsigned char Type;
+};
+template <>
+struct valtype<2, false> {
+  typedef unsigned short Type;
+};
+template <>
+struct valtype<3, false> {
+  typedef unsigned int Type;
+};
+template <>
+struct valtype<4, false> {
+  typedef unsigned int Type;
+};
+template <>
+struct valtype<1, true> {
+  typedef signed char Type;
+};
+template <>
+struct valtype<2, true> {
+  typedef short Type;
+};
+template <>
+struct valtype<3, true> {
+  typedef int Type;
+};
+template <>
+struct valtype<4, true> {
+  typedef int Type;
+};
+
+template <bool enable>
+struct ap_private_enable_if {};
+template <>
+struct ap_private_enable_if<true> {
+  static const bool isValid = true;
+};
+
+// When bitwidth < 64
+template <int _AP_W, bool _AP_S>
+class ap_private<_AP_W, _AP_S, true> {
+  // SFINAE pattern.  Only consider this class when _AP_W <= 64
+  const static bool valid = ap_private_enable_if<_AP_W <= 64>::isValid;
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4521 4522)
+#endif
+ public:
+  typedef typename valtype<(_AP_W + 7) / 8, _AP_S>::Type ValType;
+  typedef ap_private<_AP_W, _AP_S> Type;
+  template <int _AP_W2, bool _AP_S2>
+  struct RType {
+    enum {
+      mult_w = _AP_W + _AP_W2,
+      mult_s = _AP_S || _AP_S2,
+      plus_w =
+          AP_MAX(_AP_W + (_AP_S2 && !_AP_S), _AP_W2 + (_AP_S && !_AP_S2)) + 1,
+      plus_s = _AP_S || _AP_S2,
+      minus_w =
+          AP_MAX(_AP_W + (_AP_S2 && !_AP_S), _AP_W2 + (_AP_S && !_AP_S2)) + 1,
+      minus_s = true,
+      div_w = _AP_W + _AP_S2,
+      div_s = _AP_S || _AP_S2,
+      mod_w = AP_MIN(_AP_W, _AP_W2 + (!_AP_S2 && _AP_S)),
+      mod_s = _AP_S,
+      logic_w = AP_MAX(_AP_W + (_AP_S2 && !_AP_S), _AP_W2 + (_AP_S && !_AP_S2)),
+      logic_s = _AP_S || _AP_S2
+    };
+    typedef ap_private<mult_w, mult_s> mult;
+    typedef ap_private<plus_w, plus_s> plus;
+    typedef ap_private<minus_w, minus_s> minus;
+    typedef ap_private<logic_w, logic_s> logic;
+    typedef ap_private<div_w, div_s> div;
+    typedef ap_private<mod_w, mod_s> mod;
+    typedef ap_private<_AP_W, _AP_S> arg1;
+    typedef bool reduce;
+  };
+  enum { APINT_BITS_PER_WORD = sizeof(uint64_t) * 8 };
+  enum {
+    excess_bits = (_AP_W % APINT_BITS_PER_WORD)
+                      ? APINT_BITS_PER_WORD - (_AP_W % APINT_BITS_PER_WORD)
+                      : 0
+  };
+  static const uint64_t mask = ((uint64_t)~0ULL >> (excess_bits));
+  static const uint64_t not_mask = ~mask;
+  static const uint64_t sign_bit_mask = 1ULL << (APINT_BITS_PER_WORD - 1);
+  template <int _AP_W1>
+  struct sign_ext_mask {
+    static const uint64_t mask = ~0ULL << _AP_W1;
+  };
+  static const int width = _AP_W;
+
+  enum {
+    BitWidth = _AP_W,
+    _AP_N = 1,
+  };
+  ValType VAL; ///< Used to store the <= 64 bits integer value.
+#ifdef AP_CANARY
+  ValType CANARY;
+  void check_canary() { assert(CANARY == (ValType)0xDEADBEEFDEADBEEF); }
+  void set_canary() { CANARY = (ValType)0xDEADBEEFDEADBEEF; }
+#else
+  void check_canary() {}
+  void set_canary() {}
+#endif
+
+  INLINE ValType& get_VAL(void) { return VAL; }
+  INLINE ValType get_VAL(void) const { return VAL; }
+  INLINE ValType get_VAL(void) const volatile { return VAL; }
+  INLINE void set_VAL(uint64_t value) { VAL = (ValType)value; }
+  INLINE ValType& get_pVal(int i) { return VAL; }
+  INLINE ValType get_pVal(int i) const { return VAL; }
+  INLINE const uint64_t* get_pVal() const {
+    assert(0 && "invalid usage");
+    return 0;
+  }
+  INLINE ValType get_pVal(int i) const volatile { return VAL; }
+  INLINE uint64_t* get_pVal() const volatile {
+    assert(0 && "invalid usage");
+    return 0;
+  }
+  INLINE void set_pVal(int i, uint64_t value) { VAL = (ValType)value; }
+
+  INLINE uint32_t getBitWidth() const { return BitWidth; }
+
+  template <int _AP_W1, bool _AP_S1>
+  ap_private<_AP_W, _AP_S>& operator=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+    VAL = (ValType)(RHS.get_VAL());
+    clearUnusedBits();
+    return *this;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  ap_private<_AP_W, _AP_S>& operator=(
+      const volatile ap_private<_AP_W1, _AP_S1>& RHS) {
+    VAL = (ValType)(RHS.get_VAL()); // TODO check here about ap_private<W,S,false>
+    clearUnusedBits();
+    return *this;
+  }
+
+  void operator=(const ap_private& RHS) volatile {
+    // Don't do anything for X = X
+    VAL = RHS.get_VAL(); // No need to check because no harm done by copying.
+  }
+
+  ap_private& operator=(const ap_private& RHS) {
+    // Don't do anything for X = X
+    VAL = RHS.get_VAL(); // No need to check because no harm done by copying.
+    return *this;
+  }
+
+  void operator=(const volatile ap_private& RHS) volatile {
+    // Don't do anything for X = X
+    VAL = RHS.get_VAL(); // No need to check because no harm done by copying.
+  }
+
+  ap_private& operator=(const volatile ap_private& RHS) {
+    // Don't do anything for X = X
+    VAL = RHS.get_VAL(); // No need to check because no harm done by copying.
+    return *this;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_private& operator=(const _private_range_ref<_AP_W2, _AP_S2>& op2) {
+    *this = ap_private<_AP_W2, false>(op2);
+    return *this;
+  }
+
+#define ASSIGN_OP_FROM_INT(C_TYPE)               \
+  INLINE ap_private& operator=(const C_TYPE v) { \
+    set_canary();                                \
+    this->VAL = (ValType)v;                      \
+    clearUnusedBits();                           \
+    check_canary();                              \
+    return *this;                                \
+  }
+
+ASSIGN_OP_FROM_INT(bool)
+ASSIGN_OP_FROM_INT(char)
+ASSIGN_OP_FROM_INT(signed char)
+ASSIGN_OP_FROM_INT(unsigned char)
+ASSIGN_OP_FROM_INT(short)
+ASSIGN_OP_FROM_INT(unsigned short)
+ASSIGN_OP_FROM_INT(int)
+ASSIGN_OP_FROM_INT(unsigned int)
+ASSIGN_OP_FROM_INT(long)
+ASSIGN_OP_FROM_INT(unsigned long)
+ASSIGN_OP_FROM_INT(ap_slong)
+ASSIGN_OP_FROM_INT(ap_ulong)
+ASSIGN_OP_FROM_INT(half)
+//FIXME cast half to integer ?
+ASSIGN_OP_FROM_INT(float)
+ASSIGN_OP_FROM_INT(double)
+#undef ASSIGN_OP_FROM_INT
+
+  // XXX This is a must to prevent pointer being converted to bool.
+  INLINE ap_private& operator=(const char* s) {
+    ap_private tmp(s); // XXX direct-initialization, as ctor is explicit.
+    operator=(tmp);
+    return *this;
+  }
+
+ private:
+  explicit INLINE ap_private(uint64_t* val) : VAL(val[0]) {
+    set_canary();
+    clearUnusedBits();
+    check_canary();
+  }
+
+  INLINE bool isSingleWord() const { return true; }
+
+ public:
+  INLINE void fromString(const char* strStart, uint32_t slen, uint8_t radix) {
+    bool isNeg = strStart[0] == '-';
+    if (isNeg) {
+      strStart++;
+      slen--;
+    }
+
+    if (strStart[0] == '0' && (strStart[1] == 'b' || strStart[1] == 'B')) {
+      //if(radix == 0) radix = 2;
+      _AP_WARNING(radix != 2, "%s seems to have base %d, but %d given.", strStart, 2, radix);
+      strStart += 2;
+      slen -=2;
+    } else if (strStart[0] == '0' && (strStart[1] == 'o' || strStart[1] == 'O')) {
+      //if (radix == 0) radix = 8;
+      _AP_WARNING(radix != 8, "%s seems to have base %d, but %d given.", strStart, 8, radix);
+      strStart += 2;
+      slen -=2;
+    } else if (strStart[0] == '0' && (strStart[1] == 'x' || strStart[1] == 'X')) {
+      //if (radix == 0) radix = 16;
+      _AP_WARNING(radix != 16, "%s seems to have base %d, but %d given.", strStart, 16, radix);
+      strStart += 2;
+      slen -=2;
+    } else if (strStart[0] == '0' && (strStart[1] == 'd' || strStart[1] == 'D')) {
+      //if (radix == 0) radix = 10;
+      _AP_WARNING(radix != 10, "%s seems to have base %d, but %d given.", strStart, 10, radix);
+      strStart += 2;
+      slen -=2;
+    } else if (radix == 0) {
+      //radix = 2; // XXX default value
+    }
+
+    // Check our assumptions here
+    assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+           "Radix should be 2, 8, 10, or 16!");
+    assert(strStart && "String is null?");
+
+    // Clear bits.
+    uint64_t tmpVAL = VAL = 0;
+
+    switch (radix) {
+      case 2:
+        //        sscanf(strStart,"%b",&VAL);
+        // tmpVAL = *strStart =='1' ? ~0ULL : 0;
+        for (; *strStart; ++strStart) {
+          assert((*strStart == '0' || *strStart == '1') &&
+                 ("Wrong binary number"));
+          tmpVAL <<= 1;
+          tmpVAL |= (*strStart - '0');
+        }
+        break;
+      case 8:
+#ifdef _MSC_VER
+        sscanf_s(strStart, "%llo", &tmpVAL, slen + 1);
+#else
+#if defined(__x86_64__) && !defined(__MINGW32__) && !defined(__WIN32__)
+        sscanf(strStart, "%lo", &tmpVAL);
+#else
+        sscanf(strStart, "%llo", &tmpVAL);
+#endif //__x86_64__
+#endif //_MSC_VER
+        break;
+      case 10:
+#ifdef _MSC_VER
+        sscanf_s(strStart, "%llu", &tmpVAL, slen + 1);
+#else
+#if defined(__x86_64__) && !defined(__MINGW32__) && !defined(__WIN32__)
+        sscanf(strStart, "%lu", &tmpVAL);
+#else
+        sscanf(strStart, "%llu", &tmpVAL);
+#endif //__x86_64__
+#endif //_MSC_VER
+        break;
+      case 16:
+#ifdef _MSC_VER
+        sscanf_s(strStart, "%llx", &tmpVAL, slen + 1);
+#else
+#if defined(__x86_64__) && !defined(__MINGW32__) && !defined(__WIN32__)
+        sscanf(strStart, "%lx", &tmpVAL);
+#else
+        sscanf(strStart, "%llx", &tmpVAL);
+#endif //__x86_64__
+#endif //_MSC_VER
+        break;
+      default:
+        assert(true && "Unknown radix");
+        // error
+    }
+    VAL = isNeg ? (ValType)(-tmpVAL) : (ValType)(tmpVAL);
+
+    clearUnusedBits();
+  }
+
+ private:
+  INLINE ap_private(const std::string& val, uint8_t radix = 2) : VAL(0) {
+    assert(!val.empty() && "String empty?");
+    set_canary();
+    fromString(val.c_str(), val.size(), radix);
+    check_canary();
+  }
+
+  INLINE ap_private(const char strStart[], uint32_t slen, uint8_t radix)
+      : VAL(0) {
+    set_canary();
+    fromString(strStart, slen, radix);
+    check_canary();
+  }
+
+  INLINE ap_private(uint32_t numWords, const uint64_t bigVal[])
+      : VAL(bigVal[0]) {
+    set_canary();
+    clearUnusedBits();
+    check_canary();
+  }
+
+ public:
+  INLINE ap_private() {
+    set_canary();
+    clearUnusedBits();
+    check_canary();
+  }
+
+#define CTOR(TYPE)                              \
+  INLINE ap_private(TYPE v) : VAL((ValType)v) { \
+    set_canary();                               \
+    clearUnusedBits();                          \
+    check_canary();                             \
+  }
+  CTOR(bool)
+  CTOR(char)
+  CTOR(signed char)
+  CTOR(unsigned char)
+  CTOR(short)
+  CTOR(unsigned short)
+  CTOR(int)
+  CTOR(unsigned int)
+  CTOR(long)
+  CTOR(unsigned long)
+  CTOR(ap_slong)
+  CTOR(ap_ulong)
+  CTOR(half)
+  CTOR(float)
+  CTOR(double)
+#undef CTOR
+
+  template <int _AP_W1, bool _AP_S1, bool _AP_OPT>
+  INLINE ap_private(const ap_private<_AP_W1, _AP_S1, _AP_OPT>& that)
+      : VAL((ValType)that.get_VAL()) {
+    set_canary();
+    clearUnusedBits();
+    check_canary();
+  }
+
+  template <int _AP_W1, bool _AP_S1, bool _AP_OPT>
+  INLINE ap_private(const volatile ap_private<_AP_W1, _AP_S1, _AP_OPT>& that)
+      : VAL((ValType)that.get_VAL()) {
+    set_canary();
+    clearUnusedBits();
+    check_canary();
+  }
+
+  explicit INLINE ap_private(const char* val) {
+    set_canary();
+    unsigned char radix = 10;
+    std::string str = ap_private_ops::parseString(val, radix); // will set radix.
+    std::string::size_type pos = str.find('.');
+    // trunc all fraction part
+    if (pos != std::string::npos) str = str.substr(pos);
+
+    ap_private<_AP_W, _AP_S> ap_private_val(str, radix);
+    operator=(ap_private_val);
+    check_canary();
+  }
+
+  INLINE ap_private(const char* val, signed char rd) {
+    set_canary();
+    unsigned char radix = rd;
+    std::string str = ap_private_ops::parseString(val, radix); // will set radix.
+    std::string::size_type pos = str.find('.');
+    // trunc all fraction part
+    if (pos != std::string::npos) str = str.substr(pos);
+
+    ap_private<_AP_W, _AP_S> ap_private_val(str, radix);
+    operator=(ap_private_val);
+    check_canary();
+  }
+
+  INLINE ~ap_private() { check_canary(); }
+
+  INLINE bool isNegative() const {
+    static const uint64_t sign_mask = 1ULL << (_AP_W - 1);
+    return _AP_S && (sign_mask & VAL);
+  }
+
+  INLINE bool isPositive() const { return !isNegative(); }
+
+  INLINE bool isStrictlyPositive() const { return !isNegative() && VAL != 0; }
+
+  INLINE bool isAllOnesValue() const { return (mask & VAL) == mask; }
+
+  INLINE bool operator==(const ap_private<_AP_W, _AP_S>& RHS) const {
+    return VAL == RHS.get_VAL();
+  }
+  INLINE bool operator==(const ap_private<_AP_W, !_AP_S>& RHS) const {
+    return (uint64_t)VAL == (uint64_t)RHS.get_VAL();
+  }
+
+  INLINE bool operator==(uint64_t Val) const { return ((uint64_t)VAL == Val); }
+  INLINE bool operator!=(uint64_t Val) const { return ((uint64_t)VAL != Val); }
+  INLINE bool operator!=(const ap_private<_AP_W, _AP_S>& RHS) const {
+    return VAL != RHS.get_VAL();
+  }
+  INLINE bool operator!=(const ap_private<_AP_W, !_AP_S>& RHS) const {
+    return (uint64_t)VAL != (uint64_t)RHS.get_VAL();
+  }
+
+  /// postfix increment.
+  const ap_private operator++(int) {
+    ap_private orig(*this);
+    VAL++;
+    clearUnusedBits();
+    return orig;
+  }
+
+  /// prefix increment.
+  const ap_private operator++() {
+    ++VAL;
+    clearUnusedBits();
+    return *this;
+  }
+
+  /// postfix decrement.
+  const ap_private operator--(int) {
+    ap_private orig(*this);
+    --VAL;
+    clearUnusedBits();
+    return orig;
+  }
+
+  /// prefix decrement.
+  const ap_private operator--() {
+    --VAL;
+    clearUnusedBits();
+    return *this;
+  }
+
+  /// one's complement.
+  INLINE ap_private<_AP_W + !_AP_S, true> operator~() const {
+    ap_private<_AP_W + !_AP_S, true> Result(*this);
+    Result.flip();
+    return Result;
+  }
+
+  /// two's complement.
+  INLINE typename RType<1, false>::minus operator-() const {
+    return ap_private<1, false>(0) - (*this);
+  }
+
+  /// logic negation.
+  INLINE bool operator!() const { return !VAL; }
+
+  INLINE std::string toString(uint8_t radix, bool wantSigned) const;
+  INLINE std::string toStringUnsigned(uint8_t radix = 10) const {
+    return toString(radix, false);
+  }
+  INLINE std::string toStringSigned(uint8_t radix = 10) const {
+    return toString(radix, true);
+  }
+  INLINE void clear() { VAL = 0; }
+  INLINE ap_private& clear(uint32_t bitPosition) {
+    VAL &= ~(1ULL << (bitPosition));
+    clearUnusedBits();
+    return *this;
+  }
+
+  INLINE ap_private ashr(uint32_t shiftAmt) const {
+    if (_AP_S)
+      return ap_private((shiftAmt == BitWidth) ? 0
+                                               : ((int64_t)VAL) >> (shiftAmt));
+    else
+      return ap_private((shiftAmt == BitWidth) ? 0
+                                               : ((uint64_t)VAL) >> (shiftAmt));
+  }
+
+  INLINE ap_private lshr(uint32_t shiftAmt) const {
+    return ap_private((shiftAmt == BitWidth)
+                          ? ap_private(0)
+                          : ap_private((VAL & mask) >> (shiftAmt)));
+  }
+
+  INLINE ap_private shl(uint32_t shiftAmt) const
+// just for clang compiler
+#if defined(__clang__) && !defined(__CLANG_3_1__)
+      __attribute__((no_sanitize("undefined")))
+#endif
+  {
+    if (shiftAmt > BitWidth) {
+      if (!isNegative())
+        return ap_private(0);
+      else
+        return ap_private(-1);
+    }
+    if (shiftAmt == BitWidth)
+      return ap_private(0);
+    else
+      return ap_private((VAL) << (shiftAmt));
+    // return ap_private((shiftAmt == BitWidth) ? ap_private(0ULL) :
+    // ap_private(VAL << shiftAmt));
+  }
+
+  INLINE int64_t getSExtValue() const { return VAL; }
+
+  // XXX XXX this function is used in CBE
+  INLINE uint64_t getZExtValue() const { return VAL & mask; }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_private(const _private_range_ref<_AP_W2, _AP_S2>& ref) {
+    set_canary();
+    *this = ref.get();
+    check_canary();
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_private(const _private_bit_ref<_AP_W2, _AP_S2>& ref) {
+    set_canary();
+    *this = ((uint64_t)(bool)ref);
+    check_canary();
+  }
+
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_private(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& ref) {
+//    set_canary();
+//    *this = ref.get();
+//    check_canary();
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE ap_private(
+//      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+//    set_canary();
+//    *this = ((val.operator ap_private<_AP_W2, false>()));
+//    check_canary();
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE ap_private(
+//      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+//    set_canary();
+//    *this = (uint64_t)(bool)val;
+//    check_canary();
+//  }
+
+  INLINE void write(const ap_private<_AP_W, _AP_S>& op2) volatile {
+    *this = (op2);
+  }
+
+  // Explicit conversions to C interger types
+  //-----------------------------------------------------------
+  INLINE operator ValType() const { return get_VAL(); }
+
+  INLINE int to_uchar() const { return (unsigned char)get_VAL(); }
+
+  INLINE int to_char() const { return (signed char)get_VAL(); }
+
+  INLINE int to_ushort() const { return (unsigned short)get_VAL(); }
+
+  INLINE int to_short() const { return (short)get_VAL(); }
+
+  INLINE int to_int() const {
+    //      ap_private<64 /* _AP_W */, _AP_S> res(V);
+    return (int)get_VAL();
+  }
+
+  INLINE unsigned to_uint() const { return (unsigned)get_VAL(); }
+
+  INLINE long to_long() const { return (long)get_VAL(); }
+
+  INLINE unsigned long to_ulong() const { return (unsigned long)get_VAL(); }
+
+  INLINE ap_slong to_int64() const { return (ap_slong)get_VAL(); }
+
+  INLINE ap_ulong to_uint64() const { return (ap_ulong)get_VAL(); }
+
+  INLINE double to_double() const {
+    if (isNegative())
+      return roundToDouble(true);
+    else
+      return roundToDouble(false);
+  }
+
+  INLINE unsigned length() const { return _AP_W; }
+
+  INLINE bool isMinValue() const { return VAL == 0; }
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private& operator&=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+    VAL = (ValType)(((uint64_t)VAL) & RHS.get_VAL());
+    clearUnusedBits();
+    return *this;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private& operator|=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+    VAL = (ValType)(((uint64_t)VAL) | RHS.get_VAL());
+    clearUnusedBits();
+    return *this;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private& operator^=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+    VAL = (ValType)(((uint64_t)VAL) ^ RHS.get_VAL());
+    clearUnusedBits();
+    return *this;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private& operator*=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+    VAL = (ValType)(((uint64_t)VAL) * RHS.get_VAL());
+    clearUnusedBits();
+    return *this;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private& operator+=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+    VAL = (ValType)(((uint64_t)VAL) + RHS.get_VAL());
+    clearUnusedBits();
+    return *this;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private& operator-=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+    VAL = (ValType)(((uint64_t)VAL) - RHS.get_VAL());
+    clearUnusedBits();
+    return *this;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE typename RType<_AP_W1, _AP_S1>::logic operator&(
+      const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    if (RType<_AP_W1, _AP_S1>::logic_w <= 64) {
+      typename RType<_AP_W1, _AP_S1>::logic Ret(((uint64_t)VAL) &
+                                                RHS.get_VAL());
+      return Ret;
+    } else {
+      typename RType<_AP_W1, _AP_S1>::logic Ret = *this;
+      return Ret & RHS;
+    }
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE typename RType<_AP_W1, _AP_S1>::logic operator^(
+      const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    if (RType<_AP_W1, _AP_S1>::logic_w <= 64) {
+      typename RType<_AP_W1, _AP_S1>::logic Ret(((uint64_t)VAL) ^
+                                                RHS.get_VAL());
+      return Ret;
+    } else {
+      typename RType<_AP_W1, _AP_S1>::logic Ret = *this;
+      return Ret ^ RHS;
+    }
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE typename RType<_AP_W1, _AP_S1>::logic operator|(
+      const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    if (RType<_AP_W1, _AP_S1>::logic_w <= 64) {
+      typename RType<_AP_W1, _AP_S1>::logic Ret(((uint64_t)VAL) |
+                                                RHS.get_VAL());
+      return Ret;
+    } else {
+      typename RType<_AP_W1, _AP_S1>::logic Ret = *this;
+      return Ret | RHS;
+    }
+  }
+
+  INLINE ap_private And(const ap_private& RHS) const {
+    return ap_private(VAL & RHS.get_VAL());
+  }
+
+  INLINE ap_private Or(const ap_private& RHS) const {
+    return ap_private(VAL | RHS.get_VAL());
+  }
+
+  INLINE ap_private Xor(const ap_private& RHS) const {
+    return ap_private(VAL ^ RHS.get_VAL());
+  }
+#if 1
+  template <int _AP_W1, bool _AP_S1>
+  INLINE typename RType<_AP_W1, _AP_S1>::mult operator*(
+      const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    if (RType<_AP_W1, _AP_S1>::mult_w <= 64) {
+      typename RType<_AP_W1, _AP_S1>::mult Result(((uint64_t)VAL) *
+                                                  RHS.get_VAL());
+      return Result;
+    } else {
+      typename RType<_AP_W1, _AP_S1>::mult Result(*this);
+      Result *= RHS;
+      return Result;
+    }
+  }
+#endif
+  INLINE ap_private Mul(const ap_private& RHS) const {
+    return ap_private(VAL * RHS.get_VAL());
+  }
+
+  INLINE ap_private Add(const ap_private& RHS) const {
+    return ap_private(VAL + RHS.get_VAL());
+  }
+
+  INLINE ap_private Sub(const ap_private& RHS) const {
+    return ap_private(VAL - RHS.get_VAL());
+  }
+
+  INLINE ap_private& operator&=(uint64_t RHS) {
+    VAL &= (ValType)RHS;
+    clearUnusedBits();
+    return *this;
+  }
+  INLINE ap_private& operator|=(uint64_t RHS) {
+    VAL |= (ValType)RHS;
+    clearUnusedBits();
+    return *this;
+  }
+  INLINE ap_private& operator^=(uint64_t RHS) {
+    VAL ^= (ValType)RHS;
+    clearUnusedBits();
+    return *this;
+  }
+  INLINE ap_private& operator*=(uint64_t RHS) {
+    VAL *= (ValType)RHS;
+    clearUnusedBits();
+    return *this;
+  }
+  INLINE ap_private& operator+=(uint64_t RHS) {
+    VAL += (ValType)RHS;
+    clearUnusedBits();
+    return *this;
+  }
+  INLINE ap_private& operator-=(uint64_t RHS) {
+    VAL -= (ValType)RHS;
+    clearUnusedBits();
+    return *this;
+  }
+
+  INLINE bool isMinSignedValue() const {
+    static const uint64_t min_mask = ~(~0ULL << (_AP_W - 1));
+    return BitWidth == 1 ? VAL == 1
+                         : (ap_private_ops::isNegative<_AP_W>(*this) &&
+                            ((min_mask & VAL) == 0));
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE typename RType<_AP_W1, _AP_S1>::plus operator+(
+      const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    if (RType<_AP_W1, _AP_S1>::plus_w <= 64)
+      return typename RType<_AP_W1, _AP_S1>::plus(
+          RType<_AP_W1, _AP_S1>::plus_s
+              ? int64_t(((uint64_t)VAL) + RHS.get_VAL())
+              : uint64_t(((uint64_t)VAL) + RHS.get_VAL()));
+    typename RType<_AP_W1, _AP_S1>::plus Result = RHS;
+    Result += VAL;
+    return Result;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE typename RType<_AP_W1, _AP_S1>::minus operator-(
+      const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    if (RType<_AP_W1, _AP_S1>::minus_w <= 64)
+      return typename RType<_AP_W1, _AP_S1>::minus(
+          int64_t(((uint64_t)VAL) - RHS.get_VAL()));
+    typename RType<_AP_W1, _AP_S1>::minus Result = *this;
+    Result -= RHS;
+    return Result;
+  }
+
+  INLINE uint32_t countPopulation() const {
+    return ap_private_ops::CountPopulation_64(VAL);
+  }
+  INLINE uint32_t countLeadingZeros() const {
+    int remainder = BitWidth % 64;
+    int excessBits = (64 - remainder) % 64;
+    uint32_t Count = ap_private_ops::CountLeadingZeros_64(VAL);
+    if (Count) Count -= excessBits;
+    return AESL_std::min(Count, (uint32_t)_AP_W);
+  }
+
+  /// HiBits - This function returns the high "numBits" bits of this ap_private.
+  INLINE ap_private<_AP_W, _AP_S> getHiBits(uint32_t numBits) const {
+    ap_private<_AP_W, _AP_S> ret(*this);
+    ret = (ret) >> (BitWidth - numBits);
+    return ret;
+  }
+
+  /// LoBits - This function returns the low "numBits" bits of this ap_private.
+  INLINE ap_private<_AP_W, _AP_S> getLoBits(uint32_t numBits) const {
+    ap_private<_AP_W, _AP_S> ret(((uint64_t)VAL) << (BitWidth - numBits));
+    ret = (ret) >> (BitWidth - numBits);
+    return ret;
+    // return ap_private(numBits, (VAL << (BitWidth - numBits))>> (BitWidth -
+    // numBits));
+  }
+
+  INLINE ap_private<_AP_W, _AP_S>& set(uint32_t bitPosition) {
+    VAL |= (1ULL << (bitPosition));
+    clearUnusedBits();
+    return *this; // clearUnusedBits();
+  }
+
+  INLINE void set() {
+    VAL = (ValType)~0ULL;
+    clearUnusedBits();
+  }
+
+  template <int _AP_W3>
+  INLINE void set(const ap_private<_AP_W3, false>& val) {
+    operator=(ap_private<_AP_W3, _AP_S>(val));
+  }
+
+  INLINE void set(const ap_private& val) { operator=(val); }
+
+  INLINE void clearUnusedBits(void)
+// just for clang compiler
+#if defined(__clang__) && !defined(__CLANG_3_1__)
+      __attribute__((no_sanitize("undefined")))
+#endif
+  {
+    enum { excess_bits = (_AP_W % 64) ? 64 - _AP_W % 64 : 0 };
+    VAL = (ValType)(
+        _AP_S
+            ? ((((int64_t)VAL) << (excess_bits)) >> (excess_bits))
+            : (excess_bits ? (((uint64_t)VAL) << (excess_bits)) >> (excess_bits)
+                           : (uint64_t)VAL));
+  }
+
+  INLINE void clearUnusedBitsToZero(void) {
+    enum { excess_bits = (_AP_W % 64) ? 64 - _AP_W % 64 : 0 };
+    static uint64_t mask = ~0ULL >> (excess_bits);
+    VAL &= mask;
+  }
+
+  INLINE ap_private udiv(const ap_private& RHS) const {
+    return ap_private((uint64_t)VAL / RHS.get_VAL());
+  }
+
+  /// Signed divide this ap_private by ap_private RHS.
+  /// @brief Signed division function for ap_private.
+  INLINE ap_private sdiv(const ap_private& RHS) const {
+    if (isNegative())
+      if (RHS.isNegative())
+        return ((uint64_t)(0 - (*this))) / (uint64_t)(0 - RHS);
+      else
+        return 0 - ((uint64_t)(0 - (*this)) / (uint64_t)(RHS));
+    else if (RHS.isNegative())
+      return 0 - (this->udiv((ap_private)(0 - RHS)));
+    return this->udiv(RHS);
+  }
+
+  template <bool _AP_S2>
+  INLINE ap_private urem(const ap_private<_AP_W, _AP_S2>& RHS) const {
+    assert(RHS.get_VAL() != 0 && "Divide by 0");
+    return ap_private(((uint64_t)VAL) % ((uint64_t)RHS.get_VAL()));
+  }
+
+  /// Signed remainder operation on ap_private.
+  /// @brief Function for signed remainder operation.
+  template <bool _AP_S2>
+  INLINE ap_private srem(const ap_private<_AP_W, _AP_S2>& RHS) const {
+    if (isNegative()) {
+      ap_private lhs = 0 - (*this);
+      if (RHS.isNegative()) {
+        ap_private rhs = 0 - RHS;
+        return 0 - (lhs.urem(rhs));
+      } else
+        return 0 - (lhs.urem(RHS));
+    } else if (RHS.isNegative()) {
+      ap_private rhs = 0 - RHS;
+      return this->urem(rhs);
+    }
+    return this->urem(RHS);
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE bool eq(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    return (*this) == RHS;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE bool ne(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    return !((*this) == RHS);
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// the validity of the less-than relationship.
+  /// @returns true if *this < RHS when both are considered unsigned.
+  /// @brief Unsigned less than comparison
+  template <int _AP_W1, bool _AP_S1>
+  INLINE bool ult(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    if (_AP_W1 <= 64) {
+      uint64_t lhsZext = ((uint64_t(VAL)) << (64 - _AP_W)) >> (64 - _AP_W);
+      uint64_t rhsZext =
+          ((uint64_t(RHS.get_VAL())) << (64 - _AP_W1)) >> (64 - _AP_W1);
+      return lhsZext < rhsZext;
+    } else
+      return RHS.uge(*this);
+  }
+
+  /// Regards both *this and RHS as signed quantities and compares them for
+  /// validity of the less-than relationship.
+  /// @returns true if *this < RHS when both are considered signed.
+  /// @brief Signed less than comparison
+  template <int _AP_W1, bool _AP_S1>
+  INLINE bool slt(const ap_private<_AP_W1, _AP_S1>& RHS) const
+// just for clang compiler
+#if defined(__clang__) && !defined(__CLANG_3_1__)
+      __attribute__((no_sanitize("undefined")))
+#endif
+  {
+    if (_AP_W1 <= 64) {
+      int64_t lhsSext = ((int64_t(VAL)) << (64 - _AP_W)) >> (64 - _AP_W);
+      int64_t rhsSext =
+          ((int64_t(RHS.get_VAL())) << (64 - _AP_W1)) >> (64 - _AP_W1);
+      return lhsSext < rhsSext;
+    } else
+      return RHS.sge(*this);
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// validity of the less-or-equal relationship.
+  /// @returns true if *this <= RHS when both are considered unsigned.
+  /// @brief Unsigned less or equal comparison
+  template <int _AP_W1, bool _AP_S1>
+  INLINE bool ule(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    return ult(RHS) || eq(RHS);
+  }
+
+  /// Regards both *this and RHS as signed quantities and compares them for
+  /// validity of the less-or-equal relationship.
+  /// @returns true if *this <= RHS when both are considered signed.
+  /// @brief Signed less or equal comparison
+  template <int _AP_W1, bool _AP_S1>
+  INLINE bool sle(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    return slt(RHS) || eq(RHS);
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// the validity of the greater-than relationship.
+  /// @returns true if *this > RHS when both are considered unsigned.
+  /// @brief Unsigned greather than comparison
+  template <int _AP_W1, bool _AP_S1>
+  INLINE bool ugt(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    return !ult(RHS) && !eq(RHS);
+  }
+
+  /// Regards both *this and RHS as signed quantities and compares them for
+  /// the validity of the greater-than relationship.
+  /// @returns true if *this > RHS when both are considered signed.
+  /// @brief Signed greather than comparison
+  template <int _AP_W1, bool _AP_S1>
+  INLINE bool sgt(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    return !slt(RHS) && !eq(RHS);
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// validity of the greater-or-equal relationship.
+  /// @returns true if *this >= RHS when both are considered unsigned.
+  /// @brief Unsigned greater or equal comparison
+  template <int _AP_W1, bool _AP_S1>
+  INLINE bool uge(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    return !ult(RHS);
+  }
+
+  /// Regards both *this and RHS as signed quantities and compares them for
+  /// validity of the greater-or-equal relationship.
+  /// @returns true if *this >= RHS when both are considered signed.
+  /// @brief Signed greather or equal comparison
+  template <int _AP_W1, bool _AP_S1>
+  INLINE bool sge(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    return !slt(RHS);
+  }
+
+  INLINE ap_private abs() const {
+    if (isNegative()) return -(*this);
+    return *this;
+  }
+
+  INLINE ap_private<_AP_W, false> get() const {
+    ap_private<_AP_W, false> ret(*this);
+    return ret;
+  }
+
+  INLINE static uint32_t getBitsNeeded(const char* str, uint32_t slen,
+                                       uint8_t radix) {
+    return _AP_W;
+  }
+
+  INLINE uint32_t getActiveBits() const {
+    uint32_t bits = _AP_W - countLeadingZeros();
+    return bits ? bits : 1;
+  }
+
+  INLINE double roundToDouble(bool isSigned = false) const {
+    return isSigned ? double((int64_t)VAL) : double((uint64_t)VAL);
+  }
+
+  /*Reverse the contents of ap_private instance. I.e. LSB becomes MSB and vise
+   * versa*/
+  INLINE ap_private& reverse() {
+    for (int i = 0; i < _AP_W / 2; ++i) {
+      bool tmp = operator[](i);
+      if (operator[](_AP_W - 1 - i))
+        set(i);
+      else
+        clear(i);
+      if (tmp)
+        set(_AP_W - 1 - i);
+      else
+        clear(_AP_W - 1 - i);
+    }
+    clearUnusedBits();
+    return *this;
+  }
+
+  /*Return true if the value of ap_private instance is zero*/
+  INLINE bool iszero() const { return isMinValue(); }
+
+  INLINE bool to_bool() const { return !iszero(); }
+
+  /* x < 0 */
+  INLINE bool sign() const {
+    if (isNegative()) return true;
+    return false;
+  }
+
+  /* x[i] = !x[i] */
+  INLINE void invert(int i) {
+    assert(i >= 0 && "Attempting to read bit with negative index");
+    assert(i < _AP_W && "Attempting to read bit beyond MSB");
+    flip(i);
+  }
+
+  /* x[i] */
+  INLINE bool test(int i) const {
+    assert(i >= 0 && "Attempting to read bit with negative index");
+    assert(i < _AP_W && "Attempting to read bit beyond MSB");
+    return operator[](i);
+  }
+
+  // This is used for sc_lv and sc_bv, which is implemented by sc_uint
+  // Rotate an ap_private object n places to the left
+  INLINE void lrotate(int n) {
+    assert(n >= 0 && "Attempting to shift negative index");
+    assert(n < _AP_W && "Shift value larger than bit width");
+    operator=(shl(n) | lshr(_AP_W - n));
+  }
+
+  // This is used for sc_lv and sc_bv, which is implemented by sc_uint
+  // Rotate an ap_private object n places to the right
+  INLINE void rrotate(int n) {
+    assert(n >= 0 && "Attempting to shift negative index");
+    assert(n < _AP_W && "Shift value larger than bit width");
+    operator=(lshr(n) | shl(_AP_W - n));
+  }
+
+  // Set the ith bit into v
+  INLINE void set(int i, bool v) {
+    assert(i >= 0 && "Attempting to write bit with negative index");
+    assert(i < _AP_W && "Attempting to write bit beyond MSB");
+    v ? set(i) : clear(i);
+  }
+
+  // Set the ith bit into v
+  INLINE void set_bit(int i, bool v) {
+    assert(i >= 0 && "Attempting to write bit with negative index");
+    assert(i < _AP_W && "Attempting to write bit beyond MSB");
+    v ? set(i) : clear(i);
+  }
+
+  // Get the value of ith bit
+  INLINE bool get_bit(int i) const {
+    assert(i >= 0 && "Attempting to read bit with negative index");
+    assert(i < _AP_W && "Attempting to read bit beyond MSB");
+    return (((1ULL << i) & VAL) != 0);
+  }
+
+  /// Toggle all bits.
+  INLINE ap_private& flip() {
+    VAL = (ValType)((~0ULL ^ VAL) & mask);
+    clearUnusedBits();
+    return *this;
+  }
+
+  /// Toggles a given bit to its opposite value.
+  INLINE ap_private& flip(uint32_t bitPosition) {
+    assert(bitPosition < BitWidth && "Out of the bit-width range!");
+    set_bit(bitPosition, !get_bit(bitPosition));
+    return *this;
+  }
+
+  // complements every bit
+  INLINE void b_not() { flip(); }
+
+// Binary Arithmetic
+//-----------------------------------------------------------
+#define OP_BIN_AP(Sym, Rty, Fun)                           \
+  template <int _AP_W2, bool _AP_S2>                       \
+  INLINE typename RType<_AP_W2, _AP_S2>::Rty operator Sym( \
+      const ap_private<_AP_W2, _AP_S2>& op) const {        \
+    typename RType<_AP_W2, _AP_S2>::Rty lhs(*this);        \
+    typename RType<_AP_W2, _AP_S2>::Rty rhs(op);           \
+    return lhs.Fun(rhs);                                   \
+  }
+
+/// Bitwise and, or, xor
+// OP_BIN_AP(&,logic, And)
+// OP_BIN_AP(|,logic, Or)
+// OP_BIN_AP(^,logic, Xor)
+#undef OP_BIN_AP
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE typename RType<_AP_W2, _AP_S2>::div operator/(
+      const ap_private<_AP_W2, _AP_S2>& op) const {
+    ap_private<AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2)),
+               (_AP_W > _AP_W2 ? _AP_S
+                               : (_AP_W2 > _AP_W ? _AP_S2 : _AP_S || _AP_S2))>
+        lhs = *this;
+    ap_private<AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2)),
+               (_AP_W > _AP_W2 ? _AP_S
+                               : (_AP_W2 > _AP_W ? _AP_S2 : _AP_S || _AP_S2))>
+        rhs = op;
+    return typename RType<_AP_W2, _AP_S2>::div(
+        (_AP_S || _AP_S2) ? lhs.sdiv(rhs) : lhs.udiv(rhs));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE typename RType<_AP_W2, _AP_S2>::mod operator%(
+      const ap_private<_AP_W2, _AP_S2>& op) const {
+    ap_private<AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2)),
+               (_AP_W > _AP_W2 ? _AP_S
+                               : (_AP_W2 > _AP_W ? _AP_S2 : _AP_S || _AP_S2))>
+        lhs = *this;
+    ap_private<AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2)),
+               (_AP_W > _AP_W2 ? _AP_S
+                               : (_AP_W2 > _AP_W ? _AP_S2 : _AP_S || _AP_S2))>
+        rhs = op;
+    typename RType<_AP_W2, _AP_S2>::mod res =
+        typename RType<_AP_W2, _AP_S2>::mod(_AP_S ? lhs.srem(rhs)
+                                                  : lhs.urem(rhs));
+    return res;
+  }
+
+#define OP_ASSIGN_AP_2(Sym)                         \
+  template <int _AP_W2, bool _AP_S2>                \
+  INLINE ap_private<_AP_W, _AP_S>& operator Sym##=( \
+      const ap_private<_AP_W2, _AP_S2>& op) {       \
+    *this = operator Sym(op);                       \
+    return *this;                                   \
+  }
+
+  OP_ASSIGN_AP_2(/)
+  OP_ASSIGN_AP_2(%)
+#undef OP_ASSIGN_AP_2
+
+/// Bitwise assign: and, or, xor
+//-------------------------------------------------------------
+//    OP_ASSIGN_AP(&)
+//    OP_ASSIGN_AP(^)
+//    OP_ASSIGN_AP(|)
+
+#define OP_LEFT_SHIFT_CTYPE(TYPE, SIGNED)             \
+  INLINE ap_private operator<<(const TYPE op) const { \
+    if (op >= _AP_W) return ap_private(0);            \
+    if (SIGNED && op < 0) return *this >> (0 - op);   \
+    return shl(op);                                   \
+  }
+
+  // OP_LEFT_SHIFT_CTYPE(bool, false)
+  OP_LEFT_SHIFT_CTYPE(char, CHAR_IS_SIGNED)
+  OP_LEFT_SHIFT_CTYPE(signed char, true)
+  OP_LEFT_SHIFT_CTYPE(unsigned char, false)
+  OP_LEFT_SHIFT_CTYPE(short, true)
+  OP_LEFT_SHIFT_CTYPE(unsigned short, false)
+  OP_LEFT_SHIFT_CTYPE(int, true)
+  OP_LEFT_SHIFT_CTYPE(unsigned int, false)
+  OP_LEFT_SHIFT_CTYPE(long, true)
+  OP_LEFT_SHIFT_CTYPE(unsigned long, false)
+  OP_LEFT_SHIFT_CTYPE(long long, true)
+  OP_LEFT_SHIFT_CTYPE(unsigned long long, false)
+  OP_LEFT_SHIFT_CTYPE(half, false)
+  OP_LEFT_SHIFT_CTYPE(float, false)
+  OP_LEFT_SHIFT_CTYPE(double, false)
+
+#undef OP_LEFT_SHIFT_CTYPE
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_private operator<<(const ap_private<_AP_W2, _AP_S2>& op2) const {
+    if (_AP_S2 == false) {
+      uint32_t sh = op2.to_uint();
+      return *this << sh;
+    } else {
+      int sh = op2.to_int();
+      return *this << sh;
+    }
+  }
+
+#define OP_RIGHT_SHIFT_CTYPE(TYPE, SIGNED)            \
+  INLINE ap_private operator>>(const TYPE op) const { \
+    if (op >= _AP_W) {                                \
+      if (isNegative())                               \
+        return ap_private(-1);                        \
+      else                                            \
+        return ap_private(0);                         \
+    }                                                 \
+    if ((SIGNED) && op < 0) return *this << (0 - op); \
+    if (_AP_S)                                        \
+      return ashr(op);                                \
+    else                                              \
+      return lshr(op);                                \
+  }
+
+  // OP_RIGHT_SHIFT_CTYPE(bool, false)
+  OP_RIGHT_SHIFT_CTYPE(char, CHAR_IS_SIGNED)
+  OP_RIGHT_SHIFT_CTYPE(signed char, true)
+  OP_RIGHT_SHIFT_CTYPE(unsigned char, false)
+  OP_RIGHT_SHIFT_CTYPE(short, true)
+  OP_RIGHT_SHIFT_CTYPE(unsigned short, false)
+  OP_RIGHT_SHIFT_CTYPE(int, true)
+  OP_RIGHT_SHIFT_CTYPE(unsigned int, false)
+  OP_RIGHT_SHIFT_CTYPE(long, true)
+  OP_RIGHT_SHIFT_CTYPE(unsigned long, false)
+  OP_RIGHT_SHIFT_CTYPE(unsigned long long, false)
+  OP_RIGHT_SHIFT_CTYPE(long long, true)
+  OP_RIGHT_SHIFT_CTYPE(half, false)
+  OP_RIGHT_SHIFT_CTYPE(float, false)
+  OP_RIGHT_SHIFT_CTYPE(double, false)
+
+#undef OP_RIGHT_SHIFT_CTYPE
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_private operator>>(const ap_private<_AP_W2, _AP_S2>& op2) const {
+    if (_AP_S2 == false) {
+      uint32_t sh = op2.to_uint();
+      return *this >> sh;
+    } else {
+      int sh = op2.to_int();
+      return *this >> sh;
+    }
+  }
+
+  /// Shift assign
+  //-----------------------------------------------------------------
+
+  //INLINE const ap_private& operator<<=(uint32_t shiftAmt) {
+  //  VAL <<= shiftAmt;
+  //  clearUnusedBits();
+  //  return *this;
+  //}
+
+#define OP_ASSIGN_AP(Sym)                                                    \
+  template <int _AP_W2, bool _AP_S2>                                         \
+  INLINE ap_private& operator Sym##=(int op) {                               \
+    *this = operator Sym(op);                                                \
+    clearUnusedBits();                                                       \
+    return *this;                                                            \
+  }                                                                          \
+  INLINE ap_private& operator Sym##=(unsigned int op) {                      \
+    *this = operator Sym(op);                                                \
+    clearUnusedBits();                                                       \
+    return *this;                                                            \
+  }                                                                          \
+  template <int _AP_W2, bool _AP_S2>                                         \
+  INLINE ap_private& operator Sym##=(const ap_private<_AP_W2, _AP_S2>& op) { \
+    *this = operator Sym(op);                                                \
+    clearUnusedBits();                                                       \
+    return *this;                                                            \
+  }
+
+  OP_ASSIGN_AP(>>)
+  OP_ASSIGN_AP(<<)
+#undef OP_ASSIGN_AP
+
+  /// Comparisons
+  //-----------------------------------------------------------------
+  template <int _AP_W1, bool _AP_S1>
+  INLINE bool operator==(const ap_private<_AP_W1, _AP_S1>& op) const {
+    enum { _AP_MAX_W = AP_MAX(AP_MAX(_AP_W, _AP_W1), 32) };
+    ap_private<_AP_MAX_W, false> lhs(*this);
+    ap_private<_AP_MAX_W, false> rhs(op);
+    if (_AP_MAX_W <= 64) {
+      return (uint64_t)lhs.get_VAL() == (uint64_t)rhs.get_VAL();
+    } else
+      return lhs == rhs;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator!=(const ap_private<_AP_W2, _AP_S2>& op) const {
+    return !(*this == op);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>(const ap_private<_AP_W2, _AP_S2>& op) const {
+    enum {
+      _AP_MAX_W = AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2))
+    };
+    ap_private<_AP_MAX_W, _AP_S> lhs(*this);
+    ap_private<_AP_MAX_W, _AP_S2> rhs(op);
+    // this will follow gcc rule for comparison
+    // between different bitwidth and signness
+    if (_AP_S == _AP_S2)
+      return _AP_S ? lhs.sgt(rhs) : lhs.ugt(rhs);
+    else if (_AP_W < 32 && _AP_W2 < 32)
+      // different signness but both bitwidth is less than 32
+      return lhs.sgt(rhs);
+    else
+        // different signness but bigger bitwidth
+        // is greater or equal to 32
+        if (_AP_S)
+      if (_AP_W2 >= _AP_W)
+        return lhs.ugt(rhs);
+      else
+        return lhs.sgt(rhs);
+    else if (_AP_W >= _AP_W2)
+      return lhs.ugt(rhs);
+    else
+      return lhs.sgt(rhs);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<=(const ap_private<_AP_W2, _AP_S2>& op) const {
+    return !(*this > op);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<(const ap_private<_AP_W2, _AP_S2>& op) const {
+    enum {
+      _AP_MAX_W = AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2))
+    };
+    ap_private<_AP_MAX_W, _AP_S> lhs(*this);
+    ap_private<_AP_MAX_W, _AP_S2> rhs(op);
+    if (_AP_S == _AP_S2)
+      return _AP_S ? lhs.slt(rhs) : lhs.ult(rhs);
+    else if (_AP_W < 32 && _AP_W2 < 32)
+      return lhs.slt(rhs);
+    else if (_AP_S)
+      if (_AP_W2 >= _AP_W)
+        return lhs.ult(rhs);
+      else
+        return lhs.slt(rhs);
+    else if (_AP_W >= _AP_W2)
+      return lhs.ult(rhs);
+    else
+      return lhs.slt(rhs);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>=(const ap_private<_AP_W2, _AP_S2>& op) const {
+    return !(*this < op);
+  }
+
+  /// Bit and Part Select
+  //--------------------------------------------------------------
+  // FIXME now _private_range_ref refs to _AP_ROOT_TYPE(struct ssdm_int).
+  INLINE _private_range_ref<_AP_W, _AP_S> operator()(int Hi, int Lo) {
+    return _private_range_ref<_AP_W, _AP_S>(this, Hi, Lo);
+  }
+
+  INLINE _private_range_ref<_AP_W, _AP_S> operator()(int Hi, int Lo) const {
+    return _private_range_ref<_AP_W, _AP_S>(
+        const_cast<ap_private<_AP_W, _AP_S>*>(this), Hi, Lo);
+  }
+
+  INLINE _private_range_ref<_AP_W, _AP_S> range(int Hi, int Lo) const {
+    return _private_range_ref<_AP_W, _AP_S>(
+        (const_cast<ap_private<_AP_W, _AP_S>*>(this)), Hi, Lo);
+  }
+
+  INLINE _private_range_ref<_AP_W, _AP_S> range(int Hi, int Lo) {
+    return _private_range_ref<_AP_W, _AP_S>(this, Hi, Lo);
+  }
+
+  INLINE _private_bit_ref<_AP_W, _AP_S> operator[](int index) {
+    return _private_bit_ref<_AP_W, _AP_S>(*this, index);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_bit_ref<_AP_W, _AP_S> operator[](
+      const ap_private<_AP_W2, _AP_S2>& index) {
+    return _private_bit_ref<_AP_W, _AP_S>(*this, index.to_int());
+  }
+
+  INLINE const _private_bit_ref<_AP_W, _AP_S> operator[](int index) const {
+    return _private_bit_ref<_AP_W, _AP_S>(
+        const_cast<ap_private<_AP_W, _AP_S>&>(*this), index);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE const _private_bit_ref<_AP_W, _AP_S> operator[](
+      const ap_private<_AP_W2, _AP_S2>& index) const {
+    return _private_bit_ref<_AP_W, _AP_S>(
+        const_cast<ap_private<_AP_W, _AP_S>&>(*this), index.to_int());
+  }
+
+  INLINE _private_bit_ref<_AP_W, _AP_S> bit(int index) {
+    return _private_bit_ref<_AP_W, _AP_S>(*this, index);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_bit_ref<_AP_W, _AP_S> bit(const ap_private<_AP_W2, _AP_S2>& index) {
+    return _private_bit_ref<_AP_W, _AP_S>(*this, index.to_int());
+  }
+
+  INLINE const _private_bit_ref<_AP_W, _AP_S> bit(int index) const {
+    return _private_bit_ref<_AP_W, _AP_S>(
+        const_cast<ap_private<_AP_W, _AP_S>&>(*this), index);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE const _private_bit_ref<_AP_W, _AP_S> bit(
+      const ap_private<_AP_W2, _AP_S2>& index) const {
+    return _private_bit_ref<_AP_W, _AP_S>(
+        const_cast<ap_private<_AP_W, _AP_S>&>(*this), index.to_int());
+  }
+
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                       ap_private<_AP_W2, _AP_S2> >
+//  concat(const ap_private<_AP_W2, _AP_S2>& a2) const {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<ap_private<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                       ap_private<_AP_W2, _AP_S2> >
+//  concat(ap_private<_AP_W2, _AP_S2>& a2) {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(*this, a2);
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+//  operator,(const ap_private<_AP_W2, _AP_S2> &a2) const {
+//    return ap_concat_ref<_AP_W, ap_private, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<ap_private<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+//  operator,(const ap_private<_AP_W2, _AP_S2> &a2) {
+//    return ap_concat_ref<_AP_W, ap_private, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(
+//        *this, const_cast<ap_private<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+//  operator,(ap_private<_AP_W2, _AP_S2> &a2) const {
+//    return ap_concat_ref<_AP_W, ap_private, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this), a2);
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+//  operator,(ap_private<_AP_W2, _AP_S2> &a2) {
+//    return ap_concat_ref<_AP_W, ap_private, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(*this, a2);
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                       _private_range_ref<_AP_W2, _AP_S2> >
+//  operator,(const _private_range_ref<_AP_W2, _AP_S2> &a2) const {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                         _private_range_ref<_AP_W2, _AP_S2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<_private_range_ref<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                       _private_range_ref<_AP_W2, _AP_S2> >
+//  operator,(_private_range_ref<_AP_W2, _AP_S2> &a2) {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                         _private_range_ref<_AP_W2, _AP_S2> >(*this, a2);
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+//                       _private_bit_ref<_AP_W2, _AP_S2> >
+//  operator,(const _private_bit_ref<_AP_W2, _AP_S2> &a2) const {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+//                         _private_bit_ref<_AP_W2, _AP_S2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<_private_bit_ref<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+//                       _private_bit_ref<_AP_W2, _AP_S2> >
+//  operator,(_private_bit_ref<_AP_W2, _AP_S2> &a2) {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+//                         _private_bit_ref<_AP_W2, _AP_S2> >(*this, a2);
+//  }
+//
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2 + _AP_W3,
+//                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+//  operator,(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) const {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2 + _AP_W3,
+//                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>&>(a2));
+//  }
+//
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2 + _AP_W3,
+//                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+//  operator,(ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2 + _AP_W3,
+//                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(*this,
+//                                                                         a2);
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE ap_concat_ref<
+//      _AP_W, ap_private, _AP_W2,
+//      af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+//  operator,(const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>
+//                &a2) const {
+//    return ap_concat_ref<
+//        _AP_W, ap_private, _AP_W2,
+//        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<
+//            af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE ap_concat_ref<
+//      _AP_W, ap_private, _AP_W2,
+//      af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+//  operator,(af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &a2) {
+//    return ap_concat_ref<
+//        _AP_W, ap_private, _AP_W2,
+//        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this,
+//                                                                       a2);
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE
+//      ap_concat_ref<_AP_W, ap_private, 1,
+//                    af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+//      operator,(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>
+//                    &a2) const {
+//    return ap_concat_ref<
+//        _AP_W, ap_private, 1,
+//        af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(
+//            a2));
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE
+//      ap_concat_ref<_AP_W, ap_private, 1,
+//                    af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+//      operator,(
+//          af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &a2) {
+//    return ap_concat_ref<
+//        _AP_W, ap_private, 1,
+//        af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this, a2);
+//  }
+//
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_private<AP_MAX(_AP_W2 + _AP_W3, _AP_W), _AP_S> operator&(
+//      const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+//    return *this & a2.get();
+//  }
+//
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_private<AP_MAX(_AP_W2 + _AP_W3, _AP_W), _AP_S> operator|(
+//      const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+//    return *this | a2.get();
+//  }
+//
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_private<AP_MAX(_AP_W2 + _AP_W3, _AP_W), _AP_S> operator^(
+//      const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+//    return *this ^ a2.get();
+//  }
+
+  // Reduce operation
+  //-----------------------------------------------------------
+  INLINE bool and_reduce() const { return (VAL & mask) == mask; }
+
+  INLINE bool nand_reduce() const { return (VAL & mask) != mask; }
+
+  INLINE bool or_reduce() const { return (bool)VAL; }
+
+  INLINE bool nor_reduce() const { return VAL == 0; }
+
+  INLINE bool xor_reduce() const {
+    unsigned int i = countPopulation();
+    return (i % 2) ? true : false;
+  }
+
+  INLINE bool xnor_reduce() const {
+    unsigned int i = countPopulation();
+    return (i % 2) ? false : true;
+  }
+
+  INLINE std::string to_string(uint8_t radix = 2, bool sign = false) const {
+    return toString(radix, radix == 10 ? _AP_S : sign);
+  }
+}; // End of class ap_private <_AP_W, _AP_S, true>
+
+template <int _AP_W, bool _AP_S>
+std::string ap_private<_AP_W, _AP_S, true>::toString(uint8_t radix,
+                                                     bool wantSigned) const {
+  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+         "Radix should be 2, 8, 10, or 16!");
+  static const char* digits[] = {"0", "1", "2", "3", "4", "5", "6", "7",
+                                 "8", "9", "a", "b", "c", "d", "e", "f"};
+  std::string result;
+  if (radix != 10) {
+    // For the 2, 8 and 16 bit cases, we can just shift instead of divide
+    // because the number of bits per digit (1,3 and 4 respectively) divides
+    // equaly. We just shift until there value is zero.
+
+    // First, check for a zero value and just short circuit the logic below.
+    if (*this == (uint64_t)(0)) {
+      // Always generate a radix indicator because fixed-point
+      // formats require it.
+      switch (radix) {
+        case 2:
+          result = "0b0";
+          break;
+        case 8:
+          result = "0o0";
+          break;
+        case 16:
+          result = "0x0";
+          break;
+        default:
+          assert("invalid radix" && 0);
+      }
+    } else {
+      ap_private<_AP_W, false, true> tmp(*this);
+      size_t insert_at = 0;
+      bool leading_zero = true;
+      if (wantSigned && isNegative()) {
+        // They want to print the signed version and it is a negative value
+        // Flip the bits and add one to turn it into the equivalent positive
+        // value and put a '-' in the result.
+        tmp.flip();
+        tmp++;
+        result = "-";
+        insert_at = 1;
+        leading_zero = false;
+      }
+      switch (radix) {
+        case 2:
+          result += "0b";
+          break;
+        case 8:
+          result += "0o";
+          break;
+        case 16:
+          result += "0x";
+          break;
+        default:
+          assert("invalid radix" && 0);
+      }
+      insert_at += 2;
+
+      // Just shift tmp right for each digit width until it becomes zero
+      uint32_t shift = (radix == 16 ? 4 : (radix == 8 ? 3 : 1));
+      uint64_t mask = radix - 1;
+      ap_private<_AP_W, false, true> zero(0);
+      unsigned bits = 0;
+      bool msb = false;
+      while (tmp.ne(zero)) {
+        unsigned digit = (unsigned)(tmp.get_VAL() & mask);
+        result.insert(insert_at, digits[digit]);
+        tmp = tmp.lshr(shift);
+        bits++;
+        msb = (digit >> (shift - 1)) == 1;
+      }
+      bits *= shift;
+      if (bits < _AP_W && leading_zero && msb)
+        result.insert(insert_at, digits[0]);
+    }
+    return result;
+  }
+
+  ap_private<_AP_W, false, true> tmp(*this);
+  ap_private<6, false, true> divisor(radix);
+  ap_private<_AP_W, _AP_S, true> zero(0);
+  size_t insert_at = 0;
+  if (wantSigned && isNegative()) {
+    // They want to print the signed version and it is a negative value
+    // Flip the bits and add one to turn it into the equivalent positive
+    // value and put a '-' in the result.
+    tmp.flip();
+    tmp++;
+    result = "-";
+    insert_at = 1;
+  }
+  if (tmp == ap_private<_AP_W, false, true>(0ULL))
+    result = "0";
+  else
+    while (tmp.ne(zero)) {
+      ap_private<_AP_W, false, true> APdigit = tmp % divisor;
+      ap_private<_AP_W, false, true> tmp2 = tmp / divisor;
+      uint32_t digit = (uint32_t)(APdigit.getZExtValue());
+      assert(digit < radix && "divide failed");
+      result.insert(insert_at, digits[digit]);
+      tmp = tmp2;
+    }
+  return result;
+
+} // End of ap_private<_AP_W, _AP_S, true>::toString()
+
+// bitwidth > 64
+template <int _AP_W, bool _AP_S>
+class ap_private<_AP_W, _AP_S, false> {
+  // SFINAE pattern.  Only consider this class when _AP_W > 64
+  const static bool valid = ap_private_enable_if<(_AP_W > 64)>::isValid;
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4521 4522)
+#endif
+ public:
+  enum { BitWidth = _AP_W, _AP_N = (_AP_W + 63) / 64 };
+  static const int width = _AP_W;
+
+ private:
+  /// This constructor is used only internally for speed of construction of
+  /// temporaries. It is unsafe for general use so it is not public.
+
+  /* Constructors */
+  /// Note that numWords can be smaller or larger than the corresponding bit
+  /// width but any extraneous bits will be dropped.
+  /// @param numWords the number of words in bigVal
+  /// @param bigVal a sequence of words to form the initial value of the
+  /// ap_private
+  /// @brief Construct an ap_private, initialized as bigVal[].
+  INLINE ap_private(uint32_t numWords, const uint64_t bigVal[]) {
+    set_canary();
+    assert(bigVal && "Null pointer detected!");
+    {
+      // Get memory, cleared to 0
+      memset(pVal, 0, _AP_N * sizeof(uint64_t));
+
+      // Calculate the number of words to copy
+      uint32_t words = AESL_std::min<uint32_t>(numWords, _AP_N);
+      // Copy the words from bigVal to pVal
+      memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
+      if (words >= _AP_W) clearUnusedBits();
+      // Make sure unused high bits are cleared
+    }
+    check_canary();
+  }
+
+  /// This constructor interprets Val as a string in the given radix. The
+  /// interpretation stops when the first charater that is not suitable for the
+  /// radix is encountered. Acceptable radix values are 2, 8, 10 and 16. It is
+  /// an error for the value implied by the string to require more bits than
+  /// numBits.
+  /// @param val the string to be interpreted
+  /// @param radix the radix of Val to use for the intepretation
+  /// @brief Construct an ap_private from a string representation.
+  INLINE ap_private(const std::string& val, uint8_t radix = 2) {
+    set_canary();
+    assert(!val.empty() && "The input string is empty.");
+    const char* c_str = val.c_str();
+    fromString(c_str, val.size(), radix);
+    check_canary();
+  }
+
+  /// This constructor interprets the slen characters starting at StrStart as
+  /// a string in the given radix. The interpretation stops when the first
+  /// character that is not suitable for the radix is encountered. Acceptable
+  /// radix values are 2, 8, 10 and 16. It is an error for the value implied by
+  /// the string to require more bits than numBits.
+  /// @param strStart the start of the string to be interpreted
+  /// @param slen the maximum number of characters to interpret
+  /// @param radix the radix to use for the conversion
+  /// @brief Construct an ap_private from a string representation.
+  /// This method does not consider whether it is negative or not.
+  INLINE ap_private(const char strStart[], uint32_t slen, uint8_t radix) {
+    set_canary();
+    fromString(strStart, slen, radix);
+    check_canary();
+  }
+
+  INLINE void report() {
+    _AP_ERROR(_AP_W > MAX_MODE(AP_INT_MAX_W) * 1024,
+              "ap_%sint<%d>: Bitwidth exceeds the "
+              "default max value %d. Please use macro "
+              "AP_INT_MAX_W to set a larger max value.",
+              _AP_S ? "" : "u", _AP_W, MAX_MODE(AP_INT_MAX_W) * 1024);
+  }
+  /// This union is used to store the integer value. When the
+  /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
+
+  /// This enum is used to hold the constants we needed for ap_private.
+  // uint64_t VAL;    ///< Used to store the <= 64 bits integer value.
+  uint64_t pVal[_AP_N]; ///< Used to store the >64 bits integer value.
+#ifdef AP_CANARY
+  uint64_t CANARY;
+  INLINE void check_canary() { assert(CANARY == (uint64_t)0xDEADBEEFDEADBEEF); }
+  INLINE void set_canary() { CANARY = (uint64_t)0xDEADBEEFDEADBEEF; }
+#else
+  INLINE void check_canary() {}
+  INLINE void set_canary() {}
+#endif
+
+ public:
+  typedef typename valtype<8, _AP_S>::Type ValType;
+  typedef ap_private<_AP_W, _AP_S> Type;
+  // FIXME remove friend type?
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  friend struct ap_fixed_base;
+  /// return type of variety of operations
+  //----------------------------------------------------------
+  template <int _AP_W2, bool _AP_S2>
+  struct RType {
+    enum {
+      mult_w = _AP_W + _AP_W2,
+      mult_s = _AP_S || _AP_S2,
+      plus_w =
+          AP_MAX(_AP_W + (_AP_S2 && !_AP_S), _AP_W2 + (_AP_S && !_AP_S2)) + 1,
+      plus_s = _AP_S || _AP_S2,
+      minus_w =
+          AP_MAX(_AP_W + (_AP_S2 && !_AP_S), _AP_W2 + (_AP_S && !_AP_S2)) + 1,
+      minus_s = true,
+      div_w = _AP_W + _AP_S2,
+      div_s = _AP_S || _AP_S2,
+      mod_w = AP_MIN(_AP_W, _AP_W2 + (!_AP_S2 && _AP_S)),
+      mod_s = _AP_S,
+      logic_w = AP_MAX(_AP_W + (_AP_S2 && !_AP_S), _AP_W2 + (_AP_S && !_AP_S2)),
+      logic_s = _AP_S || _AP_S2
+    };
+    typedef ap_private<mult_w, mult_s> mult;
+    typedef ap_private<plus_w, plus_s> plus;
+    typedef ap_private<minus_w, minus_s> minus;
+    typedef ap_private<logic_w, logic_s> logic;
+    typedef ap_private<div_w, div_s> div;
+    typedef ap_private<mod_w, mod_s> mod;
+    typedef ap_private<_AP_W, _AP_S> arg1;
+    typedef bool reduce;
+  };
+
+  INLINE uint64_t& get_VAL(void) { return pVal[0]; }
+  INLINE uint64_t get_VAL(void) const { return pVal[0]; }
+  INLINE uint64_t get_VAL(void) const volatile { return pVal[0]; }
+  INLINE void set_VAL(uint64_t value) { pVal[0] = value; }
+  INLINE uint64_t& get_pVal(int index) { return pVal[index]; }
+  INLINE uint64_t* get_pVal() { return pVal; }
+  INLINE const uint64_t* get_pVal() const { return pVal; }
+  INLINE uint64_t get_pVal(int index) const { return pVal[index]; }
+  INLINE uint64_t* get_pVal() const volatile { return pVal; }
+  INLINE uint64_t get_pVal(int index) const volatile { return pVal[index]; }
+  INLINE void set_pVal(int i, uint64_t value) { pVal[i] = value; }
+
+  /// This enum is used to hold the constants we needed for ap_private.
+  enum {
+    APINT_BITS_PER_WORD = sizeof(uint64_t) * 8, ///< Bits in a word
+    APINT_WORD_SIZE = sizeof(uint64_t)          ///< Byte size of a word
+  };
+
+  enum {
+    excess_bits = (_AP_W % APINT_BITS_PER_WORD)
+                      ? APINT_BITS_PER_WORD - (_AP_W % APINT_BITS_PER_WORD)
+                      : 0
+  };
+  static const uint64_t mask = ((uint64_t)~0ULL >> (excess_bits));
+
+ public:
+  // NOTE changed to explicit to be consistent with ap_private<W,S,true>
+  explicit INLINE ap_private(const char* val) {
+    set_canary();
+    unsigned char radix = 10;
+    std::string str = ap_private_ops::parseString(val, radix); // determine radix.
+    std::string::size_type pos = str.find('.');
+    if (pos != std::string::npos) str = str.substr(pos);
+    ap_private ap_private_val(str, radix);
+    operator=(ap_private_val);
+    report();
+    check_canary();
+  }
+
+  INLINE ap_private(const char* val, unsigned char rd) {
+    set_canary();
+    unsigned char radix = rd;
+    std::string str = ap_private_ops::parseString(val, radix); // determine radix.
+    std::string::size_type pos = str.find('.');
+    if (pos != std::string::npos) str = str.substr(pos);
+    ap_private ap_private_val(str, radix);
+    operator=(ap_private_val);
+    report();
+
+    report();
+    check_canary();
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_private(const _private_range_ref<_AP_W2, _AP_S2>& ref) {
+    set_canary();
+    *this = ref.get();
+    report();
+    check_canary();
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_private(const _private_bit_ref<_AP_W2, _AP_S2>& ref) {
+    set_canary();
+    *this = ((uint64_t)(bool)ref);
+    report();
+    check_canary();
+  }
+
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_private(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& ref) {
+//    set_canary();
+//    *this = ref.get();
+//    report();
+//    check_canary();
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE ap_private(
+//      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+//    set_canary();
+//    *this = ((val.operator ap_private<_AP_W2, false>()));
+//    report();
+//    check_canary();
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE ap_private(
+//      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+//    set_canary();
+//    *this = (uint64_t)(bool)val;
+//    report();
+//    check_canary();
+//  }
+
+  /// Simply makes *this a copy of that.
+  /// @brief Copy Constructor.
+  INLINE ap_private(const ap_private& that) {
+      set_canary();
+      memcpy(pVal, that.get_pVal(), _AP_N * APINT_WORD_SIZE);
+      clearUnusedBits();
+      check_canary();
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private(const ap_private<_AP_W1, _AP_S1, false>& that) {
+    set_canary();
+    operator=(that);
+    check_canary();
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private(const volatile ap_private<_AP_W1, _AP_S1, false>& that) {
+    set_canary();
+    operator=(const_cast<const ap_private<_AP_W1, _AP_S1, false>&>(that));
+    check_canary();
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private(const ap_private<_AP_W1, _AP_S1, true>& that) {
+    set_canary();
+    static const uint64_t that_sign_ext_mask =
+        (_AP_W1 == APINT_BITS_PER_WORD)
+            ? 0
+            : ~0ULL >> (_AP_W1 % APINT_BITS_PER_WORD)
+                           << (_AP_W1 % APINT_BITS_PER_WORD);
+    if (that.isNegative()) {
+      pVal[0] = that.get_VAL() | that_sign_ext_mask;
+      memset(pVal + 1, ~0, sizeof(uint64_t) * (_AP_N - 1));
+    } else {
+      pVal[0] = that.get_VAL();
+      memset(pVal + 1, 0, sizeof(uint64_t) * (_AP_N - 1));
+    }
+    clearUnusedBits();
+    check_canary();
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private(const volatile ap_private<_AP_W1, _AP_S1, true>& that) {
+    set_canary();
+    operator=(const_cast<const ap_private<_AP_W1, _AP_S1, true>&>(that));
+    check_canary();
+  }
+
+  /// @brief Destructor.
+  // virtual ~ap_private() {}
+  INLINE ~ap_private() { check_canary(); }
+
+  /// @name Constructors
+  /// @{
+
+  /// Default constructor that creates an uninitialized ap_private.  This is
+  /// useful
+  ///  for object deserialization (pair this with the static method Read).
+  INLINE ap_private() {
+    set_canary();
+    clearUnusedBits();
+    check_canary();
+  }
+
+  INLINE ap_private(uint64_t* val, uint32_t bits = _AP_W) { assert(0); }
+  INLINE ap_private(const uint64_t* const val, uint32_t bits) { assert(0); }
+
+/// If isSigned is true then val is treated as if it were a signed value
+/// (i.e. as an int64_t) and the appropriate sign extension to the bit width
+/// will be done. Otherwise, no sign extension occurs (high order bits beyond
+/// the range of val are zero filled).
+/// @param numBits the bit width of the constructed ap_private
+/// @param val the initial value of the ap_private
+/// @param isSigned how to treat signedness of val
+/// @brief Create a new ap_private of numBits width, initialized as val.
+#define CTOR(TYPE, SIGNED)                                  \
+  INLINE ap_private(TYPE val, bool isSigned = SIGNED) {     \
+    set_canary();                                           \
+    pVal[0] = (ValType)val;                                 \
+    if (isSigned && int64_t(pVal[0]) < 0) {                 \
+      memset(pVal + 1, ~0, sizeof(uint64_t) * (_AP_N - 1)); \
+    } else {                                                \
+      memset(pVal + 1, 0, sizeof(uint64_t) * (_AP_N - 1));  \
+    }                                                       \
+    clearUnusedBits();                                      \
+    check_canary();                                         \
+  }
+
+  CTOR(bool, false)
+  CTOR(char, CHAR_IS_SIGNED)
+  CTOR(signed char, true)
+  CTOR(unsigned char, false)
+  CTOR(short, true)
+  CTOR(unsigned short, false)
+  CTOR(int, true)
+  CTOR(unsigned int, false)
+  CTOR(long, true)
+  CTOR(unsigned long, false)
+  CTOR(ap_slong, true)
+  CTOR(ap_ulong, false)
+  CTOR(half, false)
+  CTOR(float, false)
+  CTOR(double, false)
+#undef CTOR
+
+  /// @returns true if the number of bits <= 64, false otherwise.
+  /// @brief Determine if this ap_private just has one word to store value.
+  INLINE bool isSingleWord() const { return false; }
+
+  /// @returns the word position for the specified bit position.
+  /// @brief Determine which word a bit is in.
+  static INLINE uint32_t whichWord(uint32_t bitPosition) {
+    //    return bitPosition / APINT_BITS_PER_WORD;
+    return (bitPosition) >> 6;
+  }
+
+  /// @returns the bit position in a word for the specified bit position
+  /// in the ap_private.
+  /// @brief Determine which bit in a word a bit is in.
+  static INLINE uint32_t whichBit(uint32_t bitPosition) {
+    //    return bitPosition % APINT_BITS_PER_WORD;
+    return bitPosition & 0x3f;
+  }
+
+  /// bit at a specific bit position. This is used to mask the bit in the
+  /// corresponding word.
+  /// @returns a uint64_t with only bit at "whichBit(bitPosition)" set
+  /// @brief Get a single bit mask.
+  static INLINE uint64_t maskBit(uint32_t bitPosition) {
+    return 1ULL << (whichBit(bitPosition));
+  }
+
+  /// @returns the corresponding word for the specified bit position.
+  /// @brief Get the word corresponding to a bit position
+  INLINE uint64_t getWord(uint32_t bitPosition) const {
+    return pVal[whichWord(bitPosition)];
+  }
+
+  /// This method is used internally to clear the to "N" bits in the high order
+  /// word that are not used by the ap_private. This is needed after the most
+  /// significant word is assigned a value to ensure that those bits are
+  /// zero'd out.
+  /// @brief Clear unused high order bits
+  INLINE void clearUnusedBits(void)
+// just for clang compiler
+#if defined(__clang__) && !defined(__CLANG_3_1__)
+      __attribute__((no_sanitize("undefined")))
+#endif
+  {
+    pVal[_AP_N - 1] =
+        _AP_S ? ((((int64_t)pVal[_AP_N - 1]) << (excess_bits)) >> excess_bits)
+              : (excess_bits
+                     ? ((pVal[_AP_N - 1]) << (excess_bits)) >> (excess_bits)
+                     : pVal[_AP_N - 1]);
+  }
+
+  INLINE void clearUnusedBitsToZero(void) { pVal[_AP_N - 1] &= mask; }
+
+  INLINE void clearUnusedBitsToOne(void) { pVal[_AP_N - 1] |= mask; }
+
+  /// This is used by the constructors that take string arguments.
+  /// @brief Convert a char array into an ap_private
+  INLINE void fromString(const char* str, uint32_t slen, uint8_t radix) {
+    enum { numbits = _AP_W };
+    bool isNeg = str[0] == '-';
+    if (isNeg) {
+      str++;
+      slen--;
+    }
+
+    if (str[0] == '0' && (str[1] == 'b' || str[1] == 'B')) {
+      //if(radix == 0) radix = 2;
+      _AP_WARNING(radix != 2, "%s seems to have base %d, but %d given.", str, 2, radix);
+      str += 2;
+      slen -=2;
+    } else if (str[0] == '0' && (str[1] == 'o' || str[1] == 'O')) {
+      //if (radix == 0) radix = 8;
+      _AP_WARNING(radix != 8, "%s seems to have base %d, but %d given.", str, 8, radix);
+      str += 2;
+      slen -=2;
+    } else if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) {
+      //if (radix == 0) radix = 16;
+      _AP_WARNING(radix != 16, "%s seems to have base %d, but %d given.", str, 16, radix);
+      str += 2;
+      slen -=2;
+    } else if (str[0] == '0' && (str[1] == 'd' || str[1] == 'D')) {
+      //if (radix == 0) radix = 10;
+      _AP_WARNING(radix != 10, "%s seems to have base %d, but %d given.", str, 10, radix);
+      str += 2;
+      slen -=2;
+    } else if (radix == 0) {
+      //radix = 2; // XXX default value
+    }
+
+    // Check our assumptions here
+    assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+           "Radix should be 2, 8, 10, or 16!");
+    assert(str && "String is null?");
+
+    // skip any leading zero
+    while (*str == '0' && *(str + 1) != '\0') {
+      str++;
+      slen--;
+    }
+    assert((slen <= numbits || radix != 2) && "Insufficient bit width");
+    assert(((slen - 1) * 3 <= numbits || radix != 8) &&
+           "Insufficient bit width");
+    assert(((slen - 1) * 4 <= numbits || radix != 16) &&
+           "Insufficient bit width");
+    assert((((slen - 1) * 64) / 22 <= numbits || radix != 10) &&
+           "Insufficient bit width");
+
+    // clear bits
+    memset(pVal, 0, _AP_N * sizeof(uint64_t));
+
+    // Figure out if we can shift instead of multiply
+    uint32_t shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
+
+    // Set up an ap_private for the digit to add outside the loop so we don't
+    // constantly construct/destruct it.
+    uint64_t bigVal[_AP_N];
+    memset(bigVal, 0, _AP_N * sizeof(uint64_t));
+    ap_private<_AP_W, _AP_S> apdigit(getBitWidth(), bigVal);
+    ap_private<_AP_W, _AP_S> apradix(radix);
+
+    // Enter digit traversal loop
+    for (unsigned i = 0; i < slen; i++) {
+      // Get a digit
+      uint32_t digit = 0;
+      char cdigit = str[i];
+      if (radix == 16) {
+#define isxdigit(c)                                            \
+  (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || \
+   ((c) >= 'A' && (c) <= 'F'))
+#define isdigit(c) ((c) >= '0' && (c) <= '9')
+        if (!isxdigit(cdigit)) assert(0 && "Invalid hex digit in string");
+        if (isdigit(cdigit))
+          digit = cdigit - '0';
+        else if (cdigit >= 'a')
+          digit = cdigit - 'a' + 10;
+        else if (cdigit >= 'A')
+          digit = cdigit - 'A' + 10;
+        else
+          assert(0 && "huh? we shouldn't get here");
+      } else if (isdigit(cdigit)) {
+        digit = cdigit - '0';
+      } else if (cdigit != '\0') {
+        assert(0 && "Invalid character in digit string");
+      }
+#undef isxdigit
+#undef isdigit
+      // Shift or multiply the value by the radix
+      if (shift)
+        *this <<= shift;
+      else
+        *this *= apradix;
+
+      // Add in the digit we just interpreted
+      apdigit.set_VAL(digit);
+      *this += apdigit;
+    }
+    // If its negative, put it in two's complement form
+    if (isNeg) {
+      (*this)--;
+      this->flip();
+    }
+    clearUnusedBits();
+  }
+
+  INLINE ap_private read() volatile { return *this; }
+
+  INLINE void write(const ap_private& op2) volatile { *this = (op2); }
+
+  INLINE operator ValType() const { return get_VAL(); }
+
+  INLINE int to_uchar() const { return (unsigned char)get_VAL(); }
+
+  INLINE int to_char() const { return (signed char)get_VAL(); }
+
+  INLINE int to_ushort() const { return (unsigned short)get_VAL(); }
+
+  INLINE int to_short() const { return (short)get_VAL(); }
+
+  INLINE int to_int() const { return (int)get_VAL(); }
+
+  INLINE unsigned to_uint() const { return (unsigned)get_VAL(); }
+
+  INLINE long to_long() const { return (long)get_VAL(); }
+
+  INLINE unsigned long to_ulong() const { return (unsigned long)get_VAL(); }
+
+  INLINE ap_slong to_int64() const { return (ap_slong)get_VAL(); }
+
+  INLINE ap_ulong to_uint64() const { return (ap_ulong)get_VAL(); }
+
+  INLINE double to_double() const {
+    if (isNegative())
+      return roundToDouble(true);
+    else
+      return roundToDouble(false);
+  }
+
+  INLINE unsigned length() const { return _AP_W; }
+
+  /*Reverse the contents of ap_private instance. I.e. LSB becomes MSB and vise
+   * versa*/
+  INLINE ap_private& reverse() {
+    for (int i = 0; i < _AP_W / 2; ++i) {
+      bool tmp = operator[](i);
+      if (operator[](_AP_W - 1 - i))
+        set(i);
+      else
+        clear(i);
+      if (tmp)
+        set(_AP_W - 1 - i);
+      else
+        clear(_AP_W - 1 - i);
+    }
+    clearUnusedBits();
+    return *this;
+  }
+
+  /*Return true if the value of ap_private instance is zero*/
+  INLINE bool iszero() const { return isMinValue(); }
+
+  INLINE bool to_bool() const { return !iszero(); }
+
+  /* x < 0 */
+  INLINE bool sign() const {
+    if (isNegative()) return true;
+    return false;
+  }
+
+  /* x[i] = !x[i] */
+  INLINE void invert(int i) {
+    assert(i >= 0 && "Attempting to read bit with negative index");
+    assert(i < _AP_W && "Attempting to read bit beyond MSB");
+    flip(i);
+  }
+
+  /* x[i] */
+  INLINE bool test(int i) const {
+    assert(i >= 0 && "Attempting to read bit with negative index");
+    assert(i < _AP_W && "Attempting to read bit beyond MSB");
+    return operator[](i);
+  }
+
+  // Set the ith bit into v
+  INLINE void set(int i, bool v) {
+    assert(i >= 0 && "Attempting to write bit with negative index");
+    assert(i < _AP_W && "Attempting to write bit beyond MSB");
+    v ? set(i) : clear(i);
+  }
+
+  // Set the ith bit into v
+  INLINE void set_bit(int i, bool v) {
+    assert(i >= 0 && "Attempting to write bit with negative index");
+    assert(i < _AP_W && "Attempting to write bit beyond MSB");
+    v ? set(i) : clear(i);
+  }
+
+  // FIXME different argument for different action?
+  INLINE ap_private& set(uint32_t bitPosition) {
+    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
+    clearUnusedBits();
+    return *this;
+  }
+
+  INLINE void set() {
+    for (int i = 0; i < _AP_N; ++i) pVal[i] = ~0ULL;
+    clearUnusedBits();
+  }
+
+  // Get the value of ith bit
+  INLINE bool get(int i) const {
+    assert(i >= 0 && "Attempting to read bit with negative index");
+    assert(i < _AP_W && "Attempting to read bit beyond MSB");
+    return ((maskBit(i) & (pVal[whichWord(i)])) != 0);
+  }
+
+  // Get the value of ith bit
+  INLINE bool get_bit(int i) const {
+    assert(i >= 0 && "Attempting to read bit with negative index");
+    assert(i < _AP_W && "Attempting to read bit beyond MSB");
+    return ((maskBit(i) & (pVal[whichWord(i)])) != 0);
+  }
+
+  // This is used for sc_lv and sc_bv, which is implemented by sc_uint
+  // Rotate an ap_private object n places to the left
+  INLINE void lrotate(int n) {
+    assert(n >= 0 && "Attempting to shift negative index");
+    assert(n < _AP_W && "Shift value larger than bit width");
+    operator=(shl(n) | lshr(_AP_W - n));
+  }
+
+  // This is used for sc_lv and sc_bv, which is implemented by sc_uint
+  // Rotate an ap_private object n places to the right
+  INLINE void rrotate(int n) {
+    assert(n >= 0 && "Attempting to shift negative index");
+    assert(n < _AP_W && "Shift value larger than bit width");
+    operator=(lshr(n) | shl(_AP_W - n));
+  }
+
+  /// Set the given bit to 0 whose position is given as "bitPosition".
+  /// @brief Set a given bit to 0.
+  INLINE ap_private& clear(uint32_t bitPosition) {
+    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
+    clearUnusedBits();
+    return *this;
+  }
+
+  /// @brief Set every bit to 0.
+  INLINE void clear() { memset(pVal, 0, _AP_N * APINT_WORD_SIZE); }
+
+  /// @brief Toggle every bit to its opposite value.
+  ap_private& flip() {
+    for (int i = 0; i < _AP_N; ++i) pVal[i] ^= ~0ULL;
+    clearUnusedBits();
+    return *this;
+  }
+
+  /// @brief Toggles a given bit to its opposite value.
+  INLINE ap_private& flip(uint32_t bitPosition) {
+    assert(bitPosition < BitWidth && "Out of the bit-width range!");
+    set_bit(bitPosition, !get_bit(bitPosition));
+    return *this;
+  }
+
+  // complements every bit
+  INLINE void b_not() { flip(); }
+
+  INLINE ap_private getLoBits(uint32_t numBits) const {
+    return ap_private_ops::lshr(ap_private_ops::shl(*this, _AP_W - numBits),
+                                _AP_W - numBits);
+  }
+
+  INLINE ap_private getHiBits(uint32_t numBits) const {
+    return ap_private_ops::lshr(*this, _AP_W - numBits);
+  }
+
+  // Binary Arithmetic
+  //-----------------------------------------------------------
+
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_private<AP_MAX(_AP_W2 + _AP_W3, _AP_W), _AP_S> operator&(
+//      const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+//    return *this & a2.get();
+//  }
+//
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_private<AP_MAX(_AP_W2 + _AP_W3, _AP_W), _AP_S> operator|(
+//      const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+//    return *this | a2.get();
+//  }
+//
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_private<AP_MAX(_AP_W2 + _AP_W3, _AP_W), _AP_S> operator^(
+//      const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+//    return *this ^ a2.get();
+//  }
+
+/// Arithmetic assign
+//-------------------------------------------------------------
+
+#define OP_BIN_LOGIC_ASSIGN_AP(Sym)                                            \
+  template <int _AP_W1, bool _AP_S1>                                           \
+  INLINE ap_private& operator Sym(const ap_private<_AP_W1, _AP_S1>& RHS) {     \
+    const int _AP_N1 = ap_private<_AP_W1, _AP_S1>::_AP_N;                      \
+    uint32_t numWords = AESL_std::min((int)_AP_N, _AP_N1);                     \
+    uint32_t i;                                                                \
+    if (_AP_W != _AP_W1)                                                       \
+      fprintf(stderr,                                                          \
+              "Warning! Bitsize mismach for ap_[u]int " #Sym " ap_[u]int.\n"); \
+    for (i = 0; i < numWords; ++i) pVal[i] Sym RHS.get_pVal(i);                \
+    if (_AP_N1 < _AP_N) {                                                      \
+      uint64_t ext = RHS.isNegative() ? ~0ULL : 0;                             \
+      for (; i < _AP_N; i++) pVal[i] Sym ext;                                  \
+    }                                                                          \
+    clearUnusedBits();                                                         \
+    return *this;                                                              \
+  }
+
+  OP_BIN_LOGIC_ASSIGN_AP(&=);
+  OP_BIN_LOGIC_ASSIGN_AP(|=);
+  OP_BIN_LOGIC_ASSIGN_AP(^=);
+#undef OP_BIN_LOGIC_ASSIGN_AP
+
+  /// Adds the RHS APint to this ap_private.
+  /// @returns this, after addition of RHS.
+  /// @brief Addition assignment operator.
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private& operator+=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+    const int _AP_N1 = ap_private<_AP_W1, _AP_S1>::_AP_N;
+    uint64_t RHSpVal[_AP_N1];
+    for (int i = 0; i < _AP_N1; ++i) RHSpVal[i] = RHS.get_pVal(i);
+    ap_private_ops::add(pVal, pVal, RHSpVal, _AP_N, _AP_N, _AP_N1, _AP_S,
+                        _AP_S1);
+    clearUnusedBits();
+    return *this;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private& operator-=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+    const int _AP_N1 = ap_private<_AP_W1, _AP_S1>::_AP_N;
+    uint64_t RHSpVal[_AP_N1];
+    for (int i = 0; i < _AP_N1; ++i) RHSpVal[i] = RHS.get_pVal(i);
+    ap_private_ops::sub(pVal, pVal, RHSpVal, _AP_N, _AP_N, _AP_N1, _AP_S,
+                        _AP_S1);
+    clearUnusedBits();
+    return *this;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private& operator*=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+    // Get some bit facts about LHS and check for zero
+    uint32_t lhsBits = getActiveBits();
+    uint32_t lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
+    if (!lhsWords) {
+      // 0 * X ===> 0
+      return *this;
+    }
+
+    ap_private dupRHS = RHS;
+    // Get some bit facts about RHS and check for zero
+    uint32_t rhsBits = dupRHS.getActiveBits();
+    uint32_t rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
+    if (!rhsWords) {
+      // X * 0 ===> 0
+      clear();
+      return *this;
+    }
+
+    // Allocate space for the result
+    uint32_t destWords = rhsWords + lhsWords;
+    uint64_t* dest = (uint64_t*)malloc(destWords * sizeof(uint64_t));
+
+    // Perform the long multiply
+    ap_private_ops::mul(dest, pVal, lhsWords, dupRHS.get_pVal(), rhsWords,
+                        destWords);
+
+    // Copy result back into *this
+    clear();
+    uint32_t wordsToCopy = destWords >= _AP_N ? _AP_N : destWords;
+
+    memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
+
+    uint64_t ext = (isNegative() ^ RHS.isNegative()) ? ~0ULL : 0ULL;
+    for (int i = wordsToCopy; i < _AP_N; i++) pVal[i] = ext;
+    clearUnusedBits();
+    // delete dest array and return
+    free(dest);
+    return *this;
+  }
+
+#define OP_ASSIGN_AP(Sym)                                                    \
+  template <int _AP_W2, bool _AP_S2>                                         \
+  INLINE ap_private& operator Sym##=(const ap_private<_AP_W2, _AP_S2>& op) { \
+    *this = operator Sym(op);                                                \
+    return *this;                                                            \
+  }
+
+  OP_ASSIGN_AP(/)
+  OP_ASSIGN_AP(%)
+#undef OP_ASSIGN_AP
+
+#define OP_BIN_LOGIC_AP(Sym)                                                  \
+  template <int _AP_W1, bool _AP_S1>                                          \
+  INLINE typename RType<_AP_W1, _AP_S1>::logic operator Sym(                  \
+      const ap_private<_AP_W1, _AP_S1>& RHS) const {                          \
+    enum {                                                                    \
+      numWords = (RType<_AP_W1, _AP_S1>::logic_w + APINT_BITS_PER_WORD - 1) / \
+                 APINT_BITS_PER_WORD                                          \
+    };                                                                        \
+    typename RType<_AP_W1, _AP_S1>::logic Result;                             \
+    uint32_t i;                                                               \
+    const int _AP_N1 = ap_private<_AP_W1, _AP_S1>::_AP_N;                     \
+    uint32_t min_N = std::min((int)_AP_N, _AP_N1);                            \
+    uint32_t max_N = std::max((int)_AP_N, _AP_N1);                            \
+    for (i = 0; i < min_N; ++i)                                               \
+      Result.set_pVal(i, pVal[i] Sym RHS.get_pVal(i));                        \
+    if (numWords > i) {                                                       \
+      uint64_t ext = ((_AP_N < _AP_N1 && isNegative()) ||                     \
+                      (_AP_N1 < _AP_N && RHS.isNegative()))                   \
+                         ? ~0ULL                                              \
+                         : 0;                                                 \
+      if (_AP_N > _AP_N1)                                                     \
+        for (; i < max_N; i++) Result.set_pVal(i, pVal[i] Sym ext);           \
+      else                                                                    \
+        for (; i < max_N; i++) Result.set_pVal(i, RHS.get_pVal(i) Sym ext);   \
+      if (numWords > i) {                                                     \
+        uint64_t ext2 = ((_AP_N > _AP_N1 && isNegative()) ||                  \
+                         (_AP_N1 > _AP_N && RHS.isNegative()))                \
+                            ? ~0ULL                                           \
+                            : 0;                                              \
+        Result.set_pVal(i, ext Sym ext2);                                     \
+      }                                                                       \
+    }                                                                         \
+    Result.clearUnusedBits();                                                 \
+    return Result;                                                            \
+  }
+
+  OP_BIN_LOGIC_AP(|);
+  OP_BIN_LOGIC_AP(&);
+  OP_BIN_LOGIC_AP(^);
+
+#undef OP_BIN_LOGIC_AP
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE typename RType<_AP_W1, _AP_S1>::plus operator+(
+      const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    typename RType<_AP_W1, _AP_S1>::plus Result, lhs(*this), rhs(RHS);
+    const int Result_AP_N = (RType<_AP_W1, _AP_S1>::plus_w + 63) / 64;
+    ap_private_ops::add(Result.get_pVal(), lhs.get_pVal(), rhs.get_pVal(),
+                        Result_AP_N, Result_AP_N, Result_AP_N, _AP_S, _AP_S1);
+    Result.clearUnusedBits();
+    return Result;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE typename RType<_AP_W1, _AP_S1>::minus operator-(
+      const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    typename RType<_AP_W1, _AP_S1>::minus Result, lhs(*this), rhs(RHS);
+    const int Result_AP_N = (RType<_AP_W1, _AP_S1>::minus_w + 63) / 64;
+    ap_private_ops::sub(Result.get_pVal(), lhs.get_pVal(), rhs.get_pVal(),
+                        Result_AP_N, Result_AP_N, Result_AP_N, _AP_S, _AP_S1);
+    Result.clearUnusedBits();
+    return Result;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE typename RType<_AP_W1, _AP_S1>::mult operator*(
+      const ap_private<_AP_W1, _AP_S1>& RHS) const {
+    typename RType<_AP_W1, _AP_S1>::mult temp = *this;
+    temp *= RHS;
+    return temp;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE typename RType<_AP_W2, _AP_S2>::div operator/(
+      const ap_private<_AP_W2, _AP_S2>& op) const {
+    ap_private<AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2)),
+               (_AP_W > _AP_W2 ? _AP_S
+                               : (_AP_W2 > _AP_W ? _AP_S2 : _AP_S || _AP_S2))>
+        lhs = *this;
+    ap_private<AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2)),
+               (_AP_W > _AP_W2 ? _AP_S
+                               : (_AP_W2 > _AP_W ? _AP_S2 : _AP_S || _AP_S2))>
+        rhs = op;
+    return typename RType<_AP_W2, _AP_S2>::div(
+        (_AP_S || _AP_S2) ? lhs.sdiv(rhs) : lhs.udiv(rhs));
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE typename RType<_AP_W2, _AP_S2>::mod operator%(
+      const ap_private<_AP_W2, _AP_S2>& op) const {
+    ap_private<AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2)),
+               (_AP_W > _AP_W2 ? _AP_S
+                               : (_AP_W2 > _AP_W ? _AP_S2 : _AP_S || _AP_S2))>
+        lhs = *this;
+    ap_private<AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2)),
+               (_AP_W > _AP_W2 ? _AP_S
+                               : (_AP_W2 > _AP_W ? _AP_S2 : _AP_S || _AP_S2))>
+        rhs = op;
+    typename RType<_AP_W2, _AP_S2>::mod res =
+        typename RType<_AP_W2, _AP_S2>::mod(_AP_S ? lhs.srem(rhs)
+                                                  : lhs.urem(rhs));
+    return res;
+  }
+
+#define OP_LEFT_SHIFT_CTYPE(TYPE, SIGNED)             \
+  INLINE ap_private operator<<(const TYPE op) const { \
+    if (op >= _AP_W) return ap_private(0);            \
+    if (SIGNED && op < 0) return *this >> (0 - op);   \
+    return shl(op);                                   \
+  }
+
+  OP_LEFT_SHIFT_CTYPE(int, true)
+  // OP_LEFT_SHIFT_CTYPE(bool, false)
+  OP_LEFT_SHIFT_CTYPE(signed char, true)
+  OP_LEFT_SHIFT_CTYPE(unsigned char, false)
+  OP_LEFT_SHIFT_CTYPE(short, true)
+  OP_LEFT_SHIFT_CTYPE(unsigned short, false)
+  OP_LEFT_SHIFT_CTYPE(unsigned int, false)
+  OP_LEFT_SHIFT_CTYPE(long, true)
+  OP_LEFT_SHIFT_CTYPE(unsigned long, false)
+  OP_LEFT_SHIFT_CTYPE(unsigned long long, false)
+  OP_LEFT_SHIFT_CTYPE(long long, true)
+  OP_LEFT_SHIFT_CTYPE(half, false)
+  OP_LEFT_SHIFT_CTYPE(float, false)
+  OP_LEFT_SHIFT_CTYPE(double, false)
+#undef OP_LEFT_SHIFT_CTYPE
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_private operator<<(const ap_private<_AP_W2, _AP_S2>& op2) const {
+    if (_AP_S2 == false) {
+      uint32_t sh = op2.to_uint();
+      return *this << sh;
+    } else {
+      int sh = op2.to_int();
+      return *this << sh;
+    }
+  }
+
+#define OP_RIGHT_SHIFT_CTYPE(TYPE, SIGNED)            \
+  INLINE ap_private operator>>(const TYPE op) const { \
+    if (op >= _AP_W) {                                \
+      if (isNegative())                               \
+        return ap_private(-1);                        \
+      else                                            \
+        return ap_private(0);                         \
+    }                                                 \
+    if ((SIGNED) && op < 0) return *this << (0 - op); \
+    if (_AP_S)                                        \
+      return ashr(op);                                \
+    else                                              \
+      return lshr(op);                                \
+  }
+
+  // OP_RIGHT_SHIFT_CTYPE(bool, false)
+  OP_RIGHT_SHIFT_CTYPE(char, CHAR_IS_SIGNED)
+  OP_RIGHT_SHIFT_CTYPE(signed char, true)
+  OP_RIGHT_SHIFT_CTYPE(unsigned char, false)
+  OP_RIGHT_SHIFT_CTYPE(short, true)
+  OP_RIGHT_SHIFT_CTYPE(unsigned short, false)
+  OP_RIGHT_SHIFT_CTYPE(int, true)
+  OP_RIGHT_SHIFT_CTYPE(unsigned int, false)
+  OP_RIGHT_SHIFT_CTYPE(long, true)
+  OP_RIGHT_SHIFT_CTYPE(unsigned long, false)
+  OP_RIGHT_SHIFT_CTYPE(unsigned long long, false)
+  OP_RIGHT_SHIFT_CTYPE(long long, true)
+  OP_RIGHT_SHIFT_CTYPE(half, false)
+  OP_RIGHT_SHIFT_CTYPE(float, false)
+  OP_RIGHT_SHIFT_CTYPE(double, false)
+#undef OP_RIGHT_SHIFT_CTYPE
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_private operator>>(const ap_private<_AP_W2, _AP_S2>& op2) const {
+    if (_AP_S2 == false) {
+      uint32_t sh = op2.to_uint();
+      return *this >> sh;
+    } else {
+      int sh = op2.to_int();
+      return *this >> sh;
+    }
+  }
+
+  /// Shift assign
+  //------------------------------------------------------------------
+  // TODO call clearUnusedBits ?
+#define OP_ASSIGN_AP(Sym)                                                    \
+  template <int _AP_W2, bool _AP_S2>                                         \
+  INLINE ap_private& operator Sym##=(int op) {                               \
+    *this = operator Sym(op);                                                \
+    return *this;                                                            \
+  }                                                                          \
+  INLINE ap_private& operator Sym##=(unsigned int op) {                      \
+    *this = operator Sym(op);                                                \
+    return *this;                                                            \
+  }                                                                          \
+  template <int _AP_W2, bool _AP_S2>                                         \
+  INLINE ap_private& operator Sym##=(const ap_private<_AP_W2, _AP_S2>& op) { \
+    *this = operator Sym(op);                                                \
+    return *this;                                                            \
+  }
+  OP_ASSIGN_AP(>>)
+  OP_ASSIGN_AP(<<)
+#undef OP_ASSIGN_AP
+
+  /// Comparisons
+  //-----------------------------------------------------------------
+  INLINE bool operator==(const ap_private& RHS) const {
+    // Get some facts about the number of bits used in the two operands.
+    uint32_t n1 = getActiveBits();
+    uint32_t n2 = RHS.getActiveBits();
+
+    // If the number of bits isn't the same, they aren't equal
+    if (n1 != n2) return false;
+
+    // If the number of bits fits in a word, we only need to compare the low
+    // word.
+    if (n1 <= APINT_BITS_PER_WORD) return pVal[0] == RHS.get_pVal(0);
+
+    // Otherwise, compare everything
+    for (int i = whichWord(n1 - 1); i >= 0; --i)
+      if (pVal[i] != RHS.get_pVal(i)) return false;
+    return true;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator==(const ap_private<_AP_W2, _AP_S2>& op) const {
+    enum {
+      _AP_MAX_W = AP_MAX(_AP_W, _AP_W2),
+    };
+    ap_private<_AP_MAX_W, false> lhs(*this);
+    ap_private<_AP_MAX_W, false> rhs(op);
+    return lhs == rhs;
+  }
+
+  INLINE bool operator==(uint64_t Val) const {
+    uint32_t n = getActiveBits();
+    if (n <= APINT_BITS_PER_WORD)
+      return pVal[0] == Val;
+    else
+      return false;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator!=(const ap_private<_AP_W2, _AP_S2>& op) const {
+    return !(*this == op);
+  }
+
+  template <bool _AP_S1>
+  INLINE bool operator!=(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    return !((*this) == RHS);
+  }
+
+  INLINE bool operator!=(uint64_t Val) const { return !((*this) == Val); }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<=(const ap_private<_AP_W2, _AP_S2>& op) const {
+    return !(*this > op);
+  }
+
+  INLINE bool operator<(const ap_private& op) const {
+    return _AP_S ? slt(op) : ult(op);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<(const ap_private<_AP_W2, _AP_S2>& op) const {
+    enum {
+      _AP_MAX_W = AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2))
+    };
+    ap_private<_AP_MAX_W, _AP_S> lhs(*this);
+    ap_private<_AP_MAX_W, _AP_S2> rhs(op);
+    if (_AP_S == _AP_S2)
+      return _AP_S ? lhs.slt(rhs) : lhs.ult(rhs);
+    else if (_AP_S)
+      if (_AP_W2 >= _AP_W)
+        return lhs.ult(rhs);
+      else
+        return lhs.slt(rhs);
+    else if (_AP_W >= _AP_W2)
+      return lhs.ult(rhs);
+    else
+      return lhs.slt(rhs);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>=(const ap_private<_AP_W2, _AP_S2>& op) const {
+    return !(*this < op);
+  }
+
+  INLINE bool operator>(const ap_private& op) const {
+    return _AP_S ? sgt(op) : ugt(op);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>(const ap_private<_AP_W2, _AP_S2>& op) const {
+    enum {
+      _AP_MAX_W = AP_MAX(_AP_W + (_AP_S || _AP_S2), _AP_W2 + (_AP_S || _AP_S2))
+    };
+    ap_private<_AP_MAX_W, _AP_S> lhs(*this);
+    ap_private<_AP_MAX_W, _AP_S2> rhs(op);
+    if (_AP_S == _AP_S2)
+      return _AP_S ? lhs.sgt(rhs) : lhs.ugt(rhs);
+    else if (_AP_S)
+      if (_AP_W2 >= _AP_W)
+        return lhs.ugt(rhs);
+      else
+        return lhs.sgt(rhs);
+    else if (_AP_W >= _AP_W2)
+      return lhs.ugt(rhs);
+    else
+      return lhs.sgt(rhs);
+  }
+
+  /// Bit and Part Select
+  //--------------------------------------------------------------
+  INLINE _private_range_ref<_AP_W, _AP_S> operator()(int Hi, int Lo) {
+    return _private_range_ref<_AP_W, _AP_S>(this, Hi, Lo);
+  }
+
+  INLINE _private_range_ref<_AP_W, _AP_S> operator()(int Hi, int Lo) const {
+    return _private_range_ref<_AP_W, _AP_S>(
+        const_cast<ap_private<_AP_W, _AP_S>*>(this), Hi, Lo);
+  }
+
+  INLINE _private_range_ref<_AP_W, _AP_S> range(int Hi, int Lo) const {
+    return _private_range_ref<_AP_W, _AP_S>(
+        (const_cast<ap_private<_AP_W, _AP_S>*>(this)), Hi, Lo);
+  }
+
+  INLINE _private_range_ref<_AP_W, _AP_S> range(int Hi, int Lo) {
+    return _private_range_ref<_AP_W, _AP_S>(this, Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE _private_range_ref<_AP_W, _AP_S> range(
+      const ap_private<_AP_W2, _AP_S2>& HiIdx,
+      const ap_private<_AP_W3, _AP_S3>& LoIdx) {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return _private_range_ref<_AP_W, _AP_S>(this, Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE _private_range_ref<_AP_W, _AP_S> operator()(
+      const ap_private<_AP_W2, _AP_S2>& HiIdx,
+      const ap_private<_AP_W3, _AP_S3>& LoIdx) {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return _private_range_ref<_AP_W, _AP_S>(this, Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE _private_range_ref<_AP_W, _AP_S> range(
+      const ap_private<_AP_W2, _AP_S2>& HiIdx,
+      const ap_private<_AP_W3, _AP_S3>& LoIdx) const {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return _private_range_ref<_AP_W, _AP_S>(const_cast<ap_private*>(this), Hi, Lo);
+  }
+
+  template <int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+  INLINE _private_range_ref<_AP_W, _AP_S> operator()(
+      const ap_private<_AP_W2, _AP_S2>& HiIdx,
+      const ap_private<_AP_W3, _AP_S3>& LoIdx) const {
+    int Hi = HiIdx.to_int();
+    int Lo = LoIdx.to_int();
+    return this->range(Hi, Lo);
+  }
+
+  INLINE _private_bit_ref<_AP_W, _AP_S> operator[](int index) {
+    return _private_bit_ref<_AP_W, _AP_S>(*this, index);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_bit_ref<_AP_W, _AP_S> operator[](
+      const ap_private<_AP_W2, _AP_S2>& index) {
+    return _private_bit_ref<_AP_W, _AP_S>(*this, index.to_int());
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE const _private_bit_ref<_AP_W, _AP_S> operator[](
+      const ap_private<_AP_W2, _AP_S2>& index) const {
+    return _private_bit_ref<_AP_W, _AP_S>(
+        const_cast<ap_private<_AP_W, _AP_S>&>(*this), index.to_int());
+  }
+
+  INLINE const _private_bit_ref<_AP_W, _AP_S> operator[](int index) const {
+    return _private_bit_ref<_AP_W, _AP_S>(
+        const_cast<ap_private<_AP_W, _AP_S>&>(*this), index);
+  }
+
+  INLINE _private_bit_ref<_AP_W, _AP_S> bit(int index) {
+    return _private_bit_ref<_AP_W, _AP_S>(*this, index);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_bit_ref<_AP_W, _AP_S> bit(const ap_private<_AP_W2, _AP_S2>& index) {
+    return _private_bit_ref<_AP_W, _AP_S>(*this, index.to_int());
+  }
+
+  INLINE const _private_bit_ref<_AP_W, _AP_S> bit(int index) const {
+    return _private_bit_ref<_AP_W, _AP_S>(
+        const_cast<ap_private<_AP_W, _AP_S>&>(*this), index);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE const _private_bit_ref<_AP_W, _AP_S> bit(
+      const ap_private<_AP_W2, _AP_S2>& index) const {
+    return _private_bit_ref<_AP_W, _AP_S>(
+        const_cast<ap_private<_AP_W, _AP_S>&>(*this), index.to_int());
+  }
+
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                       ap_private<_AP_W2, _AP_S2> >
+//  concat(ap_private<_AP_W2, _AP_S2>& a2) {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(*this, a2);
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                       ap_private<_AP_W2, _AP_S2> >
+//  concat(const ap_private<_AP_W2, _AP_S2>& a2) const {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<ap_private<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+//  operator,(ap_private<_AP_W2, _AP_S2> &a2) {
+//    return ap_concat_ref<_AP_W, ap_private, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(*this, a2);
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+//  operator,(ap_private<_AP_W2, _AP_S2> &a2) const {
+//    return ap_concat_ref<_AP_W, ap_private, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this), a2);
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+//  operator,(const ap_private<_AP_W2, _AP_S2> &a2) {
+//    return ap_concat_ref<_AP_W, ap_private, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(
+//        *this, const_cast<ap_private<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+//  operator,(const ap_private<_AP_W2, _AP_S2> &a2) const {
+//    return ap_concat_ref<_AP_W, ap_private, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<ap_private<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                       _private_range_ref<_AP_W2, _AP_S2> >
+//  operator,(const _private_range_ref<_AP_W2, _AP_S2> &a2) const {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                         _private_range_ref<_AP_W2, _AP_S2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<_private_range_ref<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                       _private_range_ref<_AP_W2, _AP_S2> >
+//  operator,(_private_range_ref<_AP_W2, _AP_S2> &a2) {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+//                         _private_range_ref<_AP_W2, _AP_S2> >(*this, a2);
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+//                       _private_bit_ref<_AP_W2, _AP_S2> >
+//  operator,(const _private_bit_ref<_AP_W2, _AP_S2> &a2) const {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+//                         _private_bit_ref<_AP_W2, _AP_S2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<_private_bit_ref<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+//                       _private_bit_ref<_AP_W2, _AP_S2> >
+//  operator,(_private_bit_ref<_AP_W2, _AP_S2> &a2) {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+//                         _private_bit_ref<_AP_W2, _AP_S2> >(*this, a2);
+//  }
+//
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2 + _AP_W3,
+//                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+//  operator,(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) const {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2 + _AP_W3,
+//                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>&>(a2));
+//  }
+//
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2 + _AP_W3,
+//                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+//  operator,(ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) {
+//    return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2 + _AP_W3,
+//                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(*this,
+//                                                                         a2);
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE ap_concat_ref<
+//      _AP_W, ap_private, _AP_W2,
+//      af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+//  operator,(const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>
+//                &a2) const {
+//    return ap_concat_ref<
+//        _AP_W, ap_private, _AP_W2,
+//        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<
+//            af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE ap_concat_ref<
+//      _AP_W, ap_private, _AP_W2,
+//      af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+//  operator,(af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &a2) {
+//    return ap_concat_ref<
+//        _AP_W, ap_private, _AP_W2,
+//        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this,
+//                                                                       a2);
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE
+//      ap_concat_ref<_AP_W, ap_private, 1,
+//                    af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+//      operator,(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>
+//                    &a2) const {
+//    return ap_concat_ref<
+//        _AP_W, ap_private, 1,
+//        af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+//        const_cast<ap_private<_AP_W, _AP_S>&>(*this),
+//        const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(
+//            a2));
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE
+//      ap_concat_ref<_AP_W, ap_private, 1,
+//                    af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+//      operator,(
+//          af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &a2) {
+//    return ap_concat_ref<
+//        _AP_W, ap_private, 1,
+//        af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this, a2);
+//  }
+
+  INLINE ap_private<_AP_W, false> get() const {
+    ap_private<_AP_W, false> ret(*this);
+    return ret;
+  }
+
+  template <int _AP_W3>
+  INLINE void set(const ap_private<_AP_W3, false>& val) {
+    operator=(ap_private<_AP_W3, _AP_S>(val));
+  }
+
+  ///
+  /// @name Value Tests
+  ///
+  /// This tests the high bit of this ap_private to determine if it is set.
+  /// @returns true if this ap_private is negative, false otherwise
+  /// @brief Determine sign of this ap_private.
+  INLINE bool isNegative() const {
+    // just for get rid of warnings
+    enum { shift = (_AP_W - APINT_BITS_PER_WORD * (_AP_N - 1) - 1) };
+    static const uint64_t mask = 1ULL << (shift);
+    return _AP_S && (pVal[_AP_N - 1] & mask);
+  }
+
+  /// This tests the high bit of the ap_private to determine if it is unset.
+  /// @brief Determine if this ap_private Value is positive (not negative).
+  INLINE bool isPositive() const { return !isNegative(); }
+
+  /// This tests if the value of this ap_private is strictly positive (> 0).
+  /// @returns true if this ap_private is Positive and not zero.
+  /// @brief Determine if this ap_private Value is strictly positive.
+  INLINE bool isStrictlyPositive() const {
+    return isPositive() && (*this) != 0;
+  }
+
+  /// This checks to see if the value has all bits of the ap_private are set or
+  /// not.
+  /// @brief Determine if all bits are set
+  INLINE bool isAllOnesValue() const { return countPopulation() == _AP_W; }
+
+  /// This checks to see if the value of this ap_private is the maximum unsigned
+  /// value for the ap_private's bit width.
+  /// @brief Determine if this is the largest unsigned value.
+  INLINE bool isMaxValue() const { return countPopulation() == _AP_W; }
+
+  /// This checks to see if the value of this ap_private is the maximum signed
+  /// value for the ap_private's bit width.
+  /// @brief Determine if this is the largest signed value.
+  INLINE bool isMaxSignedValue() const {
+    return !isNegative() && countPopulation() == _AP_W - 1;
+  }
+
+  /// This checks to see if the value of this ap_private is the minimum unsigned
+  /// value for the ap_private's bit width.
+  /// @brief Determine if this is the smallest unsigned value.
+  INLINE bool isMinValue() const { return countPopulation() == 0; }
+
+  /// This checks to see if the value of this ap_private is the minimum signed
+  /// value for the ap_private's bit width.
+  /// @brief Determine if this is the smallest signed value.
+  INLINE bool isMinSignedValue() const {
+    return isNegative() && countPopulation() == 1;
+  }
+
+  /// This function returns a pointer to the internal storage of the ap_private.
+  /// This is useful for writing out the ap_private in binary form without any
+  /// conversions.
+  INLINE const uint64_t* getRawData() const { return &pVal[0]; }
+
+  // Square Root - this method computes and returns the square root of "this".
+  // Three mechanisms are used for computation. For small values (<= 5 bits),
+  // a table lookup is done. This gets some performance for common cases. For
+  // values using less than 52 bits, the value is converted to double and then
+  // the libc sqrt function is called. The result is rounded and then converted
+  // back to a uint64_t which is then used to construct the result. Finally,
+  // the Babylonian method for computing square roots is used.
+  INLINE ap_private sqrt() const {
+    // Determine the magnitude of the value.
+    uint32_t magnitude = getActiveBits();
+
+    // Use a fast table for some small values. This also gets rid of some
+    // rounding errors in libc sqrt for small values.
+    if (magnitude <= 5) {
+      static const uint8_t results[32] = {
+          /*     0 */ 0,
+          /*  1- 2 */ 1, 1,
+          /*  3- 6 */ 2, 2, 2, 2,
+          /*  7-12 */ 3, 3, 3, 3, 3, 3,
+          /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
+          /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+          /*    31 */ 6};
+      return ap_private<_AP_W, _AP_S>(/*BitWidth,*/ results[get_VAL()]);
+    }
+
+    // If the magnitude of the value fits in less than 52 bits (the precision of
+    // an IEEE double precision floating point value), then we can use the
+    // libc sqrt function which will probably use a hardware sqrt computation.
+    // This should be faster than the algorithm below.
+    if (magnitude < 52) {
+#ifdef _MSC_VER
+      // Amazingly, VC++ doesn't have round().
+      return ap_private<_AP_W, _AP_S>(/*BitWidth,*/
+                                      uint64_t(::sqrt(double(get_VAL()))) +
+                                      0.5);
+#else
+      return ap_private<_AP_W, _AP_S>(/*BitWidth,*/
+                                      uint64_t(
+                                          ::round(::sqrt(double(get_VAL())))));
+#endif
+    }
+
+    // Okay, all the short cuts are exhausted. We must compute it. The following
+    // is a classical Babylonian method for computing the square root. This code
+    // was adapted to APINt from a wikipedia article on such computations.
+    // See http://www.wikipedia.org/ and go to the page named
+    // Calculate_an_integer_square_root.
+    uint32_t nbits = BitWidth, i = 4;
+    ap_private<_AP_W, _AP_S> testy(16);
+    ap_private<_AP_W, _AP_S> x_old(/*BitWidth,*/ 1);
+    ap_private<_AP_W, _AP_S> x_new(0);
+    ap_private<_AP_W, _AP_S> two(/*BitWidth,*/ 2);
+
+    // Select a good starting value using binary logarithms.
+    for (;; i += 2, testy = testy.shl(2))
+      if (i >= nbits || this->ule(testy)) {
+        x_old = x_old.shl(i / 2);
+        break;
+      }
+
+    // Use the Babylonian method to arrive at the integer square root:
+    for (;;) {
+      x_new = (this->udiv(x_old) + x_old).udiv(two);
+      if (x_old.ule(x_new)) break;
+      x_old = x_new;
+    }
+
+    // Make sure we return the closest approximation
+    // NOTE: The rounding calculation below is correct. It will produce an
+    // off-by-one discrepancy with results from pari/gp. That discrepancy has
+    // been
+    // determined to be a rounding issue with pari/gp as it begins to use a
+    // floating point representation after 192 bits. There are no discrepancies
+    // between this algorithm and pari/gp for bit widths < 192 bits.
+    ap_private<_AP_W, _AP_S> square(x_old * x_old);
+    ap_private<_AP_W, _AP_S> nextSquare((x_old + 1) * (x_old + 1));
+    if (this->ult(square))
+      return x_old;
+    else if (this->ule(nextSquare)) {
+      ap_private<_AP_W, _AP_S> midpoint((nextSquare - square).udiv(two));
+      ap_private<_AP_W, _AP_S> offset(*this - square);
+      if (offset.ult(midpoint))
+        return x_old;
+      else
+        return x_old + 1;
+    } else
+      assert(0 && "Error in ap_private<_AP_W, _AP_S>::sqrt computation");
+    return x_old + 1;
+  }
+
+  ///
+  /// @Assignment Operators
+  ///
+  /// @returns *this after assignment of RHS.
+  /// @brief Copy assignment operator.
+  INLINE ap_private& operator=(const ap_private& RHS) {
+    if (this != &RHS) memcpy(pVal, RHS.get_pVal(), _AP_N * APINT_WORD_SIZE);
+    return *this;
+  }
+  INLINE ap_private& operator=(const volatile ap_private& RHS) {
+    if (this != &RHS)
+      for (int i = 0; i < _AP_N; ++i) pVal[i] = RHS.get_pVal(i);
+    return *this;
+  }
+  INLINE void operator=(const ap_private& RHS) volatile {
+    if (this != &RHS)
+      for (int i = 0; i < _AP_N; ++i) pVal[i] = RHS.get_pVal(i);
+  }
+  INLINE void operator=(const volatile ap_private& RHS) volatile {
+    if (this != &RHS)
+      for (int i = 0; i < _AP_N; ++i) pVal[i] = RHS.get_pVal(i);
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private& operator=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+    if (_AP_S1)
+      cpSextOrTrunc(RHS);
+    else
+      cpZextOrTrunc(RHS);
+    clearUnusedBits();
+    return *this;
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE ap_private& operator=(const volatile ap_private<_AP_W1, _AP_S1>& RHS) {
+    if (_AP_S1)
+      cpSextOrTrunc(RHS);
+    else
+      cpZextOrTrunc(RHS);
+    clearUnusedBits();
+    return *this;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE ap_private& operator=(const _private_range_ref<_AP_W2, _AP_S2>& op2) {
+    *this = ap_private<_AP_W2, false>(op2);
+    return *this;
+  }
+
+#if 0
+    template<int _AP_W1, bool _AP_S1>
+    INLINE ap_private& operator=(const ap_private<_AP_W1, _AP_S1, true>& RHS) {
+        static const uint64_t that_sign_ext_mask = (_AP_W1==APINT_BITS_PER_WORD)?0:~0ULL>>(_AP_W1%APINT_BITS_PER_WORD)<<(_AP_W1%APINT_BITS_PER_WORD);
+        if (RHS.isNegative()) {
+            pVal[0] = RHS.get_VAL() | that_sign_ext_mask;
+            memset(pVal+1,~0, APINT_WORD_SIZE*(_AP_N-1));
+        } else {
+            pVal[0] = RHS.get_VAL();
+            memset(pVal+1, 0, APINT_WORD_SIZE*(_AP_N-1));
+        }
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1>
+    INLINE ap_private& operator=(const volatile ap_private<_AP_W1, _AP_S1, true>& RHS) {
+        static const uint64_t that_sign_ext_mask = (_AP_W1==APINT_BITS_PER_WORD)?0:~0ULL>>(_AP_W1%APINT_BITS_PER_WORD)<<(_AP_W1%APINT_BITS_PER_WORD);
+        if (RHS.isNegative()) {
+            pVal[0] = RHS.get_VAL() | that_sign_ext_mask;
+            memset(pVal+1,~0, APINT_WORD_SIZE*(_AP_N-1));
+        } else {
+            pVal[0] = RHS.get_VAL();
+            memset(pVal+1, 0, APINT_WORD_SIZE*(_AP_N-1));
+        }
+        clearUnusedBits();
+        return *this;
+    }
+#endif
+
+/// from all c types.
+#define ASSIGN_OP_FROM_INT(C_TYPE, _AP_W2, _AP_S2) \
+  INLINE ap_private& operator=(const C_TYPE rhs) { \
+    ap_private<(_AP_W2), (_AP_S2)> tmp = rhs;      \
+    operator=(tmp);                                \
+    return *this;                                  \
+  }
+
+  ASSIGN_OP_FROM_INT(bool, 1, false)
+  ASSIGN_OP_FROM_INT(char, 8, CHAR_IS_SIGNED)
+  ASSIGN_OP_FROM_INT(signed char, 8, true)
+  ASSIGN_OP_FROM_INT(unsigned char, 8, false)
+  ASSIGN_OP_FROM_INT(short, sizeof(short) * 8, true)
+  ASSIGN_OP_FROM_INT(unsigned short, sizeof(unsigned short) * 8, false)
+  ASSIGN_OP_FROM_INT(int, sizeof(int) * 8, true)
+  ASSIGN_OP_FROM_INT(unsigned int, sizeof(unsigned int) * 8, false)
+  ASSIGN_OP_FROM_INT(long, sizeof(long) * 8, true)
+  ASSIGN_OP_FROM_INT(unsigned long, sizeof(unsigned long) * 8, false)
+  ASSIGN_OP_FROM_INT(ap_slong, sizeof(ap_slong) * 8, true)
+  ASSIGN_OP_FROM_INT(ap_ulong, sizeof(ap_ulong) * 8, false)
+#undef ASSIGN_OP_FROM_INT
+
+  /// from c string.
+  // XXX this is a must, to prevent pointer being converted to bool.
+  INLINE ap_private& operator=(const char* s) {
+    ap_private tmp(s); // XXX direct initialization, as ctor is explicit.
+    operator=(tmp);
+    return *this;
+  }
+
+  ///
+  /// @name Unary Operators
+  ///
+  /// @returns a new ap_private value representing *this incremented by one
+  /// @brief Postfix increment operator.
+  INLINE const ap_private operator++(int) {
+    ap_private API(*this);
+    ++(*this);
+    return API;
+  }
+
+  /// @returns *this incremented by one
+  /// @brief Prefix increment operator.
+  INLINE ap_private& operator++() {
+    ap_private_ops::add_1(pVal, pVal, _AP_N, 1);
+    clearUnusedBits();
+    return *this;
+  }
+
+  /// @returns a new ap_private representing *this decremented by one.
+  /// @brief Postfix decrement operator.
+  INLINE const ap_private operator--(int) {
+    ap_private API(*this);
+    --(*this);
+    return API;
+  }
+
+  /// @returns *this decremented by one.
+  /// @brief Prefix decrement operator.
+  INLINE ap_private& operator--() {
+    ap_private_ops::sub_1(pVal, _AP_N, 1);
+    clearUnusedBits();
+    return *this;
+  }
+
+  /// Performs a bitwise complement operation on this ap_private.
+  /// @returns an ap_private that is the bitwise complement of *this
+  /// @brief Unary bitwise complement operator.
+  INLINE ap_private<_AP_W + !_AP_S, true> operator~() const {
+    ap_private<_AP_W + !_AP_S, true> Result(*this);
+    Result.flip();
+    return Result;
+  }
+
+  /// Negates *this using two's complement logic.
+  /// @returns An ap_private value representing the negation of *this.
+  /// @brief Unary negation operator
+  INLINE typename RType<1, false>::minus operator-() const {
+    return ap_private<1, false>(0) - (*this);
+  }
+
+  /// Performs logical negation operation on this ap_private.
+  /// @returns true if *this is zero, false otherwise.
+  /// @brief Logical negation operator.
+  INLINE bool operator!() const {
+    for (int i = 0; i < _AP_N; ++i)
+      if (pVal[i]) return false;
+    return true;
+  }
+
+  template <bool _AP_S1>
+  INLINE ap_private<_AP_W, _AP_S || _AP_S1> And(
+      const ap_private<_AP_W, _AP_S1>& RHS) const {
+    return this->operator&(RHS);
+  }
+  template <bool _AP_S1>
+  INLINE ap_private Or(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    return this->operator|(RHS);
+  }
+  template <bool _AP_S1>
+  INLINE ap_private Xor(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    return this->operator^(RHS);
+  }
+
+  INLINE ap_private Mul(const ap_private& RHS) const {
+    ap_private Result(*this);
+    Result *= RHS;
+    return Result;
+  }
+
+  INLINE ap_private Add(const ap_private& RHS) const {
+    ap_private Result(0);
+    ap_private_ops::add(Result.get_pVal(), pVal, RHS.get_pVal(), _AP_N, _AP_N,
+                        _AP_N, _AP_S, _AP_S);
+    Result.clearUnusedBits();
+    return Result;
+  }
+
+  INLINE ap_private Sub(const ap_private& RHS) const {
+    ap_private Result(0);
+    ap_private_ops::sub(Result.get_pVal(), pVal, RHS.get_pVal(), _AP_N, _AP_N,
+                        _AP_N, _AP_S, _AP_S);
+    Result.clearUnusedBits();
+    return Result;
+  }
+
+  /// Arithmetic right-shift this ap_private by shiftAmt.
+  /// @brief Arithmetic right-shift function.
+  INLINE ap_private ashr(uint32_t shiftAmt) const {
+    assert(shiftAmt <= BitWidth && "Invalid shift amount, too big");
+    // Handle a degenerate case
+    if (shiftAmt == 0) return ap_private(*this);
+
+    // If all the bits were shifted out, the result is, technically, undefined.
+    // We return -1 if it was negative, 0 otherwise. We check this early to
+    // avoid
+    // issues in the algorithm below.
+    if (shiftAmt == BitWidth) {
+      if (isNegative())
+        return ap_private(-1);
+      else
+        return ap_private(0);
+    }
+
+    // Create some space for the result.
+    ap_private Retval(0);
+    uint64_t* val = Retval.get_pVal();
+
+    // Compute some values needed by the following shift algorithms
+    uint32_t wordShift =
+        shiftAmt % APINT_BITS_PER_WORD;               // bits to shift per word
+    uint32_t offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
+    uint32_t breakWord = _AP_N - 1 - offset;          // last word affected
+    uint32_t bitsInWord = whichBit(BitWidth); // how many bits in last word?
+    if (bitsInWord == 0) bitsInWord = APINT_BITS_PER_WORD;
+
+    // If we are shifting whole words, just move whole words
+    if (wordShift == 0) {
+      // Move the words containing significant bits
+      for (uint32_t i = 0; i <= breakWord; ++i)
+        val[i] = pVal[i + offset]; // move whole word
+
+      // Adjust the top significant word for sign bit fill, if negative
+      if (isNegative())
+        if (bitsInWord < APINT_BITS_PER_WORD)
+          val[breakWord] |= ~0ULL << (bitsInWord); // set high bits
+    } else {
+      // Shift the low order words
+      for (uint32_t i = 0; i < breakWord; ++i) {
+        // This combines the shifted corresponding word with the low bits from
+        // the next word (shifted into this word's high bits).
+        val[i] = ((pVal[i + offset]) >> (wordShift));
+        val[i] |= ((pVal[i + offset + 1]) << (APINT_BITS_PER_WORD - wordShift));
+      }
+
+      // Shift the break word. In this case there are no bits from the next word
+      // to include in this word.
+      val[breakWord] = (pVal[breakWord + offset]) >> (wordShift);
+
+      // Deal with sign extenstion in the break word, and possibly the word
+      // before
+      // it.
+      if (isNegative()) {
+        if (wordShift > bitsInWord) {
+          if (breakWord > 0)
+            val[breakWord - 1] |=
+                ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
+          val[breakWord] |= ~0ULL;
+        } else
+          val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
+      }
+    }
+
+    // Remaining words are 0 or -1, just assign them.
+    uint64_t fillValue = (isNegative() ? ~0ULL : 0);
+    for (int i = breakWord + 1; i < _AP_N; ++i) val[i] = fillValue;
+    Retval.clearUnusedBits();
+    return Retval;
+  }
+
+  /// Logical right-shift this ap_private by shiftAmt.
+  /// @brief Logical right-shift function.
+  INLINE ap_private lshr(uint32_t shiftAmt) const {
+    // If all the bits were shifted out, the result is 0. This avoids issues
+    // with shifting by the size of the integer type, which produces undefined
+    // results. We define these "undefined results" to always be 0.
+    if (shiftAmt == BitWidth) return ap_private(0);
+
+    // If none of the bits are shifted out, the result is *this. This avoids
+    // issues with shifting byt he size of the integer type, which produces
+    // undefined results in the code below. This is also an optimization.
+    if (shiftAmt == 0) return ap_private(*this);
+
+    // Create some space for the result.
+    ap_private Retval(0);
+    uint64_t* val = Retval.get_pVal();
+
+    // If we are shifting less than a word, compute the shift with a simple
+    // carry
+    if (shiftAmt < APINT_BITS_PER_WORD) {
+      uint64_t carry = 0;
+      for (int i = _AP_N - 1; i >= 0; --i) {
+        val[i] = ((pVal[i]) >> (shiftAmt)) | carry;
+        carry = (pVal[i]) << (APINT_BITS_PER_WORD - shiftAmt);
+      }
+      Retval.clearUnusedBits();
+      return Retval;
+    }
+
+    // Compute some values needed by the remaining shift algorithms
+    uint32_t wordShift = shiftAmt % APINT_BITS_PER_WORD;
+    uint32_t offset = shiftAmt / APINT_BITS_PER_WORD;
+
+    // If we are shifting whole words, just move whole words
+    if (wordShift == 0) {
+      for (uint32_t i = 0; i < _AP_N - offset; ++i) val[i] = pVal[i + offset];
+      for (uint32_t i = _AP_N - offset; i < _AP_N; i++) val[i] = 0;
+      Retval.clearUnusedBits();
+      return Retval;
+    }
+
+    // Shift the low order words
+    uint32_t breakWord = _AP_N - offset - 1;
+    for (uint32_t i = 0; i < breakWord; ++i)
+      val[i] = ((pVal[i + offset]) >> (wordShift)) |
+               ((pVal[i + offset + 1]) << (APINT_BITS_PER_WORD - wordShift));
+    // Shift the break word.
+    val[breakWord] = (pVal[breakWord + offset]) >> (wordShift);
+
+    // Remaining words are 0
+    for (int i = breakWord + 1; i < _AP_N; ++i) val[i] = 0;
+    Retval.clearUnusedBits();
+    return Retval;
+  }
+
+  /// Left-shift this ap_private by shiftAmt.
+  /// @brief Left-shift function.
+  INLINE ap_private shl(uint32_t shiftAmt) const {
+    assert(shiftAmt <= BitWidth && "Invalid shift amount, too big");
+    // If all the bits were shifted out, the result is 0. This avoids issues
+    // with shifting by the size of the integer type, which produces undefined
+    // results. We define these "undefined results" to always be 0.
+    if (shiftAmt == BitWidth) return ap_private(0);
+
+    // If none of the bits are shifted out, the result is *this. This avoids a
+    // lshr by the words size in the loop below which can produce incorrect
+    // results. It also avoids the expensive computation below for a common
+    // case.
+    if (shiftAmt == 0) return ap_private(*this);
+
+    // Create some space for the result.
+    ap_private Retval(0);
+    uint64_t* val = Retval.get_pVal();
+    // If we are shifting less than a word, do it the easy way
+    if (shiftAmt < APINT_BITS_PER_WORD) {
+      uint64_t carry = 0;
+      for (int i = 0; i < _AP_N; i++) {
+        val[i] = ((pVal[i]) << (shiftAmt)) | carry;
+        carry = (pVal[i]) >> (APINT_BITS_PER_WORD - shiftAmt);
+      }
+      Retval.clearUnusedBits();
+      return Retval;
+    }
+
+    // Compute some values needed by the remaining shift algorithms
+    uint32_t wordShift = shiftAmt % APINT_BITS_PER_WORD;
+    uint32_t offset = shiftAmt / APINT_BITS_PER_WORD;
+
+    // If we are shifting whole words, just move whole words
+    if (wordShift == 0) {
+      for (uint32_t i = 0; i < offset; i++) val[i] = 0;
+      for (int i = offset; i < _AP_N; i++) val[i] = pVal[i - offset];
+      Retval.clearUnusedBits();
+      return Retval;
+    }
+
+    // Copy whole words from this to Result.
+    uint32_t i = _AP_N - 1;
+    for (; i > offset; --i)
+      val[i] = (pVal[i - offset]) << (wordShift) |
+               (pVal[i - offset - 1]) >> (APINT_BITS_PER_WORD - wordShift);
+    val[offset] = (pVal[0]) << (wordShift);
+    for (i = 0; i < offset; ++i) val[i] = 0;
+    Retval.clearUnusedBits();
+    return Retval;
+  }
+
+  INLINE ap_private rotl(uint32_t rotateAmt) const {
+    if (rotateAmt == 0) return ap_private(*this);
+    // Don't get too fancy, just use existing shift/or facilities
+    ap_private hi(*this);
+    ap_private lo(*this);
+    hi.shl(rotateAmt);
+    lo.lshr(BitWidth - rotateAmt);
+    return hi | lo;
+  }
+
+  INLINE ap_private rotr(uint32_t rotateAmt) const {
+    if (rotateAmt == 0) return ap_private(*this);
+    // Don't get too fancy, just use existing shift/or facilities
+    ap_private hi(*this);
+    ap_private lo(*this);
+    lo.lshr(rotateAmt);
+    hi.shl(BitWidth - rotateAmt);
+    return hi | lo;
+  }
+
+  /// Perform an unsigned divide operation on this ap_private by RHS. Both this
+  /// and
+  /// RHS are treated as unsigned quantities for purposes of this division.
+  /// @returns a new ap_private value containing the division result
+  /// @brief Unsigned division operation.
+  INLINE ap_private udiv(const ap_private& RHS) const {
+    // Get some facts about the LHS and RHS number of bits and words
+    uint32_t rhsBits = RHS.getActiveBits();
+    uint32_t rhsWords = !rhsBits ? 0 : (whichWord(rhsBits - 1) + 1);
+    assert(rhsWords && "Divided by zero???");
+    uint32_t lhsBits = this->getActiveBits();
+    uint32_t lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
+
+    // Deal with some degenerate cases
+    if (!lhsWords)
+      // 0 / X ===> 0
+      return ap_private(0);
+    else if (lhsWords < rhsWords || this->ult(RHS)) {
+      // X / Y ===> 0, iff X < Y
+      return ap_private(0);
+    } else if (*this == RHS) {
+      // X / X ===> 1
+      return ap_private(1);
+    } else if (lhsWords == 1 && rhsWords == 1) {
+      // All high words are zero, just use native divide
+      return ap_private(this->pVal[0] / RHS.get_pVal(0));
+    }
+
+    // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+    ap_private Quotient(0); // to hold result.
+    ap_private_ops::divide(*this, lhsWords, RHS, rhsWords, &Quotient,
+                           (ap_private*)0);
+    return Quotient;
+  }
+
+  /// Signed divide this ap_private by ap_private RHS.
+  /// @brief Signed division function for ap_private.
+  INLINE ap_private sdiv(const ap_private& RHS) const {
+    if (isNegative())
+      if (RHS.isNegative())
+        return (-(*this)).udiv(-RHS);
+      else
+        return -((-(*this)).udiv(RHS));
+    else if (RHS.isNegative())
+      return -(this->udiv((ap_private)(-RHS)));
+    return this->udiv(RHS);
+  }
+
+  /// Perform an unsigned remainder operation on this ap_private with RHS being
+  /// the
+  /// divisor. Both this and RHS are treated as unsigned quantities for purposes
+  /// of this operation. Note that this is a true remainder operation and not
+  /// a modulo operation because the sign follows the sign of the dividend
+  /// which is *this.
+  /// @returns a new ap_private value containing the remainder result
+  /// @brief Unsigned remainder operation.
+  INLINE ap_private urem(const ap_private& RHS) const {
+    // Get some facts about the LHS
+    uint32_t lhsBits = getActiveBits();
+    uint32_t lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
+
+    // Get some facts about the RHS
+    uint32_t rhsBits = RHS.getActiveBits();
+    uint32_t rhsWords = !rhsBits ? 0 : (whichWord(rhsBits - 1) + 1);
+    assert(rhsWords && "Performing remainder operation by zero ???");
+
+    // Check the degenerate cases
+    if (lhsWords == 0) {
+      // 0 % Y ===> 0
+      return ap_private(0);
+    } else if (lhsWords < rhsWords || this->ult(RHS)) {
+      // X % Y ===> X, iff X < Y
+      return *this;
+    } else if (*this == RHS) {
+      // X % X == 0;
+      return ap_private(0);
+    } else if (lhsWords == 1) {
+      // All high words are zero, just use native remainder
+      return ap_private(pVal[0] % RHS.get_pVal(0));
+    }
+
+    // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+    ap_private Remainder(0);
+    ap_private_ops::divide(*this, lhsWords, RHS, rhsWords, (ap_private*)(0),
+                           &Remainder);
+    return Remainder;
+  }
+
+  INLINE ap_private urem(uint64_t RHS) const {
+    // Get some facts about the LHS
+    uint32_t lhsBits = getActiveBits();
+    uint32_t lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
+    // Get some facts about the RHS
+    uint32_t rhsWords = 1; //! rhsBits ? 0 : (ap_private<_AP_W,
+                           //! _AP_S>::whichWord(rhsBits - 1) + 1);
+    assert(rhsWords && "Performing remainder operation by zero ???");
+    // Check the degenerate cases
+    if (lhsWords == 0) {
+      // 0 % Y ===> 0
+      return ap_private(0);
+    } else if (lhsWords < rhsWords || this->ult(RHS)) {
+      // X % Y ===> X, iff X < Y
+      return *this;
+    } else if (*this == RHS) {
+      // X % X == 0;
+      return ap_private(0);
+    } else if (lhsWords == 1) {
+      // All high words are zero, just use native remainder
+      return ap_private(pVal[0] % RHS);
+    }
+
+    // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+    ap_private Remainder(0);
+    divide(*this, lhsWords, RHS, (ap_private*)(0), &Remainder);
+    return Remainder;
+  }
+
+  /// Signed remainder operation on ap_private.
+  /// @brief Function for signed remainder operation.
+  INLINE ap_private srem(const ap_private& RHS) const {
+    if (isNegative()) {
+      ap_private lhs = -(*this);
+      if (RHS.isNegative()) {
+        ap_private rhs = -RHS;
+        return -(lhs.urem(rhs));
+      } else
+        return -(lhs.urem(RHS));
+    } else if (RHS.isNegative()) {
+      ap_private rhs = -RHS;
+      return this->urem(rhs);
+    }
+    return this->urem(RHS);
+  }
+
+  /// Signed remainder operation on ap_private.
+  /// @brief Function for signed remainder operation.
+  INLINE ap_private srem(int64_t RHS) const {
+    if (isNegative())
+      if (RHS < 0)
+        return -((-(*this)).urem(-RHS));
+      else
+        return -((-(*this)).urem(RHS));
+    else if (RHS < 0)
+      return this->urem(-RHS);
+    return this->urem(RHS);
+  }
+
+  /// Compares this ap_private with RHS for the validity of the equality
+  /// relationship.
+  /// @returns true if *this == Val
+  /// @brief Equality comparison.
+  template <bool _AP_S1>
+  INLINE bool eq(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    return (*this) == RHS;
+  }
+
+  /// Compares this ap_private with RHS for the validity of the inequality
+  /// relationship.
+  /// @returns true if *this != Val
+  /// @brief Inequality comparison
+  template <bool _AP_S1>
+  INLINE bool ne(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    return !((*this) == RHS);
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// the validity of the less-than relationship.
+  /// @returns true if *this < RHS when both are considered unsigned.
+  /// @brief Unsigned less than comparison
+  template <bool _AP_S1>
+  INLINE bool ult(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    // Get active bit length of both operands
+    uint32_t n1 = getActiveBits();
+    uint32_t n2 = RHS.getActiveBits();
+
+    // If magnitude of LHS is less than RHS, return true.
+    if (n1 < n2) return true;
+
+    // If magnitude of RHS is greather than LHS, return false.
+    if (n2 < n1) return false;
+
+    // If they bot fit in a word, just compare the low order word
+    if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
+      return pVal[0] < RHS.get_pVal(0);
+
+    // Otherwise, compare all words
+    uint32_t topWord = whichWord(AESL_std::max(n1, n2) - 1);
+    for (int i = topWord; i >= 0; --i) {
+      if (pVal[i] > RHS.get_pVal(i)) return false;
+      if (pVal[i] < RHS.get_pVal(i)) return true;
+    }
+    return false;
+  }
+
+  INLINE bool ult(uint64_t RHS) const {
+    // Get active bit length of both operands
+    uint32_t n1 = getActiveBits();
+    uint32_t n2 =
+        64 - ap_private_ops::CountLeadingZeros_64(RHS); // RHS.getActiveBits();
+
+    // If magnitude of LHS is less than RHS, return true.
+    if (n1 < n2) return true;
+
+    // If magnitude of RHS is greather than LHS, return false.
+    if (n2 < n1) return false;
+
+    // If they bot fit in a word, just compare the low order word
+    if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
+      return pVal[0] < RHS;
+    assert(0);
+  }
+
+  template <bool _AP_S1>
+  INLINE bool slt(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    ap_private lhs(*this);
+    ap_private<_AP_W, _AP_S1> rhs(RHS);
+    bool lhsNeg = isNegative();
+    bool rhsNeg = rhs.isNegative();
+    if (lhsNeg) {
+      // Sign bit is set so perform two's complement to make it positive
+      lhs.flip();
+      lhs++;
+    }
+    if (rhsNeg) {
+      // Sign bit is set so perform two's complement to make it positive
+      rhs.flip();
+      rhs++;
+    }
+
+    // Now we have unsigned values to compare so do the comparison if necessary
+    // based on the negativeness of the values.
+    if (lhsNeg)
+      if (rhsNeg)
+        return lhs.ugt(rhs);
+      else
+        return true;
+    else if (rhsNeg)
+      return false;
+    else
+      return lhs.ult(rhs);
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// validity of the less-or-equal relationship.
+  /// @returns true if *this <= RHS when both are considered unsigned.
+  /// @brief Unsigned less or equal comparison
+  template <bool _AP_S1>
+  INLINE bool ule(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    return ult(RHS) || eq(RHS);
+  }
+
+  /// Regards both *this and RHS as signed quantities and compares them for
+  /// validity of the less-or-equal relationship.
+  /// @returns true if *this <= RHS when both are considered signed.
+  /// @brief Signed less or equal comparison
+  template <bool _AP_S1>
+  INLINE bool sle(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    return slt(RHS) || eq(RHS);
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// the validity of the greater-than relationship.
+  /// @returns true if *this > RHS when both are considered unsigned.
+  /// @brief Unsigned greather than comparison
+  template <bool _AP_S1>
+  INLINE bool ugt(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    return !ult(RHS) && !eq(RHS);
+  }
+
+  /// Regards both *this and RHS as signed quantities and compares them for
+  /// the validity of the greater-than relationship.
+  /// @returns true if *this > RHS when both are considered signed.
+  /// @brief Signed greather than comparison
+  template <bool _AP_S1>
+  INLINE bool sgt(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    return !slt(RHS) && !eq(RHS);
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// validity of the greater-or-equal relationship.
+  /// @returns true if *this >= RHS when both are considered unsigned.
+  /// @brief Unsigned greater or equal comparison
+  template <bool _AP_S1>
+  INLINE bool uge(const ap_private<_AP_W, _AP_S>& RHS) const {
+    return !ult(RHS);
+  }
+
+  /// Regards both *this and RHS as signed quantities and compares them for
+  /// validity of the greater-or-equal relationship.
+  /// @returns true if *this >= RHS when both are considered signed.
+  /// @brief Signed greather or equal comparison
+  template <bool _AP_S1>
+  INLINE bool sge(const ap_private<_AP_W, _AP_S1>& RHS) const {
+    return !slt(RHS);
+  }
+
+  // Sign extend to a new width.
+  template <int _AP_W1, bool _AP_S1>
+  INLINE void cpSext(const ap_private<_AP_W1, _AP_S1>& that) {
+    assert(_AP_W1 < BitWidth && "Invalid ap_private SignExtend request");
+    assert(_AP_W1 <= MAX_INT_BITS && "Too many bits");
+    // If the sign bit isn't set, this is the same as zext.
+    if (!that.isNegative()) {
+      cpZext(that);
+      return;
+    }
+
+    // The sign bit is set. First, get some facts
+    enum { wordBits = _AP_W1 % APINT_BITS_PER_WORD };
+    const int _AP_N1 = ap_private<_AP_W1, _AP_S1>::_AP_N;
+    // Mask the high order word appropriately
+    if (_AP_N1 == _AP_N) {
+      enum { newWordBits = _AP_W % APINT_BITS_PER_WORD };
+      // The extension is contained to the wordsBefore-1th word.
+      static const uint64_t mask = wordBits ? (~0ULL << (wordBits)) : 0ULL;
+      for (int i = 0; i < _AP_N; ++i) pVal[i] = that.get_pVal(i);
+      pVal[_AP_N - 1] |= mask;
+      return;
+    }
+
+    enum { newWordBits = _AP_W % APINT_BITS_PER_WORD };
+    // The extension is contained to the wordsBefore-1th word.
+    static const uint64_t mask = wordBits ? (~0ULL << (wordBits)) : 0ULL;
+    int i;
+    for (i = 0; i < _AP_N1; ++i) pVal[i] = that.get_pVal(i);
+    pVal[i - 1] |= mask;
+    for (; i < _AP_N - 1; i++) pVal[i] = ~0ULL;
+    pVal[i] = ~0ULL;
+    clearUnusedBits();
+    return;
+  }
+
+  //  Zero extend to a new width.
+  template <int _AP_W1, bool _AP_S1>
+  INLINE void cpZext(const ap_private<_AP_W1, _AP_S1>& that) {
+    assert(_AP_W1 < BitWidth && "Invalid ap_private ZeroExtend request");
+    assert(_AP_W1 <= MAX_INT_BITS && "Too many bits");
+    const int _AP_N1 = ap_private<_AP_W1, _AP_S1>::_AP_N;
+    int i = 0;
+    for (; i < _AP_N1; ++i) pVal[i] = that.get_pVal(i);
+    for (; i < _AP_N; ++i) pVal[i] = 0;
+    clearUnusedBits();
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE void cpZextOrTrunc(const ap_private<_AP_W1, _AP_S1>& that) {
+    if (BitWidth > _AP_W1)
+      cpZext(that);
+    else {
+      for (int i = 0; i < _AP_N; ++i) pVal[i] = that.get_pVal(i);
+      clearUnusedBits();
+    }
+  }
+
+  template <int _AP_W1, bool _AP_S1>
+  INLINE void cpSextOrTrunc(const ap_private<_AP_W1, _AP_S1>& that) {
+    if (BitWidth > _AP_W1)
+      cpSext(that);
+    else {
+      for (int i = 0; i < _AP_N; ++i) pVal[i] = that.get_pVal(i);
+      clearUnusedBits();
+    }
+  }
+
+  /// @}
+  /// @name Value Characterization Functions
+  /// @{
+
+  /// @returns the total number of bits.
+  INLINE uint32_t getBitWidth() const { return BitWidth; }
+
+  /// Here one word's bitwidth equals to that of uint64_t.
+  /// @returns the number of words to hold the integer value of this ap_private.
+  /// @brief Get the number of words.
+  INLINE uint32_t getNumWords() const {
+    return (BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
+  }
+
+  /// This function returns the number of active bits which is defined as the
+  /// bit width minus the number of leading zeros. This is used in several
+  /// computations to see how "wide" the value is.
+  /// @brief Compute the number of active bits in the value
+  INLINE uint32_t getActiveBits() const {
+    uint32_t bits = BitWidth - countLeadingZeros();
+    return bits ? bits : 1;
+  }
+
+  /// This method attempts to return the value of this ap_private as a zero
+  /// extended
+  /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
+  /// uint64_t. Otherwise an assertion will result.
+  /// @brief Get zero extended value
+  INLINE uint64_t getZExtValue() const {
+    assert(getActiveBits() <= 64 && "Too many bits for uint64_t");
+    return *pVal;
+  }
+
+  /// This method attempts to return the value of this ap_private as a sign
+  /// extended
+  /// int64_t. The bit width must be <= 64 or the value must fit within an
+  /// int64_t. Otherwise an assertion will result.
+  /// @brief Get sign extended value
+  INLINE int64_t getSExtValue() const {
+    assert(getActiveBits() <= 64 && "Too many bits for int64_t");
+    return int64_t(pVal[0]);
+  }
+
+  /// This method determines how many bits are required to hold the ap_private
+  /// equivalent of the string given by \p str of length \p slen.
+  /// @brief Get bits required for string value.
+  INLINE static uint32_t getBitsNeeded(const char* str, uint32_t slen,
+                                       uint8_t radix) {
+    assert(str != 0 && "Invalid value string");
+    assert(slen > 0 && "Invalid string length");
+
+    // Each computation below needs to know if its negative
+    uint32_t isNegative = str[0] == '-';
+    if (isNegative) {
+      slen--;
+      str++;
+    }
+    // For radixes of power-of-two values, the bits required is accurately and
+    // easily computed
+    if (radix == 2) return slen + isNegative;
+    if (radix == 8) return slen * 3 + isNegative;
+    if (radix == 16) return slen * 4 + isNegative;
+
+    // Otherwise it must be radix == 10, the hard case
+    assert(radix == 10 && "Invalid radix");
+
+    // Convert to the actual binary value.
+    // ap_private<_AP_W, _AP_S> tmp(sufficient, str, slen, radix);
+
+    // Compute how many bits are required.
+    // return isNegative + tmp.logBase2() + 1;
+    return isNegative + slen * 4;
+  }
+
+  /// countLeadingZeros - This function is an ap_private version of the
+  /// countLeadingZeros_{32,64} functions in MathExtras.h. It counts the number
+  /// of zeros from the most significant bit to the first one bit.
+  /// @returns BitWidth if the value is zero.
+  /// @returns the number of zeros from the most significant bit to the first
+  /// one bits.
+  INLINE uint32_t countLeadingZeros() const {
+    enum {
+      msw_bits = (BitWidth % APINT_BITS_PER_WORD)
+                     ? (BitWidth % APINT_BITS_PER_WORD)
+                     : APINT_BITS_PER_WORD,
+      excessBits = APINT_BITS_PER_WORD - msw_bits
+    };
+    uint32_t Count = ap_private_ops::CountLeadingZeros_64(pVal[_AP_N - 1]);
+    if (Count >= excessBits) Count -= excessBits;
+    if (!pVal[_AP_N - 1]) {
+      for (int i = _AP_N - 1; i; --i) {
+        if (!pVal[i - 1])
+          Count += APINT_BITS_PER_WORD;
+        else {
+          Count += ap_private_ops::CountLeadingZeros_64(pVal[i - 1]);
+          break;
+        }
+      }
+    }
+    return Count;
+  }
+
+  /// countLeadingOnes - This function counts the number of contiguous 1 bits
+  /// in the high order bits. The count stops when the first 0 bit is reached.
+  /// @returns 0 if the high order bit is not set
+  /// @returns the number of 1 bits from the most significant to the least
+  /// @brief Count the number of leading one bits.
+  INLINE uint32_t countLeadingOnes() const {
+    if (isSingleWord())
+      return countLeadingOnes_64(get_VAL(), APINT_BITS_PER_WORD - BitWidth);
+
+    uint32_t highWordBits = BitWidth % APINT_BITS_PER_WORD;
+    uint32_t shift =
+        (highWordBits == 0 ? 0 : APINT_BITS_PER_WORD - highWordBits);
+    int i = _AP_N - 1;
+    uint32_t Count = countLeadingOnes_64(get_pVal(i), shift);
+    if (Count == highWordBits) {
+      for (i--; i >= 0; --i) {
+        if (get_pVal(i) == ~0ULL)
+          Count += APINT_BITS_PER_WORD;
+        else {
+          Count += countLeadingOnes_64(get_pVal(i), 0);
+          break;
+        }
+      }
+    }
+    return Count;
+  }
+
+  /// countTrailingZeros - This function is an ap_private version of the
+  /// countTrailingZoers_{32,64} functions in MathExtras.h. It counts
+  /// the number of zeros from the least significant bit to the first set bit.
+  /// @returns BitWidth if the value is zero.
+  /// @returns the number of zeros from the least significant bit to the first
+  /// one bit.
+  /// @brief Count the number of trailing zero bits.
+  INLINE uint32_t countTrailingZeros() const {
+    uint32_t Count = 0;
+    uint32_t i = 0;
+    for (; i < _AP_N && get_pVal(i) == 0; ++i) Count += APINT_BITS_PER_WORD;
+    if (i < _AP_N) Count += ap_private_ops::CountTrailingZeros_64(get_pVal(i));
+    return AESL_std::min(Count, BitWidth);
+  }
+  /// countPopulation - This function is an ap_private version of the
+  /// countPopulation_{32,64} functions in MathExtras.h. It counts the number
+  /// of 1 bits in the ap_private value.
+  /// @returns 0 if the value is zero.
+  /// @returns the number of set bits.
+  /// @brief Count the number of bits set.
+  INLINE uint32_t countPopulation() const {
+    uint32_t Count = 0;
+    for (int i = 0; i < _AP_N - 1; ++i)
+      Count += ap_private_ops::CountPopulation_64(pVal[i]);
+    Count += ap_private_ops::CountPopulation_64(pVal[_AP_N - 1] & mask);
+    return Count;
+  }
+
+  /// @}
+  /// @name Conversion Functions
+  /// @
+
+  /// This is used internally to convert an ap_private to a string.
+  /// @brief Converts an ap_private to a std::string
+  INLINE std::string toString(uint8_t radix, bool wantSigned) const;
+
+  /// Considers the ap_private to be unsigned and converts it into a string in
+  /// the
+  /// radix given. The radix can be 2, 8, 10 or 16.
+  /// @returns a character interpretation of the ap_private
+  /// @brief Convert unsigned ap_private to string representation.
+  INLINE std::string toStringUnsigned(uint8_t radix = 10) const {
+    return toString(radix, false);
+  }
+
+  /// Considers the ap_private to be unsigned and converts it into a string in
+  /// the
+  /// radix given. The radix can be 2, 8, 10 or 16.
+  /// @returns a character interpretation of the ap_private
+  /// @brief Convert unsigned ap_private to string representation.
+  INLINE std::string toStringSigned(uint8_t radix = 10) const {
+    return toString(radix, true);
+  }
+
+  /// @brief Converts this ap_private to a double value.
+  INLINE double roundToDouble(bool isSigned) const {
+    // Handle the simple case where the value is contained in one uint64_t.
+    if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
+      uint64_t val = pVal[0];
+      if (isSigned) {
+        int64_t sext = ((int64_t(val)) << (64 - BitWidth)) >> (64 - BitWidth);
+        return double(sext);
+      } else
+        return double(val);
+    }
+
+    // Determine if the value is negative.
+    bool isNeg = isSigned ? (*this)[BitWidth - 1] : false;
+
+    // Construct the absolute value if we're negative.
+    ap_private<_AP_W, _AP_S> Tmp(isNeg ? -(*this) : (*this));
+
+    // Figure out how many bits we're using.
+    uint32_t n = Tmp.getActiveBits();
+
+    // The exponent (without bias normalization) is just the number of bits
+    // we are using. Note that the sign bit is gone since we constructed the
+    // absolute value.
+    uint64_t exp = n;
+
+    // Return infinity for exponent overflow
+    if (exp > 1023) {
+      if (!isSigned || !isNeg)
+        return std::numeric_limits<double>::infinity();
+      else
+        return -std::numeric_limits<double>::infinity();
+    }
+    exp += 1023; // Increment for 1023 bias
+
+    // Number of bits in mantissa is 52. To obtain the mantissa value, we must
+    // extract the high 52 bits from the correct words in pVal.
+    uint64_t mantissa;
+    unsigned hiWord = whichWord(n - 1);
+    if (hiWord == 0) {
+      mantissa = Tmp.get_pVal(0);
+      if (n > 52)
+        (mantissa) >>= (n - 52); // shift down, we want the top 52 bits.
+    } else {
+      assert(hiWord > 0 && "High word is negative?");
+      uint64_t hibits = (Tmp.get_pVal(hiWord))
+                        << (52 - n % APINT_BITS_PER_WORD);
+      uint64_t lobits =
+          (Tmp.get_pVal(hiWord - 1)) >> (11 + n % APINT_BITS_PER_WORD);
+      mantissa = hibits | lobits;
+    }
+
+    // The leading bit of mantissa is implicit, so get rid of it.
+    uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
+    union {
+      double __D;
+      uint64_t __I;
+    } __T;
+    __T.__I = sign | ((exp) << 52) | mantissa;
+    return __T.__D;
+  }
+
+  /// @brief Converts this unsigned ap_private to a double value.
+  INLINE double roundToDouble() const { return roundToDouble(false); }
+
+  /// @brief Converts this signed ap_private to a double value.
+  INLINE double signedRoundToDouble() const { return roundToDouble(true); }
+
+  /// The conversion does not do a translation from integer to double, it just
+  /// re-interprets the bits as a double. Note that it is valid to do this on
+  /// any bit width. Exactly 64 bits will be translated.
+  /// @brief Converts ap_private bits to a double
+  INLINE double bitsToDouble() const {
+    union {
+      uint64_t __I;
+      double __D;
+    } __T;
+    __T.__I = pVal[0];
+    return __T.__D;
+  }
+
+  /// The conversion does not do a translation from integer to float, it just
+  /// re-interprets the bits as a float. Note that it is valid to do this on
+  /// any bit width. Exactly 32 bits will be translated.
+  /// @brief Converts ap_private bits to a double
+  INLINE float bitsToFloat() const {
+    union {
+      uint32_t __I;
+      float __F;
+    } __T;
+    __T.__I = uint32_t(pVal[0]);
+    return __T.__F;
+  }
+
+  /// The conversion does not do a translation from double to integer, it just
+  /// re-interprets the bits of the double. Note that it is valid to do this on
+  /// any bit width but bits from V may get truncated.
+  /// @brief Converts a double to ap_private bits.
+  INLINE ap_private& doubleToBits(double __V) {
+    union {
+      uint64_t __I;
+      double __D;
+    } __T;
+    __T.__D = __V;
+    pVal[0] = __T.__I;
+    return *this;
+  }
+
+  /// The conversion does not do a translation from float to integer, it just
+  /// re-interprets the bits of the float. Note that it is valid to do this on
+  /// any bit width but bits from V may get truncated.
+  /// @brief Converts a float to ap_private bits.
+  INLINE ap_private& floatToBits(float __V) {
+    union {
+      uint32_t __I;
+      float __F;
+    } __T;
+    __T.__F = __V;
+    pVal[0] = __T.__I;
+  }
+
+  // Reduce operation
+  //-----------------------------------------------------------
+  INLINE bool and_reduce() const { return isMaxValue(); }
+
+  INLINE bool nand_reduce() const { return isMinValue(); }
+
+  INLINE bool or_reduce() const { return (bool)countPopulation(); }
+
+  INLINE bool nor_reduce() const { return countPopulation() == 0; }
+
+  INLINE bool xor_reduce() const {
+    unsigned int i = countPopulation();
+    return (i % 2) ? true : false;
+  }
+
+  INLINE bool xnor_reduce() const {
+    unsigned int i = countPopulation();
+    return (i % 2) ? false : true;
+  }
+  INLINE std::string to_string(uint8_t radix = 16, bool sign = false) const {
+    return toString(radix, radix == 10 ? _AP_S : sign);
+  }
+}; // End of class ap_private <_AP_W, _AP_S, false>
+
+namespace ap_private_ops {
+
+enum { APINT_BITS_PER_WORD = 64 };
+template <int _AP_W, bool _AP_S>
+INLINE bool operator==(uint64_t V1, const ap_private<_AP_W, _AP_S>& V2) {
+  return V2 == V1;
+}
+
+template <int _AP_W, bool _AP_S>
+INLINE bool operator!=(uint64_t V1, const ap_private<_AP_W, _AP_S>& V2) {
+  return V2 != V1;
+}
+
+template <int _AP_W, bool _AP_S, int index>
+INLINE bool get(const ap_private<_AP_W, _AP_S>& a) {
+  static const uint64_t mask = 1ULL << (index & 0x3f);
+  return ((mask & a.get_pVal((index) >> 6)) != 0);
+}
+
+template <int _AP_W, bool _AP_S, int msb_index, int lsb_index>
+INLINE void set(ap_private<_AP_W, _AP_S>& a,
+                const ap_private<AP_MAX(msb_index, 1), true>& mark1 = 0,
+                const ap_private<AP_MAX(lsb_index, 1), true>& mark2 = 0) {
+  enum {
+    APINT_BITS_PER_WORD = 64,
+    lsb_word = lsb_index / APINT_BITS_PER_WORD,
+    msb_word = msb_index / APINT_BITS_PER_WORD,
+    msb = msb_index % APINT_BITS_PER_WORD,
+    lsb = lsb_index % APINT_BITS_PER_WORD
+  };
+  if (msb_word == lsb_word) {
+    const uint64_t mask = ~0ULL >>
+                          (lsb) << (APINT_BITS_PER_WORD - msb + lsb - 1) >>
+                          (APINT_BITS_PER_WORD - msb - 1);
+    // a.set_pVal(msb_word, a.get_pVal(msb_word)  | mask);
+    a.get_pVal(msb_word) |= mask;
+  } else {
+    const uint64_t lsb_mask = ~0ULL >> (lsb) << (lsb);
+    const uint64_t msb_mask = ~0ULL << (APINT_BITS_PER_WORD - msb - 1) >>
+                              (APINT_BITS_PER_WORD - msb - 1);
+    // a.set_pVal(lsb_word, a.get_pVal(lsb_word) | lsb_mask);
+    a.get_pVal(lsb_word) |= lsb_mask;
+    for (int i = lsb_word + 1; i < msb_word; i++) {
+      a.set_pVal(i, ~0ULL);
+      // a.get_pVal(i)=0;
+    }
+    // a.set_pVal(msb_word, a.get_pVal(msb_word) | msb_mask);
+
+    a.get_pVal(msb_word) |= msb_mask;
+  }
+  a.clearUnusedBits();
+}
+
+template <int _AP_W, bool _AP_S, int msb_index, int lsb_index>
+INLINE void clear(ap_private<_AP_W, _AP_S>& a,
+                  const ap_private<AP_MAX(msb_index, 1), true>& mark1 = 0,
+                  const ap_private<AP_MAX(lsb_index, 1), true>& mark2 = 0) {
+  enum {
+    APINT_BITS_PER_WORD = 64,
+    lsb_word = lsb_index / APINT_BITS_PER_WORD,
+    msb_word = msb_index / APINT_BITS_PER_WORD,
+    msb = msb_index % APINT_BITS_PER_WORD,
+    lsb = lsb_index % APINT_BITS_PER_WORD
+  };
+  if (msb_word == lsb_word) {
+    const uint64_t mask =
+        ~(~0ULL >> (lsb) << (APINT_BITS_PER_WORD - msb + lsb - 1) >>
+          (APINT_BITS_PER_WORD - msb - 1));
+    // a.set_pVal(msb_word, a.get_pVal(msb_word) & mask);
+    a.get_pVal(msb_word) &= mask;
+  } else {
+    const uint64_t lsb_mask = ~(~0ULL >> (lsb) << (lsb));
+    const uint64_t msb_mask = ~(~0ULL << (APINT_BITS_PER_WORD - msb - 1) >>
+                                (APINT_BITS_PER_WORD - msb - 1));
+    // a.set_pVal(lsb_word, a.get_pVal(lsb_word) & lsb_mask);
+    a.get_pVal(lsb_word) &= lsb_mask;
+    for (int i = lsb_word + 1; i < msb_word; i++) {
+      // a.set_pVal(i, 0);
+      a.get_pVal(i) = 0;
+    }
+    // a.set_pVal(msb_word, a.get_pVal(msb_word) & msb_mask);
+    a.get_pVal(msb_word) &= msb_mask;
+  }
+  a.clearUnusedBits();
+}
+
+template <int _AP_W, bool _AP_S, int index>
+INLINE void set(ap_private<_AP_W, _AP_S>& a,
+                const ap_private<AP_MAX(index, 1), true>& mark = 0) {
+  enum { APINT_BITS_PER_WORD = 64, word = index / APINT_BITS_PER_WORD };
+  static const uint64_t mask = 1ULL << (index % APINT_BITS_PER_WORD);
+  // a.set_pVal(word, a.get_pVal(word) | mask);
+  a.get_pVal(word) |= mask;
+  a.clearUnusedBits();
+}
+
+template <int _AP_W, bool _AP_S, int index>
+INLINE void clear(ap_private<_AP_W, _AP_S>& a,
+                  const ap_private<AP_MAX(index, 1), true>& mark = 0) {
+  enum { APINT_BITS_PER_WORD = 64, word = index / APINT_BITS_PER_WORD };
+  static const uint64_t mask = ~(1ULL << (index % APINT_BITS_PER_WORD));
+  // a.set_pVal(word, a.get_pVal(word) & mask);
+  a.get_pVal(word) &= mask;
+  a.clearUnusedBits();
+}
+
+} // End of ap_private_ops namespace
+
+template <int _AP_W, bool _AP_S>
+INLINE std::string ap_private<_AP_W, _AP_S, false>::toString(
+    uint8_t radix, bool wantSigned) const {
+  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+         "Radix should be 2, 8, 10, or 16!");
+  static const char* digits[] = {"0", "1", "2", "3", "4", "5", "6", "7",
+                                 "8", "9", "A", "B", "C", "D", "E", "F"};
+  std::string result;
+
+  if (radix != 10) {
+    // For the 2, 8 and 16 bit cases, we can just shift instead of divide
+    // because the number of bits per digit (1,3 and 4 respectively) divides
+    // equaly. We just shift until there value is zero.
+
+    // First, check for a zero value and just short circuit the logic below.
+    if (*this == (uint64_t)(0))
+      result = "0";
+    else {
+      ap_private<_AP_W, false> tmp(*this);
+      size_t insert_at = 0;
+      bool leading_zero = true;
+      if (wantSigned && isNegative()) {
+        // They want to print the signed version and it is a negative value
+        // Flip the bits and add one to turn it into the equivalent positive
+        // value and put a '-' in the result.
+        tmp.flip();
+        tmp++;
+        tmp.clearUnusedBitsToZero();
+        result = "-";
+        insert_at = 1;
+        leading_zero = false;
+      }
+      switch (radix) {
+        case 2:
+          result += "0b";
+          break;
+        case 8:
+          result += "0o";
+          break;
+        case 16:
+          result += "0x";
+          break;
+        default:
+          assert("invalid radix" && 0);
+      }
+      insert_at += 2;
+      // Just shift tmp right for each digit width until it becomes zero
+      uint32_t shift = (radix == 16 ? 4 : (radix == 8 ? 3 : 1));
+      uint64_t mask = radix - 1;
+      ap_private<_AP_W, false> zero(0);
+      unsigned bits = 0;
+      while (tmp.ne(zero)) {
+        uint64_t digit = tmp.get_VAL() & mask;
+        result.insert(insert_at, digits[digit]);
+        tmp = tmp.lshr(shift);
+        ++bits;
+      }
+      bits *= shift;
+      if (bits < _AP_W && leading_zero) result.insert(insert_at, digits[0]);
+    }
+    return result;
+  }
+
+  ap_private<_AP_W, false> tmp(*this);
+  ap_private<_AP_W, false> divisor(radix);
+  ap_private<_AP_W, false> zero(0);
+  size_t insert_at = 0;
+  if (wantSigned && isNegative()) {
+    // They want to print the signed version and it is a negative value
+    // Flip the bits and add one to turn it into the equivalent positive
+    // value and put a '-' in the result.
+    tmp.flip();
+    tmp++;
+    tmp.clearUnusedBitsToZero();
+    result = "-";
+    insert_at = 1;
+  }
+  if (tmp == ap_private<_AP_W, false>(0))
+    result = "0";
+  else
+    while (tmp.ne(zero)) {
+      ap_private<_AP_W, false> APdigit(0);
+      ap_private<_AP_W, false> tmp2(0);
+      ap_private_ops::divide(tmp, tmp.getNumWords(), divisor,
+                             divisor.getNumWords(), &tmp2, &APdigit);
+      uint64_t digit = APdigit.getZExtValue();
+      assert(digit < radix && "divide failed");
+      result.insert(insert_at, digits[digit]);
+      tmp = tmp2;
+    }
+
+  return result;
+} // End of ap_private<_AP_W, _AP_S, false>::toString()
+
+template <int _AP_W, bool _AP_S>
+std::ostream &operator<<(std::ostream &os, const ap_private<_AP_W, _AP_S> &x) {
+  std::ios_base::fmtflags ff = std::cout.flags();
+  if (ff & std::cout.hex) {
+    os << x.toString(16, false); // don't print sign
+  } else if (ff & std::cout.oct) {
+    os << x.toString(8, false); // don't print sign
+  } else {
+    os << x.toString(10, _AP_S);
+  }
+  return os;
+}
+
+// ------------------------------------------------------------ //
+//           XXX moved here from ap_int_sim.h  XXX              //
+// ------------------------------------------------------------ //
+
+/// Concatination reference.
+/// Proxy class which allows concatination to be used as rvalue(for reading) and
+/// lvalue(for writing)
+// ----------------------------------------------------------------
+// template <int _AP_W1, typename _AP_T1, int _AP_W2, typename _AP_T2>
+// struct ap_concat_ref {
+//#ifdef _MSC_VER
+//#pragma warning(disable : 4521 4522)
+//#endif
+//  enum {
+//    _AP_WR = _AP_W1 + _AP_W2,
+//  };
+//  _AP_T1& mbv1;
+//  _AP_T2& mbv2;
+//
+//  INLINE ap_concat_ref(const ap_concat_ref<_AP_W1, _AP_T1, _AP_W2, _AP_T2>&
+//  ref)
+//      : mbv1(ref.mbv1), mbv2(ref.mbv2) {}
+//
+//  INLINE ap_concat_ref(_AP_T1& bv1, _AP_T2& bv2) : mbv1(bv1), mbv2(bv2) {}
+//
+//  template <int _AP_W3, bool _AP_S3>
+//  INLINE ap_concat_ref& operator=(const ap_private<_AP_W3, _AP_S3>& val) {
+//    ap_private<_AP_W1 + _AP_W2, false> vval(val);
+//    int W_ref1 = mbv1.length();
+//    int W_ref2 = mbv2.length();
+//    ap_private<_AP_W1, false> mask1(-1);
+//    mask1 >>= _AP_W1 - W_ref1;
+//    ap_private<_AP_W2, false> mask2(-1);
+//    mask2 >>= _AP_W2 - W_ref2;
+//    mbv1.set(ap_private<_AP_W1, false>((vval >> W_ref2) & mask1));
+//    mbv2.set(ap_private<_AP_W2, false>(vval & mask2));
+//    return *this;
+//  }
+//
+//  INLINE ap_concat_ref& operator=(unsigned long long val) {
+//    ap_private<_AP_W1 + _AP_W2, false> tmpVal(val);
+//    return operator=(tmpVal);
+//  }
+//
+//  template <int _AP_W3, typename _AP_T3, int _AP_W4, typename _AP_T4>
+//  INLINE ap_concat_ref& operator=(
+//      const ap_concat_ref<_AP_W3, _AP_T3, _AP_W4, _AP_T4>& val) {
+//    ap_private<_AP_W1 + _AP_W2, false> tmpVal(val);
+//    return operator=(tmpVal);
+//  }
+//
+//  INLINE ap_concat_ref& operator=(
+//      const ap_concat_ref<_AP_W1, _AP_T1, _AP_W2, _AP_T2>& val) {
+//    ap_private<_AP_W1 + _AP_W2, false> tmpVal(val);
+//    return operator=(tmpVal);
+//  }
+//
+//  template <int _AP_W3, bool _AP_S3>
+//  INLINE ap_concat_ref& operator=(const _private_bit_ref<_AP_W3, _AP_S3>&
+//  val) {
+//    ap_private<_AP_W1 + _AP_W2, false> tmpVal(val);
+//    return operator=(tmpVal);
+//  }
+//
+//  template <int _AP_W3, bool _AP_S3>
+//  INLINE ap_concat_ref& operator=(const _private_range_ref<_AP_W3, _AP_S3>&
+//  val) {
+//    ap_private<_AP_W1 + _AP_W2, false> tmpVal(val);
+//    return operator=(tmpVal);
+//  }
+//
+//  template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3,
+//            ap_o_mode _AP_O3, int _AP_N3>
+//  INLINE ap_concat_ref& operator=(
+//      const af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3>& val)
+//      {
+//    return operator=((const ap_private<_AP_W3, false>)(val));
+//  }
+//
+//  template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3,
+//            ap_o_mode _AP_O3, int _AP_N3>
+//  INLINE ap_concat_ref& operator=(
+//      const ap_fixed_base<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3>&
+//          val) {
+//    return operator=(val.to_ap_private());
+//  }
+//
+//  template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3,
+//            ap_o_mode _AP_O3, int _AP_N3>
+//  INLINE ap_concat_ref& operator=(
+//      const af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3>& val) {
+//    return operator=((unsigned long long)(bool)(val));
+//  }
+//
+//  INLINE operator ap_private<_AP_WR, false>() const { return get(); }
+//
+//  INLINE operator unsigned long long() const { return get().to_uint64(); }
+//
+//  template <int _AP_W3, bool _AP_S3>
+//  INLINE ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3,
+//                       _private_range_ref<_AP_W3, _AP_S3> >
+//  operator,(const _private_range_ref<_AP_W3, _AP_S3> &a2) {
+//    return ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3,
+//                         _private_range_ref<_AP_W3, _AP_S3> >(
+//        *this, const_cast<_private_range_ref<_AP_W3, _AP_S3>&>(a2));
+//  }
+//
+//  template <int _AP_W3, bool _AP_S3>
+//  INLINE
+//      ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3, ap_private<_AP_W3, _AP_S3>
+//      >
+//      operator,(ap_private<_AP_W3, _AP_S3> &a2) {
+//    return ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3,
+//                         ap_private<_AP_W3, _AP_S3> >(*this, a2);
+//  }
+//
+//  template <int _AP_W3, bool _AP_S3>
+//  INLINE
+//      ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3, ap_private<_AP_W3, _AP_S3>
+//      >
+//      operator,(const ap_private<_AP_W3, _AP_S3> &a2) {
+//    return ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3,
+//                         ap_private<_AP_W3, _AP_S3> >(
+//        *this, const_cast<ap_private<_AP_W3, _AP_S3>&>(a2));
+//  }
+//
+//  template <int _AP_W3, bool _AP_S3>
+//  INLINE ap_concat_ref<_AP_WR, ap_concat_ref, 1, _private_bit_ref<_AP_W3,
+//  _AP_S3> >
+//  operator,(const _private_bit_ref<_AP_W3, _AP_S3> &a2) {
+//    return ap_concat_ref<_AP_WR, ap_concat_ref, 1, _private_bit_ref<_AP_W3,
+//    _AP_S3> >(
+//        *this, const_cast<_private_bit_ref<_AP_W3, _AP_S3>&>(a2));
+//  }
+//
+//  template <int _AP_W3, typename _AP_T3, int _AP_W4, typename _AP_T4>
+//  INLINE ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3 + _AP_W4,
+//                       ap_concat_ref<_AP_W3, _AP_T3, _AP_W4, _AP_T4> >
+//  operator,(const ap_concat_ref<_AP_W3, _AP_T3, _AP_W4, _AP_T4> &a2) {
+//    return ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3 + _AP_W4,
+//                         ap_concat_ref<_AP_W3, _AP_T3, _AP_W4, _AP_T4> >(
+//        *this, const_cast<ap_concat_ref<_AP_W3, _AP_T3, _AP_W4,
+//        _AP_T4>&>(a2));
+//  }
+//
+//  template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3,
+//            ap_o_mode _AP_O3, int _AP_N3>
+//  INLINE ap_concat_ref<
+//      _AP_WR, ap_concat_ref, _AP_W3,
+//      af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> >
+//  operator,(
+//      const af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> &a2)
+//      {
+//    return ap_concat_ref<
+//        _AP_WR, ap_concat_ref, _AP_W3,
+//        af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> >(
+//        *this,
+//        const_cast<
+//            af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3,
+//            _AP_N3>&>(a2));
+//  }
+//
+//  template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3,
+//            ap_o_mode _AP_O3, int _AP_N3>
+//  INLINE
+//      ap_concat_ref<_AP_WR, ap_concat_ref, 1,
+//                    af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3>
+//                    >
+//      operator,(const af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3,
+//      _AP_N3>
+//                    &a2) {
+//    return ap_concat_ref<
+//        _AP_WR, ap_concat_ref, 1,
+//        af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> >(
+//        *this,
+//        const_cast<af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3,
+//        _AP_N3>&>(
+//            a2));
+//  }
+//
+//  template <int _AP_W3, bool _AP_S3>
+//  INLINE ap_private<AP_MAX(_AP_WR, _AP_W3), _AP_S3> operator&(
+//      const ap_private<_AP_W3, _AP_S3>& a2) {
+//    return get() & a2;
+//  }
+//
+//  template <int _AP_W3, bool _AP_S3>
+//  INLINE ap_private<AP_MAX(_AP_WR, _AP_W3), _AP_S3> operator|(
+//      const ap_private<_AP_W3, _AP_S3>& a2) {
+//    return get() | a2;
+//  }
+//
+//  template <int _AP_W3, bool _AP_S3>
+//  INLINE ap_private<AP_MAX(_AP_WR, _AP_W3), _AP_S3> operator^(
+//      const ap_private<_AP_W3, _AP_S3>& a2) {
+//    return ap_private<AP_MAX(_AP_WR, _AP_W3), _AP_S3>(get() ^ a2);
+//  }
+//
+//  INLINE const ap_private<_AP_WR, false> get() const {
+//    ap_private<_AP_W1 + _AP_W2, false> tmpVal =
+//        ap_private<_AP_W1 + _AP_W2, false>(mbv1.get());
+//    ap_private<_AP_W1 + _AP_W2, false> tmpVal2 =
+//        ap_private<_AP_W1 + _AP_W2, false>(mbv2.get());
+//    int W_ref2 = mbv2.length();
+//    tmpVal <<= W_ref2;
+//    tmpVal |= tmpVal2;
+//    return tmpVal;
+//  }
+//
+//  INLINE const ap_private<_AP_WR, false> get() {
+//    ap_private<_AP_W1 + _AP_W2, false> tmpVal =
+//        ap_private<_AP_W1 + _AP_W2, false>(mbv1.get());
+//    ap_private<_AP_W1 + _AP_W2, false> tmpVal2 =
+//        ap_private<_AP_W1 + _AP_W2, false>(mbv2.get());
+//    int W_ref2 = mbv2.length();
+//    tmpVal <<= W_ref2;
+//    tmpVal |= tmpVal2;
+//    return tmpVal;
+//  }
+//
+//  template <int _AP_W3>
+//  INLINE void set(const ap_private<_AP_W3, false>& val) {
+//    ap_private<_AP_W1 + _AP_W2, false> vval(val);
+//    int W_ref1 = mbv1.length();
+//    int W_ref2 = mbv2.length();
+//    ap_private<_AP_W1, false> mask1(-1);
+//    mask1 >>= _AP_W1 - W_ref1;
+//    ap_private<_AP_W2, false> mask2(-1);
+//    mask2 >>= _AP_W2 - W_ref2;
+//    mbv1.set(ap_private<_AP_W1, false>((vval >> W_ref2) & mask1));
+//    mbv2.set(ap_private<_AP_W2, false>(vval & mask2));
+//  }
+//
+//  INLINE int length() const { return mbv1.length() + mbv2.length(); }
+//
+//  INLINE std::string to_string(uint8_t radix = 2) const {
+//    return get().to_string(radix);
+//  }
+//}; // struct ap_concat_ref.
+
+/// Range(slice) reference
+/// Proxy class, which allows part selection to be used as rvalue(for reading)
+/// and lvalue(for writing)
+//------------------------------------------------------------
+template <int _AP_W, bool _AP_S>
+struct _private_range_ref {
+#ifdef _MSC_VER
+#pragma warning(disable : 4521 4522)
+#endif
+  ap_private<_AP_W, _AP_S>& d_bv;
+  int l_index;
+  int h_index;
+
+ public:
+  /// copy ctor.
+  INLINE _private_range_ref(const _private_range_ref<_AP_W, _AP_S>& ref)
+      : d_bv(ref.d_bv), l_index(ref.l_index), h_index(ref.h_index) {}
+
+  /// direct ctor.
+  INLINE _private_range_ref(ap_private<_AP_W, _AP_S>* bv, int h, int l)
+      : d_bv(*bv), l_index(l), h_index(h) {
+    _AP_WARNING(h < 0 || l < 0,
+                "Higher bound (%d) and lower bound (%d) cannot be "
+                "negative.",
+                h, l);
+    _AP_WARNING(h >= _AP_W || l >= _AP_W,
+                "Higher bound (%d) or lower bound (%d) out of range (%d).", h, l,
+                _AP_W);
+  }
+
+  /// compound or assignment.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_range_ref<_AP_W, _AP_S>& operator|=(
+      const _private_range_ref<_AP_W2, _AP_S2>& ref) {
+    _AP_WARNING((h_index - l_index) != (ref.h_index - ref.l_index),
+                "Bitsize mismach for ap_private<>.range() &= "
+                "ap_private<>.range().");
+    this->d_bv |= ref.d_bv;
+    return *this;
+  }
+
+  /// compound or assignment with root type.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_range_ref<_AP_W, _AP_S>& operator|=(
+      const _AP_ROOT_TYPE<_AP_W2, _AP_S2>& ref) {
+    _AP_WARNING((h_index - l_index + 1) != _AP_W2,
+                "Bitsize mismach for ap_private<>.range() |= _AP_ROOT_TYPE<>.");
+    this->d_bv |= ref.V;
+    return *this;
+  }
+
+  /// compound and assignment.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_range_ref<_AP_W, _AP_S>& operator&=(
+      const _private_range_ref<_AP_W2, _AP_S2>& ref) {
+    _AP_WARNING((h_index - l_index) != (ref.h_index - ref.l_index),
+                "Bitsize mismach for ap_private<>.range() &= "
+                "ap_private<>.range().");
+    this->d_bv &= ref.d_bv;
+    return *this;
+  };
+
+  /// compound and assignment with root type.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_range_ref<_AP_W, _AP_S>& operator&=(
+      const _AP_ROOT_TYPE<_AP_W2, _AP_S2>& ref) {
+    _AP_WARNING((h_index - l_index + 1) != _AP_W2,
+                "Bitsize mismach for ap_private<>.range() &= _AP_ROOT_TYPE<>.");
+    this->d_bv &= ref.V;
+    return *this;
+  }
+
+  /// compound xor assignment.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_range_ref<_AP_W, _AP_S>& operator^=(
+      const _private_range_ref<_AP_W2, _AP_S2>& ref) {
+    _AP_WARNING((h_index - l_index) != (ref.h_index - ref.l_index),
+                "Bitsize mismach for ap_private<>.range() ^= "
+                "ap_private<>.range().");
+    this->d_bv ^= ref.d_bv;
+    return *this;
+  };
+
+  /// compound xor assignment with root type.
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_range_ref<_AP_W, _AP_S>& operator^=(
+      const _AP_ROOT_TYPE<_AP_W2, _AP_S2>& ref) {
+    _AP_WARNING((h_index - l_index + 1) != _AP_W2,
+                "Bitsize mismach for ap_private<>.range() ^= _AP_ROOT_TYPE<>.");
+    this->d_bv ^= ref.V;
+    return *this;
+  }
+
+  /// @name convertors.
+  //  @{
+  INLINE operator ap_private<_AP_W, false>() const {
+    ap_private<_AP_W, false> val(0);
+    if (h_index >= l_index) {
+      if (_AP_W > 64) {
+        val = d_bv;
+        ap_private<_AP_W, false> mask(-1);
+        mask >>= _AP_W - (h_index - l_index + 1);
+        val >>= l_index;
+        val &= mask;
+      } else {
+        const static uint64_t mask = (~0ULL >> (64 > _AP_W ? (64 - _AP_W) : 0));
+        val = (d_bv >> l_index) & (mask >> (_AP_W - (h_index - l_index + 1)));
+      }
+    } else {
+      for (int i = 0, j = l_index; j >= 0 && j >= h_index; j--, i++)
+        if ((d_bv)[j]) val.set(i);
+    }
+    return val;
+  }
+
+  INLINE operator unsigned long long() const { return to_uint64(); }
+  //  @}
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_range_ref& operator=(const ap_private<_AP_W2, _AP_S2>& val) {
+    ap_private<_AP_W, false> vval = ap_private<_AP_W, false>(val);
+    if (l_index > h_index) {
+      for (int i = 0, j = l_index; j >= 0 && j >= h_index; j--, i++)
+        (vval)[i] ? d_bv.set(j) : d_bv.clear(j);
+    } else {
+      if (_AP_W > 64) {
+        ap_private<_AP_W, false> mask(-1);
+        if (l_index > 0) {
+          mask <<= l_index;
+          vval <<= l_index;
+        }
+        if (h_index < _AP_W - 1) {
+          ap_private<_AP_W, false> mask2(-1);
+          mask2 >>= _AP_W - h_index - 1;
+          mask &= mask2;
+          vval &= mask2;
+        }
+        mask.flip();
+        d_bv &= mask;
+        d_bv |= vval;
+      } else {
+        unsigned shift = 64 - _AP_W;
+        uint64_t mask = ~0ULL >> (shift);
+        if (l_index > 0) {
+          vval = mask & vval << l_index;
+          mask = mask & mask << l_index;
+        }
+        if (h_index < _AP_W - 1) {
+          uint64_t mask2 = mask;
+          mask2 >>= (_AP_W - h_index - 1);
+          mask &= mask2;
+          vval &= mask2;
+        }
+        mask = ~mask;
+        d_bv &= mask;
+        d_bv |= vval;
+      }
+    }
+    return *this;
+  } // operator=(const ap_private<>&)
+
+  INLINE _private_range_ref& operator=(unsigned long long val) {
+    const ap_private<_AP_W, _AP_S> vval = val;
+    return operator=(vval);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_range_ref& operator=(
+      const _private_bit_ref<_AP_W2, _AP_S2>& val) {
+    return operator=((unsigned long long)(bool)val);
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE _private_range_ref& operator=(
+      const _private_range_ref<_AP_W2, _AP_S2>& val) {
+    const ap_private<_AP_W, false> tmpVal(val);
+    return operator=(tmpVal);
+  }
+
+//  template <int _AP_W3, typename _AP_T3, int _AP_W4, typename _AP_T4>
+//  INLINE _private_range_ref& operator=(
+//      const ap_concat_ref<_AP_W3, _AP_T3, _AP_W4, _AP_T4>& val) {
+//    const ap_private<_AP_W, false> tmpVal(val);
+//    return operator=(tmpVal);
+//  }
+
+  // TODO from ap_int_base, ap_bit_ref and ap_range_ref.
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE _private_range_ref& operator=(
+      const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    return operator=(val.to_ap_int_base().V);
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE _private_range_ref& operator=(
+      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    return operator=(val.operator ap_int_base<_AP_W2, false>().V);
+  }
+
+  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+  INLINE _private_range_ref& operator=(
+      const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+    return operator=((unsigned long long)(bool)val);
+  }
+
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, _private_range_ref, _AP_W2,
+//                       _private_range_ref<_AP_W2, _AP_S2> >
+//  operator,(const _private_range_ref<_AP_W2, _AP_S2> &a2) {
+//    return ap_concat_ref<_AP_W, _private_range_ref, _AP_W2,
+//                         _private_range_ref<_AP_W2, _AP_S2> >(
+//        *this, const_cast<_private_range_ref<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, _private_range_ref, _AP_W2,
+//                       ap_private<_AP_W2, _AP_S2> >
+//  operator,(ap_private<_AP_W2, _AP_S2> &a2) {
+//    return ap_concat_ref<_AP_W, _private_range_ref, _AP_W2,
+//                         ap_private<_AP_W2, _AP_S2> >(*this, a2);
+//  }
+//
+//  INLINE
+//  ap_concat_ref<_AP_W, _private_range_ref, _AP_W, ap_private<_AP_W, _AP_S> >
+//  operator,(ap_private<_AP_W, _AP_S>& a2) {
+//    return ap_concat_ref<_AP_W, _private_range_ref, _AP_W,
+//                         ap_private<_AP_W, _AP_S> >(*this, a2);
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<_AP_W, _private_range_ref, 1,
+//                       _private_bit_ref<_AP_W2, _AP_S2> >
+//  operator,(const _private_bit_ref<_AP_W2, _AP_S2> &a2) {
+//    return ap_concat_ref<_AP_W, _private_range_ref, 1,
+//                         _private_bit_ref<_AP_W2, _AP_S2> >(
+//        *this, const_cast<_private_bit_ref<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_concat_ref<_AP_W, _private_range_ref, _AP_W2 + _AP_W3,
+//                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+//  operator,(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) {
+//    return ap_concat_ref<_AP_W, _private_range_ref, _AP_W2 + _AP_W3,
+//                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(
+//        *this, const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>&>(a2));
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE ap_concat_ref<
+//      _AP_W, _private_range_ref, _AP_W2,
+//      af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+//  operator,(
+//      const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> &a2) {
+//    return ap_concat_ref<
+//        _AP_W, _private_range_ref, _AP_W2,
+//        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+//        *this,
+//        const_cast<
+//            af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE
+//      ap_concat_ref<_AP_W, _private_range_ref, 1,
+//                    af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+//      operator,(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>
+//                    &a2) {
+//    return ap_concat_ref<
+//        _AP_W, _private_range_ref, 1,
+//        af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+//        *this,
+//        const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>&>(
+//            a2));
+//  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator==(const _private_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_private<_AP_W, false> lhs = get();
+    ap_private<_AP_W2, false> rhs = op2.get();
+    return lhs == rhs;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator!=(const _private_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_private<_AP_W, false> lhs = get();
+    ap_private<_AP_W2, false> rhs = op2.get();
+    return lhs != rhs;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>(const _private_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_private<_AP_W, false> lhs = get();
+    ap_private<_AP_W2, false> rhs = op2.get();
+    return lhs > rhs;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator>=(const _private_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_private<_AP_W, false> lhs = get();
+    ap_private<_AP_W2, false> rhs = op2.get();
+    return lhs >= rhs;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<(const _private_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_private<_AP_W, false> lhs = get();
+    ap_private<_AP_W2, false> rhs = op2.get();
+    return lhs < rhs;
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator<=(const _private_range_ref<_AP_W2, _AP_S2>& op2) {
+    ap_private<_AP_W, false> lhs = get();
+    ap_private<_AP_W2, false> rhs = op2.get();
+    return lhs <= rhs;
+  }
+
+  template <int _AP_W2>
+  INLINE void set(const ap_private<_AP_W2, false>& val) {
+    ap_private<_AP_W, _AP_S> vval = val;
+    if (l_index > h_index) {
+      for (int i = 0, j = l_index; j >= 0 && j >= h_index; j--, i++)
+        (vval)[i] ? d_bv.set(j) : d_bv.clear(j);
+    } else {
+      if (_AP_W > 64) {
+        ap_private<_AP_W, _AP_S> mask(-1);
+        if (l_index > 0) {
+          ap_private<_AP_W, false> mask1(-1);
+          mask1 >>= _AP_W - l_index;
+          mask1.flip();
+          mask = mask1;
+          // vval&=mask1;
+          vval <<= l_index;
+        }
+        if (h_index < _AP_W - 1) {
+          ap_private<_AP_W, false> mask2(-1);
+          mask2 <<= h_index + 1;
+          mask2.flip();
+          mask &= mask2;
+          vval &= mask2;
+        }
+        mask.flip();
+        d_bv &= mask;
+        d_bv |= vval;
+      } else {
+        uint64_t mask = ~0ULL >> (64 - _AP_W);
+        if (l_index > 0) {
+          uint64_t mask1 = mask;
+          mask1 = mask & (mask1 >> (_AP_W - l_index));
+          vval = mask & (vval << l_index);
+          mask = ~mask1 & mask;
+          // vval&=mask1;
+        }
+        if (h_index < _AP_W - 1) {
+          uint64_t mask2 = ~0ULL >> (64 - _AP_W);
+          mask2 = mask & (mask2 << (h_index + 1));
+          mask &= ~mask2;
+          vval &= ~mask2;
+        }
+        d_bv &= (~mask & (~0ULL >> (64 - _AP_W)));
+        d_bv |= vval;
+      }
+    }
+  }
+
+  INLINE ap_private<_AP_W, false> get() const {
+    ap_private<_AP_W, false> val(0);
+    if (h_index < l_index) {
+      for (int i = 0, j = l_index; j >= 0 && j >= h_index; j--, i++)
+        if ((d_bv)[j]) val.set(i);
+    } else {
+      val = d_bv;
+      val >>= l_index;
+      if (h_index < _AP_W - 1) {
+        if (_AP_W <= 64) {
+          const static uint64_t mask =
+              (~0ULL >> (64 > _AP_W ? (64 - _AP_W) : 0));
+          val &= (mask >> (_AP_W - (h_index - l_index + 1)));
+        } else {
+          ap_private<_AP_W, false> mask(-1);
+          mask >>= _AP_W - (h_index - l_index + 1);
+          val &= mask;
+        }
+      }
+    }
+    return val;
+  }
+
+  INLINE ap_private<_AP_W, false> get() {
+    ap_private<_AP_W, false> val(0);
+    if (h_index < l_index) {
+      for (int i = 0, j = l_index; j >= 0 && j >= h_index; j--, i++)
+        if ((d_bv)[j]) val.set(i);
+    } else {
+      val = d_bv;
+      val >>= l_index;
+      if (h_index < _AP_W - 1) {
+        if (_AP_W <= 64) {
+          static const uint64_t mask = ~0ULL >> (64 > _AP_W ? (64 - _AP_W) : 0);
+          return val &= ((mask) >> (_AP_W - (h_index - l_index + 1)));
+        } else {
+          ap_private<_AP_W, false> mask(-1);
+          mask >>= _AP_W - (h_index - l_index + 1);
+          val &= mask;
+        }
+      }
+    }
+    return val;
+  }
+
+  INLINE int length() const {
+    return h_index >= l_index ? h_index - l_index + 1 : l_index - h_index + 1;
+  }
+
+  INLINE int to_int() const {
+    ap_private<_AP_W, false> val = get();
+    return val.to_int();
+  }
+
+  INLINE unsigned int to_uint() const {
+    ap_private<_AP_W, false> val = get();
+    return val.to_uint();
+  }
+
+  INLINE long to_long() const {
+    ap_private<_AP_W, false> val = get();
+    return val.to_long();
+  }
+
+  INLINE unsigned long to_ulong() const {
+    ap_private<_AP_W, false> val = get();
+    return val.to_ulong();
+  }
+
+  INLINE ap_slong to_int64() const {
+    ap_private<_AP_W, false> val = get();
+    return val.to_int64();
+  }
+
+  INLINE ap_ulong to_uint64() const {
+    ap_private<_AP_W, false> val = get();
+    return val.to_uint64();
+  }
+
+  INLINE std::string to_string(uint8_t radix = 2) const {
+    return get().to_string(radix);
+  }
+
+  INLINE bool and_reduce() {
+    bool ret = true;
+    bool reverse = l_index > h_index;
+    unsigned low = reverse ? h_index : l_index;
+    unsigned high = reverse ? l_index : h_index;
+    for (unsigned i = low; i != high; ++i) ret &= d_bv[i];
+    return ret;
+  }
+
+  INLINE bool or_reduce() {
+    bool ret = false;
+    bool reverse = l_index > h_index;
+    unsigned low = reverse ? h_index : l_index;
+    unsigned high = reverse ? l_index : h_index;
+    for (unsigned i = low; i != high; ++i) ret |= d_bv[i];
+    return ret;
+  }
+
+  INLINE bool xor_reduce() {
+    bool ret = false;
+    bool reverse = l_index > h_index;
+    unsigned low = reverse ? h_index : l_index;
+    unsigned high = reverse ? l_index : h_index;
+    for (unsigned i = low; i != high; ++i) ret ^= d_bv[i];
+    return ret;
+  }
+}; // struct _private_range_ref.
+
+/// Bit reference
+/// Proxy class, which allows bit selection to be used as rvalue(for reading)
+/// and lvalue(for writing)
+//--------------------------------------------------------------
+template <int _AP_W, bool _AP_S>
+struct _private_bit_ref {
+#ifdef _MSC_VER
+#pragma warning(disable : 4521 4522)
+#endif
+  ap_private<_AP_W, _AP_S>& d_bv;
+  int d_index;
+
+ public:
+  // copy ctor.
+  INLINE _private_bit_ref(const _private_bit_ref<_AP_W, _AP_S>& ref)
+      : d_bv(ref.d_bv), d_index(ref.d_index) {}
+
+  // director ctor.
+  INLINE _private_bit_ref(ap_private<_AP_W, _AP_S>& bv, int index = 0)
+      : d_bv(bv), d_index(index) {
+    _AP_WARNING(d_index < 0, "Index of bit vector  (%d) cannot be negative.\n",
+                d_index);
+    _AP_WARNING(d_index >= _AP_W,
+                "Index of bit vector (%d) out of range (%d).\n", d_index, _AP_W);
+  }
+
+  INLINE operator bool() const { return d_bv.get_bit(d_index); }
+
+  INLINE bool to_bool() const { return operator bool(); }
+
+  template <typename T>
+  INLINE _private_bit_ref& operator=(const T& val) {
+    if (!!val)
+      d_bv.set(d_index);
+    else
+      d_bv.clear(d_index);
+    return *this;
+  }
+
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<1, _private_bit_ref, _AP_W2, ap_private<_AP_W2,
+//  _AP_S2> >
+//  operator,(ap_private<_AP_W2, _AP_S2> &a2) const {
+//    return ap_concat_ref<1, _private_bit_ref, _AP_W2, ap_private<_AP_W2,
+//    _AP_S2> >(
+//        const_cast<_private_bit_ref<_AP_W, _AP_S>&>(*this), a2);
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<1, _private_bit_ref, _AP_W2,
+//  _private_range_ref<_AP_W2,
+//  _AP_S2> >
+//  operator,(const _private_range_ref<_AP_W2, _AP_S2> &a2) const {
+//    return ap_concat_ref<1, _private_bit_ref, _AP_W2,
+//    _private_range_ref<_AP_W2,
+//    _AP_S2> >(
+//        const_cast<_private_bit_ref<_AP_W, _AP_S>&>(*this),
+//        const_cast<_private_range_ref<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, bool _AP_S2>
+//  INLINE ap_concat_ref<1, _private_bit_ref, 1, _private_bit_ref<_AP_W2,
+//  _AP_S2> > operator,(
+//      const _private_bit_ref<_AP_W2, _AP_S2> &a2) const {
+//    return ap_concat_ref<1, _private_bit_ref, 1,
+//    _private_bit_ref<_AP_W2, _AP_S2> >(
+//        const_cast<_private_bit_ref<_AP_W, _AP_S>&>(*this),
+//        const_cast<_private_bit_ref<_AP_W2, _AP_S2>&>(a2));
+//  }
+//
+//  INLINE ap_concat_ref<1, _private_bit_ref, 1, _private_bit_ref>
+//  operator,(
+//      const _private_bit_ref &a2) const {
+//    return ap_concat_ref<1, _private_bit_ref, 1, _private_bit_ref>(
+//        const_cast<_private_bit_ref<_AP_W, _AP_S>&>(*this),
+//        const_cast<_private_bit_ref&>(a2));
+//  }
+//
+//  template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+//  INLINE ap_concat_ref<1, _private_bit_ref, _AP_W2 + _AP_W3,
+//                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+//  operator,(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) const {
+//    return ap_concat_ref<1, _private_bit_ref, _AP_W2 + _AP_W3,
+//                         ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(
+//        const_cast<_private_bit_ref<_AP_W, _AP_S>&>(*this),
+//        const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>&>(a2));
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE ap_concat_ref<
+//      1, _private_bit_ref, _AP_W2,
+//      af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+//  operator,(const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2,
+//  _AP_N2>
+//                &a2) const {
+//    return ap_concat_ref<
+//        1, _private_bit_ref, _AP_W2,
+//        af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(
+//        const_cast<_private_bit_ref<_AP_W, _AP_S>&>(*this),
+//        const_cast<
+//            af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2,
+//            _AP_N2>&>(a2));
+//  }
+//
+//  template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+//            ap_o_mode _AP_O2, int _AP_N2>
+//  INLINE
+//      ap_concat_ref<1, _private_bit_ref, 1,
+//                    af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2,
+//                    _AP_N2> >
+//      operator,(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2,
+//      _AP_N2>
+//                    &a2) const {
+//    return ap_concat_ref<1, _private_bit_ref, 1, af_bit_ref<_AP_W2,
+//    _AP_I2, _AP_S2,
+//                                                      _AP_Q2, _AP_O2,
+//                                                      _AP_N2> >(
+//        const_cast<_private_bit_ref<_AP_W, _AP_S>&>(*this),
+//        const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2,
+//        _AP_N2>&>(
+//            a2));
+//  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator==(const _private_bit_ref<_AP_W2, _AP_S2>& op) const {
+    return get() == op.get();
+  }
+
+  template <int _AP_W2, bool _AP_S2>
+  INLINE bool operator!=(const _private_bit_ref<_AP_W2, _AP_S2>& op) const {
+    return get() != op.get();
+  }
+
+  INLINE bool get() const { return operator bool(); }
+
+  //  template <int _AP_W3>
+  //  INLINE void set(const ap_private<_AP_W3, false>& val) {
+  //    operator=(val);
+  //  }
+
+  //  INLINE bool operator~() const {
+  //    bool bit = (d_bv)[d_index];
+  //    return bit ? false : true;
+  //  }
+
+  INLINE int length() const { return 1; }
+
+  //  INLINE std::string to_string() const {
+  //    bool val = get();
+  //    return val ? "1" : "0";
+  //  }
+
+}; // struct _private_bit_ref.
+
+// char a[100];
+// char* ptr = a;
+// ap_int<2> n = 3;
+// char* ptr2 = ptr + n*2;
+// avoid ambiguous errors
+#define OP_BIN_MIX_PTR(BIN_OP)                                           \
+  template <typename PTR_TYPE, int _AP_W, bool _AP_S>                    \
+  INLINE PTR_TYPE* operator BIN_OP(PTR_TYPE* i_op,                       \
+                                   const ap_private<_AP_W, _AP_S>& op) { \
+    typename ap_private<_AP_W, _AP_S>::ValType op2 = op;                 \
+    return i_op BIN_OP op2;                                              \
+  }                                                                      \
+  template <typename PTR_TYPE, int _AP_W, bool _AP_S>                    \
+  INLINE PTR_TYPE* operator BIN_OP(const ap_private<_AP_W, _AP_S>& op,   \
+                                   PTR_TYPE* i_op) {                     \
+    typename ap_private<_AP_W, _AP_S>::ValType op2 = op;                 \
+    return op2 BIN_OP i_op;                                              \
+  }
+
+OP_BIN_MIX_PTR(+)
+OP_BIN_MIX_PTR(-)
+#undef OP_BIN_MIX_PTR
+
+// float OP ap_int
+// when ap_int<wa>'s width > 64, then trunc ap_int<w> to ap_int<64>
+#define OP_BIN_MIX_FLOAT(BIN_OP, C_TYPE)                              \
+  template <int _AP_W, bool _AP_S>                                    \
+  INLINE C_TYPE operator BIN_OP(C_TYPE i_op,                          \
+                                const ap_private<_AP_W, _AP_S>& op) { \
+    typename ap_private<_AP_W, _AP_S>::ValType op2 = op;              \
+    return i_op BIN_OP op2;                                           \
+  }                                                                   \
+  template <int _AP_W, bool _AP_S>                                    \
+  INLINE C_TYPE operator BIN_OP(const ap_private<_AP_W, _AP_S>& op,   \
+                                C_TYPE i_op) {                        \
+    typename ap_private<_AP_W, _AP_S>::ValType op2 = op;              \
+    return op2 BIN_OP i_op;                                           \
+  }
+
+#define OPS_MIX_FLOAT(C_TYPE) \
+  OP_BIN_MIX_FLOAT(*, C_TYPE) \
+  OP_BIN_MIX_FLOAT(/, C_TYPE) \
+  OP_BIN_MIX_FLOAT(+, C_TYPE) \
+  OP_BIN_MIX_FLOAT(-, C_TYPE)
+
+OPS_MIX_FLOAT(float)
+OPS_MIX_FLOAT(double)
+#undef OP_BIN_MIX_FLOAT
+#undef OPS_MIX_FLOAT
+
+/// Operators mixing Integers with AP_Int
+// ----------------------------------------------------------------
+
+// partially specialize template argument _AP_C in order that:
+// for _AP_W > 64, we will explicitly convert operand with native data type
+// into corresponding ap_private
+// for _AP_W <= 64, we will implicitly convert operand with ap_private into
+// (unsigned) long long
+#define OP_BIN_MIX_INT(BIN_OP, C_TYPE, _AP_WI, _AP_SI, RTYPE)                  \
+  template <int _AP_W, bool _AP_S>                                             \
+  INLINE                                                                       \
+      typename ap_private<_AP_WI, _AP_SI>::template RType<_AP_W, _AP_S>::RTYPE \
+      operator BIN_OP(C_TYPE i_op, const ap_private<_AP_W, _AP_S>& op) {       \
+    return ap_private<_AP_WI, _AP_SI>(i_op).operator BIN_OP(op);               \
+  }                                                                            \
+  template <int _AP_W, bool _AP_S>                                             \
+  INLINE                                                                       \
+      typename ap_private<_AP_W, _AP_S>::template RType<_AP_WI, _AP_SI>::RTYPE \
+      operator BIN_OP(const ap_private<_AP_W, _AP_S>& op, C_TYPE i_op) {       \
+    return op.operator BIN_OP(ap_private<_AP_WI, _AP_SI>(i_op));               \
+  }
+
+#define OP_REL_MIX_INT(REL_OP, C_TYPE, _AP_W2, _AP_S2)                     \
+  template <int _AP_W, bool _AP_S>                                         \
+  INLINE bool operator REL_OP(const ap_private<_AP_W, _AP_S>& op,          \
+                              C_TYPE op2) {                                \
+    return op.operator REL_OP(ap_private<_AP_W2, _AP_S2>(op2));            \
+  }                                                                        \
+  template <int _AP_W, bool _AP_S>                                         \
+  INLINE bool operator REL_OP(C_TYPE op2,                                  \
+                              const ap_private<_AP_W, _AP_S, false>& op) { \
+    return ap_private<_AP_W2, _AP_S2>(op2).operator REL_OP(op);            \
+  }
+
+#define OP_ASSIGN_MIX_INT(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2)       \
+  template <int _AP_W, bool _AP_S>                                 \
+  INLINE ap_private<_AP_W, _AP_S>& operator ASSIGN_OP(             \
+      ap_private<_AP_W, _AP_S>& op, C_TYPE op2) {                  \
+    return op.operator ASSIGN_OP(ap_private<_AP_W2, _AP_S2>(op2)); \
+  }
+
+#define OP_BIN_SHIFT_INT(BIN_OP, C_TYPE, _AP_WI, _AP_SI, RTYPE)                \
+  template <int _AP_W, bool _AP_S>                                             \
+  C_TYPE operator BIN_OP(C_TYPE i_op,                                          \
+                         const ap_private<_AP_W, _AP_S, false>& op) {          \
+    return i_op BIN_OP(op.get_VAL());                                          \
+  }                                                                            \
+  template <int _AP_W, bool _AP_S>                                             \
+  INLINE                                                                       \
+      typename ap_private<_AP_W, _AP_S>::template RType<_AP_WI, _AP_SI>::RTYPE \
+      operator BIN_OP(const ap_private<_AP_W, _AP_S>& op, C_TYPE i_op) {       \
+    return op.operator BIN_OP(i_op);                                           \
+  }
+
+#define OP_ASSIGN_RSHIFT_INT(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2) \
+  template <int _AP_W, bool _AP_S>                              \
+  INLINE ap_private<_AP_W, _AP_S>& operator ASSIGN_OP(          \
+      ap_private<_AP_W, _AP_S>& op, C_TYPE op2) {               \
+    op = op.operator>>(op2);                                    \
+    return op;                                                  \
+  }
+
+#define OP_ASSIGN_LSHIFT_INT(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2) \
+  template <int _AP_W, bool _AP_S>                              \
+  INLINE ap_private<_AP_W, _AP_S>& operator ASSIGN_OP(          \
+      ap_private<_AP_W, _AP_S>& op, C_TYPE op2) {               \
+    op = op.operator<<(op2);                                    \
+    return op;                                                  \
+  }
+
+#define OPS_MIX_INT(C_TYPE, _AP_W2, _AP_S2)              \
+  OP_BIN_MIX_INT(*, C_TYPE, (_AP_W2), (_AP_S2), mult)    \
+  OP_BIN_MIX_INT(+, C_TYPE, (_AP_W2), (_AP_S2), plus)    \
+  OP_BIN_MIX_INT(-, C_TYPE, (_AP_W2), (_AP_S2), minus)   \
+  OP_BIN_MIX_INT(/, C_TYPE, (_AP_W2), (_AP_S2), div)     \
+  OP_BIN_MIX_INT(%, C_TYPE, (_AP_W2), (_AP_S2), mod)     \
+  OP_BIN_MIX_INT(&, C_TYPE, (_AP_W2), (_AP_S2), logic)   \
+  OP_BIN_MIX_INT(|, C_TYPE, (_AP_W2), (_AP_S2), logic)   \
+  OP_BIN_MIX_INT (^, C_TYPE, (_AP_W2), (_AP_S2), logic)  \
+  OP_BIN_SHIFT_INT(>>, C_TYPE, (_AP_W2), (_AP_S2), arg1) \
+  OP_BIN_SHIFT_INT(<<, C_TYPE, (_AP_W2), (_AP_S2), arg1) \
+                                                         \
+  OP_ASSIGN_MIX_INT(+=, C_TYPE, (_AP_W2), (_AP_S2))      \
+  OP_ASSIGN_MIX_INT(-=, C_TYPE, (_AP_W2), (_AP_S2))      \
+  OP_ASSIGN_MIX_INT(*=, C_TYPE, (_AP_W2), (_AP_S2))      \
+  OP_ASSIGN_MIX_INT(/=, C_TYPE, (_AP_W2), (_AP_S2))      \
+  OP_ASSIGN_MIX_INT(%=, C_TYPE, (_AP_W2), (_AP_S2))      \
+  OP_ASSIGN_MIX_INT(&=, C_TYPE, (_AP_W2), (_AP_S2))      \
+  OP_ASSIGN_MIX_INT(|=, C_TYPE, (_AP_W2), (_AP_S2))      \
+  OP_ASSIGN_MIX_INT(^=, C_TYPE, (_AP_W2), (_AP_S2))      \
+  OP_ASSIGN_RSHIFT_INT(>>=, C_TYPE, (_AP_W2), (_AP_S2))  \
+  OP_ASSIGN_LSHIFT_INT(<<=, C_TYPE, (_AP_W2), (_AP_S2))  \
+                                                         \
+  OP_REL_MIX_INT(>, C_TYPE, (_AP_W2), (_AP_S2))          \
+  OP_REL_MIX_INT(<, C_TYPE, (_AP_W2), (_AP_S2))          \
+  OP_REL_MIX_INT(>=, C_TYPE, (_AP_W2), (_AP_S2))         \
+  OP_REL_MIX_INT(<=, C_TYPE, (_AP_W2), (_AP_S2))         \
+  OP_REL_MIX_INT(==, C_TYPE, (_AP_W2), (_AP_S2))         \
+  OP_REL_MIX_INT(!=, C_TYPE, (_AP_W2), (_AP_S2))
+
+OPS_MIX_INT(bool, 1, false)
+OPS_MIX_INT(char, 8, CHAR_IS_SIGNED)
+OPS_MIX_INT(signed char, 8, true)
+OPS_MIX_INT(unsigned char, 8, false)
+OPS_MIX_INT(short, sizeof(short) * 8, true)
+OPS_MIX_INT(unsigned short, sizeof(unsigned short) * 8, false)
+OPS_MIX_INT(int, sizeof(int) * 8, true)
+OPS_MIX_INT(unsigned int, sizeof(unsigned int) * 8, false)
+OPS_MIX_INT(long, sizeof(long) * 8, true)
+OPS_MIX_INT(unsigned long, sizeof(unsigned long) * 8, false)
+OPS_MIX_INT(ap_slong, sizeof(ap_slong) * 8, true)
+OPS_MIX_INT(ap_ulong, sizeof(ap_ulong) * 8, false)
+
+#undef OP_BIN_MIX_INT
+#undef OP_BIN_SHIFT_INT
+#undef OP_ASSIGN_MIX_INT
+#undef OP_ASSIGN_RSHIFT_INT
+#undef OP_ASSIGN_LSHIFT_INT
+#undef OP_REL_MIX_INT
+#undef OPS_MIX_INT
+
+#define OP_BIN_MIX_RANGE(BIN_OP, RTYPE)                                     \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>               \
+  INLINE typename ap_private<_AP_W1, _AP_S1>::template RType<_AP_W2,        \
+                                                             _AP_S2>::RTYPE \
+  operator BIN_OP(const _private_range_ref<_AP_W1, _AP_S1>& op1,            \
+                  const ap_private<_AP_W2, _AP_S2>& op2) {                  \
+    return ap_private<_AP_W1, false>(op1).operator BIN_OP(op2);             \
+  }                                                                         \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>               \
+  INLINE typename ap_private<_AP_W1, _AP_S1>::template RType<_AP_W2,        \
+                                                             _AP_S2>::RTYPE \
+  operator BIN_OP(const ap_private<_AP_W1, _AP_S1>& op1,                    \
+                  const _private_range_ref<_AP_W2, _AP_S2>& op2) {          \
+    return op1.operator BIN_OP(ap_private<_AP_W2, false>(op2));             \
+  }
+
+#define OP_ASSIGN_MIX_RANGE(ASSIGN_OP)                             \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>      \
+  INLINE ap_private<_AP_W1, _AP_S1>& operator ASSIGN_OP(           \
+      ap_private<_AP_W1, _AP_S1>& op1,                             \
+      const _private_range_ref<_AP_W2, _AP_S2>& op2) {             \
+    return op1.operator ASSIGN_OP(ap_private<_AP_W2, false>(op2)); \
+  }                                                                \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>      \
+  INLINE _private_range_ref<_AP_W1, _AP_S1>& operator ASSIGN_OP(   \
+      _private_range_ref<_AP_W1, _AP_S1>& op1,                     \
+      ap_private<_AP_W2, _AP_S2>& op2) {                           \
+    ap_private<_AP_W1, false> tmp(op1);                            \
+    tmp.operator ASSIGN_OP(op2);                                   \
+    op1 = tmp;                                                     \
+    return op1;                                                    \
+  }
+
+#define OP_REL_MIX_RANGE(REL_OP)                                               \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                  \
+  INLINE bool operator REL_OP(const _private_range_ref<_AP_W1, _AP_S1>& op1,   \
+                              const ap_private<_AP_W2, _AP_S2>& op2) {         \
+    return ap_private<_AP_W1, false>(op1).operator REL_OP(op2);                \
+  }                                                                            \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                  \
+  INLINE bool operator REL_OP(const ap_private<_AP_W1, _AP_S1>& op1,           \
+                              const _private_range_ref<_AP_W2, _AP_S2>& op2) { \
+    return op1.operator REL_OP(op2.operator ap_private<_AP_W2, false>());      \
+  }
+
+OP_BIN_MIX_RANGE(+, plus)
+OP_BIN_MIX_RANGE(-, minus)
+OP_BIN_MIX_RANGE(*, mult)
+OP_BIN_MIX_RANGE(/, div)
+OP_BIN_MIX_RANGE(%, mod)
+OP_BIN_MIX_RANGE(&, logic)
+OP_BIN_MIX_RANGE(|, logic)
+OP_BIN_MIX_RANGE(^, logic)
+OP_BIN_MIX_RANGE(>>, arg1)
+OP_BIN_MIX_RANGE(<<, arg1)
+#undef OP_BIN_MIX_RANGE
+
+OP_ASSIGN_MIX_RANGE(+=)
+OP_ASSIGN_MIX_RANGE(-=)
+OP_ASSIGN_MIX_RANGE(*=)
+OP_ASSIGN_MIX_RANGE(/=)
+OP_ASSIGN_MIX_RANGE(%=)
+OP_ASSIGN_MIX_RANGE(&=)
+OP_ASSIGN_MIX_RANGE(|=)
+OP_ASSIGN_MIX_RANGE(^=)
+OP_ASSIGN_MIX_RANGE(>>=)
+OP_ASSIGN_MIX_RANGE(<<=)
+#undef OP_ASSIGN_MIX_RANGE
+
+OP_REL_MIX_RANGE(>)
+OP_REL_MIX_RANGE(<)
+OP_REL_MIX_RANGE(>=)
+OP_REL_MIX_RANGE(<=)
+OP_REL_MIX_RANGE(==)
+OP_REL_MIX_RANGE(!=)
+#undef OP_REL_MIX_RANGE
+
+#define OP_BIN_MIX_BIT(BIN_OP, RTYPE)                                         \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                 \
+  INLINE typename ap_private<1, false>::template RType<_AP_W2, _AP_S2>::RTYPE \
+  operator BIN_OP(const _private_bit_ref<_AP_W1, _AP_S1>& op1,                \
+                  const ap_private<_AP_W2, _AP_S2>& op2) {                    \
+    return ap_private<1, false>(op1).operator BIN_OP(op2);                    \
+  }                                                                           \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                 \
+  INLINE typename ap_private<_AP_W1, _AP_S1>::template RType<1, false>::RTYPE \
+  operator BIN_OP(const ap_private<_AP_W1, _AP_S1>& op1,                      \
+                  const _private_bit_ref<_AP_W2, _AP_S2>& op2) {              \
+    return op1.operator BIN_OP(ap_private<1, false>(op2));                    \
+  }
+
+#define OP_ASSIGN_MIX_BIT(ASSIGN_OP)                           \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>  \
+  INLINE ap_private<_AP_W1, _AP_S1>& operator ASSIGN_OP(       \
+      ap_private<_AP_W1, _AP_S1>& op1,                         \
+      _private_bit_ref<_AP_W2, _AP_S2>& op2) {                 \
+    return op1.operator ASSIGN_OP(ap_private<1, false>(op2));  \
+  }                                                            \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>  \
+  INLINE _private_bit_ref<_AP_W1, _AP_S1>& operator ASSIGN_OP( \
+      _private_bit_ref<_AP_W1, _AP_S1>& op1,                   \
+      ap_private<_AP_W2, _AP_S2>& op2) {                       \
+    ap_private<1, false> tmp(op1);                             \
+    tmp.operator ASSIGN_OP(op2);                               \
+    op1 = tmp;                                                 \
+    return op1;                                                \
+  }
+
+#define OP_REL_MIX_BIT(REL_OP)                                               \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                \
+  INLINE bool operator REL_OP(const _private_bit_ref<_AP_W1, _AP_S1>& op1,   \
+                              const ap_private<_AP_W2, _AP_S2>& op2) {       \
+    return ap_private<_AP_W1, false>(op1).operator REL_OP(op2);              \
+  }                                                                          \
+  template <int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>                \
+  INLINE bool operator REL_OP(const ap_private<_AP_W1, _AP_S1>& op1,         \
+                              const _private_bit_ref<_AP_W2, _AP_S2>& op2) { \
+    return op1.operator REL_OP(ap_private<1, false>(op2));                   \
+  }
+
+OP_ASSIGN_MIX_BIT(+=)
+OP_ASSIGN_MIX_BIT(-=)
+OP_ASSIGN_MIX_BIT(*=)
+OP_ASSIGN_MIX_BIT(/=)
+OP_ASSIGN_MIX_BIT(%=)
+OP_ASSIGN_MIX_BIT(&=)
+OP_ASSIGN_MIX_BIT(|=)
+OP_ASSIGN_MIX_BIT(^=)
+OP_ASSIGN_MIX_BIT(>>=)
+OP_ASSIGN_MIX_BIT(<<=)
+#undef OP_ASSIGN_MIX_BIT
+
+OP_BIN_MIX_BIT(+, plus)
+OP_BIN_MIX_BIT(-, minus)
+OP_BIN_MIX_BIT(*, mult)
+OP_BIN_MIX_BIT(/, div)
+OP_BIN_MIX_BIT(%, mod)
+OP_BIN_MIX_BIT(&, logic)
+OP_BIN_MIX_BIT(|, logic)
+OP_BIN_MIX_BIT(^, logic)
+OP_BIN_MIX_BIT(>>, arg1)
+OP_BIN_MIX_BIT(<<, arg1)
+#undef OP_BIN_MIX_BIT
+
+OP_REL_MIX_BIT(>)
+OP_REL_MIX_BIT(<)
+OP_REL_MIX_BIT(<=)
+OP_REL_MIX_BIT(>=)
+OP_REL_MIX_BIT(==)
+OP_REL_MIX_BIT(!=)
+#undef OP_REL_MIX_BIT
+
+#define REF_REL_OP_MIX_INT(REL_OP, C_TYPE, _AP_W2, _AP_S2)                  \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE bool operator REL_OP(const _private_range_ref<_AP_W, _AP_S>& op,   \
+                              C_TYPE op2) {                                 \
+    return (ap_private<_AP_W, false>(op))                                   \
+        .                                                                   \
+        operator REL_OP(ap_private<_AP_W2, _AP_S2>(op2));                   \
+  }                                                                         \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE bool operator REL_OP(C_TYPE op2,                                   \
+                              const _private_range_ref<_AP_W, _AP_S>& op) { \
+    return ap_private<_AP_W2, _AP_S2>(op2).operator REL_OP(                 \
+        ap_private<_AP_W, false>(op));                                      \
+  }                                                                         \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE bool operator REL_OP(const _private_bit_ref<_AP_W, _AP_S>& op,     \
+                              C_TYPE op2) {                                 \
+    return (bool(op))REL_OP op2;                                            \
+  }                                                                         \
+  template <int _AP_W, bool _AP_S>                                          \
+  INLINE bool operator REL_OP(C_TYPE op2,                                   \
+                              const _private_bit_ref<_AP_W, _AP_S>& op) {   \
+    return op2 REL_OP(bool(op));                                            \
+  }
+
+#define REF_REL_MIX_INT(C_TYPE, _AP_W2, _AP_S2)      \
+  REF_REL_OP_MIX_INT(>, C_TYPE, (_AP_W2), (_AP_S2))  \
+  REF_REL_OP_MIX_INT(<, C_TYPE, (_AP_W2), (_AP_S2))  \
+  REF_REL_OP_MIX_INT(>=, C_TYPE, (_AP_W2), (_AP_S2)) \
+  REF_REL_OP_MIX_INT(<=, C_TYPE, (_AP_W2), (_AP_S2)) \
+  REF_REL_OP_MIX_INT(==, C_TYPE, (_AP_W2), (_AP_S2)) \
+  REF_REL_OP_MIX_INT(!=, C_TYPE, (_AP_W2), (_AP_S2))
+
+REF_REL_MIX_INT(bool, 1, false)
+REF_REL_MIX_INT(char, 8, CHAR_IS_SIGNED)
+REF_REL_MIX_INT(signed char, 8, true)
+REF_REL_MIX_INT(unsigned char, 8, false)
+REF_REL_MIX_INT(short, sizeof(short) * 8, true)
+REF_REL_MIX_INT(unsigned short, sizeof(unsigned short) * 8, false)
+REF_REL_MIX_INT(int, sizeof(int) * 8, true)
+REF_REL_MIX_INT(unsigned int, sizeof(unsigned int) * 8, false)
+REF_REL_MIX_INT(long, sizeof(long) * 8, true)
+REF_REL_MIX_INT(unsigned long, sizeof(unsigned long) * 8, false)
+REF_REL_MIX_INT(ap_slong, sizeof(ap_slong) * 8, true)
+REF_REL_MIX_INT(ap_ulong, sizeof(ap_ulong) * 8, false)
+#undef REF_REL_OP_MIX_INT
+#undef REF_REL_MIX_INT
+
+#define REF_BIN_OP_MIX_INT(BIN_OP, RTYPE, C_TYPE, _AP_W2, _AP_S2)              \
+  template <int _AP_W, bool _AP_S>                                             \
+  INLINE                                                                       \
+      typename ap_private<_AP_W, false>::template RType<_AP_W2, _AP_S2>::RTYPE \
+      operator BIN_OP(const _private_range_ref<_AP_W, _AP_S>& op,              \
+                      C_TYPE op2) {                                            \
+    return (ap_private<_AP_W, false>(op))                                      \
+        .                                                                      \
+        operator BIN_OP(ap_private<_AP_W2, _AP_S2>(op2));                      \
+  }                                                                            \
+  template <int _AP_W, bool _AP_S>                                             \
+  INLINE                                                                       \
+      typename ap_private<_AP_W2, _AP_S2>::template RType<_AP_W, false>::RTYPE \
+      operator BIN_OP(C_TYPE op2,                                              \
+                      const _private_range_ref<_AP_W, _AP_S>& op) {            \
+    return ap_private<_AP_W2, _AP_S2>(op2).operator BIN_OP(                    \
+        ap_private<_AP_W, false>(op));                                         \
+  }
+
+#define REF_BIN_MIX_INT(C_TYPE, _AP_W2, _AP_S2)            \
+  REF_BIN_OP_MIX_INT(+, plus, C_TYPE, (_AP_W2), (_AP_S2))  \
+  REF_BIN_OP_MIX_INT(-, minus, C_TYPE, (_AP_W2), (_AP_S2)) \
+  REF_BIN_OP_MIX_INT(*, mult, C_TYPE, (_AP_W2), (_AP_S2))  \
+  REF_BIN_OP_MIX_INT(/, div, C_TYPE, (_AP_W2), (_AP_S2))   \
+  REF_BIN_OP_MIX_INT(%, mod, C_TYPE, (_AP_W2), (_AP_S2))   \
+  REF_BIN_OP_MIX_INT(&, logic, C_TYPE, (_AP_W2), (_AP_S2)) \
+  REF_BIN_OP_MIX_INT(|, logic, C_TYPE, (_AP_W2), (_AP_S2)) \
+  REF_BIN_OP_MIX_INT(^, logic, C_TYPE, (_AP_W2), (_AP_S2)) \
+  REF_BIN_OP_MIX_INT(>>, arg1, C_TYPE, (_AP_W2), (_AP_S2)) \
+  REF_BIN_OP_MIX_INT(<<, arg1, C_TYPE, (_AP_W2), (_AP_S2))
+
+REF_BIN_MIX_INT(bool, 1, false)
+REF_BIN_MIX_INT(char, 8, CHAR_IS_SIGNED)
+REF_BIN_MIX_INT(signed char, 8, true)
+REF_BIN_MIX_INT(unsigned char, 8, false)
+REF_BIN_MIX_INT(short, sizeof(short) * 8, true)
+REF_BIN_MIX_INT(unsigned short, sizeof(unsigned short) * 8, false)
+REF_BIN_MIX_INT(int, sizeof(int) * 8, true)
+REF_BIN_MIX_INT(unsigned int, sizeof(unsigned int) * 8, false)
+REF_BIN_MIX_INT(long, sizeof(long) * 8, true)
+REF_BIN_MIX_INT(unsigned long, sizeof(unsigned long) * 8, false)
+REF_BIN_MIX_INT(ap_slong, sizeof(ap_slong) * 8, true)
+REF_BIN_MIX_INT(ap_ulong, sizeof(ap_ulong) * 8, false)
+#undef REF_BIN_OP_MIX_INT
+#undef REF_BIN_MIX_INT
+
+#define REF_BIN_OP(BIN_OP, RTYPE)                                             \
+  template <int _AP_W, bool _AP_S, int _AP_W2, bool _AP_S2>                   \
+  INLINE                                                                      \
+      typename ap_private<_AP_W, false>::template RType<_AP_W2, false>::RTYPE \
+      operator BIN_OP(const _private_range_ref<_AP_W, _AP_S>& lhs,            \
+                      const _private_range_ref<_AP_W2, _AP_S2>& rhs) {        \
+    return ap_private<_AP_W, false>(lhs).operator BIN_OP(                     \
+        ap_private<_AP_W2, false>(rhs));                                      \
+  }
+
+REF_BIN_OP(+, plus)
+REF_BIN_OP(-, minus)
+REF_BIN_OP(*, mult)
+REF_BIN_OP(/, div)
+REF_BIN_OP(%, mod)
+REF_BIN_OP(&, logic)
+REF_BIN_OP(|, logic)
+REF_BIN_OP(^, logic)
+REF_BIN_OP(>>, arg1)
+REF_BIN_OP(<<, arg1)
+#undef REF_BIN_OP
+
+//************************************************************************
+//  Implement
+//      ap_private<M+N> = ap_concat_ref<M> OP ap_concat_ref<N>
+//  for operators  +, -, *, /, %, >>, <<, &, |, ^
+//  Without these operators the operands are converted to int64 and
+//  larger results lose informations (higher order bits).
+//
+//                       operand OP
+//                      /          |
+//              left-concat        right-concat
+//                /     |           /         |
+//         <LW1,LT1>  <LW2,LT2>   <RW1,RT1>   <RW2,RT2>
+//
+//      _AP_LW1, _AP_LT1 (width and type of left-concat's left side)
+//      _AP_LW2, _AP_LT2 (width and type of left-concat's right side)
+//  Similarly for RHS of operand OP: _AP_RW1, AP_RW2, _AP_RT1, _AP_RT2
+//
+//  In Verilog 2001 result of concatenation is always unsigned even
+//  when both sides are signed.
+//************************************************************************
+
+#endif // ifndef __AP_PRIVATE_H__
diff --git a/include/floating_point_v7_0_bitacc_cmodel.h b/include/floating_point_v7_0_bitacc_cmodel.h
new file mode 100644
index 0000000..4972cc4
--- /dev/null
+++ b/include/floating_point_v7_0_bitacc_cmodel.h
@@ -0,0 +1,371 @@
+//----------------------------------------------------------------------------
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /   Vendor: Xilinx
+// \   \   \/    Version: 6.0
+//  \   \        Filename: $RCSfile: floating_point_v7_0_bitacc_cmodel.h,v $
+//  /   /        Date Last Modified: $Date: 2011/06/15 13:06:43 $
+// /___/   /\    Date Created: 2011
+//
+// Device  : All
+// Library : floating_point_v7_0
+// Purpose : Header file for bit accurate model of Floating Point Operator
+// Revision: $Revision: 1.6.6.2 $
+//
+//------------------------------------------------------------------------------
+//  (c) Copyright 2011-2012 Xilinx, Inc. All rights reserved.
+//
+//  This file contains confidential and proprietary information
+//  of Xilinx, Inc. and is protected under U.S. and
+//  international copyright and other intellectual property
+//  laws.
+//
+//  DISCLAIMER
+//  This disclaimer is not a license and does not grant any
+//  rights to the materials distributed herewith. Except as
+//  otherwise provided in a valid license issued to you by
+//  Xilinx, and to the maximum extent permitted by applicable
+//  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+//  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+//  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+//  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+//  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+//  (2) Xilinx shall not be liable (whether in contract or tort,
+//  including negligence, or under any other theory of
+//  liability) for any loss or damage of any kind or nature
+//  related to, arising under or in connection with these
+//  materials, including for any direct, or any indirect,
+//  special, incidental, or consequential loss or damage
+//  (including loss of data, profits, goodwill, or any type of
+//  loss or damage suffered as a result of any action brought
+//  by a third party) even if such damage or loss was
+//  reasonably foreseeable or Xilinx had been advised of the
+//  possibility of the same.
+//
+//  CRITICAL APPLICATIONS
+//  Xilinx products are not designed or intended to be fail-
+//  safe, or for use in any application requiring fail-safe
+//  performance, such as life-support or safety devices or
+//  systems, Class III medical devices, nuclear facilities,
+//  applications related to the deployment of airbags, or any
+//  other applications that could lead to death, personal
+//  injury, or severe property or environmental damage
+//  (individually and collectively, "Critical
+//  Applications"). Customer assumes the sole risk and
+//  liability of any use of Xilinx products in Critical
+//  Applications, subject only to applicable laws and
+//  regulations governing limitations on product liability.
+//
+//  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+//  PART OF THIS FILE AT ALL TIMES.
+//------------------------------------------------------------------------------
+
+#ifndef __xip_fpo_bitacc_cmodel_h
+#define __xip_fpo_bitacc_cmodel_h
+
+#ifdef NT
+#define __XIP_FPO_DLLIMPORT __declspec(dllimport)
+#define __XIP_FPO_DLLEXPORT __declspec(dllexport)
+#else
+#define __XIP_FPO_DLLIMPORT
+#define __XIP_FPO_DLLEXPORT
+#endif
+
+// Only define __XIP_FPO_BUILD_DLL when building the C model DLL; do not define it when using the C model
+#ifdef __XIP_FPO_BUILD_DLL
+#define __XIP_FPO_DLL __XIP_FPO_DLLEXPORT
+#else
+#define __XIP_FPO_DLL __XIP_FPO_DLLIMPORT
+#endif
+
+/* Extra define for functions with variable numbers of arguments */
+#define __XIP_FPO_SENTINEL_ATTR
+#if defined (__GNUC__)
+# if __GNUC__ >= 4
+#  undef __XIP_FPO_SENTINEL_ATTR
+#  define __XIP_FPO_SENTINEL_ATTR __attribute__ ((sentinel))
+# endif
+#endif
+
+/* Define Floating Point Operator core version number */
+#define XIP_FPO_VERSION_MAJOR 6
+#define XIP_FPO_VERSION_MINOR 2
+#define XIP_FPO_REVISION      0
+/* Version string does not include revision if revision is 0 (revision >0 reserved for future use) */
+#define XIP_FPO_VERSION_STRING "6.2"
+
+/* Use C99 exact width integer types for 64-bit integers and *_uj and *_sj functions */
+// For Windows platforms, stdint.h and inttypes.h are not present in Visual Studio 2005/2008
+// Therefore we define the required types ourselves
+// For Linux platforms, we need to continue using stdint.h (and not re-define intmax_t, uintmax_t)
+// because SysGen already uses this header.
+#ifdef NT
+typedef   signed char      xint8;
+typedef   signed short     xint16;
+typedef   signed int       xint32;
+typedef   signed long long xint64;
+typedef unsigned char      xuint8;
+typedef unsigned short     xuint16;
+typedef unsigned int       xuint32;
+typedef unsigned long long xuint64;
+typedef xint64  intmax_t;
+typedef xuint64 uintmax_t;
+#else
+#include <stdint.h>
+typedef int8_t   xint8;
+typedef int16_t  xint16;
+typedef int32_t  xint32;
+typedef int64_t  xint64;
+typedef uint8_t  xuint8;
+typedef uint16_t xuint16;
+typedef uint32_t xuint32;
+typedef uint64_t xuint64;
+#endif
+
+#include <stdbool.h>
+
+// REVISIT: included before mpfr.h to permit definition of prototypes for mpfr_printf
+#include <stdio.h>
+
+// Force MPFR to use intmax_t and uintmax_t types (the compiled libraries have the functions that use these)
+#define MPFR_USE_INTMAX_T
+
+// Tell MPIR on Windows platforms that it is compiled into a DLL
+#ifdef NT
+#define __GMP_LIBGMP_DLL 1
+#endif
+
+/* Check if GMP is included, and try to include it (Works with local GMP)
+   Note that where MPIR is provided as a compatible alternative to GMP,
+   it also provides a gmp.h header file to allow MPIR and GMP to be easily interchanged.
+   Select the correct gmp.h by setting the -I option (includes path) for your compiler. */
+#ifndef __GMP_H__
+#include <gmp.h>
+#endif
+
+/* Check if MPFR is included, and try to include it (Works with local MPFR) */
+#ifndef __MPFR_H
+#include <mpfr.h>
+#endif
+
+/* Precision of mantissa or exponent (bits) */
+typedef long xip_fpo_prec_t;
+
+/* Definition of sign */
+typedef int xip_fpo_sign_t;
+
+/* Definition of exponent */
+typedef long xip_fpo_exp_t;
+
+/* The main floating point number structure */
+typedef struct {
+  xip_fpo_prec_t  _xip_fpo_exp_prec;
+  xip_fpo_prec_t  _xip_fpo_mant_prec;
+  xip_fpo_sign_t  _xip_fpo_sign;
+  xip_fpo_exp_t   _xip_fpo_exp;
+  mp_limb_t      *_xip_fpo_d;
+} __xip_fpo_struct;
+
+/* The main fixed point number structure */
+typedef struct {
+  xip_fpo_prec_t  _xip_fpo_i_prec;
+  xip_fpo_prec_t  _xip_fpo_frac_prec;
+  xint64         _xip_fpo_i;
+  xint64         _xip_fpo_frac;
+} __xip_fpo_fix_struct;
+
+/* User-visible types for floating point and fixed point numbers */
+typedef       __xip_fpo_struct      xip_fpo_t[1];
+typedef       __xip_fpo_fix_struct  xip_fpo_fix_t[1];
+
+/* Pointers to floating point and fixed point numbers, for function prototypes */
+typedef       __xip_fpo_struct     *xip_fpo_ptr;
+typedef const __xip_fpo_struct     *xip_fpo_srcptr;
+typedef       __xip_fpo_fix_struct *xip_fpo_fix_ptr;
+typedef const __xip_fpo_fix_struct *xip_fpo_fix_srcptr;
+
+/* Definition of exception flags - return type of most functions
+   Flags are as follows:
+   bit 0 : underflow
+   bit 1 : overflow
+   bit 2 : invalid operation
+   bit 3 : divide by zero
+   bit 4 : operation not supported
+   bit 5 : Accumulator Input Overflow
+   bit 6 : Accumulator Overflow
+   */
+typedef int xip_fpo_exc_t;
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+typedef struct xil_fpo_accum_state xil_fpo_accum_state;
+
+/* Information functions */
+__XIP_FPO_DLL const char * xip_fpo_get_version (void);
+
+/* Initialization functions */
+__XIP_FPO_DLL void xip_fpo_init2        (xip_fpo_ptr, xip_fpo_prec_t, xip_fpo_prec_t);
+__XIP_FPO_DLL void xip_fpo_fix_init2    (xip_fpo_fix_ptr, xip_fpo_prec_t, xip_fpo_prec_t);
+__XIP_FPO_DLL void xip_fpo_inits2       (xip_fpo_prec_t, xip_fpo_prec_t, xip_fpo_ptr, ...) __XIP_FPO_SENTINEL_ATTR;
+__XIP_FPO_DLL void xip_fpo_fix_inits2   (xip_fpo_prec_t, xip_fpo_prec_t, xip_fpo_fix_ptr, ...) __XIP_FPO_SENTINEL_ATTR;
+__XIP_FPO_DLL void xip_fpo_clear        (xip_fpo_ptr);
+__XIP_FPO_DLL void xip_fpo_fix_clear    (xip_fpo_fix_ptr);
+__XIP_FPO_DLL void xip_fpo_clears       (xip_fpo_ptr, ...) __XIP_FPO_SENTINEL_ATTR;
+__XIP_FPO_DLL void xip_fpo_fix_clears   (xip_fpo_fix_ptr, ...) __XIP_FPO_SENTINEL_ATTR;
+__XIP_FPO_DLL void xip_fpo_set_prec     (xip_fpo_ptr, xip_fpo_prec_t, xip_fpo_prec_t);
+__XIP_FPO_DLL void xip_fpo_fix_set_prec (xip_fpo_fix_ptr, xip_fpo_prec_t, xip_fpo_prec_t);
+__XIP_FPO_DLL xip_fpo_prec_t xip_fpo_get_prec_mant     (xip_fpo_ptr);
+__XIP_FPO_DLL xip_fpo_prec_t xip_fpo_get_prec_exp      (xip_fpo_ptr);
+__XIP_FPO_DLL xip_fpo_prec_t xip_fpo_fix_get_prec_frac (xip_fpo_fix_ptr);
+__XIP_FPO_DLL xip_fpo_prec_t xip_fpo_fix_get_prec_int  (xip_fpo_fix_ptr);
+
+/* Assignment functions */
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set         (xip_fpo_ptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set     (xip_fpo_fix_ptr, xip_fpo_fix_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_ui      (xip_fpo_ptr, unsigned long);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set_ui  (xip_fpo_fix_ptr, unsigned long);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_si      (xip_fpo_ptr, long);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set_si  (xip_fpo_fix_ptr, long);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_uj      (xip_fpo_ptr, uintmax_t);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set_uj  (xip_fpo_fix_ptr, uintmax_t);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_sj      (xip_fpo_ptr, intmax_t);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set_sj  (xip_fpo_fix_ptr, intmax_t);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_flt     (xip_fpo_ptr, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set_flt (xip_fpo_fix_ptr, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_d       (xip_fpo_ptr, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set_d   (xip_fpo_fix_ptr, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_z       (xip_fpo_ptr, mpz_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set_z   (xip_fpo_fix_ptr, mpz_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_q       (xip_fpo_ptr, mpq_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set_q   (xip_fpo_fix_ptr, mpq_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_f       (xip_fpo_ptr, mpf_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set_f   (xip_fpo_fix_ptr, mpf_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_fr      (xip_fpo_ptr, mpfr_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set_fr  (xip_fpo_fix_ptr, mpfr_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_ui_2exp (xip_fpo_ptr, unsigned long, xip_fpo_exp_t);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_si_2exp (xip_fpo_ptr, long, xip_fpo_exp_t);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_uj_2exp (xip_fpo_ptr, uintmax_t, intmax_t);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_sj_2exp (xip_fpo_ptr, intmax_t, intmax_t);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_set_str     (xip_fpo_ptr, const char *, int);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_set_str (xip_fpo_fix_ptr, const char *, int);
+__XIP_FPO_DLL void          xip_fpo_set_nan     (xip_fpo_ptr);
+__XIP_FPO_DLL void          xip_fpo_set_inf     (xip_fpo_ptr, int);
+__XIP_FPO_DLL void          xip_fpo_set_zero    (xip_fpo_ptr, int);
+
+/* Conversion functions */
+__XIP_FPO_DLL unsigned long xip_fpo_get_ui         (xip_fpo_srcptr);
+__XIP_FPO_DLL unsigned long xip_fpo_fix_get_ui     (xip_fpo_fix_srcptr);
+__XIP_FPO_DLL long          xip_fpo_get_si         (xip_fpo_srcptr);
+__XIP_FPO_DLL long          xip_fpo_fix_get_si     (xip_fpo_fix_srcptr);
+__XIP_FPO_DLL uintmax_t     xip_fpo_get_uj         (xip_fpo_srcptr);
+__XIP_FPO_DLL uintmax_t     xip_fpo_fix_get_uj     (xip_fpo_fix_srcptr);
+__XIP_FPO_DLL intmax_t      xip_fpo_get_sj         (xip_fpo_srcptr);
+__XIP_FPO_DLL intmax_t      xip_fpo_fix_get_sj     (xip_fpo_fix_srcptr);
+__XIP_FPO_DLL float         xip_fpo_get_flt        (xip_fpo_srcptr);
+__XIP_FPO_DLL float         xip_fpo_fix_get_flt    (xip_fpo_fix_srcptr);
+__XIP_FPO_DLL double        xip_fpo_get_d          (xip_fpo_srcptr);
+__XIP_FPO_DLL double        xip_fpo_fix_get_d      (xip_fpo_fix_srcptr);
+__XIP_FPO_DLL double        xip_fpo_get_d_2exp     (long *, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_get_z          (mpz_ptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_get_z      (mpz_ptr, xip_fpo_fix_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_get_f          (mpf_ptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_get_f      (mpf_ptr, xip_fpo_fix_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_get_fr         (mpfr_ptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fix_get_fr     (mpfr_ptr, xip_fpo_fix_srcptr);
+__XIP_FPO_DLL char *        xip_fpo_get_str        (char *, xip_fpo_exp_t *, int, int, xip_fpo_srcptr);
+__XIP_FPO_DLL char *        xip_fpo_fix_get_str    (char *, int, xip_fpo_fix_srcptr);
+__XIP_FPO_DLL void          xip_fpo_free_str       (char *);
+__XIP_FPO_DLL void          xip_fpo_fix_free_str   (char *);
+__XIP_FPO_DLL int           xip_fpo_sizeinbase     (xip_fpo_srcptr, int);
+__XIP_FPO_DLL int           xip_fpo_fix_sizeinbase (xip_fpo_fix_srcptr, int);
+
+/* Operation functions */
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_add                        (xip_fpo_ptr, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_add_flt                    (float *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_add_d                      (double *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_sub                        (xip_fpo_ptr, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_sub_flt                    (float *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_sub_d                      (double *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_mul                        (xip_fpo_ptr, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_mul_flt                    (float *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_mul_d                      (double *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fma                        (xip_fpo_ptr, xip_fpo_srcptr, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fma_flt                    (float *, float, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fma_d                      (double *, double, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fms                        (xip_fpo_ptr, xip_fpo_srcptr, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fms_flt                    (float *, float, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fms_d                      (double *, double, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_div                        (xip_fpo_ptr, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_div_flt                    (float *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_div_d                      (double *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_rec                        (xip_fpo_ptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_rec_flt                    (float *, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_rec_d                      (double *, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_abs                        (xip_fpo_ptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_abs_flt                    (float *, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_abs_d                      (double *, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_log                        (xip_fpo_ptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_log_flt                    (float *, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_log_d                      (double *, double);
+__XIP_FPO_DLL int           xip_fpo_exp_array                  (xip_fpo_t * , xip_fpo_t * , xip_fpo_exc_t *, unsigned long long);
+__XIP_FPO_DLL void          xip_fpo_exp_flt_array              (float  *    , float     * , xip_fpo_exc_t *, unsigned long long);
+__XIP_FPO_DLL void          xip_fpo_exp_d_array                (double *    , double    * , xip_fpo_exc_t *, unsigned long long);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_exp                        (xip_fpo_ptr , xip_fpo_srcptr                    );
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_exp_flt                    (float  *    , float                             );
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_exp_d                      (double *    , double                            );
+__XIP_FPO_DLL struct xil_fpo_accum_state * xip_fpo_accum_create_state (int , int , int , int , int);
+__XIP_FPO_DLL void xip_fpo_accum_reset_state                   (struct xil_fpo_accum_state *);
+__XIP_FPO_DLL void xip_fpo_accum_destroy_state                 (struct xil_fpo_accum_state *);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_accum_sample               (xip_fpo_t, xip_fpo_t, bool, struct xil_fpo_accum_state *);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_accum_sample_flt           (float   *, float    , bool, struct xil_fpo_accum_state *);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_accum_sample_d             (double  *, double   , bool, struct xil_fpo_accum_state *);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_sqrt                       (xip_fpo_ptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_sqrt_flt                   (float *, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_sqrt_d                     (double *, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_recsqrt                    (xip_fpo_ptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_recsqrt_flt                (float *, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_recsqrt_d                  (double *, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_unordered                  (int *, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_unordered_flt              (int *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_unordered_d                (int *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_equal                      (int *, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_equal_flt                  (int *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_equal_d                    (int *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_less                       (int *, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_less_flt                   (int *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_less_d                     (int *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_lessequal                  (int *, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_lessequal_flt              (int *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_lessequal_d                (int *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_greater                    (int *, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_greater_flt                (int *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_greater_d                  (int *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_greaterequal               (int *, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_greaterequal_flt           (int *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_greaterequal_d             (int *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_notequal                   (int *, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_notequal_flt               (int *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_notequal_d                 (int *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_condcode                   (int *, xip_fpo_srcptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_condcode_flt               (int *, float, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_condcode_d                 (int *, double, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_flttofix                   (xip_fpo_fix_ptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_flttofix_int_flt           (int *, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_flttofix_int_d             (int *, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fixtoflt                   (xip_fpo_ptr, xip_fpo_fix_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fixtoflt_flt_int           (float *, int);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_fixtoflt_d_int             (double *, int);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_flttoflt                   (xip_fpo_ptr, xip_fpo_srcptr);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_flttoflt_flt_flt           (float *, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_flttoflt_flt_d             (float *, double);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_flttoflt_d_flt             (double *, float);
+__XIP_FPO_DLL xip_fpo_exc_t xip_fpo_flttoflt_d_d               (double *, double);
+
+#ifdef  __cplusplus
+} /* End of "C" linkage block */
+#endif
+
+#endif // __xip_fpo_bitacc_cmodel_h
+
diff --git a/include/gmp.h b/include/gmp.h
new file mode 100644
index 0000000..b09771c
--- /dev/null
+++ b/include/gmp.h
@@ -0,0 +1,2391 @@
+/* Definitions for GNU multiple precision functions.   -*- mode: c -*-
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
+2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+Copyright 2008 William Hart, Gonzalo Tornaria
+
+This file is part of the MPIR Library.
+
+The MPIR Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The MPIR Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#ifndef __GMP_H__
+
+#if defined (__cplusplus)
+#include <iosfwd>   /* for std::istream, std::ostream, std::string */
+#include <cstdio>
+#endif
+
+
+/* Instantiated by configure. */
+#if ! defined (__GMP_WITHIN_CONFIGURE)
+#define __GMP_BITS_PER_MP_LIMB             64
+#define __GMP_HAVE_HOST_CPU_FAMILY_power   0
+#define __GMP_HAVE_HOST_CPU_FAMILY_powerpc 0
+#define GMP_LIMB_BITS                      64
+#define GMP_NAIL_BITS                      0
+#endif
+#define GMP_NUMB_BITS     (GMP_LIMB_BITS - GMP_NAIL_BITS)
+#define GMP_NUMB_MASK     ((~ __GMP_CAST (mp_limb_t, 0)) >> GMP_NAIL_BITS)
+#define GMP_NUMB_MAX      GMP_NUMB_MASK
+#define GMP_NAIL_MASK     (~ GMP_NUMB_MASK)
+
+
+/* The following (everything under ifndef __GNU_MP__) must be identical in
+   mpir.h and mp.h to allow both to be included in an application or during
+   the library build.  */
+#ifndef __GNU_MP__
+#define __GNU_MP__ 4
+
+#define __need_size_t  /* tell gcc stddef.h we only want size_t */
+#if defined (__cplusplus)
+#include <cstddef>     /* for size_t */
+#else
+#include <stddef.h>    /* for size_t */
+#endif
+#undef __need_size_t
+
+/* Instantiated by configure. */
+#if ! defined (__GMP_WITHIN_CONFIGURE)
+/* #undef _LONG_LONG_LIMB */
+#define __GMP_LIBGMP_DLL  0
+#endif
+
+/*  #if defined(__GMP_WITHIN_CONFIGURE) && defined(_WIN64)   */
+#ifdef __WIN64
+#define _LONG_LONG_LIMB	1
+#endif
+
+
+/* __STDC__ - some ANSI compilers define this only to 0, hence the use of
+       "defined" and not "__STDC__-0".  In particular Sun workshop C 5.0
+       sets __STDC__ to 0, but requires "##" for token pasting.
+
+   _AIX - gnu ansidecl.h asserts that all known AIX compilers are ANSI but
+       don't always define __STDC__.
+
+   __DECC - current versions of DEC C (5.9 for instance) for alpha are ANSI,
+       but don't define __STDC__ in their default mode.  Don't know if old
+       versions might have been K&R, but let's not worry about that unless
+       someone is still using one.
+
+   _mips - gnu ansidecl.h says the RISC/OS MIPS compiler is ANSI in SVR4
+       mode, but doesn't define __STDC__.
+
+   _MSC_VER - Microsoft C is ANSI, but __STDC__ is undefined unless the /Za
+       option is given (in which case it's 1).
+
+   _WIN32 - tested for by gnu ansidecl.h, no doubt on the assumption that
+      all w32 compilers are ansi.
+
+   Note: This same set of tests is used by gen-psqr.c and
+   demos/expr/expr-impl.h, so if anything needs adding, then be sure to
+   update those too.  */
+
+#if  defined (__STDC__)                                 \
+  || defined (__cplusplus)                              \
+  || defined (_AIX)                                     \
+  || defined (__DECC)                                   \
+  || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
+  || defined (_MSC_VER)                                 \
+  || defined (_WIN32)
+#define __GMP_HAVE_CONST        1
+#define __GMP_HAVE_PROTOTYPES   1
+#define __GMP_HAVE_TOKEN_PASTE  1
+#else
+#define __GMP_HAVE_CONST        0
+#define __GMP_HAVE_PROTOTYPES   0
+#define __GMP_HAVE_TOKEN_PASTE  0
+#endif
+
+
+#if __GMP_HAVE_CONST
+#define __gmp_const   const
+#define __gmp_signed  signed
+#else
+#define __gmp_const
+#define __gmp_signed
+#endif
+
+
+/* __GMP_DECLSPEC supports Windows DLL versions of libmpir, and is empty in
+   all other circumstances.
+
+   When compiling objects for libmpir, __GMP_DECLSPEC is an export directive,
+   or when compiling for an application it's an import directive.  The two
+   cases are differentiated by __GMP_WITHIN_GMP defined by the GMP Makefiles
+   (and not defined from an application).
+
+   __GMP_DECLSPEC_XX is similarly used for libmpirxx.  __GMP_WITHIN_GMPXX
+   indicates when building libmpirxx, and in that case libmpirxx functions are
+   exports, but libmpir functions which might get called are imports.
+
+   libmp.la uses __GMP_DECLSPEC, just as if it were libmpir.la.  libmpir and
+   libmp don't call each other, so there's no conflict or confusion.
+
+   Libtool DLL_EXPORT define is not used.
+
+   There's no attempt to support GMP built both static and DLL.  Doing so
+   would mean applications would have to tell us which of the two is going
+   to be used when linking, and that seems very tedious and error prone if
+   using GMP by hand, and equally tedious from a package since autoconf and
+   automake don't give much help.
+
+   __GMP_DECLSPEC is required on all documented global functions and
+   variables, the various internals in gmp-impl.h etc can be left unadorned.
+   But internals used by the test programs or speed measuring programs
+   should have __GMP_DECLSPEC, and certainly constants or variables must
+   have it or the wrong address will be resolved.
+
+   In gcc __declspec can go at either the start or end of a prototype.
+
+   In Microsoft C __declspec must go at the start, or after the type like
+   void __declspec(...) *foo()".  There's no __dllexport or anything to
+   guard against someone foolish #defining dllexport.  _export used to be
+   available, but no longer.
+
+   In Borland C _export still exists, but needs to go after the type, like
+   "void _export foo();".  Would have to change the __GMP_DECLSPEC syntax to
+   make use of that.  Probably more trouble than it's worth.  */
+
+#if defined (__GNUC__)
+#define __GMP_DECLSPEC_EXPORT  __declspec(__dllexport__)
+#define __GMP_DECLSPEC_IMPORT  __declspec(__dllimport__)
+#endif
+#if defined (_MSC_VER) || defined (__BORLANDC__)
+#define __GMP_DECLSPEC_EXPORT  __declspec(dllexport)
+#define __GMP_DECLSPEC_IMPORT  __declspec(dllimport)
+#endif
+#ifdef __WATCOMC__
+#define __GMP_DECLSPEC_EXPORT  __export
+#define __GMP_DECLSPEC_IMPORT  __import
+#endif
+#ifdef __IBMC__
+#define __GMP_DECLSPEC_EXPORT  _Export
+#define __GMP_DECLSPEC_IMPORT  _Import
+#endif
+
+#if __GMP_LIBGMP_DLL
+#if __GMP_WITHIN_GMP
+/* compiling to go into a DLL libmpir */
+#define __GMP_DECLSPEC  __GMP_DECLSPEC_EXPORT
+#else
+/* compiling to go into an application which will link to a DLL libmpir */
+#define __GMP_DECLSPEC  __GMP_DECLSPEC_IMPORT
+#endif
+#else
+/* all other cases */
+#define __GMP_DECLSPEC
+#endif
+
+
+#ifdef __GMP_SHORT_LIMB
+typedef unsigned int		mp_limb_t;
+typedef int			mp_limb_signed_t;
+#else
+#ifdef _LONG_LONG_LIMB
+typedef unsigned long long int	mp_limb_t;
+typedef long long int		mp_limb_signed_t;
+#else
+typedef unsigned long int	mp_limb_t;
+typedef long int		mp_limb_signed_t;
+#endif
+#endif
+
+#ifdef _WIN64
+typedef unsigned long long int	mp_bitcnt_t;
+#else
+typedef unsigned long int mp_bitcnt_t;
+#endif
+
+/* For reference, note that the name __mpz_struct gets into C++ mangled
+   function names, which means although the "__" suggests an internal, we
+   must leave this name for binary compatibility.  */
+typedef struct
+{
+  int _mp_alloc;		/* Number of *limbs* allocated and pointed
+				   to by the _mp_d field.  */
+  int _mp_size;			/* abs(_mp_size) is the number of limbs the
+				   last field points to.  If _mp_size is
+				   negative this is a negative number.  */
+  mp_limb_t *_mp_d;		/* Pointer to the limbs.  */
+} __mpz_struct;
+
+#endif /* __GNU_MP__ */
+
+typedef __mpz_struct mpz_t[1];
+
+typedef mp_limb_t *		mp_ptr;
+typedef __gmp_const mp_limb_t *	mp_srcptr;
+#if defined( _WIN64)
+#define __GMP_MP_SIZE_T_INT     0
+typedef long long int	mp_size_t;
+typedef long int		mp_exp_t;
+#else
+#define __GMP_MP_SIZE_T_INT     0
+typedef long int		mp_size_t;
+typedef long int		mp_exp_t;
+#endif
+
+typedef struct
+{
+  __mpz_struct _mp_num;
+  __mpz_struct _mp_den;
+} __mpq_struct;
+
+typedef __mpq_struct mpq_t[1];
+
+typedef struct
+{
+  int _mp_prec;			/* Max precision, in number of `mp_limb_t's.
+				   Set by mpf_init and modified by
+				   mpf_set_prec.  The area pointed to by the
+				   _mp_d field contains `prec' + 1 limbs.  */
+  int _mp_size;			/* abs(_mp_size) is the number of limbs the
+				   last field points to.  If _mp_size is
+				   negative this is a negative number.  */
+  mp_exp_t _mp_exp;		/* Exponent, in the base of `mp_limb_t'.  */
+  mp_limb_t *_mp_d;		/* Pointer to the limbs.  */
+} __mpf_struct;
+
+typedef __mpf_struct mpf_t[1];
+
+/* Available random number generation algorithms.  */
+typedef enum
+{
+  GMP_RAND_ALG_DEFAULT = 0,
+  GMP_RAND_ALG_LC = GMP_RAND_ALG_DEFAULT /* Linear congruential.  */
+} gmp_randalg_t;
+
+/* Random state struct.  */
+typedef struct
+{
+  mpz_t _mp_seed;	  /* _mp_d member points to state of the generator. */
+  gmp_randalg_t _mp_alg;  /* Currently unused. */
+  union {
+    void *_mp_lc;         /* Pointer to function pointers structure.  */
+  } _mp_algdata;
+} __gmp_randstate_struct;
+typedef __gmp_randstate_struct gmp_randstate_t[1];
+
+/* Types for function declarations in gmp files.  */
+/* ??? Should not pollute user name space with these ??? */
+typedef __gmp_const __mpz_struct *mpz_srcptr;
+typedef __mpz_struct *mpz_ptr;
+typedef __gmp_const __mpf_struct *mpf_srcptr;
+typedef __mpf_struct *mpf_ptr;
+typedef __gmp_const __mpq_struct *mpq_srcptr;
+typedef __mpq_struct *mpq_ptr;
+
+
+/* This is not wanted in mp.h, so put it outside the __GNU_MP__ common
+   section. */
+#if __GMP_LIBGMP_DLL
+#if __GMP_WITHIN_GMPXX
+/* compiling to go into a DLL libmpirxx */
+#define __GMP_DECLSPEC_XX  __GMP_DECLSPEC_EXPORT
+#else
+/* compiling to go into a application which will link to a DLL libmpirxx */
+#define __GMP_DECLSPEC_XX  __GMP_DECLSPEC_IMPORT
+#endif
+#else
+/* all other cases */
+#define __GMP_DECLSPEC_XX
+#endif
+
+
+#if __GMP_HAVE_PROTOTYPES
+#define __GMP_PROTO(x) x
+#else
+#define __GMP_PROTO(x) ()
+#endif
+
+#ifndef __MPN
+#if __GMP_HAVE_TOKEN_PASTE
+#define __MPN(x) __gmpn_##x
+#else
+#define __MPN(x) __gmpn_/**/x
+#endif
+#endif
+
+/* For reference, "defined(EOF)" cannot be used here.  In g++ 2.95.4,
+   <iostream> defines EOF but not FILE.  */
+#if defined (FILE)                                              \
+  || defined (H_STDIO)                                          \
+  || defined (_H_STDIO)               /* AIX */                 \
+  || defined (_STDIO_H)               /* glibc, Sun, SCO */     \
+  || defined (_STDIO_H_)              /* BSD, OSF */            \
+  || defined (__STDIO_H)              /* Borland */             \
+  || defined (__STDIO_H__)            /* IRIX */                \
+  || defined (_STDIO_INCLUDED)        /* HPUX */                \
+  || defined (_FILE_DEFINED)          /* Microsoft */           \
+  || defined (__STDIO__)              /* Apple MPW MrC */       \
+  || defined (_MSL_STDIO_H)           /* Metrowerks */          \
+  || defined (_STDIO_H_INCLUDED)      /* QNX4 */		\
+  || defined (_ISO_STDIO_ISO_H)       /* Sun C++ */
+#define _GMP_H_HAVE_FILE 1
+#endif
+
+/* In ISO C, if a prototype involving "struct obstack *" is given without
+   that structure defined, then the struct is scoped down to just the
+   prototype, causing a conflict if it's subsequently defined for real.  So
+   only give prototypes if we've got obstack.h.  */
+#if defined (_OBSTACK_H)   /* glibc <obstack.h> */
+#define _GMP_H_HAVE_OBSTACK 1
+#endif
+
+/* The prototypes for gmp_vprintf etc are provided only if va_list is
+   available, via an application having included <stdarg.h> or <varargs.h>.
+   Usually va_list is a typedef so can't be tested directly, but C99
+   specifies that va_start is a macro (and it was normally a macro on past
+   systems too), so look for that.
+
+   <stdio.h> will define some sort of va_list for vprintf and vfprintf, but
+   let's not bother trying to use that since it's not standard and since
+   application uses for gmp_vprintf etc will almost certainly require the
+   whole <stdarg.h> or <varargs.h> anyway.  */
+
+#ifdef va_start
+#define _GMP_H_HAVE_VA_LIST 1
+#endif
+
+/* Test for gcc >= maj.min, as per __GNUC_PREREQ in glibc */
+#if defined (__GNUC__) && defined (__GNUC_MINOR__)
+#define __GMP_GNUC_PREREQ(maj, min) \
+  ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+#define __GMP_GNUC_PREREQ(maj, min)  0
+#endif
+
+/* "pure" is in gcc 2.96 and up, see "(gcc)Function Attributes".  Basically
+   it means a function does nothing but examine its arguments and memory
+   (global or via arguments) to generate a return value, but changes nothing
+   and has no side-effects.  __GMP_NO_ATTRIBUTE_CONST_PURE lets
+   tune/common.c etc turn this off when trying to write timing loops.  */
+#if __GMP_GNUC_PREREQ (2,96) && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
+#define __GMP_ATTRIBUTE_PURE   __attribute__ ((__pure__))
+#else
+#define __GMP_ATTRIBUTE_PURE
+#endif
+
+
+/* __GMP_CAST allows us to use static_cast in C++, so our macros are clean
+   to "g++ -Wold-style-cast".
+
+   Casts in "extern inline" code within an extern "C" block don't induce
+   these warnings, so __GMP_CAST only needs to be used on documented
+   macros.  */
+
+#ifdef __cplusplus
+#define __GMP_CAST(type, expr)  (static_cast<type> (expr))
+#else
+#define __GMP_CAST(type, expr)  ((type) (expr))
+#endif
+
+
+/* An empty "throw ()" means the function doesn't throw any C++ exceptions,
+   this can save some stack frame info in applications.
+
+   Currently it's given only on functions which never divide-by-zero etc,
+   don't allocate memory, and are expected to never need to allocate memory.
+   This leaves open the possibility of a C++ throw from a future GMP
+   exceptions scheme.
+
+   mpz_set_ui etc are omitted to leave open the lazy allocation scheme
+   described in doc/tasks.html.  mpz_get_d etc are omitted to leave open
+   exceptions for float overflows.
+
+   Note that __GMP_NOTHROW must be given on any inlines the same as on their
+   prototypes (for g++ at least, where they're used together).  Note also
+   that g++ 3.0 demands that __GMP_NOTHROW is before other attributes like
+   __GMP_ATTRIBUTE_PURE.  */
+
+#if defined (__cplusplus)
+#define __GMP_NOTHROW  throw ()
+#else
+#define __GMP_NOTHROW
+#endif
+
+/* PORTME: What other compilers have a useful "extern inline"?  "static
+   inline" would be an acceptable substitute if the compiler (or linker)
+   discards unused statics.  */
+
+/* gcc has __inline__ in all modes, including strict ansi.  Give a prototype
+   for an inline too, so as to correctly specify "dllimport" on windows, in
+   case the function is called rather than inlined.  */
+
+#ifdef __GNUC__
+#if defined(__APPLE_CC__) && (__APPLE_CC__ != 1) /* FSF GCC sets this flag to 1 on Apple machines */
+
+#if ! (__APPLE_CC__ >= 5465 && __STDC_VERSION__ >= 199901L)
+#define __GMP_EXTERN_INLINE extern __inline__
+#define __GMP_INLINE_PROTOTYPES  1
+#endif
+
+#else /*GNU CC*/
+
+#if defined(__GNUC_STDC_INLINE__) || defined (__GNUC_GNU_INLINE__)
+#define __GMP_EXTERN_INLINE extern __inline__ __attribute__((__gnu_inline__))
+#else
+#define __GMP_EXTERN_INLINE extern __inline__
+#endif
+#define __GMP_INLINE_PROTOTYPES  1
+
+#endif
+#endif
+
+/* DEC C (eg. version 5.9) supports "static __inline foo()", even in -std1
+   strict ANSI mode.  Inlining is done even when not optimizing (ie. -O0
+   mode, which is the default), but an unnecessary local copy of foo is
+   emitted unless -O is used.  "extern __inline" is accepted, but the
+   "extern" appears to be ignored, ie. it becomes a plain global function
+   but which is inlined within its file.  Don't know if all old versions of
+   DEC C supported __inline, but as a start let's do the right thing for
+   current versions.  */
+#ifdef __DECC
+#define __GMP_EXTERN_INLINE  static __inline
+#endif
+
+/* SCO OpenUNIX 8 cc supports "static inline foo()" but not in -Xc strict
+   ANSI mode (__STDC__ is 1 in that mode).  Inlining only actually takes
+   place under -O.  Without -O "foo" seems to be emitted whether it's used
+   or not, which is wasteful.  "extern inline foo()" isn't useful, the
+   "extern" is apparently ignored, so foo is inlined if possible but also
+   emitted as a global, which causes multiple definition errors when
+   building a shared libmpir.  */
+#ifdef __SCO_VERSION__
+#if __SCO_VERSION__ > 400000000 && __STDC__ != 1 \
+  && ! defined (__GMP_EXTERN_INLINE)
+#define __GMP_EXTERN_INLINE  static inline
+#endif
+#endif
+
+#if defined _MSC_VER
+#define __GMP_EXTERN_INLINE  static __inline
+#endif
+
+/* C++ always has "inline" and since it's a normal feature the linker should
+   discard duplicate non-inlined copies, or if it doesn't then that's a
+   problem for everyone, not just GMP.  */
+#if defined (__cplusplus) && ! defined (__GMP_EXTERN_INLINE)
+#define __GMP_EXTERN_INLINE  inline
+#endif
+
+/* Don't do any inlining within a configure run, since if the compiler ends
+   up emitting copies of the code into the object file it can end up
+   demanding the various support routines (like mpn_popcount) for linking,
+   making the "alloca" test and perhaps others fail.  And on hppa ia64 a
+   pre-release gcc 3.2 was seen not respecting the "extern" in "extern
+   __inline__", triggering this problem too.  */
+#if defined (__GMP_WITHIN_CONFIGURE) && ! __GMP_WITHIN_CONFIGURE_INLINE
+#undef __GMP_EXTERN_INLINE
+#endif
+
+/* By default, don't give a prototype when there's going to be an inline
+   version.  Note in particular that Cray C++ objects to the combination of
+   prototype and inline.  */
+#ifdef __GMP_EXTERN_INLINE
+#ifndef __GMP_INLINE_PROTOTYPES
+#define __GMP_INLINE_PROTOTYPES  0
+#endif
+#else
+#define __GMP_INLINE_PROTOTYPES  1
+#endif
+
+
+#define __GMP_ABS(x)   ((x) >= 0 ? (x) : -(x))
+#define __GMP_MAX(h,i) ((h) > (i) ? (h) : (i))
+
+/* __GMP_USHRT_MAX is not "~ (unsigned short) 0" because short is promoted
+   to int by "~".  */
+#define __GMP_UINT_MAX   (~ (unsigned) 0)
+#define __GMP_ULONG_MAX  (~ (unsigned long) 0)
+#define __GMP_USHRT_MAX  ((unsigned short) ~0)
+
+
+/* __builtin_expect is in gcc 3.0, and not in 2.95. */
+#if __GMP_GNUC_PREREQ (3,0)
+#define __GMP_LIKELY(cond)    __builtin_expect ((cond) != 0, 1)
+#define __GMP_UNLIKELY(cond)  __builtin_expect ((cond) != 0, 0)
+#else
+#define __GMP_LIKELY(cond)    (cond)
+#define __GMP_UNLIKELY(cond)  (cond)
+#endif
+
+/* Allow direct user access to numerator and denominator of a mpq_t object.  */
+#define mpq_numref(Q) (&((Q)->_mp_num))
+#define mpq_denref(Q) (&((Q)->_mp_den))
+
+
+#if defined (__cplusplus)
+extern "C" {
+using std::FILE;
+#endif
+
+#define mp_set_memory_functions __gmp_set_memory_functions
+__GMP_DECLSPEC void mp_set_memory_functions __GMP_PROTO ((void *(*) (size_t),
+				      void *(*) (void *, size_t, size_t),
+				      void (*) (void *, size_t))) __GMP_NOTHROW;
+
+#define mp_get_memory_functions __gmp_get_memory_functions
+__GMP_DECLSPEC void mp_get_memory_functions __GMP_PROTO ((void *(**) (size_t),
+                                      void *(**) (void *, size_t, size_t),
+                                      void (**) (void *, size_t))) __GMP_NOTHROW;
+
+#define mp_bits_per_limb __gmp_bits_per_limb
+__GMP_DECLSPEC extern __gmp_const int mp_bits_per_limb;
+
+#define gmp_errno __gmp_errno
+__GMP_DECLSPEC extern int gmp_errno;
+
+#define gmp_version __gmp_version
+__GMP_DECLSPEC extern __gmp_const char * __gmp_const gmp_version;
+
+#define mpir_version __mpir_version
+__GMP_DECLSPEC extern __gmp_const char * __gmp_const mpir_version;
+
+
+/**************** Random number routines.  ****************/
+
+#define gmp_randinit_default __gmp_randinit_default
+__GMP_DECLSPEC void gmp_randinit_default __GMP_PROTO ((gmp_randstate_t));
+
+#define gmp_randinit_lc_2exp __gmp_randinit_lc_2exp
+__GMP_DECLSPEC void gmp_randinit_lc_2exp __GMP_PROTO ((gmp_randstate_t,
+						       mpz_srcptr, unsigned long int,
+						       mp_bitcnt_t));
+
+#define gmp_randinit_lc_2exp_size __gmp_randinit_lc_2exp_size
+__GMP_DECLSPEC int gmp_randinit_lc_2exp_size __GMP_PROTO ((gmp_randstate_t, mp_bitcnt_t));
+
+#define gmp_randinit_mt __gmp_randinit_mt
+__GMP_DECLSPEC void gmp_randinit_mt __GMP_PROTO ((gmp_randstate_t));
+
+#define gmp_randinit_set __gmp_randinit_set
+__GMP_DECLSPEC void gmp_randinit_set __GMP_PROTO ((gmp_randstate_t, __gmp_const __gmp_randstate_struct *));
+
+#define gmp_randseed __gmp_randseed
+__GMP_DECLSPEC void gmp_randseed __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
+
+#define gmp_randseed_ui __gmp_randseed_ui
+__GMP_DECLSPEC void gmp_randseed_ui __GMP_PROTO ((gmp_randstate_t, unsigned long int));
+
+#define gmp_randclear __gmp_randclear
+__GMP_DECLSPEC void gmp_randclear __GMP_PROTO ((gmp_randstate_t));
+
+#define gmp_urandomb_ui __gmp_urandomb_ui
+__GMP_DECLSPEC unsigned long gmp_urandomb_ui __GMP_PROTO ((gmp_randstate_t, unsigned long));
+
+#define gmp_urandomm_ui __gmp_urandomm_ui
+__GMP_DECLSPEC unsigned long gmp_urandomm_ui __GMP_PROTO ((gmp_randstate_t, unsigned long));
+
+
+/**************** Formatted output routines.  ****************/
+
+#define gmp_asprintf __gmp_asprintf
+__GMP_DECLSPEC int gmp_asprintf __GMP_PROTO ((char **, __gmp_const char *, ...));
+
+#define gmp_fprintf __gmp_fprintf
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC int gmp_fprintf __GMP_PROTO ((FILE *, __gmp_const char *, ...));
+#endif
+
+#define gmp_obstack_printf __gmp_obstack_printf
+#if defined (_GMP_H_HAVE_OBSTACK)
+__GMP_DECLSPEC int gmp_obstack_printf __GMP_PROTO ((struct obstack *, __gmp_const char *, ...));
+#endif
+
+#define gmp_obstack_vprintf __gmp_obstack_vprintf
+#if defined (_GMP_H_HAVE_OBSTACK) && defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_obstack_vprintf __GMP_PROTO ((struct obstack *, __gmp_const char *, va_list));
+#endif
+
+#define gmp_printf __gmp_printf
+__GMP_DECLSPEC int gmp_printf __GMP_PROTO ((__gmp_const char *, ...));
+
+#define gmp_snprintf __gmp_snprintf
+__GMP_DECLSPEC int gmp_snprintf __GMP_PROTO ((char *, size_t, __gmp_const char *, ...));
+
+#define gmp_sprintf __gmp_sprintf
+__GMP_DECLSPEC int gmp_sprintf __GMP_PROTO ((char *, __gmp_const char *, ...));
+
+#define gmp_vasprintf __gmp_vasprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vasprintf __GMP_PROTO ((char **, __gmp_const char *, va_list));
+#endif
+
+#define gmp_vfprintf __gmp_vfprintf
+#if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vfprintf __GMP_PROTO ((FILE *, __gmp_const char *, va_list));
+#endif
+
+#define gmp_vprintf __gmp_vprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vprintf __GMP_PROTO ((__gmp_const char *, va_list));
+#endif
+
+#define gmp_vsnprintf __gmp_vsnprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vsnprintf __GMP_PROTO ((char *, size_t, __gmp_const char *, va_list));
+#endif
+
+#define gmp_vsprintf __gmp_vsprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vsprintf __GMP_PROTO ((char *, __gmp_const char *, va_list));
+#endif
+
+
+/**************** Formatted input routines.  ****************/
+
+#define gmp_fscanf __gmp_fscanf
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC int gmp_fscanf __GMP_PROTO ((FILE *, __gmp_const char *, ...));
+#endif
+
+#define gmp_scanf __gmp_scanf
+__GMP_DECLSPEC int gmp_scanf __GMP_PROTO ((__gmp_const char *, ...));
+
+#define gmp_sscanf __gmp_sscanf
+__GMP_DECLSPEC int gmp_sscanf __GMP_PROTO ((__gmp_const char *, __gmp_const char *, ...));
+
+#define gmp_vfscanf __gmp_vfscanf
+#if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vfscanf __GMP_PROTO ((FILE *, __gmp_const char *, va_list));
+#endif
+
+#define gmp_vscanf __gmp_vscanf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vscanf __GMP_PROTO ((__gmp_const char *, va_list));
+#endif
+
+#define gmp_vsscanf __gmp_vsscanf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vsscanf __GMP_PROTO ((__gmp_const char *, __gmp_const char *, va_list));
+#endif
+
+
+/**************** Integer (i.e. Z) routines.  ****************/
+
+#define __GMP_BITS_PER_ULONG	(8*sizeof(unsigned long))
+
+#define _mpz_realloc __gmpz_realloc
+#define mpz_realloc __gmpz_realloc
+__GMP_DECLSPEC void *_mpz_realloc __GMP_PROTO ((mpz_ptr, mp_size_t));
+
+#define mpz_abs __gmpz_abs
+#define __GMP_MPZ_ABS_MIN_ALLOC(x,y) (__GMP_ABS(y->_mp_size))
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_abs)
+__GMP_DECLSPEC void mpz_abs __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+#endif
+
+#define __GMP_MPZ_ADD_MIN_ALLOC(x,y,z) (__GMP_MAX(__GMP_ABS(y->_mp_size),__GMP_ABS(z->_mp_size))+1)
+#define mpz_add __gmpz_add
+__GMP_DECLSPEC void mpz_add __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define __GMP_MPZ_ADD_UI_MIN_ALLOC(x,y,z) (__GMP_MAX(__GMP_ABS(y->_mp_size),1+(__GMP_BITS_PER_ULONG-1)/GMP_NUMB_BITS)+1)
+#define mpz_add_ui __gmpz_add_ui
+__GMP_DECLSPEC void mpz_add_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_addmul __gmpz_addmul
+__GMP_DECLSPEC void mpz_addmul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_addmul_ui __gmpz_addmul_ui
+__GMP_DECLSPEC void mpz_addmul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_and __gmpz_and
+__GMP_DECLSPEC void mpz_and __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_array_init __gmpz_array_init
+__GMP_DECLSPEC void mpz_array_init __GMP_PROTO ((mpz_ptr, mp_size_t, mp_size_t));
+
+#define mpz_bin_ui __gmpz_bin_ui
+__GMP_DECLSPEC void mpz_bin_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_bin_uiui __gmpz_bin_uiui
+__GMP_DECLSPEC void mpz_bin_uiui __GMP_PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+
+#define mpz_cdiv_q __gmpz_cdiv_q
+__GMP_DECLSPEC void mpz_cdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_cdiv_q_2exp __gmpz_cdiv_q_2exp
+__GMP_DECLSPEC void mpz_cdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long));
+
+#define mpz_cdiv_q_ui __gmpz_cdiv_q_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_cdiv_qr __gmpz_cdiv_qr
+__GMP_DECLSPEC void mpz_cdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_cdiv_qr_ui __gmpz_cdiv_qr_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_cdiv_r __gmpz_cdiv_r
+__GMP_DECLSPEC void mpz_cdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_cdiv_r_2exp __gmpz_cdiv_r_2exp
+__GMP_DECLSPEC void mpz_cdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_cdiv_r_ui __gmpz_cdiv_r_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_cdiv_ui __gmpz_cdiv_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_clear __gmpz_clear
+__GMP_DECLSPEC void mpz_clear __GMP_PROTO ((mpz_ptr));
+
+#define mpz_clears __gmpz_clears
+__GMP_DECLSPEC void mpz_clears __GMP_PROTO ((mpz_ptr, ...));
+
+#define mpz_clrbit __gmpz_clrbit
+__GMP_DECLSPEC void mpz_clrbit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+
+#define mpz_cmp __gmpz_cmp
+__GMP_DECLSPEC int mpz_cmp __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmp_d __gmpz_cmp_d
+__GMP_DECLSPEC int mpz_cmp_d __GMP_PROTO ((mpz_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+
+#define _mpz_cmp_si __gmpz_cmp_si
+__GMP_DECLSPEC int _mpz_cmp_si __GMP_PROTO ((mpz_srcptr, signed long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define _mpz_cmp_ui __gmpz_cmp_ui
+__GMP_DECLSPEC int _mpz_cmp_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmpabs __gmpz_cmpabs
+__GMP_DECLSPEC int mpz_cmpabs __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmpabs_d __gmpz_cmpabs_d
+__GMP_DECLSPEC int mpz_cmpabs_d __GMP_PROTO ((mpz_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmpabs_ui __gmpz_cmpabs_ui
+__GMP_DECLSPEC int mpz_cmpabs_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_com __gmpz_com
+__GMP_DECLSPEC void mpz_com __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+
+#define mpz_combit __gmpz_combit
+__GMP_DECLSPEC void mpz_combit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+
+#define mpz_congruent_p __gmpz_congruent_p
+__GMP_DECLSPEC int mpz_congruent_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_congruent_2exp_p __gmpz_congruent_2exp_p
+__GMP_DECLSPEC int mpz_congruent_2exp_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_congruent_ui_p __gmpz_congruent_ui_p
+__GMP_DECLSPEC int mpz_congruent_ui_p __GMP_PROTO ((mpz_srcptr, unsigned long, unsigned long)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_divexact __gmpz_divexact
+__GMP_DECLSPEC void mpz_divexact __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_divexact_ui __gmpz_divexact_ui
+__GMP_DECLSPEC void mpz_divexact_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long));
+
+#define mpz_divisible_p __gmpz_divisible_p
+__GMP_DECLSPEC int mpz_divisible_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_divisible_ui_p __gmpz_divisible_ui_p
+__GMP_DECLSPEC int mpz_divisible_ui_p __GMP_PROTO ((mpz_srcptr, unsigned long)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_divisible_2exp_p __gmpz_divisible_2exp_p
+__GMP_DECLSPEC int mpz_divisible_2exp_p __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_dump __gmpz_dump
+__GMP_DECLSPEC void mpz_dump __GMP_PROTO ((mpz_srcptr));
+
+#define mpz_export __gmpz_export
+__GMP_DECLSPEC void *mpz_export __GMP_PROTO ((void *, size_t *, int, size_t, int, size_t, mpz_srcptr));
+
+#define mpz_fac_ui __gmpz_fac_ui
+__GMP_DECLSPEC void mpz_fac_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+
+#define mpz_fdiv_q __gmpz_fdiv_q
+__GMP_DECLSPEC void mpz_fdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_fdiv_q_2exp __gmpz_fdiv_q_2exp
+__GMP_DECLSPEC void mpz_fdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_fdiv_q_ui __gmpz_fdiv_q_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_fdiv_qr __gmpz_fdiv_qr
+__GMP_DECLSPEC void mpz_fdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_fdiv_qr_ui __gmpz_fdiv_qr_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_fdiv_r __gmpz_fdiv_r
+__GMP_DECLSPEC void mpz_fdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_fdiv_r_2exp __gmpz_fdiv_r_2exp
+__GMP_DECLSPEC void mpz_fdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_fdiv_r_ui __gmpz_fdiv_r_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_fdiv_ui __gmpz_fdiv_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fib_ui __gmpz_fib_ui
+__GMP_DECLSPEC void mpz_fib_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+
+#define mpz_fib2_ui __gmpz_fib2_ui
+__GMP_DECLSPEC void mpz_fib2_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, unsigned long int));
+
+#define mpz_fits_sint_p __gmpz_fits_sint_p
+__GMP_DECLSPEC int mpz_fits_sint_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fits_slong_p __gmpz_fits_slong_p
+__GMP_DECLSPEC int mpz_fits_slong_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fits_sshort_p __gmpz_fits_sshort_p
+__GMP_DECLSPEC int mpz_fits_sshort_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fits_uint_p __gmpz_fits_uint_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_uint_p)
+__GMP_DECLSPEC int mpz_fits_uint_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_fits_ulong_p __gmpz_fits_ulong_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ulong_p)
+__GMP_DECLSPEC int mpz_fits_ulong_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_fits_ushort_p __gmpz_fits_ushort_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ushort_p)
+__GMP_DECLSPEC int mpz_fits_ushort_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_gcd __gmpz_gcd
+__GMP_DECLSPEC void mpz_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_gcd_ui __gmpz_gcd_ui
+__GMP_DECLSPEC unsigned long int mpz_gcd_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_gcdext __gmpz_gcdext
+__GMP_DECLSPEC void mpz_gcdext __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_get_d __gmpz_get_d
+__GMP_DECLSPEC double mpz_get_d __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_get_d_2exp __gmpz_get_d_2exp
+__GMP_DECLSPEC double mpz_get_d_2exp __GMP_PROTO ((signed long int *, mpz_srcptr));
+
+#define mpz_get_si __gmpz_get_si
+__GMP_DECLSPEC /* signed */ long int mpz_get_si __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_get_str __gmpz_get_str
+__GMP_DECLSPEC char *mpz_get_str __GMP_PROTO ((char *, int, mpz_srcptr));
+
+#define mpz_get_ui __gmpz_get_ui
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_get_ui)
+__GMP_DECLSPEC unsigned long int mpz_get_ui __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_getlimbn __gmpz_getlimbn
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_getlimbn)
+__GMP_DECLSPEC mp_limb_t mpz_getlimbn __GMP_PROTO ((mpz_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_hamdist __gmpz_hamdist
+__GMP_DECLSPEC mp_bitcnt_t mpz_hamdist __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_import __gmpz_import
+__GMP_DECLSPEC void mpz_import __GMP_PROTO ((mpz_ptr, size_t, int, size_t, int, size_t, __gmp_const void *));
+
+#define mpz_init __gmpz_init
+__GMP_DECLSPEC void mpz_init __GMP_PROTO ((mpz_ptr));
+
+#define mpz_init2 __gmpz_init2
+__GMP_DECLSPEC void mpz_init2 __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+
+#define mpz_inits __gmpz_inits
+__GMP_DECLSPEC void mpz_inits __GMP_PROTO ((mpz_ptr, ...));
+
+#define mpz_init_set __gmpz_init_set
+__GMP_DECLSPEC void mpz_init_set __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+
+#define mpz_init_set_d __gmpz_init_set_d
+__GMP_DECLSPEC void mpz_init_set_d __GMP_PROTO ((mpz_ptr, double));
+
+#define mpz_init_set_si __gmpz_init_set_si
+__GMP_DECLSPEC void mpz_init_set_si __GMP_PROTO ((mpz_ptr, signed long int));
+
+#define mpz_init_set_str __gmpz_init_set_str
+__GMP_DECLSPEC int mpz_init_set_str __GMP_PROTO ((mpz_ptr, __gmp_const char *, int));
+
+#define mpz_init_set_ui __gmpz_init_set_ui
+__GMP_DECLSPEC void mpz_init_set_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+
+#define mpz_inp_raw __gmpz_inp_raw
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_inp_raw __GMP_PROTO ((mpz_ptr, FILE *));
+#endif
+
+#define mpz_inp_str __gmpz_inp_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_inp_str __GMP_PROTO ((mpz_ptr, FILE *, int));
+#endif
+
+#define mpz_invert __gmpz_invert
+__GMP_DECLSPEC int mpz_invert __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_ior __gmpz_ior
+__GMP_DECLSPEC void mpz_ior __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_jacobi __gmpz_jacobi
+__GMP_DECLSPEC int mpz_jacobi __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_kronecker mpz_jacobi  /* alias */
+
+#define mpz_kronecker_si __gmpz_kronecker_si
+__GMP_DECLSPEC int mpz_kronecker_si __GMP_PROTO ((mpz_srcptr, long)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_kronecker_ui __gmpz_kronecker_ui
+__GMP_DECLSPEC int mpz_kronecker_ui __GMP_PROTO ((mpz_srcptr, unsigned long)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_si_kronecker __gmpz_si_kronecker
+__GMP_DECLSPEC int mpz_si_kronecker __GMP_PROTO ((long, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_ui_kronecker __gmpz_ui_kronecker
+__GMP_DECLSPEC int mpz_ui_kronecker __GMP_PROTO ((unsigned long, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_lcm __gmpz_lcm
+__GMP_DECLSPEC void mpz_lcm __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_lcm_ui __gmpz_lcm_ui
+__GMP_DECLSPEC void mpz_lcm_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long));
+
+#define mpz_legendre mpz_jacobi  /* alias */
+
+#define mpz_lucnum_ui __gmpz_lucnum_ui
+__GMP_DECLSPEC void mpz_lucnum_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+
+#define mpz_lucnum2_ui __gmpz_lucnum2_ui
+__GMP_DECLSPEC void mpz_lucnum2_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, unsigned long int));
+
+#define mpz_millerrabin __gmpz_millerrabin
+__GMP_DECLSPEC int mpz_millerrabin __GMP_PROTO ((mpz_srcptr, int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_mod __gmpz_mod
+__GMP_DECLSPEC void mpz_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_mod_ui mpz_fdiv_r_ui /* same as fdiv_r because divisor unsigned */
+
+#define __GMP_MPZ_MUL_MIN_ALLOC(x,y,z) (__GMP_ABS(y->_mp_size)+__GMP_ABS(z->_mp_size)+1)
+#define mpz_mul __gmpz_mul
+__GMP_DECLSPEC void mpz_mul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_mul_2exp __gmpz_mul_2exp
+__GMP_DECLSPEC void mpz_mul_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define __GMP_MPZ_MUL_SI_MIN_ALLOC(x,y,z) (__GMP_ABS(y->_mp_size)+(__GMP_BITS_PER_ULONG-1)/GMP_NUMB_BITS+1)
+#define mpz_mul_si __gmpz_mul_si
+__GMP_DECLSPEC void mpz_mul_si __GMP_PROTO ((mpz_ptr, mpz_srcptr, long int));
+
+#define __GMP_MPZ_MUL_UI_MIN_ALLOC(x,y,z) (__GMP_ABS(y->_mp_size)+(__GMP_BITS_PER_ULONG-1)/GMP_NUMB_BITS+1)
+#define mpz_mul_ui __gmpz_mul_ui
+__GMP_DECLSPEC void mpz_mul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_neg __gmpz_neg
+#define __GMP_MPZ_NEG_MIN_ALLOC(x,y) (__GMP_ABS(y->_mp_size))
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_neg)
+__GMP_DECLSPEC void mpz_neg __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+#endif
+
+#define mpz_nextprime __gmpz_nextprime
+__GMP_DECLSPEC void mpz_nextprime __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+
+#define mpz_next_likely_prime __gmpz_next_likely_prime
+__GMP_DECLSPEC void mpz_next_likely_prime __GMP_PROTO ((mpz_ptr, mpz_srcptr,gmp_randstate_t));
+
+#define mpz_out_raw __gmpz_out_raw
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_out_raw __GMP_PROTO ((FILE *, mpz_srcptr));
+#endif
+
+#define mpz_out_str __gmpz_out_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_out_str __GMP_PROTO ((FILE *, int, mpz_srcptr));
+#endif
+
+#define mpz_perfect_power_p __gmpz_perfect_power_p
+__GMP_DECLSPEC int mpz_perfect_power_p __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_perfect_square_p __gmpz_perfect_square_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_perfect_square_p)
+__GMP_DECLSPEC int mpz_perfect_square_p __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_popcount __gmpz_popcount
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_popcount)
+__GMP_DECLSPEC mp_bitcnt_t mpz_popcount __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_pow_ui __gmpz_pow_ui
+__GMP_DECLSPEC void mpz_pow_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_powm __gmpz_powm
+__GMP_DECLSPEC void mpz_powm __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_powm_ui __gmpz_powm_ui
+__GMP_DECLSPEC void mpz_powm_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int, mpz_srcptr));
+
+#define mpz_probab_prime_p __gmpz_probab_prime_p
+__GMP_DECLSPEC int mpz_probab_prime_p __GMP_PROTO ((mpz_srcptr, int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_probable_prime_p __gmpz_probable_prime_p
+__GMP_DECLSPEC int mpz_probable_prime_p __GMP_PROTO ((mpz_srcptr,gmp_randstate_t, int,unsigned long));
+
+#define mpz_likely_prime_p __gmpz_likely_prime_p
+__GMP_DECLSPEC int mpz_likely_prime_p __GMP_PROTO ((mpz_srcptr,gmp_randstate_t, unsigned long));
+
+#define mpz_realloc2 __gmpz_realloc2
+__GMP_DECLSPEC void mpz_realloc2 __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+
+#define mpz_remove __gmpz_remove
+__GMP_DECLSPEC unsigned long int mpz_remove __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_root __gmpz_root
+__GMP_DECLSPEC int mpz_root __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_nthroot __gmpz_nthroot
+__GMP_DECLSPEC void mpz_nthroot __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_rootrem __gmpz_rootrem
+__GMP_DECLSPEC void mpz_rootrem __GMP_PROTO ((mpz_ptr,mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_rrandomb __gmpz_rrandomb
+__GMP_DECLSPEC void mpz_rrandomb __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mp_bitcnt_t));
+
+#define mpz_scan0 __gmpz_scan0
+__GMP_DECLSPEC mp_bitcnt_t mpz_scan0 __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_scan1 __gmpz_scan1
+__GMP_DECLSPEC mp_bitcnt_t mpz_scan1 __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define __GMP_MPZ_SET_MIN_ALLOC(x,y) __GMP_ABS(y->_mp_size)
+#define mpz_set __gmpz_set
+__GMP_DECLSPEC void mpz_set __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+
+#define mpz_set_d __gmpz_set_d
+__GMP_DECLSPEC void mpz_set_d __GMP_PROTO ((mpz_ptr, double));
+
+#define mpz_set_f __gmpz_set_f
+__GMP_DECLSPEC void mpz_set_f __GMP_PROTO ((mpz_ptr, mpf_srcptr));
+
+#define mpz_set_q __gmpz_set_q
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_set_q)
+__GMP_DECLSPEC void mpz_set_q __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+#endif
+
+#define __GMP_MPZ_SET_SI_MIN_ALLOC(x,y) (1+(__GMP_BITS_PER_ULONG-1)/GMP_NUMB_BITS)
+#define mpz_set_si __gmpz_set_si
+__GMP_DECLSPEC void mpz_set_si __GMP_PROTO ((mpz_ptr, signed long int));
+
+#define mpz_set_str __gmpz_set_str
+__GMP_DECLSPEC int mpz_set_str __GMP_PROTO ((mpz_ptr, __gmp_const char *, int));
+
+#define __GMP_MPZ_SET_UI_MIN_ALLOC(x,y) (1+(__GMP_BITS_PER_ULONG-1)/GMP_NUMB_BITS)
+#define mpz_set_ui __gmpz_set_ui
+__GMP_DECLSPEC void mpz_set_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+
+#define mpz_setbit __gmpz_setbit
+__GMP_DECLSPEC void mpz_setbit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+
+#define mpz_size __gmpz_size
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_size)
+__GMP_DECLSPEC size_t mpz_size __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_sizeinbase __gmpz_sizeinbase
+__GMP_DECLSPEC size_t mpz_sizeinbase __GMP_PROTO ((mpz_srcptr, int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_sqrt __gmpz_sqrt
+__GMP_DECLSPEC void mpz_sqrt __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+
+#define mpz_sqrtrem __gmpz_sqrtrem
+__GMP_DECLSPEC void mpz_sqrtrem __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr));
+
+#define __GMP_MPZ_SUB_MIN_ALLOC(x,y,z) (__GMP_MAX(__GMP_ABS(y->_mp_size),__GMP_ABS(z->_mp_size))+1)
+#define mpz_sub __gmpz_sub
+__GMP_DECLSPEC void mpz_sub __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define __GMP_MPZ_SUB_UI_MIN_ALLOC(x,y,z) (__GMP_MAX(__GMP_ABS(y->_mp_size),1+(__GMP_BITS_PER_ULONG-1)/GMP_NUMB_BITS)+1)
+#define mpz_sub_ui __gmpz_sub_ui
+__GMP_DECLSPEC void mpz_sub_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define __GMP_MPZ_UI_SUB_MIN_ALLOC(x,y,z) (__GMP_MAX(__GMP_ABS(z->_mp_size),1+(__GMP_BITS_PER_ULONG-1)/GMP_NUMB_BITS)+1)
+#define mpz_ui_sub __gmpz_ui_sub
+__GMP_DECLSPEC void mpz_ui_sub __GMP_PROTO ((mpz_ptr, unsigned long int, mpz_srcptr));
+
+#define mpz_submul __gmpz_submul
+__GMP_DECLSPEC void mpz_submul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_submul_ui __gmpz_submul_ui
+__GMP_DECLSPEC void mpz_submul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_swap __gmpz_swap
+__GMP_DECLSPEC void mpz_swap __GMP_PROTO ((mpz_ptr, mpz_ptr)) __GMP_NOTHROW;
+
+#define mpz_tdiv_ui __gmpz_tdiv_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_tdiv_q __gmpz_tdiv_q
+__GMP_DECLSPEC void mpz_tdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_tdiv_q_2exp __gmpz_tdiv_q_2exp
+__GMP_DECLSPEC void mpz_tdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_tdiv_q_ui __gmpz_tdiv_q_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_tdiv_qr __gmpz_tdiv_qr
+__GMP_DECLSPEC void mpz_tdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_tdiv_qr_ui __gmpz_tdiv_qr_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_tdiv_r __gmpz_tdiv_r
+__GMP_DECLSPEC void mpz_tdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_tdiv_r_2exp __gmpz_tdiv_r_2exp
+__GMP_DECLSPEC void mpz_tdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_tdiv_r_ui __gmpz_tdiv_r_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_tstbit __gmpz_tstbit
+__GMP_DECLSPEC int mpz_tstbit __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_ui_pow_ui __gmpz_ui_pow_ui
+__GMP_DECLSPEC void mpz_ui_pow_ui __GMP_PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+
+#define mpz_urandomb __gmpz_urandomb
+__GMP_DECLSPEC void mpz_urandomb __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mp_bitcnt_t));
+
+#define mpz_urandomm __gmpz_urandomm
+__GMP_DECLSPEC void mpz_urandomm __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mpz_srcptr));
+
+#define mpz_xor __gmpz_xor
+#define mpz_eor __gmpz_xor
+__GMP_DECLSPEC void mpz_xor __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+
+/**************** Rational (i.e. Q) routines.  ****************/
+
+#define mpq_abs __gmpq_abs
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_abs)
+__GMP_DECLSPEC void mpq_abs __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+#endif
+
+#define mpq_add __gmpq_add
+__GMP_DECLSPEC void mpq_add __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+
+#define mpq_canonicalize __gmpq_canonicalize
+__GMP_DECLSPEC void mpq_canonicalize __GMP_PROTO ((mpq_ptr));
+
+#define mpq_clear __gmpq_clear
+__GMP_DECLSPEC void mpq_clear __GMP_PROTO ((mpq_ptr));
+
+#define mpq_clears __gmpq_clears
+__GMP_DECLSPEC void mpq_clears __GMP_PROTO ((mpq_ptr, ...));
+
+#define mpq_cmp __gmpq_cmp
+__GMP_DECLSPEC int mpq_cmp __GMP_PROTO ((mpq_srcptr, mpq_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define _mpq_cmp_si __gmpq_cmp_si
+__GMP_DECLSPEC int _mpq_cmp_si __GMP_PROTO ((mpq_srcptr, long, unsigned long)) __GMP_ATTRIBUTE_PURE;
+
+#define _mpq_cmp_ui __gmpq_cmp_ui
+__GMP_DECLSPEC int _mpq_cmp_ui __GMP_PROTO ((mpq_srcptr, unsigned long int, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpq_div __gmpq_div
+__GMP_DECLSPEC void mpq_div __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+
+#define mpq_div_2exp __gmpq_div_2exp
+__GMP_DECLSPEC void mpq_div_2exp __GMP_PROTO ((mpq_ptr, mpq_srcptr, mp_bitcnt_t));
+
+#define mpq_equal __gmpq_equal
+__GMP_DECLSPEC int mpq_equal __GMP_PROTO ((mpq_srcptr, mpq_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpq_get_num __gmpq_get_num
+__GMP_DECLSPEC void mpq_get_num __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+
+#define mpq_get_den __gmpq_get_den
+__GMP_DECLSPEC void mpq_get_den __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+
+#define mpq_get_d __gmpq_get_d
+__GMP_DECLSPEC double mpq_get_d __GMP_PROTO ((mpq_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpq_get_str __gmpq_get_str
+__GMP_DECLSPEC char *mpq_get_str __GMP_PROTO ((char *, int, mpq_srcptr));
+
+#define mpq_init __gmpq_init
+__GMP_DECLSPEC void mpq_init __GMP_PROTO ((mpq_ptr));
+
+#define mpq_inits __gmpq_inits
+__GMP_DECLSPEC void mpq_inits __GMP_PROTO ((mpq_ptr, ...));
+
+#define mpq_inp_str __gmpq_inp_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpq_inp_str __GMP_PROTO ((mpq_ptr, FILE *, int));
+#endif
+
+#define mpq_inv __gmpq_inv
+__GMP_DECLSPEC void mpq_inv __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+
+#define mpq_mul __gmpq_mul
+__GMP_DECLSPEC void mpq_mul __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+
+#define mpq_mul_2exp __gmpq_mul_2exp
+__GMP_DECLSPEC void mpq_mul_2exp __GMP_PROTO ((mpq_ptr, mpq_srcptr, mp_bitcnt_t));
+
+#define mpq_neg __gmpq_neg
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_neg)
+__GMP_DECLSPEC void mpq_neg __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+#endif
+
+#define mpq_out_str __gmpq_out_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpq_out_str __GMP_PROTO ((FILE *, int, mpq_srcptr));
+#endif
+
+#define mpq_set __gmpq_set
+__GMP_DECLSPEC void mpq_set __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+
+#define mpq_set_d __gmpq_set_d
+__GMP_DECLSPEC void mpq_set_d __GMP_PROTO ((mpq_ptr, double));
+
+#define mpq_set_den __gmpq_set_den
+__GMP_DECLSPEC void mpq_set_den __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+
+#define mpq_set_f __gmpq_set_f
+__GMP_DECLSPEC void mpq_set_f __GMP_PROTO ((mpq_ptr, mpf_srcptr));
+
+#define mpq_set_num __gmpq_set_num
+__GMP_DECLSPEC void mpq_set_num __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+
+#define mpq_set_si __gmpq_set_si
+__GMP_DECLSPEC void mpq_set_si __GMP_PROTO ((mpq_ptr, signed long int, unsigned long int));
+
+#define mpq_set_str __gmpq_set_str
+__GMP_DECLSPEC int mpq_set_str __GMP_PROTO ((mpq_ptr, __gmp_const char *, int));
+
+#define mpq_set_ui __gmpq_set_ui
+__GMP_DECLSPEC void mpq_set_ui __GMP_PROTO ((mpq_ptr, unsigned long int, unsigned long int));
+
+#define mpq_set_z __gmpq_set_z
+__GMP_DECLSPEC void mpq_set_z __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+
+#define mpq_sub __gmpq_sub
+__GMP_DECLSPEC void mpq_sub __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+
+#define mpq_swap __gmpq_swap
+__GMP_DECLSPEC void mpq_swap __GMP_PROTO ((mpq_ptr, mpq_ptr)) __GMP_NOTHROW;
+
+
+/**************** Float (i.e. F) routines.  ****************/
+
+#define mpf_abs __gmpf_abs
+__GMP_DECLSPEC void mpf_abs __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_add __gmpf_add
+__GMP_DECLSPEC void mpf_add __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+
+#define mpf_add_ui __gmpf_add_ui
+__GMP_DECLSPEC void mpf_add_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+#define mpf_ceil __gmpf_ceil
+__GMP_DECLSPEC void mpf_ceil __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_clear __gmpf_clear
+__GMP_DECLSPEC void mpf_clear __GMP_PROTO ((mpf_ptr));
+
+#define mpf_clears __gmpf_clears
+__GMP_DECLSPEC void mpf_clears __GMP_PROTO ((mpf_ptr, ...));
+
+#define mpf_cmp __gmpf_cmp
+__GMP_DECLSPEC int mpf_cmp __GMP_PROTO ((mpf_srcptr, mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_cmp_d __gmpf_cmp_d
+__GMP_DECLSPEC int mpf_cmp_d __GMP_PROTO ((mpf_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+
+#define mpf_cmp_si __gmpf_cmp_si
+__GMP_DECLSPEC int mpf_cmp_si __GMP_PROTO ((mpf_srcptr, signed long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_cmp_ui __gmpf_cmp_ui
+__GMP_DECLSPEC int mpf_cmp_ui __GMP_PROTO ((mpf_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_div __gmpf_div
+__GMP_DECLSPEC void mpf_div __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+
+#define mpf_div_2exp __gmpf_div_2exp
+__GMP_DECLSPEC void mpf_div_2exp __GMP_PROTO ((mpf_ptr, mpf_srcptr, mp_bitcnt_t));
+
+#define mpf_div_ui __gmpf_div_ui
+__GMP_DECLSPEC void mpf_div_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+
+#define mpf_dump __gmpf_dump
+__GMP_DECLSPEC void mpf_dump __GMP_PROTO ((mpf_srcptr));
+
+#define mpf_eq __gmpf_eq
+__GMP_DECLSPEC int mpf_eq __GMP_PROTO ((mpf_srcptr, mpf_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_sint_p __gmpf_fits_sint_p
+__GMP_DECLSPEC int mpf_fits_sint_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_slong_p __gmpf_fits_slong_p
+__GMP_DECLSPEC int mpf_fits_slong_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_sshort_p __gmpf_fits_sshort_p
+__GMP_DECLSPEC int mpf_fits_sshort_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_uint_p __gmpf_fits_uint_p
+__GMP_DECLSPEC int mpf_fits_uint_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_ulong_p __gmpf_fits_ulong_p
+__GMP_DECLSPEC int mpf_fits_ulong_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_ushort_p __gmpf_fits_ushort_p
+__GMP_DECLSPEC int mpf_fits_ushort_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_floor __gmpf_floor
+__GMP_DECLSPEC void mpf_floor __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_get_d __gmpf_get_d
+__GMP_DECLSPEC double mpf_get_d __GMP_PROTO ((mpf_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_d_2exp __gmpf_get_d_2exp
+__GMP_DECLSPEC double mpf_get_d_2exp __GMP_PROTO ((signed long int *, mpf_srcptr));
+
+#define mpf_get_default_prec __gmpf_get_default_prec
+__GMP_DECLSPEC mp_bitcnt_t mpf_get_default_prec __GMP_PROTO ((void)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_prec __gmpf_get_prec
+__GMP_DECLSPEC mp_bitcnt_t mpf_get_prec __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_si __gmpf_get_si
+__GMP_DECLSPEC long mpf_get_si __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_str __gmpf_get_str
+__GMP_DECLSPEC char *mpf_get_str __GMP_PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr));
+
+#define mpf_get_ui __gmpf_get_ui
+__GMP_DECLSPEC unsigned long mpf_get_ui __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_init __gmpf_init
+__GMP_DECLSPEC void mpf_init __GMP_PROTO ((mpf_ptr));
+
+#define mpf_init2 __gmpf_init2
+__GMP_DECLSPEC void mpf_init2 __GMP_PROTO ((mpf_ptr, mp_bitcnt_t));
+
+#define mpf_inits __gmpf_inits
+__GMP_DECLSPEC void mpf_inits __GMP_PROTO ((mpf_ptr, ...));
+
+#define mpf_init_set __gmpf_init_set
+__GMP_DECLSPEC void mpf_init_set __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_init_set_d __gmpf_init_set_d
+__GMP_DECLSPEC void mpf_init_set_d __GMP_PROTO ((mpf_ptr, double));
+
+#define mpf_init_set_si __gmpf_init_set_si
+__GMP_DECLSPEC void mpf_init_set_si __GMP_PROTO ((mpf_ptr, signed long int));
+
+#define mpf_init_set_str __gmpf_init_set_str
+__GMP_DECLSPEC int mpf_init_set_str __GMP_PROTO ((mpf_ptr, __gmp_const char *, int));
+
+#define mpf_init_set_ui __gmpf_init_set_ui
+__GMP_DECLSPEC void mpf_init_set_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+
+#define mpf_inp_str __gmpf_inp_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpf_inp_str __GMP_PROTO ((mpf_ptr, FILE *, int));
+#endif
+
+#define mpf_integer_p __gmpf_integer_p
+__GMP_DECLSPEC int mpf_integer_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_mul __gmpf_mul
+__GMP_DECLSPEC void mpf_mul __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+
+#define mpf_mul_2exp __gmpf_mul_2exp
+__GMP_DECLSPEC void mpf_mul_2exp __GMP_PROTO ((mpf_ptr, mpf_srcptr, mp_bitcnt_t));
+
+#define mpf_mul_ui __gmpf_mul_ui
+__GMP_DECLSPEC void mpf_mul_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+
+#define mpf_neg __gmpf_neg
+__GMP_DECLSPEC void mpf_neg __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_out_str __gmpf_out_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpf_out_str __GMP_PROTO ((FILE *, int, size_t, mpf_srcptr));
+#endif
+
+#define mpf_pow_ui __gmpf_pow_ui
+__GMP_DECLSPEC void mpf_pow_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+
+#define mpf_random2 __gmpf_random2
+__GMP_DECLSPEC void mpf_random2 __GMP_PROTO ((mpf_ptr, mp_size_t, mp_exp_t));
+
+#define mpf_rrandomb __gmpf_rrandomb
+__GMP_DECLSPEC void mpf_rrandomb __GMP_PROTO ((mpf_ptr, gmp_randstate_t, mp_size_t, mp_exp_t));
+
+#define mpf_reldiff __gmpf_reldiff
+__GMP_DECLSPEC void mpf_reldiff __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+
+#define mpf_set __gmpf_set
+__GMP_DECLSPEC void mpf_set __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_set_d __gmpf_set_d
+__GMP_DECLSPEC void mpf_set_d __GMP_PROTO ((mpf_ptr, double));
+
+#define mpf_set_default_prec __gmpf_set_default_prec
+__GMP_DECLSPEC void mpf_set_default_prec __GMP_PROTO ((mp_bitcnt_t)) __GMP_NOTHROW;
+
+#define mpf_set_prec __gmpf_set_prec
+__GMP_DECLSPEC void mpf_set_prec __GMP_PROTO ((mpf_ptr, mp_bitcnt_t));
+
+#define mpf_set_prec_raw __gmpf_set_prec_raw
+__GMP_DECLSPEC void mpf_set_prec_raw __GMP_PROTO ((mpf_ptr, mp_bitcnt_t)) __GMP_NOTHROW;
+
+#define mpf_set_q __gmpf_set_q
+__GMP_DECLSPEC void mpf_set_q __GMP_PROTO ((mpf_ptr, mpq_srcptr));
+
+#define mpf_set_si __gmpf_set_si
+__GMP_DECLSPEC void mpf_set_si __GMP_PROTO ((mpf_ptr, signed long int));
+
+#define mpf_set_str __gmpf_set_str
+__GMP_DECLSPEC int mpf_set_str __GMP_PROTO ((mpf_ptr, __gmp_const char *, int));
+
+#define mpf_set_ui __gmpf_set_ui
+__GMP_DECLSPEC void mpf_set_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+
+#define mpf_set_z __gmpf_set_z
+__GMP_DECLSPEC void mpf_set_z __GMP_PROTO ((mpf_ptr, mpz_srcptr));
+
+#define mpf_size __gmpf_size
+__GMP_DECLSPEC size_t mpf_size __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_sqrt __gmpf_sqrt
+__GMP_DECLSPEC void mpf_sqrt __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_sqrt_ui __gmpf_sqrt_ui
+__GMP_DECLSPEC void mpf_sqrt_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+
+#define mpf_sub __gmpf_sub
+__GMP_DECLSPEC void mpf_sub __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+
+#define mpf_sub_ui __gmpf_sub_ui
+__GMP_DECLSPEC void mpf_sub_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+
+#define mpf_swap __gmpf_swap
+__GMP_DECLSPEC void mpf_swap __GMP_PROTO ((mpf_ptr, mpf_ptr)) __GMP_NOTHROW;
+
+#define mpf_trunc __gmpf_trunc
+__GMP_DECLSPEC void mpf_trunc __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_ui_div __gmpf_ui_div
+__GMP_DECLSPEC void mpf_ui_div __GMP_PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+
+#define mpf_ui_sub __gmpf_ui_sub
+__GMP_DECLSPEC void mpf_ui_sub __GMP_PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+
+#define mpf_urandomb __gmpf_urandomb
+__GMP_DECLSPEC void mpf_urandomb __GMP_PROTO ((mpf_t, gmp_randstate_t, mp_bitcnt_t));
+
+
+/************ Low level positive-integer (i.e. N) routines.  ************/
+
+/* This is ugly, but we need to make user calls reach the prefixed function. */
+
+#define mpn_add __MPN(add)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add)
+__GMP_DECLSPEC mp_limb_t mpn_add __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+#endif
+
+#define mpn_add_1 __MPN(add_1)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add_1)
+__GMP_DECLSPEC mp_limb_t mpn_add_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) __GMP_NOTHROW;
+#endif
+
+#define mpn_add_n __MPN(add_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_addmul_1 __MPN(addmul_1)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_bdivmod __MPN(bdivmod)
+__GMP_DECLSPEC mp_limb_t mpn_bdivmod __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, unsigned long int));
+
+#define mpn_divrem __MPN(divrem)
+__GMP_DECLSPEC mp_limb_t mpn_divrem __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+
+#define mpn_mulmod_2expp1 __MPN(mulmod_2expp1)
+__GMP_DECLSPEC int mpn_mulmod_2expp1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr,int,unsigned long, mp_ptr));
+
+#define mpn_mulmod_2expm1 __MPN(mulmod_2expm1)
+__GMP_DECLSPEC void mpn_mulmod_2expm1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr,unsigned long, mp_ptr));
+
+#define mpn_cmp __MPN(cmp)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_cmp)
+__GMP_DECLSPEC int mpn_cmp __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpn_divexact_by3(dst,src,size) \
+  mpn_divexact_by3c (dst, src, size, __GMP_CAST (mp_limb_t, 0))
+
+#define mpn_divexact_by3c __MPN(divexact_by3c)
+__GMP_DECLSPEC mp_limb_t mpn_divexact_by3c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_divmod_1(qp,np,nsize,dlimb) \
+  mpn_divrem_1 (qp, __GMP_CAST (mp_size_t, 0), np, nsize, dlimb)
+
+#define mpn_divrem_1 __MPN(divrem_1)
+__GMP_DECLSPEC mp_limb_t mpn_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_divrem_2 __MPN(divrem_2)
+__GMP_DECLSPEC mp_limb_t mpn_divrem_2 __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+
+#define mpn_invert __MPN(invert)
+__GMP_DECLSPEC void mpn_invert __GMP_PROTO ((mp_ptr xp, mp_srcptr ap, mp_size_t n));
+
+#define mpn_sb_divappr_q __MPN(sb_divappr_q)
+__GMP_DECLSPEC mp_limb_t mpn_sb_divappr_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn, mp_limb_t dip)); 
+
+#define mpn_dc_divappr_q_n __MPN(dc_divappr_q_n)
+__GMP_DECLSPEC mp_limb_t mpn_dc_divappr_q_n __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, 
+		         mp_limb_t dip, mp_ptr tp));
+
+#define mpn_dc_bdiv_q_n __MPN(dc_bdiv_q_n)
+__GMP_DECLSPEC void mpn_dc_bdiv_q_n __GMP_PROTO ((mp_ptr qp, mp_ptr wp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+                 mp_limb_t dinv, mp_ptr scratch));
+
+#define mpn_inv_divappr_q_n __MPN(inv_divappr_q_n)
+__GMP_DECLSPEC mp_limb_t mpn_inv_divappr_q_n __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+                         mp_srcptr dip));
+
+#define mpn_dc_divappr_q __MPN(dc_divappr_q)
+__GMP_DECLSPEC mp_limb_t mpn_dc_divappr_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t n,
+                 mp_limb_t dinv));
+
+#define mpn_dc_div_q __MPN(dc_div_q)
+__GMP_DECLSPEC mp_limb_t mpn_dc_div_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
+         mp_srcptr dp, mp_size_t dn, mp_limb_t dinv));
+
+#define mpn_inv_divappr_q __MPN(inv_divappr_q)
+__GMP_DECLSPEC mp_limb_t mpn_inv_divappr_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t n,
+                 mp_srcptr dinv));
+
+#define mpn_inv_div_q __MPN(inv_div_q)
+__GMP_DECLSPEC mp_limb_t mpn_inv_div_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
+         mp_srcptr dp, mp_size_t dn, mp_srcptr dinv));
+
+#define mpn_inv_div_qr __MPN(inv_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_inv_div_qr __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
+         mp_srcptr dp, mp_size_t dn, mp_srcptr dinv));
+
+#define mpn_inv_div_qr_n __MPN(inv_div_qr_n)
+__GMP_DECLSPEC mp_limb_t mpn_inv_div_qr_n __GMP_PROTO ((mp_ptr qp, mp_ptr np,
+         mp_srcptr dp, mp_size_t dn, mp_srcptr dinv));
+
+#define mpn_dc_div_qr __MPN(dc_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_dc_div_qr __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
+         mp_srcptr dp, mp_size_t dn, mp_limb_t dinv));
+
+#define mpn_dc_div_qr_n __MPN(dc_div_qr_n)
+__GMP_DECLSPEC mp_limb_t mpn_dc_div_qr_n __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+         mp_limb_t dinv, mp_ptr tp));
+
+#define mpn_sb_div_q __MPN(sb_div_q)
+__GMP_DECLSPEC mp_limb_t mpn_sb_div_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
+         mp_srcptr dp, mp_size_t dn, mp_limb_t dinv));
+
+#define mpn_sb_bdiv_q __MPN(sb_bdiv_q)
+__GMP_DECLSPEC void mpn_sb_bdiv_q __GMP_PROTO ((mp_ptr qp, mp_ptr wp, mp_ptr np, mp_size_t nn,
+         mp_srcptr dp, mp_size_t dn, mp_limb_t dinv));
+
+#define mpn_dc_bdiv_q __MPN(dc_bdiv_q)
+__GMP_DECLSPEC void mpn_dc_bdiv_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
+         mp_srcptr dp, mp_size_t dn, mp_limb_t dinv));
+
+#define mpn_dc_bdiv_qr __MPN(dc_bdiv_qr)
+__GMP_DECLSPEC mp_limb_t mpn_dc_bdiv_qr __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
+         mp_srcptr dp, mp_size_t dn, mp_limb_t dinv));
+
+#define mpn_dc_bdiv_qr_n __MPN(dc_bdiv_qr_n)
+__GMP_DECLSPEC mp_limb_t mpn_dc_bdiv_qr_n __GMP_PROTO ((mp_ptr qp, mp_ptr np,
+         mp_srcptr dp, mp_size_t n, mp_limb_t dinv, mp_ptr tp));
+
+#define mpn_sb_div_qr __MPN(sb_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_sb_div_qr __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
+         mp_srcptr dp, mp_size_t dn, mp_limb_t dinv));
+
+#define mpn_sb_bdiv_qr __MPN(sb_bdiv_qr)
+__GMP_DECLSPEC mp_limb_t mpn_sb_bdiv_qr __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
+         mp_srcptr dp, mp_size_t dn, mp_limb_t dinv));
+
+#define mpn_tdiv_q __MPN(tdiv_q)
+__GMP_DECLSPEC void mpn_tdiv_q __GMP_PROTO ((mp_ptr qp, mp_srcptr np, mp_size_t nn, 
+                                   mp_srcptr dp, mp_size_t dn));
+
+#define mpn_divexact __MPN(divexact)
+__GMP_DECLSPEC void  mpn_divexact __GMP_PROTO ((mp_ptr qp,
+	      mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn));
+
+#define mpn_redc_1 __MPN(redc_1)
+__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);)
+
+#define mpn_gcd __MPN(gcd)
+__GMP_DECLSPEC mp_size_t mpn_gcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+
+#define mpn_gcd_1 __MPN(gcd_1)
+__GMP_DECLSPEC mp_limb_t mpn_gcd_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_gcdext __MPN(gcdext)
+__GMP_DECLSPEC mp_size_t mpn_gcdext __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+
+#define mpn_get_str __MPN(get_str)
+__GMP_DECLSPEC size_t mpn_get_str __GMP_PROTO ((unsigned char *, int, mp_ptr, mp_size_t));
+
+#define mpn_hamdist __MPN(hamdist)
+__GMP_DECLSPEC mp_bitcnt_t mpn_hamdist __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpn_lshift __MPN(lshift)
+__GMP_DECLSPEC mp_limb_t mpn_lshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+
+#define mpn_mod_1 __MPN(mod_1)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_mul __MPN(mul)
+__GMP_DECLSPEC mp_limb_t mpn_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+
+#define mpn_mul_1 __MPN(mul_1)
+__GMP_DECLSPEC mp_limb_t mpn_mul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_mul_n __MPN(mul_n)
+__GMP_DECLSPEC void mpn_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_sqr   __MPN(sqr)
+__GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+
+#define mpn_neg_n __MPN(neg_n)
+#define mpn_neg   __MPN(neg_n)
+__GMP_DECLSPEC mp_limb_t mpn_neg_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+
+#define mpn_com_n __MPN(com_n)
+#define mpn_com   __MPN(com_n)
+__GMP_DECLSPEC void mpn_com_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+
+#define mpn_perfect_square_p __MPN(perfect_square_p)
+__GMP_DECLSPEC int mpn_perfect_square_p __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_popcount __MPN(popcount)
+__GMP_DECLSPEC mp_bitcnt_t mpn_popcount __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpn_pow_1 __MPN(pow_1)
+__GMP_DECLSPEC mp_size_t mpn_pow_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
+
+/* undocumented now, but retained here for upward compatibility */
+#define mpn_preinv_mod_1 __MPN(preinv_mod_1)
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mod_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_random __MPN(random)
+__GMP_DECLSPEC void mpn_random __GMP_PROTO ((mp_ptr, mp_size_t));
+
+#define mpn_random2 __MPN(random2)
+__GMP_DECLSPEC void mpn_random2 __GMP_PROTO ((mp_ptr, mp_size_t));
+
+#define mpn_urandomb __MPN(urandomb)
+__GMP_DECLSPEC void mpn_urandomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, unsigned long));
+
+#define mpn_urandomm __MPN(urandomm)
+__GMP_DECLSPEC void mpn_urandomm __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_srcptr, mp_size_t));
+
+#define mpn_randomb __MPN(randomb)
+__GMP_DECLSPEC void mpn_randomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_size_t));
+
+#define mpn_rrandom __MPN(rrandom)
+__GMP_DECLSPEC void mpn_rrandom __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_size_t));
+
+#define mpn_rshift __MPN(rshift)
+__GMP_DECLSPEC mp_limb_t mpn_rshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+
+#define mpn_scan0 __MPN(scan0)
+__GMP_DECLSPEC mp_bitcnt_t mpn_scan0 __GMP_PROTO ((mp_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_scan1 __MPN(scan1)
+__GMP_DECLSPEC mp_bitcnt_t mpn_scan1 __GMP_PROTO ((mp_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_set_str __MPN(set_str)
+__GMP_DECLSPEC mp_size_t mpn_set_str __GMP_PROTO ((mp_ptr, __gmp_const unsigned char *, size_t, int));
+
+#define mpn_sqrtrem __MPN(sqrtrem)
+__GMP_DECLSPEC mp_size_t mpn_sqrtrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+
+#define mpn_sub __MPN(sub)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub)
+__GMP_DECLSPEC mp_limb_t mpn_sub __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+#endif
+
+#define mpn_sub_1 __MPN(sub_1)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub_1)
+__GMP_DECLSPEC mp_limb_t mpn_sub_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) __GMP_NOTHROW;
+#endif
+
+#define mpn_sub_n __MPN(sub_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_submul_1 __MPN(submul_1)
+__GMP_DECLSPEC mp_limb_t mpn_submul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_tdiv_qr __MPN(tdiv_qr)
+__GMP_DECLSPEC void mpn_tdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+
+#define mpn_and_n __MPN(and_n)
+__GMP_DECLSPEC void mpn_and_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_andn_n __MPN(andn_n)
+__GMP_DECLSPEC void mpn_andn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_nand_n __MPN(nand_n)
+__GMP_DECLSPEC void mpn_nand_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_ior_n __MPN(ior_n)
+__GMP_DECLSPEC void mpn_ior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_iorn_n __MPN(iorn_n)
+__GMP_DECLSPEC void mpn_iorn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_nior_n __MPN(nior_n)
+__GMP_DECLSPEC void mpn_nior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_xor_n __MPN(xor_n)
+__GMP_DECLSPEC void mpn_xor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_xnor_n __MPN(xnor_n)
+__GMP_DECLSPEC void mpn_xnor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_copyi __MPN(copyi)
+__GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#define mpn_copyd __MPN(copyd)
+__GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#define mpn_zero __MPN(zero)
+__GMP_DECLSPEC void mpn_zero __GMP_PROTO ((mp_ptr, mp_size_t));
+
+/**************** mpz inlines ****************/
+
+/* The following are provided as inlines where possible, but always exist as
+   library functions too, for binary compatibility.
+
+   Within gmp itself this inlining generally isn't relied on, since it
+   doesn't get done for all compilers, whereas if something is worth
+   inlining then it's worth arranging always.
+
+   There are two styles of inlining here.  When the same bit of code is
+   wanted for the inline as for the library version, then __GMP_FORCE_foo
+   arranges for that code to be emitted and the __GMP_EXTERN_INLINE
+   directive suppressed, eg. mpz_fits_uint_p.  When a different bit of code
+   is wanted for the inline than for the library version, then
+   __GMP_FORCE_foo arranges the inline to be suppressed, eg. mpz_abs.  */
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpz_abs)
+__GMP_EXTERN_INLINE void
+mpz_abs (mpz_ptr __gmp_w, mpz_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpz_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_size = __GMP_ABS (__gmp_w->_mp_size);
+}
+#endif
+
+#if GMP_NAIL_BITS == 0
+#define __GMPZ_FITS_UTYPE_P(z,maxval)					\
+  mp_size_t  __gmp_n = z->_mp_size;					\
+  mp_ptr  __gmp_p = z->_mp_d;						\
+  return (__gmp_n == 0 || (__gmp_n == 1 && __gmp_p[0] <= maxval));
+#else
+#define __GMPZ_FITS_UTYPE_P(z,maxval)					\
+  mp_size_t  __gmp_n = z->_mp_size;					\
+  mp_ptr  __gmp_p = z->_mp_d;						\
+  return (__gmp_n == 0 || (__gmp_n == 1 && __gmp_p[0] <= maxval)	\
+	  || (__gmp_n == 2 && __gmp_p[1] <= ((mp_limb_t) maxval >> GMP_NUMB_BITS)));
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_uint_p)
+#if ! defined (__GMP_FORCE_mpz_fits_uint_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_fits_uint_p (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  __GMPZ_FITS_UTYPE_P (__gmp_z, __GMP_UINT_MAX);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_ulong_p)
+#if ! defined (__GMP_FORCE_mpz_fits_ulong_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_fits_ulong_p (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  __GMPZ_FITS_UTYPE_P (__gmp_z, __GMP_ULONG_MAX);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_ushort_p)
+#if ! defined (__GMP_FORCE_mpz_fits_ushort_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_fits_ushort_p (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  __GMPZ_FITS_UTYPE_P (__gmp_z, __GMP_USHRT_MAX);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_get_ui)
+#if ! defined (__GMP_FORCE_mpz_get_ui)
+__GMP_EXTERN_INLINE
+#endif
+unsigned long
+mpz_get_ui (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  mp_ptr __gmp_p = __gmp_z->_mp_d;
+  mp_size_t __gmp_n = __gmp_z->_mp_size;
+  mp_limb_t __gmp_l = __gmp_p[0];
+  /* This is a "#if" rather than a plain "if" so as to avoid gcc warnings
+     about "<< GMP_NUMB_BITS" exceeding the type size, and to avoid Borland
+     C++ 6.0 warnings about condition always true for something like
+     "__GMP_ULONG_MAX < GMP_NUMB_MASK".  */
+#if GMP_NAIL_BITS == 0 || defined (_LONG_LONG_LIMB)
+  /* limb==long and no nails, or limb==longlong, one limb is enough */
+  return (unsigned long)(__gmp_n != 0 ? __gmp_l : 0);
+#else
+  /* limb==long and nails, need two limbs when available */
+  __gmp_n = __GMP_ABS (__gmp_n);
+  if (__gmp_n <= 1)
+    return (__gmp_n != 0 ? __gmp_l : 0);
+  else
+    return __gmp_l + (__gmp_p[1] << GMP_NUMB_BITS);
+#endif
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_getlimbn)
+#if ! defined (__GMP_FORCE_mpz_getlimbn)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpz_getlimbn (mpz_srcptr __gmp_z, mp_size_t __gmp_n) __GMP_NOTHROW
+{
+  mp_limb_t  __gmp_result = 0;
+  if (__GMP_LIKELY (__gmp_n >= 0 && __gmp_n < __GMP_ABS (__gmp_z->_mp_size)))
+    __gmp_result = __gmp_z->_mp_d[__gmp_n];
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpz_neg)
+__GMP_EXTERN_INLINE void
+mpz_neg (mpz_ptr __gmp_w, mpz_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpz_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_size = - __gmp_w->_mp_size;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_perfect_square_p)
+#if ! defined (__GMP_FORCE_mpz_perfect_square_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_perfect_square_p (mpz_srcptr __gmp_a)
+{
+  mp_size_t __gmp_asize;
+  int       __gmp_result;
+
+  __gmp_asize = __gmp_a->_mp_size;
+  __gmp_result = (__gmp_asize >= 0);  /* zero is a square, negatives are not */
+  if (__GMP_LIKELY (__gmp_asize > 0))
+    __gmp_result = mpn_perfect_square_p (__gmp_a->_mp_d, __gmp_asize);
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_popcount)
+#if ! defined (__GMP_FORCE_mpz_popcount)
+__GMP_EXTERN_INLINE
+#endif
+mp_bitcnt_t
+mpz_popcount (mpz_srcptr __gmp_u) __GMP_NOTHROW
+{
+  mp_size_t      __gmp_usize;
+  mp_bitcnt_t    __gmp_result;
+
+  __gmp_usize = __gmp_u->_mp_size;
+  __gmp_result = (__gmp_usize < 0 ? __GMP_ULONG_MAX : 0);
+  if (__GMP_LIKELY (__gmp_usize > 0))
+    __gmp_result =  mpn_popcount (__gmp_u->_mp_d, __gmp_usize);
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_set_q)
+#if ! defined (__GMP_FORCE_mpz_set_q)
+__GMP_EXTERN_INLINE
+#endif
+void
+mpz_set_q (mpz_ptr __gmp_w, mpq_srcptr __gmp_u)
+{
+  mpz_tdiv_q (__gmp_w, mpq_numref (__gmp_u), mpq_denref (__gmp_u));
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_size)
+#if ! defined (__GMP_FORCE_mpz_size)
+__GMP_EXTERN_INLINE
+#endif
+size_t
+mpz_size (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  return __GMP_ABS (__gmp_z->_mp_size);
+}
+#endif
+
+
+/**************** mpq inlines ****************/
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpq_abs)
+__GMP_EXTERN_INLINE void
+mpq_abs (mpq_ptr __gmp_w, mpq_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpq_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_num._mp_size = __GMP_ABS (__gmp_w->_mp_num._mp_size);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpq_neg)
+__GMP_EXTERN_INLINE void
+mpq_neg (mpq_ptr __gmp_w, mpq_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpq_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_num._mp_size = - __gmp_w->_mp_num._mp_size;
+}
+#endif
+
+
+/**************** mpn inlines ****************/
+
+/* The comments with __GMPN_ADD_1 below apply here too.
+
+   The test for FUNCTION returning 0 should predict well.  If it's assumed
+   {yp,ysize} will usually have a random number of bits then the high limb
+   won't be full and a carry out will occur a good deal less than 50% of the
+   time.
+
+   ysize==0 isn't a documented feature, but is used internally in a few
+   places.
+
+   Producing cout last stops it using up a register during the main part of
+   the calculation, though gcc (as of 3.0) on an "if (mpn_add (...))"
+   doesn't seem able to move the true and false legs of the conditional up
+   to the two places cout is generated.  */
+
+#define __GMPN_AORS(cout, wp, xp, xsize, yp, ysize, FUNCTION, TEST)     \
+  do {                                                                  \
+    mp_size_t  __gmp_i;                                                 \
+    mp_limb_t  __gmp_x;                                                 \
+                                                                        \
+    /* ASSERT ((ysize) >= 0); */                                        \
+    /* ASSERT ((xsize) >= (ysize)); */                                  \
+    /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, xp, xsize)); */      \
+    /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, yp, ysize)); */      \
+                                                                        \
+    __gmp_i = (ysize);                                                  \
+    if (__gmp_i != 0)                                                   \
+      {                                                                 \
+        if (FUNCTION (wp, xp, yp, __gmp_i))                             \
+          {                                                             \
+            do                                                          \
+              {                                                         \
+                if (__gmp_i >= (xsize))                                 \
+                  {                                                     \
+                    (cout) = 1;                                         \
+                    goto __gmp_done;                                    \
+                  }                                                     \
+                __gmp_x = (xp)[__gmp_i];                                \
+              }                                                         \
+            while (TEST);                                               \
+          }                                                             \
+      }                                                                 \
+    if ((wp) != (xp))                                                   \
+      __GMPN_COPY_REST (wp, xp, xsize, __gmp_i);                        \
+    (cout) = 0;                                                         \
+  __gmp_done:                                                           \
+    ;                                                                   \
+  } while (0)
+
+#define __GMPN_ADD(cout, wp, xp, xsize, yp, ysize)              \
+  __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_add_n,       \
+               (((wp)[__gmp_i++] = (__gmp_x + 1) & GMP_NUMB_MASK) == 0))
+#define __GMPN_SUB(cout, wp, xp, xsize, yp, ysize)              \
+  __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_sub_n,       \
+               (((wp)[__gmp_i++] = (__gmp_x - 1) & GMP_NUMB_MASK), __gmp_x == 0))
+
+
+/* The use of __gmp_i indexing is designed to ensure a compile time src==dst
+   remains nice and clear to the compiler, so that __GMPN_COPY_REST can
+   disappear, and the load/add/store gets a chance to become a
+   read-modify-write on CISC CPUs.
+
+   Alternatives:
+
+   Using a pair of pointers instead of indexing would be possible, but gcc
+   isn't able to recognise compile-time src==dst in that case, even when the
+   pointers are incremented more or less together.  Other compilers would
+   very likely have similar difficulty.
+
+   gcc could use "if (__builtin_constant_p(src==dst) && src==dst)" or
+   similar to detect a compile-time src==dst.  This works nicely on gcc
+   2.95.x, it's not good on gcc 3.0 where __builtin_constant_p(p==p) seems
+   to be always false, for a pointer p.  But the current code form seems
+   good enough for src==dst anyway.
+
+   gcc on x86 as usual doesn't give particularly good flags handling for the
+   carry/borrow detection.  It's tempting to want some multi instruction asm
+   blocks to help it, and this was tried, but in truth there's only a few
+   instructions to save and any gain is all too easily lost by register
+   juggling setting up for the asm.  */
+
+#if GMP_NAIL_BITS == 0
+#define __GMPN_AORS_1(cout, dst, src, n, v, OP, CB)		\
+  do {								\
+    mp_size_t  __gmp_i;						\
+    mp_limb_t  __gmp_x, __gmp_r;                                \
+								\
+    /* ASSERT ((n) >= 1); */					\
+    /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, n)); */	\
+								\
+    __gmp_x = (src)[0];						\
+    __gmp_r = __gmp_x OP (v);                                   \
+    (dst)[0] = __gmp_r;						\
+    if (CB (__gmp_r, __gmp_x, (v)))                             \
+      {								\
+	(cout) = 1;						\
+	for (__gmp_i = 1; __gmp_i < (n);)                       \
+	  {							\
+	    __gmp_x = (src)[__gmp_i];                           \
+	    __gmp_r = __gmp_x OP 1;                             \
+	    (dst)[__gmp_i] = __gmp_r;                           \
+	    ++__gmp_i;						\
+	    if (!CB (__gmp_r, __gmp_x, 1))                      \
+	      {							\
+		if ((src) != (dst))				\
+		  __GMPN_COPY_REST (dst, src, n, __gmp_i);      \
+		(cout) = 0;					\
+		break;						\
+	      }							\
+	  }							\
+      }								\
+    else							\
+      {								\
+	if ((src) != (dst))					\
+	  __GMPN_COPY_REST (dst, src, n, 1);			\
+	(cout) = 0;						\
+      }								\
+  } while (0)
+#endif
+
+#if GMP_NAIL_BITS >= 1
+#define __GMPN_AORS_1(cout, dst, src, n, v, OP, CB)		\
+  do {								\
+    mp_size_t  __gmp_i;						\
+    mp_limb_t  __gmp_x, __gmp_r;				\
+								\
+    /* ASSERT ((n) >= 1); */					\
+    /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, n)); */	\
+								\
+    __gmp_x = (src)[0];						\
+    __gmp_r = __gmp_x OP (v);					\
+    (dst)[0] = __gmp_r & GMP_NUMB_MASK;				\
+    if (__gmp_r >> GMP_NUMB_BITS != 0)				\
+      {								\
+	(cout) = 1;						\
+	for (__gmp_i = 1; __gmp_i < (n);)			\
+	  {							\
+	    __gmp_x = (src)[__gmp_i];				\
+	    __gmp_r = __gmp_x OP 1;				\
+	    (dst)[__gmp_i] = __gmp_r & GMP_NUMB_MASK;		\
+	    ++__gmp_i;						\
+	    if (__gmp_r >> GMP_NUMB_BITS == 0)			\
+	      {							\
+		if ((src) != (dst))				\
+		  __GMPN_COPY_REST (dst, src, n, __gmp_i);	\
+		(cout) = 0;					\
+		break;						\
+	      }							\
+	  }							\
+      }								\
+    else							\
+      {								\
+	if ((src) != (dst))					\
+	  __GMPN_COPY_REST (dst, src, n, 1);			\
+	(cout) = 0;						\
+      }								\
+  } while (0)
+#endif
+
+#define __GMPN_ADDCB(r,x,y) ((r) < (y))
+#define __GMPN_SUBCB(r,x,y) ((x) < (y))
+
+#define __GMPN_ADD_1(cout, dst, src, n, v)	     \
+  __GMPN_AORS_1(cout, dst, src, n, v, +, __GMPN_ADDCB)
+#define __GMPN_SUB_1(cout, dst, src, n, v)	     \
+  __GMPN_AORS_1(cout, dst, src, n, v, -, __GMPN_SUBCB)
+
+
+/* Compare {xp,size} and {yp,size}, setting "result" to positive, zero or
+   negative.  size==0 is allowed.  On random data usually only one limb will
+   need to be examined to get a result, so it's worth having it inline.  */
+#define __GMPN_CMP(result, xp, yp, size)                                \
+  do {                                                                  \
+    mp_size_t  __gmp_i;                                                 \
+    mp_limb_t  __gmp_x, __gmp_y;                                        \
+                                                                        \
+    /* ASSERT ((size) >= 0); */                                         \
+                                                                        \
+    (result) = 0;                                                       \
+    __gmp_i = (size);                                                   \
+    while (--__gmp_i >= 0)                                              \
+      {                                                                 \
+        __gmp_x = (xp)[__gmp_i];                                        \
+        __gmp_y = (yp)[__gmp_i];                                        \
+        if (__gmp_x != __gmp_y)                                         \
+          {                                                             \
+            /* Cannot use __gmp_x - __gmp_y, may overflow an "int" */   \
+            (result) = (__gmp_x > __gmp_y ? 1 : -1);                    \
+            break;                                                      \
+          }                                                             \
+      }                                                                 \
+  } while (0)
+
+
+#if defined (__GMPN_COPY) && ! defined (__GMPN_COPY_REST)
+#define __GMPN_COPY_REST(dst, src, size, start)                 \
+  do {                                                          \
+    /* ASSERT ((start) >= 0); */                                \
+    /* ASSERT ((start) <= (size)); */                           \
+    __GMPN_COPY ((dst)+(start), (src)+(start), (size)-(start)); \
+  } while (0)
+#endif
+
+/* Copy {src,size} to {dst,size}, starting at "start".  This is designed to
+   keep the indexing dst[j] and src[j] nice and simple for __GMPN_ADD_1,
+   __GMPN_ADD, etc.  */
+#if ! defined (__GMPN_COPY_REST)
+#define __GMPN_COPY_REST(dst, src, size, start)                 \
+  do {                                                          \
+    mp_size_t __gmp_j;                                          \
+    /* ASSERT ((size) >= 0); */                                 \
+    /* ASSERT ((start) >= 0); */                                \
+    /* ASSERT ((start) <= (size)); */                           \
+    /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */     \
+    for (__gmp_j = (start); __gmp_j < (size); __gmp_j++)        \
+      (dst)[__gmp_j] = (src)[__gmp_j];                          \
+  } while (0)
+#endif
+
+/* Enhancement: Use some of the smarter code from gmp-impl.h.  Maybe use
+   mpn_copyi if there's a native version, and if we don't mind demanding
+   binary compatibility for it (on targets which use it).  */
+
+#if ! defined (__GMPN_COPY)
+#define __GMPN_COPY(dst, src, size)   __GMPN_COPY_REST (dst, src, size, 0)
+#endif
+
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_add)
+#if ! defined (__GMP_FORCE_mpn_add)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_add (mp_ptr __gmp_wp, mp_srcptr __gmp_xp, mp_size_t __gmp_xsize, mp_srcptr __gmp_yp, mp_size_t __gmp_ysize)
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_ADD (__gmp_c, __gmp_wp, __gmp_xp, __gmp_xsize, __gmp_yp, __gmp_ysize);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_add_1)
+#if ! defined (__GMP_FORCE_mpn_add_1)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_add_1 (mp_ptr __gmp_dst, mp_srcptr __gmp_src, mp_size_t __gmp_size, mp_limb_t __gmp_n) __GMP_NOTHROW
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_ADD_1 (__gmp_c, __gmp_dst, __gmp_src, __gmp_size, __gmp_n);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_cmp)
+#if ! defined (__GMP_FORCE_mpn_cmp)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpn_cmp (mp_srcptr __gmp_xp, mp_srcptr __gmp_yp, mp_size_t __gmp_size) __GMP_NOTHROW
+{
+  int __gmp_result;
+  __GMPN_CMP (__gmp_result, __gmp_xp, __gmp_yp, __gmp_size);
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_sub)
+#if ! defined (__GMP_FORCE_mpn_sub)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_sub (mp_ptr __gmp_wp, mp_srcptr __gmp_xp, mp_size_t __gmp_xsize, mp_srcptr __gmp_yp, mp_size_t __gmp_ysize)
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_SUB (__gmp_c, __gmp_wp, __gmp_xp, __gmp_xsize, __gmp_yp, __gmp_ysize);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_sub_1)
+#if ! defined (__GMP_FORCE_mpn_sub_1)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_sub_1 (mp_ptr __gmp_dst, mp_srcptr __gmp_src, mp_size_t __gmp_size, mp_limb_t __gmp_n) __GMP_NOTHROW
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_SUB_1 (__gmp_c, __gmp_dst, __gmp_src, __gmp_size, __gmp_n);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+/* Allow faster testing for negative, zero, and positive.  */
+#define mpz_sgn(Z) ((Z)->_mp_size < 0 ? -1 : (Z)->_mp_size > 0)
+#define mpf_sgn(F) ((F)->_mp_size < 0 ? -1 : (F)->_mp_size > 0)
+#define mpq_sgn(Q) ((Q)->_mp_num._mp_size < 0 ? -1 : (Q)->_mp_num._mp_size > 0)
+
+/* When using GCC, optimize certain common comparisons.  */
+#if defined (__GNUC__)
+#define mpz_cmp_ui(Z,UI) \
+  (__builtin_constant_p (UI) && (UI) == 0				\
+   ? mpz_sgn (Z) : _mpz_cmp_ui (Z,UI))
+#define mpz_cmp_si(Z,SI) \
+  (__builtin_constant_p (SI) && (SI) == 0 ? mpz_sgn (Z)			\
+   : __builtin_constant_p (SI) && (SI) > 0				\
+    ? _mpz_cmp_ui (Z, __GMP_CAST (unsigned long int, SI))		\
+   : _mpz_cmp_si (Z,SI))
+#define mpq_cmp_ui(Q,NUI,DUI) \
+  (__builtin_constant_p (NUI) && (NUI) == 0				\
+   ? mpq_sgn (Q) : _mpq_cmp_ui (Q,NUI,DUI))
+#define mpq_cmp_si(q,n,d)                       \
+  (__builtin_constant_p ((n) >= 0) && (n) >= 0  \
+   ? mpq_cmp_ui (q, __GMP_CAST (unsigned long, n), d) \
+   : _mpq_cmp_si (q, n, d))
+#else
+#define mpz_cmp_ui(Z,UI) _mpz_cmp_ui (Z,UI)
+#define mpz_cmp_si(Z,UI) _mpz_cmp_si (Z,UI)
+#define mpq_cmp_ui(Q,NUI,DUI) _mpq_cmp_ui (Q,NUI,DUI)
+#define mpq_cmp_si(q,n,d)  _mpq_cmp_si(q,n,d)
+#endif
+
+
+/* Using "&" rather than "&&" means these can come out branch-free.  Every
+   mpz_t has at least one limb allocated, so fetching the low limb is always
+   allowed.  */
+#define mpz_odd_p(z)   (((z)->_mp_size != 0) & __GMP_CAST (int, (z)->_mp_d[0]))
+#define mpz_even_p(z)  (! mpz_odd_p (z))
+
+
+/**************** C++ routines ****************/
+
+#ifdef __cplusplus
+__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpz_srcptr);
+__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpq_srcptr);
+__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpf_srcptr);
+__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpz_ptr);
+__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpq_ptr);
+__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpf_ptr);
+#endif
+
+/* Source-level compatibility with GMP 1.  */
+#define mpz_mdiv    mpz_fdiv_q
+#define mpz_mdivmod mpz_fdiv_qr
+#define mpz_mmod    mpz_fdiv_r
+#define mpz_mdiv_ui mpz_fdiv_q_ui
+#define mpz_mdivmod_ui(q,r,n,d) \
+  (((r) == 0) ? mpz_fdiv_q_ui (q,n,d) : mpz_fdiv_qr_ui (q,r,n,d))
+#define mpz_mmod_ui(r,n,d) \
+  (((r) == 0) ? mpz_fdiv_ui (n,d) : mpz_fdiv_r_ui (r,n,d))
+
+#define gmp_randinit(x,y,z)  gmp_randinit_lc_2exp_size(x,z)
+
+typedef __mpz_struct MP_INT;    /* gmp 1 source compatibility */
+typedef __mpq_struct MP_RAT;    /* gmp 1 source compatibility */
+
+#define mpz_div		    mpz_fdiv_q
+#define mpz_divmod	    mpz_fdiv_qr
+#define mpz_div_ui	    mpz_fdiv_q_ui
+#define mpz_divmod_ui	mpz_fdiv_qr_ui
+#define mpz_div_2exp	mpz_fdiv_q_2exp
+#define mpz_mod_2exp	mpz_fdiv_r_2exp
+
+enum
+{
+  GMP_ERROR_NONE = 0,
+  GMP_ERROR_UNSUPPORTED_ARGUMENT = 1,
+  GMP_ERROR_DIVISION_BY_ZERO = 2,
+  GMP_ERROR_SQRT_OF_NEGATIVE = 4,
+  GMP_ERROR_INVALID_ARGUMENT = 8
+};
+
+/* Major version number is the value of __GNU_MP__ too, above and in mp.h. */
+#define __GNU_MP_VERSION 5 
+#define __GNU_MP_VERSION_MINOR 0 
+#define __GNU_MP_VERSION_PATCHLEVEL 1
+#define GMP_VERSION "5.0.1"
+
+#define __MPIR_VERSION 2
+#define __MPIR_VERSION_MINOR 2
+#define __MPIR_VERSION_PATCHLEVEL 1
+#if defined( _MSC_VER )
+#define _MSC_MPIR_VERSION "2.2.1"
+#endif
+
+/* These are for programs like MPFR to use the same CC and CFLAGS as MPIR */
+
+#if ! defined (__GMP_WITHIN_CONFIGURE)
+#define __GMP_CC "gcc" 
+#define __GMP_CFLAGS "-O3" 
+#define __MPIR_CC "gcc -std=gnu99"
+#define __MPIR_CFLAGS "-O3"
+#endif
+
+#define __GMP_H__
+#endif /* __GMP_H__ */
+
diff --git a/include/hls_fpo.h b/include/hls_fpo.h
new file mode 100644
index 0000000..80b7ed3
--- /dev/null
+++ b/include/hls_fpo.h
@@ -0,0 +1,665 @@
+/* -*- c -*-*/
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+ *
+ *
+ */
+
+
+#ifndef __AESL_FPO_H__
+#define __AESL_FPO_H__
+
+#ifndef __SYNTHESIS__
+#include <math.h>
+#endif
+
+#include <assert.h>
+
+#if defined __arm__ && !(defined HLS_NO_XIL_FPO_LIB)
+#warning "Xilinx Floating Point Operator IP core does not provide simulation models for ARM architecture.  Automatically defining HLS_NO_XIL_FPO_LIB in order to avoid this library dependency, although bit-accurate simulation of some functions is no longer possible.  You can make this warning go away by adding this define yourself before including any other files." 
+#define HLS_NO_XIL_FPO_LIB
+#endif
+
+#ifdef __SYNTHESIS__
+#define HLS_FPO_SQRTF(a)               __builtin_sqrtf(a) 
+#define HLS_FPO_SQRT(a)                __builtin_sqrt(a)   
+#define HLS_FPO_RECSQRTF(a)            (1.0f/__builtin_sqrtf(a))
+#define HLS_FPO_RECSQRT(a)             (1.0/__builtin_sqrt(a))
+#define HLS_FPO_ABSF(a)                __builtin_fabsf(a)
+#define HLS_FPO_ABS(a)                 __builtin_fabs(a)
+#define HLS_FPO_LOGF(a)                __builtin_logf(a)
+#define HLS_FPO_LOG(a)                 __builtin_log(a)
+#define HLS_FPO_EXPF(a)                __builtin_expf(a)
+#define HLS_FPO_EXP(a)                 __builtin_exp(a)
+#else// csim
+#ifdef HLS_NO_XIL_FPO_LIB
+#define HLS_FPO_SQRTF(a)               sqrtf(a) 
+#define HLS_FPO_SQRT(a)                sqrt(a)   
+#define HLS_FPO_RECSQRTF(a)            (1.0f/sqrtf(a))
+#define HLS_FPO_RECSQRT(a)             (1.0/sqrt(a))
+#define HLS_FPO_ABSF(a)                fabsf(a)
+#define HLS_FPO_ABS(a)                 fabs(a)
+#define HLS_FPO_LOGF(a)                logf(a)
+#define HLS_FPO_LOG(a)                 log(a)
+#define HLS_FPO_EXPF(a)                expf(a)
+#define HLS_FPO_EXP(a)                 exp(a)
+#else
+#define HLS_FPO_SQRTF(a)               xil_fpo_sqrt_flt(a)
+#define HLS_FPO_SQRT(a)                xil_fpo_sqrt_d(a) 
+#define HLS_FPO_RECSQRTF(a)            xil_fpo_recsqrt_flt(a)      
+#define HLS_FPO_RECSQRT(a)             xil_fpo_recsqrt_d(a)
+#define HLS_FPO_ABSF(a)                xil_fpo_abs_flt(a)
+#define HLS_FPO_ABS(a)                 xil_fpo_abs_d(a)
+#define HLS_FPO_LOGF(a)                xil_fpo_log_flt(a)
+#define HLS_FPO_LOG(a)                 xil_fpo_log_d(a)
+#define HLS_FPO_EXPF(a)                xil_fpo_exp_flt(a)
+#define HLS_FPO_EXP(a)                 xil_fpo_exp_d(a)
+#endif //HLS_NO_XIL_FPO_LIB
+#endif //__SYNTHESIS__
+
+#if (defined __SYNTHESIS__ || defined HLS_NO_XIL_FPO_LIB)
+#define HLS_FPO_ADDF(a,b)              ((a) + (b))
+#define HLS_FPO_ADD(a,b)               ((a) + (b))
+#define HLS_FPO_SUBF(a,b)              ((a) - (b))
+#define HLS_FPO_SUB(a,b)               ((a) - (b))
+#define HLS_FPO_MULF(a,b)              ((a) * (b))
+#define HLS_FPO_MUL(a,b)               ((a) * (b))
+#define HLS_FPO_DIVF(a,b)              ((a)/(b))
+#define HLS_FPO_DIV(a,b)               ((a)/(b))
+#define HLS_FPO_RECF(a)                (1.0f/(a))
+#define HLS_FPO_RECIPF(a)              HLS_FPO_RECF(a)
+#define HLS_FPO_REC(a)                 (1.0/(a))
+#define HLS_FPO_RECIP(a)               HLS_FPO_REC(a)
+#define HLS_FPO_RSQRTF(a)              HLS_FPO_RECSQRTF(a)
+#define HLS_FPO_RSQRT(a)               HLS_FPO_RECSQRT(a)
+//#define HLS_FPO_UNORDEREDF(a,b)
+//#define HLS_FPO_UNORDERED(a,b)
+#define HLS_FPO_EQUALF(a,b)            ((a) == (b))
+#define HLS_FPO_EQUAL(a,b)             ((a) == (b))  
+#define HLS_FPO_LESSF(a,b)             ((a) < (b))
+#define HLS_FPO_LESS(a,b)              ((a) < (b))
+#define HLS_FPO_LESSEQUALF(a,b)        ((a) <= (b)) 
+#define HLS_FPO_LESSEQUAL(a,b)         ((a) <= (b))
+#define HLS_FPO_GREATERF(a,b)          ((a) > (b))
+#define HLS_FPO_GREATER(a,b)           ((a) > (b))  
+#define HLS_FPO_GREATEREQUALF(a,b)     ((a) >= (b)) 
+#define HLS_FPO_GREATEREQUAL(a,b)      ((a) >= (b))
+#define HLS_FPO_NOTEQUALF(a,b)         ((a) != (b))
+#define HLS_FPO_NOTEQUAL(a,b)          ((a) != (b))      
+//#define HLS_FPO_CONDCODEF(a,b)
+//#define HLS_FPO_CONDCODE(a,b)
+#define HLS_FPO_FTOI(a)                ((int)(a))
+#define HLS_FPO_DTOI(a)                ((int)(a))        
+#define HLS_FPO_ITOF(a)                ((float)(a))      
+#define HLS_FPO_ITOD(a)                ((double)(a))        
+#define HLS_FPO_FTOF(a)                ((float)(a))      
+#define HLS_FPO_DTOF(a)                ((float)(a))        
+#define HLS_FPO_FTOD(a)                ((double)(a))        
+#define HLS_FPO_DTOD(a)                ((double)(a))          
+#else
+#define HLS_FPO_ADDF(a,b)              xil_fpo_add_flt(a,b)        
+#define HLS_FPO_ADD(a,b)               xil_fpo_add_d(a,b)
+#define HLS_FPO_SUBF(a,b)              xil_fpo_sub_flt(a,b)
+#define HLS_FPO_SUB(a,b)               xil_fpo_sub_d(a,b)
+#define HLS_FPO_MULF(a,b)              xil_fpo_mul_flt(a,b)
+#define HLS_FPO_MUL(a,b)               xil_fpo_mul_d(a,b)
+#define HLS_FPO_DIVF(a,b)              xil_fpo_div_flt(a,b)
+#define HLS_FPO_DIV(a,b)               xil_fpo_div_d(a,b)
+#define HLS_FPO_RECF(a)                xil_fpo_rec_flt(a)
+#define HLS_FPO_RECIPF(a)              HLS_FPO_RECF(a)
+#define HLS_FPO_REC(a)                 xil_fpo_rec_d(a)
+#define HLS_FPO_RECIP(a)               HLS_FPO_REC(a)
+#define HLS_FPO_RSQRTF(a)              HLS_FPO_RECSQRTF(a)
+#define HLS_FPO_RSQRT(a)               HLS_FPO_RECSQRT(a)
+#define HLS_FPO_UNORDEREDF(a,b)        xil_fpo_unordered_flt(a,b)
+#define HLS_FPO_UNORDERED(a,b)         xil_fpo_unordered_d(a,b)
+#define HLS_FPO_EQUALF(a,b)            xil_fpo_equal_flt(a,b)
+#define HLS_FPO_EQUAL(a,b)             xil_fpo_equal_d(a,b)
+#define HLS_FPO_LESSF(a,b)             xil_fpo_less_flt(a,b)
+#define HLS_FPO_LESS(a,b)              xil_fpo_less_d(a,b)
+#define HLS_FPO_LESSEQUALF(a,b)        xil_fpo_lessequal_flt(a,b)
+#define HLS_FPO_LESSEQUAL(a,b)         xil_fpo_lessequal_d(a,b)
+#define HLS_FPO_GREATERF(a,b)          xil_fpo_greater_flt(a,b)
+#define HLS_FPO_GREATER(a,b)           xil_fpo_greater_d(a,b)
+#define HLS_FPO_GREATEREQUALF(a,b)     xil_fpo_greaterequal_flt(a,b)
+#define HLS_FPO_GREATEREQUAL(a,b)      xil_fpo_greaterequal_d(a,b)
+#define HLS_FPO_NOTEQUALF(a,b)         xil_fpo_notequal_flt(a,b)
+#define HLS_FPO_NOTEQUAL(a,b)          xil_fpo_notequal_d(a,b)
+#define HLS_FPO_CONDCODEF(a,b)         xil_fpo_condcode_flt(a,b)
+#define HLS_FPO_CONDCODE(a,b)          xil_fpo_condcode_d(a,b)
+#define HLS_FPO_FTOI(a)                xil_fpo_flttofix_int_flt(a)
+#define HLS_FPO_DTOI(a)                xil_fpo_flttofix_int_d(a)        
+#define HLS_FPO_ITOF(a)                xil_fpo_fixtoflt_flt_int(a)      
+#define HLS_FPO_ITOD(a)                xil_fpo_fixtoflt_d_int(a)        
+#define HLS_FPO_FTOF(a)                xil_fpo_flttoflt_flt_flt(a)      
+#define HLS_FPO_DTOF(a)                xil_fpo_flttoflt_flt_d(a)        
+#define HLS_FPO_FTOD(a)                xil_fpo_flttoflt_d_flt(a)        
+#define HLS_FPO_DTOD(a)                xil_fpo_flttoflt_d_d(a)          
+
+
+#include "floating_point_v7_0_bitacc_cmodel.h"  // Must include before GMP and MPFR
+#include "gmp.h"
+#include "mpfr.h"
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: add
+  ////////////////////////////////////////////////////////////////////////
+
+inline float xil_fpo_add_flt(float a, float b)
+{
+  float res_flt = 0.0f;
+
+  // xip_fpo_add_flt
+  xip_fpo_add_flt(&res_flt, a, b);  // normal operation
+  return res_flt;
+}
+
+inline double xil_fpo_add_d(double a, double b)
+{
+  double res_d = 0.0;
+
+  // xip_fpo_add_d
+  xip_fpo_add_d(&res_d, a, b);  // normal operation
+  return res_d;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: subtract
+  ////////////////////////////////////////////////////////////////////////
+
+inline float xil_fpo_sub_flt(float a, float b)
+{
+  float res_flt = 0.0f;
+
+  // xip_fpo_sub_flt
+  xip_fpo_sub_flt(&res_flt, a, b);  // normal operation
+  return res_flt;
+}
+
+inline double xil_fpo_sub_d(double a, double b)
+{
+  double res_d = 0.0;
+
+  // xip_fpo_sub_d
+  xip_fpo_sub_d(&res_d, a, b);  // normal operation
+  return res_d;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: multiply
+  ////////////////////////////////////////////////////////////////////////
+
+inline float xil_fpo_mul_flt(float a, float b)
+{
+  float res_flt = 0.0f;
+ 
+  // xip_fpo_mul_flt
+  xip_fpo_mul_flt(&res_flt, a, b);  // normal operation
+  return res_flt;
+}
+
+inline double xil_fpo_mul_d(double a, double b)
+{
+  double res_d = 0.0;
+
+  // xip_fpo_mul_d
+  xip_fpo_mul_d(&res_d, a, b);  // normal operation
+  return res_d;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: divide
+  ////////////////////////////////////////////////////////////////////////
+
+inline float xil_fpo_div_flt(float a, float b)
+{
+  float res_flt = 0.0f;
+
+  // xip_fpo_div_flt
+  xip_fpo_div_flt(&res_flt, a, b);  // normal operation
+  return res_flt;
+}
+
+inline double xil_fpo_div_d(double a, double b)
+{
+  double res_d = 0.0;
+
+  // xip_fpo_div_d
+  xip_fpo_div_d(&res_d, a, b);  // normal operation
+  return res_d;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: reciprocal
+  ////////////////////////////////////////////////////////////////////////
+
+inline float xil_fpo_rec_flt(float a)
+{
+  float res_flt = 0.0f;
+
+  // xip_fpo_rec_flt
+  xip_fpo_rec_flt(&res_flt, a);  // normal operation
+  return res_flt;
+}
+
+inline double xil_fpo_rec_d(double a)
+{
+  double res_d = 0.0;
+
+  // xip_fpo_rec_d
+  xip_fpo_rec_d(&res_d, a);  // normal operation
+  return res_d;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: square root
+  ////////////////////////////////////////////////////////////////////////
+
+inline float xil_fpo_sqrt_flt(float a)
+{
+//  printf("Testing operation functions: square root\n");
+  float res_flt = 0.0f;
+
+  // xip_fpo_sqrt_flt
+  xip_fpo_sqrt_flt(&res_flt, a);  // normal operation
+//  printf("float = sqrtf(a), and got res_flt=%f\n", res_flt);
+  return res_flt;
+}
+
+inline double xil_fpo_sqrt_d(double a)
+{
+  double res_d = 0.0;
+
+  // xip_fpo_sqrt_d
+  xip_fpo_sqrt_d(&res_d, a);  // normal operation
+//  printf("double = sqrt(a), and got res_d=%f\n", res_d);
+  return res_d;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: reciprocal square root
+  ////////////////////////////////////////////////////////////////////////
+
+inline float xil_fpo_recsqrt_flt(float a)      
+{
+  float res_flt = 0.0f;
+
+  // xip_fpo_recsqrt_flt
+  xip_fpo_recsqrt_flt(&res_flt, a);  // normal operation
+  return res_flt;
+}
+
+inline double xil_fpo_recsqrt_d(double a)
+{
+  double res_d = 0.0;
+
+  // xip_fpo_recsqrt_d
+  xip_fpo_recsqrt_d(&res_d, a);  // normal operation
+  return res_d;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: absolute value
+  ////////////////////////////////////////////////////////////////////////
+
+inline float xil_fpo_abs_flt(float a)
+{
+  float res_flt = 0.0f;
+
+  xip_fpo_abs_flt(&res_flt, a);
+  return res_flt;
+}
+
+inline double xil_fpo_abs_d(double a)
+{
+  double res_d = 0.0;
+
+  xip_fpo_abs_d(&res_d, a);
+  return res_d;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: logarithm
+  ////////////////////////////////////////////////////////////////////////
+
+inline float xil_fpo_log_flt(float a)
+{
+  float res_flt = 0.0f;
+
+  // xip_fpo_log_flt
+  xip_fpo_log_flt(&res_flt, a);  // normal operation
+  return res_flt;
+}
+
+inline double xil_fpo_log_d(double a)
+{
+  double res_d = 0.0;
+
+  // xip_fpo_log_d
+  xip_fpo_log_d(&res_d, a);  // normal operation
+  return res_d;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: Exponential
+  ////////////////////////////////////////////////////////////////////////
+
+inline float xil_fpo_exp_flt(float a)
+{
+  float res_flt = 0.0f;
+
+  // xip_fpo_exp_flt
+  xip_fpo_exp_flt(&res_flt, a);  // normal operation
+  return res_flt;
+}
+
+inline double xil_fpo_exp_d(double a)
+{
+  double res_d = 0.0;
+
+  // xip_fpo_exp_d
+  xip_fpo_exp_d(&res_d, a);  // normal operation
+  return res_d;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: compare unordered
+  ////////////////////////////////////////////////////////////////////////
+
+inline int xil_fpo_unordered_flt(float a, float b)
+{
+  int res_int = 0;
+
+  // xip_fpo_unordered_flt
+  xip_fpo_unordered_flt(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+inline int xil_fpo_unordered_d(double a, double b)
+{
+  int res_int = 0;
+
+  // xip_fpo_unordered_d
+  xip_fpo_unordered_d(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: compare equal
+  ////////////////////////////////////////////////////////////////////////
+
+
+inline int xil_fpo_equal_flt(float a, float b)
+{
+  int res_int = 0;
+
+  // xip_fpo_equal_flt
+  xip_fpo_equal_flt(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+inline int xil_fpo_equal_d(double a, double b)
+{
+  int res_int = 0;
+
+  // xip_fpo_equal_d
+  xip_fpo_equal_d(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: compare less than
+  ////////////////////////////////////////////////////////////////////////
+
+inline int xil_fpo_less_flt(float a, float b)
+{
+  int res_int = 0;
+
+  // xip_fpo_less_flt
+  xip_fpo_less_flt(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+inline int xil_fpo_less_d(double a, double b)
+{
+  int res_int = 0;
+
+  // xip_fpo_less_d
+  xip_fpo_less_d(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: compare less than or equal
+  ////////////////////////////////////////////////////////////////////////
+
+inline int xil_fpo_lessequal_flt(float a, float b)
+{
+  int res_int = 0;
+
+  // xip_fpo_lessequal_flt
+  xip_fpo_lessequal_flt(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+inline int xil_fpo_lessequal_d(double a, double b)
+{
+  int res_int = 0;
+
+  // xip_fpo_lessequal_d
+  xip_fpo_lessequal_d(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: compare greater than
+  ////////////////////////////////////////////////////////////////////////
+
+inline int xil_fpo_greater_flt(float a, float b)
+{
+  int res_int = 0;
+
+  // xip_fpo_greater_flt
+  xip_fpo_greater_flt(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+inline int xil_fpo_greater_d(double a, double b)
+{
+  int res_int = 0;
+
+  // xip_fpo_greater_d
+  xip_fpo_greater_d(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: compare greater than or equal
+  ////////////////////////////////////////////////////////////////////////
+
+inline int xil_fpo_greaterequal_flt(float a, float b)
+{
+  int res_int = 0;
+
+  // xip_fpo_greaterequal_flt
+  xip_fpo_greaterequal_flt(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+inline int xil_fpo_greaterequal_d(double a, double b)
+{
+  int res_int = 0;
+
+  // xip_fpo_greaterequal_d
+  xip_fpo_greaterequal_d(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: compare not equal
+  ////////////////////////////////////////////////////////////////////////
+
+inline int xil_fpo_notequal_flt(float a, float b)
+{
+  int res_int = 0;
+
+  // xip_fpo_notequal_flt
+  xip_fpo_notequal_flt(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+inline int xil_fpo_notequal_d(double a, double b)
+{
+  int res_int = 0;
+
+  xip_fpo_notequal_d(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Operation functions: compare condition code
+  ////////////////////////////////////////////////////////////////////////
+
+inline int xil_fpo_condcode_flt(float a, float b)
+{
+  int res_int = 0;
+
+  // xip_fpo_condcode_flt
+  xip_fpo_condcode_flt(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+inline int xil_fpo_condcode_d(double a, double b)
+{
+  int res_int = 0;
+
+  // xip_fpo_condcode_d
+  xip_fpo_condcode_d(&res_int, a, b);  // normal operation
+  return res_int;
+}
+
+  ////////////////////////////////////////////////////////////////////////
+  // Conversion functions: conversion code
+  ////////////////////////////////////////////////////////////////////////
+inline int xil_fpo_flttofix_int_flt(float a)
+{
+  int res_int = 0;
+
+  // xip_fpo_flttofix_int_flt
+  xip_fpo_flttofix_int_flt(&res_int, a);  // normal operation
+  return res_int;
+}
+
+inline int xil_fpo_flttofix_int_d(double a)        
+{
+  int res_int = 0;
+
+  // xip_fpo_flttofix_int_d
+  xip_fpo_flttofix_int_d(&res_int, a);  // normal operation
+  return res_int;
+}
+
+inline float xil_fpo_fixtoflt_flt_int(int a)      
+{
+  float res_flt = 0.0f;
+  
+  // xip_fpo_fixtoflt_flt_int
+  xip_fpo_fixtoflt_flt_int(&res_flt, a);  // normal operation
+  return res_flt;
+}
+
+inline double xil_fpo_fixtoflt_d_int(int a)        
+{
+  double res_d = 0.0;
+
+  // xip_fpo_fixtoflt_d_int
+  xip_fpo_fixtoflt_d_int(&res_d, a);  // normal operation
+  return res_d;
+}
+
+inline float xil_fpo_flttoflt_flt_flt(float a)      
+{
+  float res_flt = 0.0f;
+
+  // xip_fpo_flttoflt_flt_flt
+  xip_fpo_flttoflt_flt_flt(&res_flt, a);  // normal operation
+  return res_flt;
+}
+
+inline float xil_fpo_flttoflt_flt_d(double a)        
+{
+  float res_flt = 0.0f;
+
+  // xip_fpo_flttoflt_flt_d
+  xip_fpo_flttoflt_flt_d(&res_flt, a);  // normal operation
+  return res_flt;
+}
+
+inline double xil_fpo_flttoflt_d_flt(float a)        
+{
+  double res_d = 0.0;
+
+  // xip_fpo_flttoflt_d_flt
+  xip_fpo_flttoflt_d_flt(&res_d, a);  // normal operation
+  return res_d;
+}
+
+inline double xil_fpo_flttoflt_d_d(double a)          
+{
+  double res_d = 0.0;
+
+  // xip_fpo_flttoflt_d_d
+  xip_fpo_flttoflt_d_d(&res_d, a);  // normal operation
+  return res_d;
+}
+
+#endif
+#endif /* #ifndef __AESL_FPO_H__*/
+
+
diff --git a/include/hls_half.h b/include/hls_half.h
new file mode 100644
index 0000000..0c17568
--- /dev/null
+++ b/include/hls_half.h
@@ -0,0 +1,3348 @@
+// half - IEEE 754-based half-precision floating point library.
+//
+// Copyright (c) 2012-2013 Christian Rau <rauy@users.sourceforge.net>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation 
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the 
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 
+// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+// Version 1.11.0
+
+/// \file
+/// Main header file for half precision functionality.
+
+#ifndef __HLS_HALF_H__
+#define __HLS_HALF_H__
+
+#ifndef __cplusplus
+#ifndef __SYNTHESIS__ 
+#error C++ is required to include this header file
+#endif
+#endif
+#ifndef __SYNTHESIS__
+#include <cmath>
+#endif
+
+#ifdef __SYNTHESIS__ 
+
+#ifndef _HLS_HALF_DEFINED_
+typedef __fp16 half;
+#endif
+
+#else // AESL_SYN
+
+// XXX work-around old version of GMP with C++11 <cstddef> used by fpo.
+#include <cstddef>
+#include "hls_fpo.h"
+
+//Forward declaration of ap_fixed_base.
+#include "ap_decl.h"
+
+#ifdef HLS_NO_XIL_FPO_LIB
+//#warning "Xilinx Floating Point Operator IP core does not provide simulation models for ARM architecture. Then there may be mismatch between simulation model and FPGA implementation" 
+
+#else
+
+// Fiddle an MPFR variable to contain the same information as an xip_fpo_t variable without allocating memory
+// m is the mpfr_t variable (destination), x is the xip_fpo_t variable (source)
+// First variant: copy all information
+#define XIP_FPO_2_MPFR(m, x) \
+(m)->_mpfr_prec = (x)->_xip_fpo_mant_prec; \
+(m)->_mpfr_sign = (x)->_xip_fpo_sign;      \
+(m)->_mpfr_exp  = (x)->_xip_fpo_exp;       \
+(m)->_mpfr_d    = (x)->_xip_fpo_d;
+
+#endif
+
+/// Combined gcc version number.
+#define HALF_GNUC_VERSION (__GNUC__*100+__GNUC_MINOR__)
+
+//check C++11 language features
+#if defined(__clang__)										//clang
+	#if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
+		#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+	#endif
+	#if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
+		#define HALF_ENABLE_CPP11_CONSTEXPR 1
+	#endif
+	#if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
+		#define HALF_ENABLE_CPP11_NOEXCEPT 1
+	#endif
+	#if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
+		#define HALF_ENABLE_CPP11_USER_LITERALS 1
+	#endif
+	#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG)
+		#define HALF_ENABLE_CPP11_LONG_LONG 1
+	#endif
+/*#elif defined(__INTEL_COMPILER)								//Intel C++
+	#if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)		????????
+		#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+	#endif
+	#if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)			????????
+		#define HALF_ENABLE_CPP11_CONSTEXPR 1
+	#endif
+	#if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)			????????
+		#define HALF_ENABLE_CPP11_NOEXCEPT 1
+	#endif
+	#if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG)			????????
+		#define HALF_ENABLE_CPP11_LONG_LONG 1
+	#endif*/
+#elif defined(__GNUC__)										//gcc
+	#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L
+		#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
+			#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+		#endif
+		#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
+			#define HALF_ENABLE_CPP11_CONSTEXPR 1
+		#endif
+		#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
+			#define HALF_ENABLE_CPP11_NOEXCEPT 1
+		#endif
+		#if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
+			#define HALF_ENABLE_CPP11_USER_LITERALS 1
+		#endif
+		#if !defined(HALF_ENABLE_CPP11_LONG_LONG)
+			#define HALF_ENABLE_CPP11_LONG_LONG 1
+		#endif
+	#endif
+#elif defined(_MSC_VER)										//Visual C++
+	#if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
+		#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+	#endif
+	#if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG)
+		#define HALF_ENABLE_CPP11_LONG_LONG 1
+	#endif
+	#define HALF_POP_WARNINGS 1
+	#pragma warning(push)
+	#pragma warning(disable : 4099 4127 4146)	//struct vs class, constant in if, negative unsigned
+#endif
+
+//check C++11 library features
+#include <utility>
+#if defined(_LIBCPP_VERSION)								//libc++
+	#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103
+		#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS
+			#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
+		#endif
+		#ifndef HALF_ENABLE_CPP11_CSTDINT
+			#define HALF_ENABLE_CPP11_CSTDINT 1
+		#endif
+		#ifndef HALF_ENABLE_CPP11_CMATH
+			#define HALF_ENABLE_CPP11_CMATH 1
+		#endif
+		#ifndef HALF_ENABLE_CPP11_HASH
+			#define HALF_ENABLE_CPP11_HASH 1
+		#endif
+	#endif
+#elif defined(__GLIBCXX__)									//libstdc++
+	#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103
+		#ifdef __clang__
+			#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS)
+				#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
+			#endif
+			#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT)
+				#define HALF_ENABLE_CPP11_CSTDINT 1
+			#endif
+			#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH)
+				#define HALF_ENABLE_CPP11_CMATH 1
+			#endif
+			#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH)
+				#define HALF_ENABLE_CPP11_HASH 1
+			#endif
+		#else
+			#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT)
+				#define HALF_ENABLE_CPP11_CSTDINT 1
+			#endif
+			#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH)
+				#define HALF_ENABLE_CPP11_CMATH 1
+			#endif
+			#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH)
+				#define HALF_ENABLE_CPP11_HASH 1
+			#endif
+		#endif
+	#endif
+#elif defined(_CPPLIB_VER)									//Dinkumware/Visual C++
+	#if _CPPLIB_VER >= 520
+		#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS
+			#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
+		#endif
+		#ifndef HALF_ENABLE_CPP11_CSTDINT
+			#define HALF_ENABLE_CPP11_CSTDINT 1
+		#endif
+		#ifndef HALF_ENABLE_CPP11_HASH
+			#define HALF_ENABLE_CPP11_HASH 1
+		#endif
+	#endif
+	#if _CPPLIB_VER >= 610
+		#ifndef HALF_ENABLE_CPP11_CMATH
+			#define HALF_ENABLE_CPP11_CMATH 1
+		#endif
+	#endif
+#endif
+#undef HALF_GNUC_VERSION
+
+//support constexpr
+#if HALF_ENABLE_CPP11_CONSTEXPR
+	#define HALF_CONSTEXPR			constexpr
+	#define HALF_CONSTEXPR_CONST	constexpr
+#else
+	#define HALF_CONSTEXPR
+	#define HALF_CONSTEXPR_CONST	const
+#endif
+
+//support noexcept
+#if HALF_ENABLE_CPP11_NOEXCEPT
+	#define HALF_NOEXCEPT	noexcept
+	#define HALF_NOTHROW	noexcept
+#else
+	#define HALF_NOEXCEPT
+	#define HALF_NOTHROW	throw()
+#endif
+
+#include <algorithm>
+#include <iostream>
+#include <limits>
+#include <climits>
+//#include <cmath>
+#include <cstring>
+#if HALF_ENABLE_CPP11_TYPE_TRAITS
+	#include <type_traits>
+#endif
+#if HALF_ENABLE_CPP11_CSTDINT
+	#include <cstdint>
+#endif
+#if HALF_ENABLE_CPP11_HASH
+	#include <functional>
+#endif
+
+
+/// Default rounding mode.
+/// This specifies the rounding mode used for all conversions between [half](\ref half)s and `float`s as well as 
+/// for the half_cast() if not specifying a rounding mode explicitly. It can be redefined (before including half.hpp) to one 
+/// of the standard rounding modes using their respective constants or the equivalent values of `std::float_round_style`:
+///
+/// `std::float_round_style`         | value | rounding
+/// ---------------------------------|-------|-------------------------
+/// `std::round_indeterminate`       | -1    | fastest (default)
+/// `std::round_toward_zero`         | 0     | toward zero
+/// `std::round_to_nearest`          | 1     | to nearest
+/// `std::round_toward_infinity`     | 2     | toward positive infinity
+/// `std::round_toward_neg_infinity` | 3     | toward negative infinity
+///
+/// By default this is set to `-1` (`std::round_indeterminate`), which uses truncation (round toward zero, but with overflows 
+/// set to infinity) and is the fastest rounding mode possible. It can even be set to `std::numeric_limits<float>::round_style` 
+/// to synchronize the rounding mode with that of the underlying single-precision implementation.
+#ifndef HALF_ROUND_STYLE
+	#define HALF_ROUND_STYLE	1			// = std::round_indeterminate
+#endif
+
+/// Tie-breaking behaviour for round to nearest.
+/// This specifies if ties in round to nearest should be resolved by rounding to the nearest even value. By default this is 
+/// defined to `0` resulting in the faster but slightly more biased behaviour of rounding away from zero in half-way cases (and 
+/// thus equal to the round() function), but can be redefined to `1` (before including half.hpp) if more IEEE-conformant 
+/// behaviour is needed.
+#ifndef HALF_ROUND_TIES_TO_EVEN
+	#define HALF_ROUND_TIES_TO_EVEN	1		// ties away from zero
+#endif
+
+/// Value signaling overflow.
+/// In correspondence with `HUGE_VAL[F|L]` from `<cmath>` this symbol expands to a positive value signaling the overflow of an 
+/// operation, in particular it just evaluates to positive infinity.
+#define HUGE_VALH	std::numeric_limits<half>::infinity()
+
+/// Fast half-precision fma function.
+/// This symbol is only defined if the fma() function generally executes as fast as, or faster than, a separate 
+/// half-precision multiplication followed by an addition. Due to the internal single-precision implementation of all 
+/// arithmetic operations, this is in fact always the case.
+#define FP_FAST_FMAH	1
+
+#ifndef FP_ILOGB0
+	#define FP_ILOGB0		INT_MIN
+#endif
+#ifndef FP_ILOGBNAN
+	#define FP_ILOGBNAN		INT_MAX
+#endif
+#ifndef FP_SUBNORMAL
+	#define FP_SUBNORMAL	0
+#endif
+#ifndef FP_ZERO
+	#define FP_ZERO			1
+#endif
+#ifndef FP_NAN
+	#define FP_NAN			2
+#endif
+#ifndef FP_INFINITE
+	#define FP_INFINITE		3
+#endif
+#ifndef FP_NORMAL
+	#define FP_NORMAL		4
+#endif
+
+
+/// Main namespace for half precision functionality.
+/// This namespace contains all the functionality provided by the library.
+	class half;
+
+	/// \internal
+	/// \brief Implementation details.
+	namespace detail
+	{
+	#if HALF_ENABLE_CPP11_TYPE_TRAITS
+		/// Conditional type.
+		template<bool B,typename T,typename F> struct conditional : std::conditional<B,T,F> {};
+
+		/// Helper for tag dispatching.
+		template<bool B> struct bool_type : std::integral_constant<bool,B> {};
+		using std::true_type;
+		using std::false_type;
+
+		/// Type traits for floating point types.
+		template<typename T> struct is_float : std::is_floating_point<T> {};
+	#else
+		/// Conditional type.
+		template<bool,typename T,typename> struct conditional { typedef T type; };
+		template<typename T,typename F> struct conditional<false,T,F> { typedef F type; };
+
+		/// Helper for tag dispatching.
+		template<bool> struct bool_type {};
+		typedef bool_type<true> true_type;
+		typedef bool_type<false> false_type;
+
+		/// Type traits for floating point types.
+		template<typename> struct is_float : false_type {};
+		template<typename T> struct is_float<const T> : is_float<T> {};
+		template<typename T> struct is_float<volatile T> : is_float<T> {};
+		template<typename T> struct is_float<const volatile T> : is_float<T> {};
+		template<> struct is_float<float> : true_type {};
+		template<> struct is_float<double> : true_type {};
+		template<> struct is_float<long double> : true_type {};
+	#endif
+
+	#if HALF_ENABLE_CPP11_CSTDINT
+		/// Unsigned integer of (at least) 16 bits width.
+		typedef std::uint_least16_t uint16;
+
+		/// Unsigned integer of (at least) 32 bits width.
+		typedef std::uint_least32_t uint32;
+
+		/// Fastest signed integer capable of holding all values of type uint16.
+		typedef std::int_fast32_t int17;
+	#else
+		/// Unsigned integer of (at least) 16 bits width.
+		typedef unsigned short uint16;
+
+		/// Unsigned integer of (at least) 32 bits width.
+		typedef conditional<std::numeric_limits<unsigned int>::digits>=32,unsigned int,unsigned long>::type uint32;
+
+		/// Fastest signed integer capable of holding all values of type uint16.
+		typedef conditional<std::numeric_limits<int>::digits>=16,int,long>::type int17;
+	#endif
+
+		/// Tag type for binary construction.
+		struct binary_t {};
+
+		/// Tag for binary construction.
+		HALF_CONSTEXPR_CONST binary_t binary = binary_t();
+
+		/// Temporary half-precision expression.
+		/// This class represents a half-precision expression which just stores a single-precision value internally.
+		struct expr
+		{
+			/// Conversion constructor.
+			/// \param f single-precision value to convert
+			explicit HALF_CONSTEXPR expr(float f) : value_(f) {}
+
+			/// Conversion to single-precision.
+			/// \return single precision value representing expression value
+			HALF_CONSTEXPR operator float() const { return value_; }
+
+		private:
+			/// Internal expression value stored in single-precision.
+			float value_;
+		};
+
+		/// SFINAE helper for generic half-precision functions.
+		/// This class template has to be specialized for each valid combination of argument types to provide a corresponding 
+		/// `type` member equivalent to \a T.
+		/// \tparam T type to return
+		template<typename T,typename,typename=void,typename=void> struct enable {};
+		template<typename T> struct enable<T,half,void,void> { typedef T type; };
+		template<typename T> struct enable<T,float,void,void> { typedef T type; };
+		template<typename T> struct enable<T,double,void,void> { typedef T type; };
+		template<typename T> struct enable<T,long long,void,void> { typedef T type; };
+		template<typename T> struct enable<T,unsigned long long,void,void> { typedef T type; };
+		template<typename T> struct enable<T,long,void,void> { typedef T type; };
+		template<typename T> struct enable<T,unsigned long,void,void> { typedef T type; };
+		template<typename T> struct enable<T,int,void,void> { typedef T type; };
+		template<typename T> struct enable<T,unsigned int,void,void> { typedef T type; };
+		template<typename T> struct enable<T,short,void,void> { typedef T type; };
+		template<typename T> struct enable<T,unsigned short,void,void> { typedef T type; };
+		template<typename T> struct enable<T,char,void,void> { typedef T type; };
+		template<typename T> struct enable<T,unsigned char,void,void> { typedef T type; };
+		template<typename T> struct enable<T,expr,void,void> { typedef T type; };
+		template<typename T> struct enable<T,half,half,void> { typedef T type; };
+		template<typename T> struct enable<T,half,long long,void> { typedef T type; };
+		template<typename T> struct enable<T,half,unsigned long long,void> { typedef T type; };
+		template<typename T> struct enable<T,half,long,void> { typedef T type; };
+		template<typename T> struct enable<T,half,unsigned long,void> { typedef T type; };
+		template<typename T> struct enable<T,half,int,void> { typedef T type; };
+		template<typename T> struct enable<T,half,unsigned int,void> { typedef T type; };
+		template<typename T> struct enable<T,half,short,void> { typedef T type; };
+		template<typename T> struct enable<T,half,unsigned short,void> { typedef T type; };
+		template<typename T> struct enable<T,half,char,void> { typedef T type; };
+		template<typename T> struct enable<T,half,unsigned char,void> { typedef T type; };
+		template<typename T> struct enable<T,float,half,void> { typedef float type; };
+		template<typename T> struct enable<T,half,float,void> { typedef float type; };
+		template<typename T> struct enable<T,double,half,void> { typedef double type; };
+		template<typename T> struct enable<T,half,double,void> { typedef double type; };
+		template<typename T> struct enable<T,half,expr,void> { typedef T type; };
+		template<typename T> struct enable<T,expr,half,void> { typedef T type; };
+		template<typename T> struct enable<T,expr,expr,void> { typedef T type; };
+		template<typename T> struct enable<T,half,half,half> { typedef T type; };
+		template<typename T> struct enable<T,half,half,expr> { typedef T type; };
+		template<typename T> struct enable<T,half,expr,half> { typedef T type; };
+		template<typename T> struct enable<T,half,expr,expr> { typedef T type; };
+		template<typename T> struct enable<T,expr,half,half> { typedef T type; };
+		template<typename T> struct enable<T,expr,half,expr> { typedef T type; };
+		template<typename T> struct enable<T,expr,expr,half> { typedef T type; };
+		template<typename T> struct enable<T,expr,expr,expr> { typedef T type; };
+
+		/// Return type for specialized generic 2-argument half-precision functions.
+		/// This class template has to be specialized for each valid combination of argument types to provide a corresponding 
+		/// `type` member denoting the appropriate return type.
+		/// \tparam T first argument type
+		/// \tparam U first argument type
+		template<typename T,typename U> struct result : enable<expr,T,U> {};
+		template<> struct result<half,half> { typedef half type; };
+
+		/// \name Classification helpers
+		/// \{
+
+		/// Check for infinity.
+		/// \tparam T argument type (builtin floating point type)
+		/// \param arg value to query
+		/// \retval true if infinity
+		/// \retval false else
+		template<typename T> bool builtin_isinf(T arg)
+		{
+		#if HALF_ENABLE_CPP11_CMATH
+			return std::isinf(arg);
+		#elif defined(_MSC_VER)
+			return !_finite(static_cast<double>(arg)) && !_isnan(static_cast<double>(arg));
+		#else
+			return arg == std::numeric_limits<T>::infinity() || arg == -std::numeric_limits<T>::infinity();
+		#endif
+		}
+
+		/// Check for NaN.
+		/// \tparam T argument type (builtin floating point type)
+		/// \param arg value to query
+		/// \retval true if not a number
+		/// \retval false else
+		template<typename T> bool builtin_isnan(T arg)
+		{
+		#if HALF_ENABLE_CPP11_CMATH
+			return std::isnan(arg);
+		#elif defined(_MSC_VER)
+			return _isnan(static_cast<double>(arg)) != 0;
+		#else
+			return arg != arg;
+		#endif
+		}
+
+		/// Check sign.
+		/// \tparam T argument type (builtin floating point type)
+		/// \param arg value to query
+		/// \retval true if signbit set
+		/// \retval false else
+		template<typename T> bool builtin_signbit(T arg)
+		{
+		#if HALF_ENABLE_CPP11_CMATH
+			return std::signbit(arg);
+		#else
+			return arg < T() || (arg == T() && T(1)/arg < T());
+		#endif
+		}
+
+		/// \}
+		/// \name Conversion
+		/// \{
+
+		/// Convert IEEE single-precision to half-precision.
+		/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
+		/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+		/// \param value single-precision value
+		/// \return binary representation of half-precision value
+		template<std::float_round_style R> uint16 float2half_impl(float value, true_type)
+		{
+		#if HALF_ENABLE_CPP11_STATIC_ASSERT
+			static_assert(std::numeric_limits<float>::is_iec559, "float to half conversion needs IEEE 754 conformant 'float' type");
+			static_assert(sizeof(uint32)==sizeof(float), "float to half conversion needs unsigned integer type of exactly the size of a 'float'");
+		#endif
+			static const uint16 base_table[512] = { 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 
+				0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, 
+				0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00, 
+				0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 
+				0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 
+				0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 
+				0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 
+				0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 
+				0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 
+				0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 
+				0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 
+				0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00, 
+				0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 
+				0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 
+				0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 
+				0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 
+				0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 
+				0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 
+				0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00 };
+			static const unsigned char shift_table[512] = { 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 
+				13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 
+				13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 };
+			uint32 bits;// = *reinterpret_cast<uint32*>(&value);		//violating strict aliasing!
+			std::memcpy(&bits, &value, sizeof(float));
+			uint16 hbits = base_table[bits>>23] + static_cast<uint16>((bits&0x7FFFFF)>>shift_table[bits>>23]);
+			if(R == std::round_to_nearest)
+				hbits += (((bits&0x7FFFFF)>>(shift_table[bits>>23]-1))|(((bits>>23)&0xFF)==102)) & ((hbits&0x7C00)!=0x7C00)
+				#if HALF_ROUND_TIES_TO_EVEN
+					& (((((static_cast<uint32>(1)<<(shift_table[bits>>23]-1))-1)&bits)!=0)|hbits)
+				#endif
+				;
+			else if(R == std::round_toward_zero)
+				hbits -= ((hbits&0x7FFF)==0x7C00) & ~shift_table[bits>>23];
+			else if(R == std::round_toward_infinity)
+				hbits += ((((bits&0x7FFFFF&((static_cast<uint32>(1)<<(shift_table[bits>>23]))-1))!=0)|(((bits>>23)<=102)&
+					((bits>>23)!=0)))&(hbits<0x7C00)) - ((hbits==0xFC00)&((bits>>23)!=511));
+			else if(R == std::round_toward_neg_infinity)
+				hbits += ((((bits&0x7FFFFF&((static_cast<uint32>(1)<<(shift_table[bits>>23]))-1))!=0)|(((bits>>23)<=358)&
+					((bits>>23)!=256)))&(hbits<0xFC00)&(hbits>>15)) - ((hbits==0x7C00)&((bits>>23)!=255));
+			return hbits;
+		}
+
+		/// Convert non-IEEE single-precision to half-precision.
+		/// \param value single-precision value
+		/// \return binary representation of half-precision value
+		template<std::float_round_style R> uint16 float2half_impl(float value, false_type)
+		{
+			uint16 hbits = builtin_signbit(value) << 15;
+			if(value == 0.0f)
+				return hbits;
+			if(builtin_isnan(value))
+				return hbits | 0x7FFF;
+			if(builtin_isinf(value))
+				return hbits | 0x7C00;
+			int exp;
+			std::frexp(value, &exp);
+			if(exp > 16)
+			{
+				if(R == std::round_toward_zero)
+					return hbits | 0x7BFF;
+				else if(R == std::round_toward_infinity)
+					return hbits | 0x7C00 - (hbits>>15);
+				else if(R == std::round_toward_neg_infinity)
+					return hbits | 0x7BFF + (hbits>>15);
+				return hbits | 0x7C00;
+			}
+			if(exp < -13)
+				value = std::ldexp(value, 24);
+			else
+			{
+				value = std::ldexp(value, 11-exp);
+				hbits |= ((exp+14)<<10);
+			}
+			int ival = static_cast<int>(value);
+			hbits |= static_cast<uint16>(std::abs(ival)&0x3FF);
+			if(R == std::round_to_nearest)
+			{
+				float diff = std::abs(value-static_cast<float>(ival));
+				#if HALF_ROUND_TIES_TO_EVEN
+					hbits += (diff>0.5f) | ((diff==0.5f)&hbits);
+				#else
+					hbits += diff >= 0.5f;
+				#endif
+			}
+			else if(R == std::round_toward_infinity)
+				hbits += value > static_cast<float>(ival);
+			else if(R == std::round_toward_neg_infinity)
+				hbits += value < static_cast<float>(ival);
+			return hbits;
+		}
+
+		/// Convert single-precision to half-precision.
+		/// \param value single-precision value
+		/// \return binary representation of half-precision value
+		template<std::float_round_style R> uint16 float2half(float value)
+		{
+			return float2half_impl<R>(value, bool_type<std::numeric_limits<float>::is_iec559&&sizeof(uint32)==sizeof(float)>());
+		}
+
+		/// Convert integer to half-precision floating point.
+		/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+		/// \tparam S `true` if value negative, `false` else
+		/// \tparam T type to convert (builtin integer type)
+		/// \param value non-negative integral value
+		/// \return binary representation of half-precision value
+		template<std::float_round_style R,bool S,typename T> uint16 int2half_impl(T value)
+		{
+			if(S)
+				value = -value;
+			uint16 bits = S << 15;
+			if(value > 65504)
+			{
+				if(R == std::round_toward_infinity)
+					bits |= 0x7C00 - S;
+				else if(R == std::round_toward_neg_infinity)
+					bits |= 0x7BFF + S;
+				else
+					bits |= 0x7BFF + (R!=std::round_toward_zero);
+			}
+			else if(value)
+			{
+				unsigned int m = value, exp = 25;
+				for(; m<0x400; m<<=1,--exp) ;
+				for(; m>0x7FF; m>>=1,++exp) ;
+				bits |= (exp<<10) | (m&0x3FF);
+				if(exp > 25)
+				{
+					if(R == std::round_to_nearest)
+						bits += (value>>(exp-26)) & 1
+						#if HALF_ROUND_TIES_TO_EVEN
+							& (((((1<<(exp-26))-1)&value)!=0)|bits)
+						#endif
+						;
+					else if(R == std::round_toward_infinity)
+						bits += ((value&((1<<(exp-25))-1))!=0) & !S;
+					else if(R == std::round_toward_neg_infinity)
+						bits += ((value&((1<<(exp-25))-1))!=0) & S;
+				}
+			}
+			return bits;
+		}
+
+		/// Convert integer to half-precision floating point.
+		/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+		/// \tparam T type to convert (builtin integer type)
+		/// \param value integral value
+		/// \return binary representation of half-precision value
+		template<std::float_round_style R,typename T> uint16 int2half(T value)
+		{
+			return (value<0) ? int2half_impl<R,true>(value) : int2half_impl<R,false>(value);
+		}
+
+		/// Convert half-precision to IEEE single-precision.
+		/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
+		/// \param value binary representation of half-precision value
+		/// \return single-precision value
+		inline float half2float_impl(uint16 value, true_type)
+		{
+		#if HALF_ENABLE_CPP11_STATIC_ASSERT
+			static_assert(std::numeric_limits<float>::is_iec559, "half to float conversion needs IEEE 754 conformant 'float' type");
+			static_assert(sizeof(uint32)==sizeof(float), "half to float conversion needs unsigned integer type of exactly the size of a 'float'");
+		#endif
+			static const uint32 mantissa_table[2048] = { 
+				0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, 
+				0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, 
+				0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, 
+				0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, 
+				0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, 
+				0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, 
+				0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 
+				0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, 
+				0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, 
+				0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, 
+				0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, 
+				0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, 
+				0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 
+				0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, 
+				0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, 
+				0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 
+				0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, 
+				0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, 
+				0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, 
+				0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, 
+				0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, 
+				0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 
+				0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, 
+				0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, 
+				0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 
+				0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, 
+				0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, 
+				0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, 
+				0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, 
+				0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, 
+				0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 
+				0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, 
+				0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, 
+				0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, 
+				0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, 
+				0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, 
+				0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, 
+				0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, 
+				0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, 
+				0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 
+				0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, 
+				0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, 
+				0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 
+				0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, 
+				0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, 
+				0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, 
+				0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, 
+				0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, 
+				0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, 
+				0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, 
+				0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, 
+				0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 
+				0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, 
+				0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, 
+				0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, 
+				0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, 
+				0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, 
+				0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, 
+				0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, 
+				0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, 
+				0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, 
+				0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, 
+				0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, 
+				0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, 
+				0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, 
+				0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, 
+				0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 
+				0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, 
+				0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, 
+				0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 
+				0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, 
+				0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, 
+				0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, 
+				0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, 
+				0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, 
+				0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 
+				0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, 
+				0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, 
+				0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 
+				0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, 
+				0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, 
+				0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, 
+				0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, 
+				0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, 
+				0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 
+				0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, 
+				0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, 
+				0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 
+				0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, 
+				0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, 
+				0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, 
+				0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, 
+				0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, 
+				0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 
+				0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, 
+				0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, 
+				0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 
+				0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, 
+				0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, 
+				0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, 
+				0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, 
+				0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, 
+				0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 
+				0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, 
+				0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, 
+				0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 
+				0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, 
+				0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, 
+				0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, 
+				0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, 
+				0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, 
+				0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 
+				0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, 
+				0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, 
+				0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 
+				0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, 
+				0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, 
+				0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, 
+				0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, 
+				0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, 
+				0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 
+				0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, 
+				0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, 
+				0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 
+				0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, 
+				0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, 
+				0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, 
+				0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 };
+			static const uint32 exponent_table[64] = { 
+				0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, 
+				0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, 
+				0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, 
+				0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 };
+			static const unsigned short offset_table[64] = { 
+				   0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 
+				   0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 };
+			uint32 bits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10];
+//			uint32 bits = mantissa_table[(((value&0x7C00)!=0)<<10)+(value&0x3FF)] + exponent_table[value>>10];
+//			return *reinterpret_cast<float*>(&bits);			//violating strict aliasing!
+			float out;
+			std::memcpy(&out, &bits, sizeof(float));
+			return out;
+		}
+
+		/// Convert half-precision to non-IEEE single-precision.
+		/// \param value binary representation of half-precision value
+		/// \return single-precision value
+		inline float half2float_impl(uint16 value, false_type)
+		{
+			float out;
+			int abs = value & 0x7FFF;
+			if(abs > 0x7C00)
+				out = std::numeric_limits<float>::has_quiet_NaN ? std::numeric_limits<float>::quiet_NaN() : 0.0f;
+			else if(abs == 0x7C00)
+				out = std::numeric_limits<float>::has_infinity ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::max();
+			else if(abs > 0x3FF)
+				out = std::ldexp(static_cast<float>((value&0x3FF)|0x400), (abs>>10)-25);
+			else
+				out = std::ldexp(static_cast<float>(abs), -24);
+			return (value&0x8000) ? -out : out;
+		}
+
+		/// Convert half-precision to single-precision.
+		/// \param value binary representation of half-precision value
+		/// \return single-precision value
+		inline float half2float(uint16 value)
+		{
+			return half2float_impl(value, bool_type<std::numeric_limits<float>::is_iec559&&sizeof(uint32)==sizeof(float)>());
+		}
+
+		/// Convert half-precision floating point to integer.
+		/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+		/// \tparam E `true` for round to even, `false` for round away from zero
+		/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
+		/// \param value binary representation of half-precision value
+		/// \return integral value
+		template<std::float_round_style R,bool E,typename T> T half2int_impl(uint16 value)
+		{
+			unsigned int e = value & 0x7FFF;
+			if(e >= 0x7C00)
+				return (value&0x8000) ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
+			if(e < 0x3800)
+			{
+				if(R == std::round_toward_infinity)
+					return T(~(value>>15)&(e!=0));
+				else if(R == std::round_toward_neg_infinity)
+					return -T(value>0x8000);
+				return T();
+			}
+			int17 m = (value&0x3FF) | 0x400;
+			e >>= 10;
+			if(e < 25)
+			{
+				if(R == std::round_indeterminate || R == std::round_toward_zero)
+					m >>= 25 - e;
+				else
+				{
+					if(R == std::round_to_nearest)
+						m += (1<<(24-e)) - (~(m>>(25-e))&E);
+					else if(R == std::round_toward_infinity)
+						m += ((value>>15)-1) & ((1<<(25-e))-1U);
+					else if(R == std::round_toward_neg_infinity)
+						m += -(value>>15) & ((1<<(25-e))-1U);
+					m >>= 25 - e;
+				}
+			}
+			else
+				m <<= e - 25;
+//			if(std::numeric_limits<T>::digits < 16)
+//				return std::min(std::max(m, static_cast<int17>(std::numeric_limits<T>::min())), static_cast<int17>(std::numeric_limits<T>::max()));
+			return static_cast<T>((value&0x8000) ? -m : m);
+		}
+
+		/// Convert half-precision floating point to integer.
+		/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+		/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
+		/// \param value binary representation of half-precision value
+		/// \return integral value
+		template<std::float_round_style R,typename T> T half2int(uint16 value) { return half2int_impl<R,HALF_ROUND_TIES_TO_EVEN,T>(value); }
+
+		/// Convert half-precision floating point to integer using round-to-nearest-away-from-zero.
+		/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
+		/// \param value binary representation of half-precision value
+		/// \return integral value
+		template<typename T> T half2int_up(uint16 value) { return half2int_impl<std::round_to_nearest,0,T>(value); }
+
+		/// Round half-precision number to nearest integer value.
+		/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+		/// \tparam E `true` for round to even, `false` for round away from zero
+		/// \param value binary representation of half-precision value
+		/// \return half-precision bits for nearest integral value
+		template<std::float_round_style R,bool E> uint16 round_half_impl(uint16 value)
+		{
+			unsigned int e = value & 0x7FFF;
+			uint16 result = value;
+			if(e < 0x3C00)
+			{
+				result &= 0x8000;
+				if(R == std::round_to_nearest)
+					result |= 0x3C00U & -(e>=(0x3800+E));
+				else if(R == std::round_toward_infinity)
+					result |= 0x3C00U & -(~(value>>15)&(e!=0));
+				else if(R == std::round_toward_neg_infinity)
+					result |= 0x3C00U & -(value>0x8000);
+			}
+			else if(e < 0x6400)
+			{
+				e = 25 - (e>>10);
+				unsigned int mask = (1<<e) - 1;
+				if(R == std::round_to_nearest)
+					result += (1<<(e-1)) - (~(result>>e)&E);
+				else if(R == std::round_toward_infinity)
+					result += mask & ((value>>15)-1);
+				else if(R == std::round_toward_neg_infinity)
+					result += mask & -(value>>15);
+				result &= ~mask;
+			}
+			return result;
+		}
+
+		/// Round half-precision number to nearest integer value.
+		/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+		/// \param value binary representation of half-precision value
+		/// \return half-precision bits for nearest integral value
+		template<std::float_round_style R> uint16 round_half(uint16 value) { return round_half_impl<R,HALF_ROUND_TIES_TO_EVEN>(value); }
+
+		/// Round half-precision number to nearest integer value using round-to-nearest-away-from-zero.
+		/// \param value binary representation of half-precision value
+		/// \return half-precision bits for nearest integral value
+		inline uint16 round_half_up(uint16 value) { return round_half_impl<std::round_to_nearest,0>(value); }
+		/// \}
+
+		struct functions;
+		template<typename> struct unary_specialized;
+		template<typename,typename> struct binary_specialized;
+		template<typename,typename,std::float_round_style> struct half_caster;
+	}
+
+	/// Half-precision floating point type.
+	/// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and 
+	/// conversions. It is implicitly convertible to single-precision floating point, which makes artihmetic expressions and 
+	/// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations 
+	/// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to 
+	/// half-precision are done using truncation (round towards zero), but temporary results inside chained arithmetic 
+	/// expressions are kept in single-precision as long as possible (while of course still maintaining a strong half-precision type).
+	///
+	/// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and 
+	/// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which 
+	/// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the 
+	/// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of 
+	/// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most 
+	/// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit 
+	/// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if 
+	/// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on 
+	/// nearly any reasonable platform.
+	///
+	/// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable 
+	/// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation.
+	class half
+	{
+		friend struct detail::functions;
+		friend struct detail::unary_specialized<half>;
+		friend struct detail::binary_specialized<half,half>;
+		template<typename,typename,std::float_round_style> friend struct detail::half_caster;
+		friend struct std::numeric_limits<half>;
+	#if HALF_ENABLE_CPP11_HASH
+		friend struct std::hash<half>;
+	#endif
+
+	public:
+		/// Default constructor.
+		/// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics 
+		/// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics.
+		HALF_CONSTEXPR half() : data_() {}
+
+		/// Copy constructor.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to copy from
+		half(detail::expr rhs) : data_(detail::float2half<round_style>(rhs)) {
+#ifndef HLS_NO_XIL_FPO_LIB
+            xip_fpo_half_set_flt(rhs);
+#endif
+        }
+
+		/// Conversion constructor.
+		/// \param rhs float to convert
+		half(float rhs) : data_(detail::float2half<round_style>(rhs)) {
+#ifndef HLS_NO_XIL_FPO_LIB
+            xip_fpo_half_set_flt(rhs);
+#endif
+        }
+
+        /// Conversion constructor from AP types.
+		template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+		half(const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& rhs) {
+			std::cout << "WARNING: explicit method ap_fixed::to_half() should be used to convert ap_fixed to half." << std::endl;
+			*this = rhs.to_half();
+		}
+
+		/// Conversion to single-precision.
+		/// \return single precision value representing expression value
+		operator float() const {
+#ifdef HLS_NO_XIL_FPO_LIB
+            return detail::half2float(data_);
+#else
+            return xip_fpo_half_get_flt();
+#endif
+        }
+
+		/// Assignment operator.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to copy from
+		/// \return reference to this half
+		half& operator=(detail::expr rhs) { return *this = static_cast<float>(rhs); }
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to add
+		/// \return reference to this half
+		template<typename T> typename detail::enable<half&,T>::type operator+=(T rhs) { return *this = *this + rhs; }
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to subtract
+		/// \return reference to this half
+		template<typename T> typename detail::enable<half&,T>::type operator-=(T rhs) { return *this = *this - rhs; }
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to multiply with
+		/// \return reference to this half
+		template<typename T> typename detail::enable<half&,T>::type operator*=(T rhs) { return *this = *this * rhs; }
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to divide by
+		/// \return reference to this half
+		template<typename T> typename detail::enable<half&,T>::type operator/=(T rhs) { return *this = *this / rhs; }
+
+#if 0
+		/* disable to avoid ambiguous overload. */
+		/// Assignment operator.
+		/// \param rhs single-precision value to copy from
+		/// \return reference to this half
+		half& operator=(float rhs) { 
+#ifdef HLS_NO_XIL_FPO_LIB
+                    data_ = detail::float2half<round_style>(rhs);
+#else
+                    xip_fpo_half_set_flt(rhs);
+#endif
+                    return *this; 
+                }
+#endif
+
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to add
+		/// \return reference to this half
+		//half& operator+=(float rhs) { data_ = detail::float2half<round_style>(detail::half2float(data_)+rhs); return *this; }
+
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to subtract
+		/// \return reference to this half
+		//half& operator-=(float rhs) { data_ = detail::float2half<round_style>(detail::half2float(data_)-rhs); return *this; }
+
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to multiply with
+		/// \return reference to this half
+		//half& operator*=(float rhs) { data_ = detail::float2half<round_style>(detail::half2float(data_)*rhs); return *this; }
+
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to divide by
+		/// \return reference to this half
+		//half& operator/=(float rhs) { data_ = detail::float2half<round_style>(detail::half2float(data_)/rhs); return *this; }
+
+		/// Prefix increment.
+		/// \return incremented half value
+		half& operator++() { return *this += 1.0f; }
+
+		/// Prefix decrement.
+		/// \return decremented half value
+		half& operator--() { return *this -= 1.0f; }
+
+		/// Postfix increment.
+		/// \return non-incremented half value
+		half operator++(int) { half out(*this); ++*this; return out; }
+
+		/// Postfix decrement.
+		/// \return non-decremented half value
+		half operator--(int) { half out(*this); --*this; return out; }
+
+        /// Get half internal uint16 representation
+        /// \return packed uint16 data
+        detail::uint16 get_bits() { return data_; }
+		
+        /// Set half internal uint16 representation
+        /// \return
+        void set_bits(detail::uint16 bits) { data_ = bits; }
+
+	
+#ifndef HLS_NO_XIL_FPO_LIB
+        /// Assign value from half data to xip_fpo struct
+        /// \return xip_fpo exceptions
+        xip_fpo_exc_t xip_fpo_get_data(xip_fpo_ptr op) const {
+            int exc = 0;
+            op->_xip_fpo_sign = ((data_ & 0x8000) ? -1 : 1);
+            op->_xip_fpo_exp  = ((data_ & 0x7C00) >> 10) - 14;
+            *(op->_xip_fpo_d) = ((mp_limb_t)(data_ & 0x3FF) + (mp_limb_t)(0x400)) << (8*sizeof(*(op->_xip_fpo_d)) - 11);
+            if ((data_ & 0x7C00) == 0) { // subnormal
+                exc |= 0x1;
+                xip_fpo_set_zero(op, op->_xip_fpo_sign);
+            } else if ((data_ & 0x7FFF) == 0x7C00) { // infinity
+                exc |= 0x2;
+                xip_fpo_set_inf(op, op->_xip_fpo_sign);
+            } else if ((data_ & 0x7FFF) > 0x7C00) { // NaN
+                exc |= 0x4;
+                xip_fpo_set_nan(op);
+            }
+            return exc;
+        }
+
+        /// Convert
+        float xip_fpo_half_get_flt() const {
+            xip_fpo_t op;
+            xip_fpo_init2(op, 5, 11);
+            xip_fpo_exc_t exc = xip_fpo_get_data(op);
+            float res;
+            if (exc & 0x1) {
+                res = (op->_xip_fpo_sign > 0 ? 0.0f : -0.0f);
+            } else if (exc & 0x2) {
+                res = (op->_xip_fpo_sign > 0 ? std::numeric_limits<float>::infinity() : -std::numeric_limits<float>::infinity());
+            } else if (exc & 0x4) {
+                res = std::numeric_limits<float>::quiet_NaN();
+            } else {
+                res = xip_fpo_get_flt(op);
+            }
+            xip_fpo_clear(op);
+            return res;
+        }
+
+        /// Assign value from xip_fpo struct to half data
+        /// \return void
+        void xip_fpo_set_data(xip_fpo_ptr op) {
+            mpfr_t fr;
+            XIP_FPO_2_MPFR(fr, op);
+            data_ = 0;
+            data_ |= (op->_xip_fpo_sign == 1 ? 0 : 1) << 15;
+            if (mpfr_zero_p(fr)) {
+                data_ &= 0x8000;
+            } else if (mpfr_inf_p(fr)) {
+                data_ |= 0x7C00;
+            } else if (mpfr_nan_p(fr)) {
+                data_ |= 0x7E00;
+            } else {
+                data_ |= (op->_xip_fpo_exp + 14) << 10;
+                data_ |= (*(op->_xip_fpo_d) << 1) >> (8*sizeof(*(op->_xip_fpo_d)) - 10);
+            }
+        }
+        
+        /// Assignment
+        void xip_fpo_half_set_flt(float rhs) {
+            xip_fpo_t op;
+            xip_fpo_init2(op, 5, 11);
+            xip_fpo_set_flt(op, rhs);
+            xip_fpo_set_data(op);
+            xip_fpo_clear(op);
+        }
+#endif
+
+        private:
+		/// Rounding mode to use (always `std::round_indeterminate`)
+		static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE);
+
+		/// Constructor.
+		/// \param bits binary representation to set half to
+		HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) : data_(bits) {}
+
+		/// Internal binary representation
+		detail::uint16 data_;
+	};
+
+    // half simulation model with Xilinx Floating Point Operator IP core
+#ifndef HLS_NO_XIL_FPO_LIB
+    template<typename F>
+    half math_function_1arg(F f, half x) {
+        half res;
+        xip_fpo_t rop, xop;
+        xip_fpo_inits2(5, 11, rop, xop, (xip_fpo_ptr)0);
+        x.xip_fpo_get_data(xop);
+        f(rop, xop);
+        res.xip_fpo_set_data(rop);
+        xip_fpo_clears(rop, xop, (xip_fpo_ptr)0);
+        return res;
+    }
+
+    template<typename F>
+    half binary_operator(F f, half x, half y) {
+        half res;
+        xip_fpo_t op, xop, yop;
+        xip_fpo_inits2(5, 11, op, xop, yop, (xip_fpo_ptr)0);
+        x.xip_fpo_get_data(xop);
+        y.xip_fpo_get_data(yop);
+        f(op, xop, yop);
+        res.xip_fpo_set_data(op);
+        xip_fpo_clears(op, xop, yop, (xip_fpo_ptr)0);
+        return res;
+    }
+    // perform comparison
+    template<typename F>
+    bool binary_operator_compare(F f, half x, half y) {
+        int res;
+        xip_fpo_t xop, yop;
+        xip_fpo_inits2(5, 11, xop, yop, (xip_fpo_ptr)0);
+        x.xip_fpo_get_data(xop);
+        y.xip_fpo_get_data(yop);
+        f(&res, xop, yop);
+        xip_fpo_clears(xop, yop, (xip_fpo_ptr)0);
+        return res;
+    }
+#endif 
+
+#if HALF_ENABLE_CPP11_USER_LITERALS
+	/// Library-defined half-precision literals.
+	/// Import this namespace to enable half-precision floating point literals:
+	/// ~~~~{.cpp}
+	/// using namespace literal;
+	/// half = 4.2_h;
+	/// ~~~~
+	namespace literal
+	{
+		/// Half literal.
+		/// While this returns an actual half-precision value, half literals can unfortunately not be constant expressions due 
+		/// to rather involved single-to-half conversion.
+		/// \param value literal value
+		/// \return half with given value (if representable)
+		inline half operator "" _h(long double value) { return half(static_cast<float>(value)); }
+	}
+#endif
+
+
+	namespace detail
+	{
+		/// Wrapper implementing unspecialized half-precision functions.
+		struct functions
+		{
+			/// Addition implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \return Half-precision sum
+#ifdef HLS_NO_XIL_FPO_LIB
+			/// \return Half-precision sum stored in single-precision
+			static expr plus(float x, float y) { return expr(x+y); }
+#else
+            template<typename T1, typename T2>
+			static half plus(T1 x, T2 y) { return binary_operator(xip_fpo_add, x, y); }
+			static float plus(float x, half y) { return HLS_FPO_ADDF(x,y); }
+			static float plus(half x, float y) { return HLS_FPO_ADDF(x,y); }
+			static double plus(double x, half y) { return HLS_FPO_ADD(x,y); }
+			static double plus(half x, double y) { return HLS_FPO_ADD(x,y); }
+#endif
+
+			/// Subtraction implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \return Half-precision difference 
+#ifdef HLS_NO_XIL_FPO_LIB
+			/// \return Half-precision sum stored in single-precision
+			static expr minus(float x, float y) { return expr(x-y); }
+#else
+            template<typename T1, typename T2>
+			static half minus(T1 x, T2 y) { return binary_operator(xip_fpo_sub, x, y); }
+			static float minus(float x, half y) { return HLS_FPO_SUBF(x,y); }
+			static float minus(half x, float y) { return HLS_FPO_SUBF(x,y); }
+			static double minus(double x, half y) { return HLS_FPO_SUB(x,y); }
+			static double minus(half x, double y) { return HLS_FPO_SUB(x,y); }
+#endif
+
+			/// Multiplication implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \return Half-precision product 
+#ifdef HLS_NO_XIL_FPO_LIB
+			/// \return Half-precision sum stored in single-precision
+			static expr multiplies(float x, float y) { return expr(x*y); }
+#else
+            template<typename T1, typename T2>
+			static half multiplies(T1 x, T2 y) { return binary_operator(xip_fpo_mul, x, y); }
+			static float multiplies(float x, half y) { return HLS_FPO_MULF(x,y); }
+			static float multiplies(half x, float y) { return HLS_FPO_MULF(x,y); }
+			static double multiplies(double x, half y) { return HLS_FPO_MUL(x,y); }
+			static double multiplies(half x, double y) { return HLS_FPO_MUL(x,y); }
+#endif
+
+			/// Division implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \return Half-precision quotient
+#ifdef HLS_NO_XIL_FPO_LIB
+			/// \return Half-precision sum stored in single-precision
+			static expr divides(float x, float y) { return expr(x/y); }
+#else
+            template<typename T1, typename T2>
+			static half divides(T1 x, T2 y) { return binary_operator(xip_fpo_div, x, y); }
+			static float divides(float x, half y) { return HLS_FPO_DIVF(x,y); }
+			static float divides(half x, float y) { return HLS_FPO_DIVF(x,y); }
+			static double divides(double x, half y) { return HLS_FPO_DIV(x,y); }
+			static double divides(half x, double y) { return HLS_FPO_DIV(x,y); }
+#endif
+
+			/// Output implementation.
+			/// \param out stream to write to
+			/// \param arg value to write
+			/// \return reference to stream
+			template<typename charT,typename traits> static std::basic_ostream<charT,traits>& write(std::basic_ostream<charT,traits> &out, float arg) { return out << arg; }
+
+			/// Input implementation.
+			/// \param in stream to read from
+			/// \param arg half to read into
+			/// \return reference to stream
+			template<typename charT,typename traits> static std::basic_istream<charT,traits>& read(std::basic_istream<charT,traits> &in, half &arg)
+			{
+				float f;
+				if(in >> f)
+					arg = f;
+				return in;
+			}
+
+			/// Modulo implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \return Half-precision division remainder stored in single-precision
+			static expr fmod(float x, float y) { return expr(std::fmod(x, y)); }
+
+			/// Remainder implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \return Half-precision division remainder stored in single-precision
+			static expr remainder(float x, float y)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::remainder(x, y));
+			#else
+				if(builtin_isnan(x) || builtin_isnan(y))
+					return expr(std::numeric_limits<float>::quiet_NaN());
+				float ax = std::fabs(x), ay = std::fabs(y);
+				if(ax >= 65536.0f || ay < std::ldexp(1.0f, -24))
+					return expr(std::numeric_limits<float>::quiet_NaN());
+				if(ay >= 65536.0f)
+					return expr(x);
+				if(ax == ay)
+					return expr(builtin_signbit(x) ? -0.0f : 0.0f);
+				ax = std::fmod(ax, ay+ay);
+				float y2 = 0.5f * ay;
+				if(ax > y2)
+				{
+					ax -= ay;
+					if(ax >= y2)
+						ax -= ay;
+				}
+				return expr(builtin_signbit(x) ? -ax : ax);
+			#endif
+			}
+
+			/// Remainder implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \param quo address to store quotient bits at
+			/// \return Half-precision division remainder stored in single-precision
+			static expr remquo(float x, float y, int *quo)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::remquo(x, y, quo));
+			#else
+				if(builtin_isnan(x) || builtin_isnan(y))
+					return expr(std::numeric_limits<float>::quiet_NaN());
+				bool sign = builtin_signbit(x), qsign = static_cast<bool>(sign^builtin_signbit(y));
+				float ax = std::fabs(x), ay = std::fabs(y);
+				if(ax >= 65536.0f || ay < std::ldexp(1.0f, -24))
+					return expr(std::numeric_limits<float>::quiet_NaN());
+				if(ay >= 65536.0f)
+					return expr(x);
+				if(ax == ay)
+					return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f);
+				ax = std::fmod(ax, 8.0f*ay);
+				int cquo = 0;
+				if(ax >= 4.0f * ay)
+				{
+					ax -= 4.0f * ay;
+					cquo += 4;
+				}
+				if(ax >= 2.0f * ay)
+				{
+					ax -= 2.0f * ay;
+					cquo += 2;
+				}
+				float y2 = 0.5f * ay;
+				if(ax > y2)
+				{
+					ax -= ay;
+					++cquo;
+					if(ax >= y2)
+					{
+						ax -= ay;
+						++cquo;
+					}
+				}
+				return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax);
+			#endif
+			}
+
+			/// Positive difference implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \return Positive difference stored in single-precision
+			static expr fdim(float x, float y)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::fdim(x, y));
+			#else
+				return expr((x<=y) ? 0.0f : (x-y));
+			#endif
+			}
+
+                        static expr maxmag(float x, float y)
+                        {
+				if (fabs(y)>fabs(x)) return expr(y);
+				else return expr(x);
+                        }
+
+                        static expr minmag(float x, float y)
+                        {
+				if (fabs(y)<fabs(x)) return expr(y);
+				else return expr(x);
+                        }
+
+			/// Fused multiply-add implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \param z third operand
+			/// \return \a x * \a y + \a z stored in single-precision
+			static expr fma(float x, float y, float z)
+			{
+			#if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF)
+				return expr(std::fma(x, y, z));
+			#else
+				return expr(x*y+z);
+			#endif
+			}
+
+			/// Get NaN.
+			/// \return Half-precision quiet NaN
+			static half nanh(const char*) { return half(binary, 0x7FFF); }
+
+			/// Exponential implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr exp(float arg) { return expr(std::exp(arg)); }
+
+			/// Exponential implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr expm1(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::expm1(arg));
+			#else
+				return expr(static_cast<float>(std::exp(static_cast<double>(arg))-1.0));
+			#endif
+			}
+
+			/// Binary exponential implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr exp2(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::exp2(arg));
+			#else
+				return expr(static_cast<float>(std::exp(arg*0.69314718055994530941723212145818)));
+			#endif
+			}
+
+			/// Logarithm implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr log(float arg) { return expr(std::log(arg)); }
+
+			/// Common logarithm implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr log10(float arg) { return expr(std::log10(arg)); }
+
+			/// Logarithm implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr log1p(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::log1p(arg));
+			#else
+				return expr(static_cast<float>(std::log(1.0+arg)));
+			#endif
+			}
+
+			/// Binary logarithm implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr log2(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::log2(arg));
+			#else
+				return expr(static_cast<float>(std::log(static_cast<double>(arg))*1.4426950408889634073599246810019));
+			#endif
+			}
+
+                        static expr logb(float arg)
+                        {
+                        #if HALF_ENABLE_CPP11_CMATH
+                                return expr(std::logb(arg));
+                        #else
+                                return expr(static_cast<float>(std::log(static_cast<double>(fabs(arg)))*1.4426950408889634073599246810019));
+                        #endif
+                        }
+
+			/// Square root implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr sqrt(float arg) { return expr(std::sqrt(arg)); }
+
+			/// Cubic root implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr cbrt(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::cbrt(arg));
+			#else
+				if(builtin_isnan(arg) || builtin_isinf(arg))
+					return expr(arg);
+				return expr(builtin_signbit(arg) ? -static_cast<float>(std::pow(std::fabs(static_cast<double>(arg)), 1.0/3.0)) : 
+					static_cast<float>(std::pow(static_cast<double>(arg), 1.0/3.0)));
+			#endif
+			}
+
+			/// Hypotenuse implementation.
+			/// \param x first argument
+			/// \param y second argument
+			/// \return function value stored in single-preicision
+			static expr hypot(float x, float y)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::hypot(x, y));
+			#else
+				return expr((builtin_isinf(x) || builtin_isinf(y)) ? std::numeric_limits<float>::infinity() : 
+					static_cast<float>(std::sqrt(static_cast<double>(x)*x+static_cast<double>(y)*y)));
+			#endif
+			}
+
+			/// Power implementation.
+			/// \param base value to exponentiate
+			/// \param exp power to expontiate to
+			/// \return function value stored in single-preicision
+			static expr pow(float base, float exp) { return expr(std::pow(base, exp)); }
+                        static expr powr(float base, float exp) { return expr(std::pow(base, exp)); }
+                        static expr pown(float base, int exp) { return expr(std::pow(base, exp)); }
+			static expr rootn(float base, int exp) { return expr(std::pow(base, float(float(1)/float(exp)))); }
+
+			/// Sine implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr sin(float arg) { return expr(std::sin(arg)); }
+
+			/// Cosine implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr cos(float arg) { return expr(std::cos(arg)); }
+
+			/// Tan implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr tan(float arg) { return expr(std::tan(arg)); }
+
+			/// Arc sine implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr asin(float arg) { return expr(std::asin(arg)); }
+
+			/// Arc cosine implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr acos(float arg) { return expr(std::acos(arg)); }
+
+			/// Arc tangent implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr atan(float arg) { return expr(std::atan(arg)); }
+
+			/// Arc tangent implementation.
+			/// \param x first argument
+			/// \param y second argument
+			/// \return function value stored in single-preicision
+			static expr atan2(float x, float y) { return expr(std::atan2(x, y)); }
+
+			/// Hyperbolic sine implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr sinh(float arg) { return expr(std::sinh(arg)); }
+
+			/// Hyperbolic cosine implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr cosh(float arg) { return expr(std::cosh(arg)); }
+
+			/// Hyperbolic tangent implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr tanh(float arg) { return expr(std::tanh(arg)); }
+
+			/// Hyperbolic area sine implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr asinh(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::asinh(arg));
+			#else
+				return expr((arg==-std::numeric_limits<float>::infinity()) ? arg : static_cast<float>(std::log(arg+std::sqrt(arg*arg+1.0))));
+			#endif
+			}
+
+			/// Hyperbolic area cosine implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr acosh(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::acosh(arg));
+			#else
+				return expr((arg<-1.0f) ? std::numeric_limits<float>::quiet_NaN() : static_cast<float>(std::log(arg+std::sqrt(arg*arg-1.0))));
+			#endif
+			}
+
+			/// Hyperbolic area tangent implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr atanh(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::atanh(arg));
+			#else
+				return expr(static_cast<float>(0.5*std::log((1.0+arg)/(1.0-arg))));
+			#endif
+			}
+
+			/// Error function implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr erf(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::erf(arg));
+			#else
+				return expr(static_cast<float>(erf(static_cast<double>(arg))));
+			#endif
+			}
+
+			/// Complementary implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr erfc(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::erfc(arg));
+			#else
+				return expr(static_cast<float>(1.0-erf(static_cast<double>(arg))));
+			#endif
+			}
+
+			/// Gamma logarithm implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr lgamma(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::lgamma(arg));
+			#else
+				if(builtin_isinf(arg))
+					return expr(std::numeric_limits<float>::infinity());
+				double z = static_cast<double>(arg);
+				if(z < 0)
+				{
+					double i, f = std::modf(-z, &i);
+					if(f == 0.0)
+						return expr(std::numeric_limits<float>::infinity());
+					return expr(static_cast<float>(1.1447298858494001741434273513531-std::log(std::abs(std::sin(3.1415926535897932384626433832795*f)))-lgamma(1.0-z)));
+				}
+//				if(z < 8.0)
+					return expr(static_cast<float>(lgamma(static_cast<double>(arg))));
+				return expr(static_cast<float>(0.5*(1.8378770664093454835606594728112-std::log(z))+z*(std::log(z+1.0/(12.0*z-1.0/(10.0*z)-1.0))-1.0)));
+			#endif
+			}
+
+			/// Gamma implementation.
+			/// \param arg function argument
+			/// \return function value stored in single-preicision
+			static expr tgamma(float arg)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::tgamma(arg));
+			#else
+				double z = static_cast<double>(arg);
+				if(z == 0.0)
+					return builtin_signbit(z) ? expr(-std::numeric_limits<float>::infinity()) : expr(std::numeric_limits<float>::infinity());
+				if(z < 0.0)
+				{
+					double i, f = std::modf(-z, &i);
+					if(f == 0.0)
+						return expr(std::numeric_limits<float>::quiet_NaN());
+					double sign = (std::fmod(i, 2.0)==0.0) ? -1.0 : 1.0;
+					return expr(static_cast<float>(sign*3.1415926535897932384626433832795/(std::sin(3.1415926535897932384626433832795*f)*std::exp(lgamma(1.0-z)))));
+				}
+				if(builtin_isinf(arg))
+					return expr(arg);
+//				if(arg < 8.0f)
+					return expr(static_cast<float>(std::exp(lgamma(z))));
+				return expr(static_cast<float>(std::sqrt(6.283185307179586476925286766559/z)*std::pow(0.36787944117144232159552377016146*(z+1.0/(12.0*z-1.0/(10.0*z))), z)));
+			#endif
+			}
+
+			/// Floor implementation.
+			/// \param arg value to round
+			/// \return rounded value
+			static half floor(half arg) { return half(binary, round_half<std::round_toward_neg_infinity>(arg.data_)); }
+
+			/// Ceiling implementation.
+			/// \param arg value to round
+			/// \return rounded value
+			static half ceil(half arg) { return half(binary, round_half<std::round_toward_infinity>(arg.data_)); }
+
+			/// Truncation implementation.
+			/// \param arg value to round
+			/// \return rounded value
+			static half trunc(half arg) { return half(binary, round_half<std::round_toward_zero>(arg.data_)); }
+
+			/// Nearest integer implementation.
+			/// \param arg value to round
+			/// \return rounded value
+			static half round(half arg) { return half(binary, round_half_up(arg.data_)); }
+
+			/// Nearest integer implementation.
+			/// \param arg value to round
+			/// \return rounded value
+			static long lround(half arg) { return detail::half2int_up<long>(arg.data_); }
+
+			/// Nearest integer implementation.
+			/// \param arg value to round
+			/// \return rounded value
+			static half rint(half arg) { return half(binary, round_half<half::round_style>(arg.data_)); }
+
+			/// Nearest integer implementation.
+			/// \param arg value to round
+			/// \return rounded value
+			static long lrint(half arg) { return detail::half2int<half::round_style,long>(arg.data_); }
+
+		#if HALF_ENABLE_CPP11_LONG_LONG
+			/// Nearest integer implementation.
+			/// \param arg value to round
+			/// \return rounded value
+			static long long llround(half arg) { return detail::half2int_up<long long>(arg.data_); }
+
+			/// Nearest integer implementation.
+			/// \param arg value to round
+			/// \return rounded value
+			static long long llrint(half arg) { return detail::half2int<half::round_style,long long>(arg.data_); }
+		#endif
+
+			/// Decompression implementation.
+			/// \param arg number to decompress
+			/// \param exp address to store exponent at
+			/// \return normalized significant
+			static half frexp(half arg, int *exp)
+			{
+				unsigned int m = arg.data_ & 0x7FFF;
+				if(m >= 0x7C00 || !m)
+					return *exp = 0, arg;
+				int e = m >> 10;
+				if(!e)
+					for(m<<=1; m<0x400; m<<=1,--e) ;
+				return *exp = e-14, half(binary, static_cast<uint16>((arg.data_&0x8000)|0x3800|(m&0x3FF)));
+			}
+
+			/// Decompression implementation.
+			/// \param arg number to decompress
+			/// \param iptr address to store integer part at
+			/// \return fractional part
+			static half modf(half arg, half *iptr)
+			{
+				unsigned int e = arg.data_ & 0x7C00;
+				if(e > 0x6000)
+					return *iptr = arg, (e==0x7C00&&(arg.data_&0x3FF)) ? arg : half(binary, arg.data_&0x8000);
+				if(e < 0x3C00)
+					return iptr->data_ = arg.data_ & 0x8000, arg;
+				e >>= 10;
+				unsigned int mask = (1<<(25-e)) - 1, m = arg.data_ & mask;
+				iptr->data_ = arg.data_ & ~mask;
+				if(!m)
+					return half(binary, arg.data_&0x8000);
+				for(; m<0x400; m<<=1,--e) ;
+				return half(binary, static_cast<uint16>((arg.data_&0x8000)|(e<<10)|(m&0x3FF)));
+			}
+
+			/// Scaling implementation.
+			/// \param arg number to scale
+			/// \param exp power of two to scale by
+			/// \return scaled number
+			static half scalbln(half arg, long exp)
+			{
+				long e = arg.data_ & 0x7C00;
+				if(e == 0x7C00)
+					return arg;
+				unsigned int m = arg.data_ & 0x3FF;
+				if(e >>= 10)
+					m |= 0x400;
+				else
+				{
+					if(!m)
+						return arg;
+					for(m<<=1; m<0x400; m<<=1,--e) ;
+				}
+				e += exp;
+				uint16 value = arg.data_ & 0x8000;
+				if(e > 30)
+				{
+					if(half::round_style == std::round_toward_zero)
+						value |= 0x7BFF;
+					else if(half::round_style == std::round_toward_infinity)
+						value |= 0x7C00 - (value>>15);
+					else if(half::round_style == std::round_toward_neg_infinity)
+						value |= 0x7BFF + (value>>15);
+					else
+						value |= 0x7C00;
+				}
+				else if(e > 0)
+					value |= (e<<10) | (m&0x3FF);
+				else if(e > -11)
+				{
+					if(half::round_style == std::round_to_nearest)
+					{
+						m += 1 << -e;
+					#if HALF_ROUND_TIES_TO_EVEN
+						m -= (m>>(1-e)) & 1;
+					#endif
+					}
+					else if(half::round_style == std::round_toward_infinity)
+						m += ((value>>15)-1) & ((1<<(1-e))-1U);
+					else if(half::round_style == std::round_toward_neg_infinity)
+						m += -(value>>15) & ((1<<(1-e))-1U);
+					value |= m >> (1-e);
+				}
+				else if(half::round_style == std::round_toward_infinity)
+					value |= ((value>>15)-1) & 1;
+				else if(half::round_style == std::round_toward_neg_infinity)
+					value |= value >> 15;
+				return half(binary, value);
+			}
+
+			/// Exponent implementation.
+			/// \param arg number to query
+			/// \return floating point exponent
+			static int ilogb(half arg)
+			{
+				int exp = arg.data_ & 0x7FFF;
+				if(!exp)
+					return FP_ILOGB0;
+				if(exp < 0x7C00)
+				{
+					if(!(exp>>=10))
+						for(unsigned int m=(arg.data_&0x3FF); m<0x200; m<<=1,--exp) ;
+					return exp - 15;
+				}
+				if(exp > 0x7C00)
+					return FP_ILOGBNAN;
+				return INT_MAX;
+			}
+
+			/// \param from number to increase/decrease
+			/// \param to direction to enumerate into
+			/// \return next representable number
+			static half nextafter(half from, half to)
+			{
+				uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF;
+				if(fabs > 0x7C00)
+					return from;
+				if(tabs > 0x7C00 || from.data_ == to.data_ || !(fabs|tabs))
+					return to;
+				if(!fabs)
+					return half(binary, (to.data_&0x8000)+1);
+				bool lt = (signbit(from) ? (static_cast<int17>(0x8000)-from.data_) : static_cast<int17>(from.data_)) < 
+					(signbit(to) ? (static_cast<int17>(0x8000)-to.data_) : static_cast<int17>(to.data_));
+				return half(binary, from.data_+(((from.data_>>15)^static_cast<uint16>(lt))<<1)-1);
+			}
+
+			/// Enumeration implementation.
+			/// \param from number to increase/decrease
+			/// \param to direction to enumerate into
+			/// \return next representable number
+			static half nexttoward(half from, long double to)
+			{
+				if(isnan(from))
+					return from;
+				long double lfrom = static_cast<long double>(from);
+				if(builtin_isnan(to) || lfrom == to)
+					return half(static_cast<float>(to));
+				if(!(from.data_&0x7FFF))
+					return half(binary, (static_cast<detail::uint16>(builtin_signbit(to))<<15)+1);
+				return half(binary, from.data_+(((from.data_>>15)^static_cast<uint16>(lfrom<to))<<1)-1);
+			}
+
+			/// Sign implementation
+			/// \param x first operand
+			/// \param y second operand
+			/// \return composed value
+			static half copysign(half x, half y) { return half(binary, x.data_^((x.data_^y.data_)&0x8000)); }
+
+			/// Classification implementation.
+			/// \param arg value to classify
+			/// \retval true if infinite number
+			/// \retval false else
+			static int fpclassify(half arg)
+			{
+				unsigned int abs = arg.data_ & 0x7FFF;
+				if(abs > 0x7C00)
+					return FP_NAN;
+				if(abs == 0x7C00)
+					return FP_INFINITE;
+				if(abs > 0x3FF)
+					return FP_NORMAL;
+				return abs ? FP_SUBNORMAL : FP_ZERO;
+			}
+
+			/// Classification implementation.
+			/// \param arg value to classify
+			/// \retval true if finite number
+			/// \retval false else
+			static bool isfinite(half arg) { return (arg.data_&0x7C00) != 0x7C00; }
+
+			/// Classification implementation.
+			/// \param arg value to classify
+			/// \retval true if infinite number
+			/// \retval false else
+			static bool isinf(half arg) { return (arg.data_&0x7FFF) == 0x7C00; }
+
+			/// Classification implementation.
+			/// \param arg value to classify
+			/// \retval true if not a number
+			/// \retval false else
+			static bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; }
+
+			/// Classification implementation.
+			/// \param arg value to classify
+			/// \retval true if normal number
+			/// \retval false else
+			static bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); }
+
+			/// Sign bit implementation.
+			/// \param arg value to check
+			/// \retval true if signed
+			/// \retval false if unsigned
+			static bool signbit(half arg) { return (arg.data_&0x8000) != 0; }
+
+			/// Comparison implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \retval true if operands equal
+			/// \retval false else
+#ifdef HLS_NO_XIL_FPO_LIB
+			static bool isequal(half x, half y) { return (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF)) && !isnan(x); }
+#else
+            template<typename T1, typename T2>
+			static bool isequal(T1 x, T2 y) { return binary_operator_compare(xip_fpo_equal, x, y); }
+			static bool isequal(float x, half y) { return HLS_FPO_EQUALF(x,y); }
+			static bool isequal(half x, float y) { return HLS_FPO_EQUALF(x,y); }
+			static bool isequal(double x, half y) { return HLS_FPO_EQUAL(x,y); }
+			static bool isequal(half x, double y) { return HLS_FPO_EQUAL(x,y); }
+#endif
+
+			/// Comparison implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \retval true if operands not equal
+			/// \retval false else
+#ifdef HLS_NO_XIL_FPO_LIB
+			static bool isnotequal(half x, half y) { return (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF)) || isnan(x); }
+#else
+            template<typename T1, typename T2>
+			static bool isnotequal(T1 x, T2 y) { return binary_operator_compare(xip_fpo_notequal, x, y); }
+			static bool isnotequal(float x, half y) { return HLS_FPO_NOTEQUALF(x,y); }
+			static bool isnotequal(half x, float y) { return HLS_FPO_NOTEQUALF(x,y); }
+			static bool isnotequal(double x, half y) { return HLS_FPO_NOTEQUAL(x,y); }
+			static bool isnotequal(half x, double y) { return HLS_FPO_NOTEQUAL(x,y); }
+#endif
+
+			/// Comparison implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \retval true if \a x > \a y
+			/// \retval false else
+#ifdef HLS_NO_XIL_FPO_LIB
+			static bool isgreater(half x, half y) { return !isnan(x) && !isnan(y) && ((signbit(x) ? (static_cast<int17>(0x8000)-x.data_) : 
+				static_cast<int17>(x.data_)) > (signbit(y) ? (static_cast<int17>(0x8000)-y.data_) : static_cast<int17>(y.data_))); }
+#else
+            template<typename T1, typename T2>
+			static bool isgreater(T1 x, T2 y) { return binary_operator_compare(xip_fpo_greater, x, y); }
+			static bool isgreater(float x, half y) { return HLS_FPO_GREATERF(x,y); }
+			static bool isgreater(half x, float y) { return HLS_FPO_GREATERF(x,y); }
+			static bool isgreater(double x, half y) { return HLS_FPO_GREATER(x,y); }
+			static bool isgreater(half x, double y) { return HLS_FPO_GREATER(x,y); }
+#endif
+
+			/// Comparison implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \retval true if \a x >= \a y
+			/// \retval false else
+#ifdef HLS_NO_XIL_FPO_LIB
+			static bool isgreaterequal(half x, half y) { return !isnan(x) && !isnan(y) && ((signbit(x) ? (static_cast<int17>(0x8000)-x.data_) : 
+				static_cast<int17>(x.data_)) >= (signbit(y) ? (static_cast<int17>(0x8000)-y.data_) : static_cast<int17>(y.data_))); }
+#else
+            template<typename T1, typename T2>
+			static bool isgreaterequal(T1 x, T2 y) { return binary_operator_compare(xip_fpo_greaterequal, x, y); }
+			static bool isgreaterequal(float x, half y) { return HLS_FPO_GREATEREQUALF(x,y); }
+			static bool isgreaterequal(half x, float y) { return HLS_FPO_GREATEREQUALF(x,y); }
+			static bool isgreaterequal(double x, half y) { return HLS_FPO_GREATEREQUAL(x,y); }
+			static bool isgreaterequal(half x, double y) { return HLS_FPO_GREATEREQUAL(x,y); }
+#endif
+
+			/// Comparison implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \retval true if \a x < \a y
+			/// \retval false else
+#ifdef HLS_NO_XIL_FPO_LIB
+			static bool isless(half x, half y) { return !isnan(x) && !isnan(y) && ((signbit(x) ? (static_cast<int17>(0x8000)-x.data_) : 
+				static_cast<int17>(x.data_)) < (signbit(y) ? (static_cast<int17>(0x8000)-y.data_) : static_cast<int17>(y.data_))); }
+#else
+            template<typename T1, typename T2>
+			static bool isless(T1 x, T2 y) { return binary_operator_compare(xip_fpo_less, x, y); }
+			static bool isless(float x, half y) { return HLS_FPO_LESSF(x,y); }
+			static bool isless(half x, float y) { return HLS_FPO_LESSF(x,y); }
+			static bool isless(double x, half y) { return HLS_FPO_LESS(x,y); }
+			static bool isless(half x, double y) { return HLS_FPO_LESS(x,y); }
+#endif
+
+			/// Comparison implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \retval true if \a x <= \a y
+			/// \retval false else
+#ifdef HLS_NO_XIL_FPO_LIB
+			static bool islessequal(half x, half y) { return !isnan(x) && !isnan(y) && ((signbit(x) ? (static_cast<int17>(0x8000)-x.data_) : 
+				static_cast<int17>(x.data_)) <= (signbit(y) ? (static_cast<int17>(0x8000)-y.data_) : static_cast<int17>(y.data_))); }
+#else
+            template<typename T1, typename T2>
+			static bool islessequal(T1 x, T2 y) { return binary_operator_compare(xip_fpo_lessequal, x, y); }
+			static bool islessequal(float x, half y) { return HLS_FPO_LESSEQUALF(x,y); }
+			static bool islessequal(half x, float y) { return HLS_FPO_LESSEQUALF(x,y); }
+			static bool islessequal(double x, half y) { return HLS_FPO_LESSEQUAL(x,y); }
+			static bool islessequal(half x, double y) { return HLS_FPO_LESSEQUAL(x,y); }
+#endif
+
+			/// Comparison implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \retval true neither \a x > \a y nor \a x < \a y
+			/// \retval false else
+			static bool islessgreater(half x, half y)
+			{
+				if(isnan(x) || isnan(y))
+					return false;
+				//int17 a = signbit(x) ? (static_cast<int17>(0x8000)-x.data_) : static_cast<int17>(x.data_);
+				//int17 b = signbit(y) ? (static_cast<int17>(0x8000)-y.data_) : static_cast<int17>(y.data_);
+				//return a < b || a > b;
+                return isless(x, y) || isgreater(x, y);
+			}
+
+			/// Comparison implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \retval true if operand unordered
+			/// \retval false else
+			static bool isunordered(half x, half y) { return isnan(x) || isnan(y); }
+
+		private:
+			static double erf(double arg)
+			{
+				if(builtin_isinf(arg))
+					return (arg<0.0) ? -1.0 : 1.0;
+				double x2 = static_cast<double>(arg) * static_cast<double>(arg), ax2 = 0.147 * x2;
+				double value = std::sqrt(1.0-std::exp(-x2*(1.2732395447351626861510701069801+ax2)/(1.0+ax2)));
+				return builtin_signbit(arg) ? -value : value;
+			}
+
+			static double lgamma(double arg)
+			{
+				double v = 1.0;
+				for(; arg<8.0; ++arg) v *= arg;
+				double w = 1.0 / (arg * arg);
+				return (((((((-0.02955065359477124183006535947712*w+0.00641025641025641025641025641026)*w+
+					-0.00191752691752691752691752691753)*w+8.4175084175084175084175084175084e-4)*w+
+					-5.952380952380952380952380952381e-4)*w+7.9365079365079365079365079365079e-4)*w+
+					-0.00277777777777777777777777777778)*w+0.08333333333333333333333333333333)/arg + 
+					0.91893853320467274178032973640562 - std::log(v) - arg + (arg-0.5) * std::log(arg);
+			}
+		};
+
+		/// Wrapper for unary half-precision functions needing specialization for individual argument types.
+		/// \tparam T argument type
+		template<typename T> struct unary_specialized
+		{
+			/// Negation implementation.
+			/// \param arg value to negate
+			/// \return negated value
+			static HALF_CONSTEXPR half negate(half arg) { return half(binary, arg.data_^0x8000); }
+
+			/// Absolute value implementation.
+			/// \param arg function argument
+			/// \return absolute value
+			static half fabs(half arg) { return half(binary, arg.data_&0x7FFF); }
+		};
+		template<> struct unary_specialized<expr>
+		{
+			static HALF_CONSTEXPR expr negate(float arg) { return expr(-arg); }
+			static expr fabs(float arg) { return expr(std::fabs(arg)); }
+		};
+
+		/// Wrapper for binary half-precision functions needing specialization for individual argument types.
+		/// \tparam T first argument type
+		/// \tparam U first argument type
+		template<typename T,typename U> struct binary_specialized
+		{
+			/// Minimum implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \return minimum value
+			static expr fmin(float x, float y)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::fmin(x, y));
+			#else
+				if(builtin_isnan(x))
+					return expr(y);
+				if(builtin_isnan(y))
+					return expr(x);
+				return expr(std::min(x, y));
+			#endif
+			}
+
+			/// Maximum implementation.
+			/// \param x first operand
+			/// \param y second operand
+			/// \return maximum value
+			static expr fmax(float x, float y)
+			{
+			#if HALF_ENABLE_CPP11_CMATH
+				return expr(std::fmax(x, y));
+			#else
+				if(builtin_isnan(x))
+					return expr(y);
+				if(builtin_isnan(y))
+					return expr(x);
+				return expr(std::max(x, y));
+			#endif
+			}
+		};
+		template<> struct binary_specialized<half,half>
+		{
+			static half fmin(half x, half y)
+			{
+				if(functions::isnan(x))
+					return y;
+				if(functions::isnan(y))
+					return x;
+				return ((functions::signbit(x) ? (static_cast<int17>(0x8000)-x.data_) : static_cast<int17>(x.data_)) >
+						(functions::signbit(y) ? (static_cast<int17>(0x8000)-y.data_) : static_cast<int17>(y.data_))) ? y : x;
+			}
+			static half fmax(half x, half y)
+			{
+				if(functions::isnan(x))
+					return y;
+				if(functions::isnan(y))
+					return x;
+				return ((functions::signbit(x) ? (static_cast<int17>(0x8000)-x.data_) : static_cast<int17>(x.data_)) <
+						(functions::signbit(y) ? (static_cast<int17>(0x8000)-y.data_) : static_cast<int17>(y.data_))) ? y : x;
+			}
+		};
+
+		/// Helper class for half casts.
+		/// This class template has to be specialized for all valid cast argument to define an appropriate static `cast` member 
+		/// function and a corresponding `type` member denoting its return type.
+		/// \tparam T destination type
+		/// \tparam U source type
+		/// \tparam R rounding mode to use
+		template<typename T,typename U,std::float_round_style R=(std::float_round_style)(HALF_ROUND_STYLE)> struct half_caster {};
+		template<typename U,std::float_round_style R> struct half_caster<half,U,R>
+		{
+		#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
+			static_assert(std::is_arithmetic<U>::value, "half_cast from non-arithmetic type unsupported");
+		#endif
+
+			typedef half type;
+			static half cast(U arg) { return cast_impl(arg, is_float<U>()); };
+
+		private:
+			static half cast_impl(U arg, true_type) { return half(binary, float2half<R>(static_cast<float>(arg))); }
+			static half cast_impl(U arg, false_type) { return half(binary, int2half<R>(arg)); }
+		};
+		template<typename T,std::float_round_style R> struct half_caster<T,half,R>
+		{
+		#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
+			static_assert(std::is_arithmetic<T>::value, "half_cast to non-arithmetic type unsupported");
+		#endif
+
+			typedef T type;
+			template<typename U> static T cast(U arg) { return cast_impl(arg, is_float<T>()); }
+
+		private:
+			static T cast_impl(float arg, true_type) { return static_cast<T>(arg); }
+			static T cast_impl(half arg, false_type) { return half2int<R,T>(arg.data_); }
+		};
+		template<typename T,std::float_round_style R> struct half_caster<T,expr,R> : public half_caster<T,half,R> {};
+		template<std::float_round_style R> struct half_caster<half,half,R>
+		{
+			typedef half type;
+			static half cast(half arg) { return arg; }
+		};
+		template<std::float_round_style R> struct half_caster<half,expr,R> : public half_caster<half,half,R> {};
+
+		/// \name Comparison operators
+		/// \{
+
+        /// the behavior of comparison operators' simulation model is same with STD/following implmentation
+        /// so no need to call FPO model
+
+		/// Comparison for equality.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if operands equal
+		/// \retval false else
+		template<typename T,typename U> typename enable<bool,T,U>::type operator==(T x, U y) { return functions::isequal(x, y); }
+
+		/// Comparison for inequality.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if operands not equal
+		/// \retval false else
+		template<typename T,typename U> typename enable<bool,T,U>::type operator!=(T x, U y) { return functions::isnotequal(x, y); }
+
+		/// Comparison for less than.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if \a x less than \a y
+		/// \retval false else
+		template<typename T,typename U> typename enable<bool,T,U>::type operator<(T x, U y) { return functions::isless(x, y); }
+
+		/// Comparison for greater than.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if \a x greater than \a y
+		/// \retval false else
+		template<typename T,typename U> typename enable<bool,T,U>::type operator>(T x, U y) { return functions::isgreater(x, y); }
+
+		/// Comparison for less equal.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if \a x less equal \a y
+		/// \retval false else
+		template<typename T,typename U> typename enable<bool,T,U>::type operator<=(T x, U y) { return functions::islessequal(x, y); }
+
+		/// Comparison for greater equal.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if \a x greater equal \a y
+		/// \retval false else
+		template<typename T,typename U> typename enable<bool,T,U>::type operator>=(T x, U y) { return functions::isgreaterequal(x, y); }
+
+		/// \}
+		/// \name Arithmetic operators
+		/// \{
+
+		/// Add halfs.
+		/// \param x left operand
+		/// \param y right operand
+		/// \return sum of half expressions
+#ifdef HLS_NO_XIL_FPO_LIB
+		template<typename T,typename U> typename enable<expr,T,U>::type operator+(T x, U y) { return functions::plus(x, y); }
+#else
+		template<typename T,typename U> typename enable<half,T,U>::type operator+(T x, U y) { return functions::plus(x, y); }
+#endif
+
+		/// Subtract halfs.
+		/// \param x left operand
+		/// \param y right operand
+		/// \return difference of half expressions
+#ifdef HLS_NO_XIL_FPO_LIB
+		template<typename T,typename U> typename enable<expr,T,U>::type operator-(T x, U y) { return functions::minus(x, y); }
+#else
+		template<typename T,typename U> typename enable<half,T,U>::type operator-(T x, U y) { return functions::minus(x, y); }
+#endif
+
+		/// Multiply halfs.
+		/// \param x left operand
+		/// \param y right operand
+		/// \return product of half expressions
+#ifdef HLS_NO_XIL_FPO_LIB
+		template<typename T,typename U> typename enable<expr,T,U>::type operator*(T x, U y) { return functions::multiplies(x, y); }
+#else
+		template<typename T,typename U> typename enable<half,T,U>::type operator*(T x, U y) { return functions::multiplies(x, y); }
+#endif
+
+		/// Divide halfs.
+		/// \param x left operand
+		/// \param y right operand
+		/// \return quotient of half expressions
+#ifdef HLS_NO_XIL_FPO_LIB
+		template<typename T,typename U> typename enable<expr,T,U>::type operator/(T x, U y) { return functions::divides(x, y); }
+#else
+		template<typename T,typename U> typename enable<half,T,U>::type operator/(T x, U y) { return functions::divides(x, y); }
+#endif
+
+		/// Identity.
+		/// \param arg operand
+		/// \return uncahnged operand
+		template<typename T> HALF_CONSTEXPR typename enable<T,T>::type operator+(T arg) { return arg; }
+
+		/// Negation.
+		/// \param arg operand
+		/// \return negated operand
+		template<typename T> HALF_CONSTEXPR typename enable<T,T>::type operator-(T arg) { return unary_specialized<T>::negate(arg); }
+
+		/// \}
+		/// \name Input and output
+		/// \{
+
+		/// Output operator.
+		/// \param out output stream to write into
+		/// \param arg half expression to write
+		/// \return reference to output stream
+		template<typename T,typename charT,typename traits> typename enable<std::basic_ostream<charT,traits>&,T>::type
+			operator<<(std::basic_ostream<charT,traits> &out, T arg) { return functions::write(out, arg); }
+
+		/// Input operator.
+		/// \param in input stream to read from
+		/// \param arg half to read into
+		/// \return reference to input stream
+		template<typename charT,typename traits> std::basic_istream<charT,traits>&
+			operator>>(std::basic_istream<charT,traits> &in, half &arg) { return functions::read(in, arg); }
+
+		/// \}
+		/// \name Basic mathematical operations
+		/// \{
+
+		/// Absolute value.
+		/// \param arg operand
+		/// \return absolute value of \a arg
+//		template<typename T> typename enable<T,T>::type abs(T arg) { return unary_specialized<T>::fabs(arg); }
+		inline half abs(half arg) { return unary_specialized<half>::fabs(arg); }
+		inline expr abs(expr arg) { return unary_specialized<expr>::fabs(arg); }
+
+		/// Absolute value.
+		/// \param arg operand
+		/// \return absolute value of \a arg
+//		template<typename T> typename enable<T,T>::type fabs(T arg) { return unary_specialized<T>::fabs(arg); }
+		inline half fabs(half arg) { return unary_specialized<half>::fabs(arg); }
+		inline expr fabs(expr arg) { return unary_specialized<expr>::fabs(arg); }
+
+		/// Remainder of division.
+		/// \param x first operand
+		/// \param y second operand
+		/// \return remainder of floating point division.
+//		template<typename T,typename U> typename enable<expr,T,U>::type fmod(T x, U y) { return functions::fmod(x, y); }
+		inline expr fmod(half x, half y) { return functions::fmod(x, y); }
+		inline expr fmod(half x, expr y) { return functions::fmod(x, y); }
+		inline expr fmod(expr x, half y) { return functions::fmod(x, y); }
+		inline expr fmod(expr x, expr y) { return functions::fmod(x, y); }
+
+		/// Remainder of division.
+		/// \param x first operand
+		/// \param y second operand
+		/// \return remainder of floating point division.
+//		template<typename T,typename U> typename enable<expr,T,U>::type remainder(T x, U y) { return functions::remainder(x, y); }
+		inline expr remainder(half x, half y) { return functions::remainder(x, y); }
+		inline expr remainder(half x, expr y) { return functions::remainder(x, y); }
+		inline expr remainder(expr x, half y) { return functions::remainder(x, y); }
+		inline expr remainder(expr x, expr y) { return functions::remainder(x, y); }
+
+		/// Remainder of division.
+		/// \param x first operand
+		/// \param y second operand
+		/// \param quo address to store some bits of quotient at
+		/// \return remainder of floating point division.
+//		template<typename T,typename U> typename enable<expr,T,U>::type remquo(T x, U y, int *quo) { return functions::remquo(x, y, quo); }
+		inline expr remquo(half x, half y, int *quo) { return functions::remquo(x, y, quo); }
+		inline expr remquo(half x, expr y, int *quo) { return functions::remquo(x, y, quo); }
+		inline expr remquo(expr x, half y, int *quo) { return functions::remquo(x, y, quo); }
+		inline expr remquo(expr x, expr y, int *quo) { return functions::remquo(x, y, quo); }
+
+		/// Fused multiply add.
+		/// \param x first operand
+		/// \param y second operand
+		/// \param z third operand
+		/// \return ( \a x * \a y ) + \a z rounded as one operation.
+//		template<typename T,typename U,typename V> typename enable<expr,T,U,V>::type fma(T x, U y, V z) { return functions::fma(x, y, z); }
+		inline expr fma(half x, half y, half z) { return functions::fma(x, y, z); }
+		inline expr fma(half x, half y, expr z) { return functions::fma(x, y, z); }
+		inline expr fma(half x, expr y, half z) { return functions::fma(x, y, z); }
+		inline expr fma(half x, expr y, expr z) { return functions::fma(x, y, z); }
+		inline expr fma(expr x, half y, half z) { return functions::fma(x, y, z); }
+		inline expr fma(expr x, half y, expr z) { return functions::fma(x, y, z); }
+		inline expr fma(expr x, expr y, half z) { return functions::fma(x, y, z); }
+		inline expr fma(expr x, expr y, expr z) { return functions::fma(x, y, z); }
+
+                inline expr mad(half x, half y, half z) { return functions::fma(x, y, z); }
+                inline expr mad(half x, half y, expr z) { return functions::fma(x, y, z); }
+                inline expr mad(half x, expr y, half z) { return functions::fma(x, y, z); }
+                inline expr mad(half x, expr y, expr z) { return functions::fma(x, y, z); }
+                inline expr mad(expr x, half y, half z) { return functions::fma(x, y, z); }
+                inline expr mad(expr x, half y, expr z) { return functions::fma(x, y, z); }
+                inline expr mad(expr x, expr y, half z) { return functions::fma(x, y, z); }
+                inline expr mad(expr x, expr y, expr z) { return functions::fma(x, y, z); }
+
+		/// Maximum of half expressions.
+		/// \param x first operand
+		/// \param y second operand
+		/// \return maximum of operands
+//		template<typename T,typename U> typename result<T,U>::type fmax(T x, U y) { return binary_specialized<T,U>::fmax(x, y); }
+		inline half fmax(half x, half y) { return binary_specialized<half,half>::fmax(x, y); }
+		inline expr fmax(half x, expr y) { return binary_specialized<half,expr>::fmax(x, y); }
+		inline expr fmax(expr x, half y) { return binary_specialized<expr,half>::fmax(x, y); }
+		inline expr fmax(expr x, expr y) { return binary_specialized<expr,expr>::fmax(x, y); }
+
+		/// Minimum of half expressions.
+		/// \param x first operand
+		/// \param y second operand
+		/// \return minimum of operands
+//		template<typename T,typename U> typename result<T,U>::type fmin(T x, U y) { return binary_specialized<T,U>::fmin(x, y); }
+		inline half fmin(half x, half y) { return binary_specialized<half,half>::fmin(x, y); }
+		inline expr fmin(half x, expr y) { return binary_specialized<half,expr>::fmin(x, y); }
+		inline expr fmin(expr x, half y) { return binary_specialized<expr,half>::fmin(x, y); }
+		inline expr fmin(expr x, expr y) { return binary_specialized<expr,expr>::fmin(x, y); }
+
+		/// Positive difference.
+		/// \param x first operand
+		/// \param y second operand
+		/// \return \a x - \a y or 0 if difference negative
+//		template<typename T,typename U> typename enable<expr,T,U>::type fdim(T x, U y) { return functions::fdim(x, y); }
+		inline expr fdim(half x, half y) { return functions::fdim(x, y); }
+		inline expr fdim(half x, expr y) { return functions::fdim(x, y); }
+		inline expr fdim(expr x, half y) { return functions::fdim(x, y); }
+		inline expr fdim(expr x, expr y) { return functions::fdim(x, y); }
+
+                inline expr maxmag(half x, half y) { return functions::maxmag(x, y); }
+                inline expr maxmag(half x, expr y) { return functions::maxmag(x, y); }
+                inline expr maxmag(expr x, half y) { return functions::maxmag(x, y); }
+                inline expr maxmag(expr x, expr y) { return functions::maxmag(x, y); }
+
+                inline expr minmag(half x, half y) { return functions::minmag(x, y); }
+                inline expr minmag(half x, expr y) { return functions::minmag(x, y); }
+                inline expr minmag(expr x, half y) { return functions::minmag(x, y); }
+                inline expr minmag(expr x, expr y) { return functions::minmag(x, y); }
+
+		/// Get NaN value.
+		/// \param arg descriptive string (ignored)
+		/// \return quiet NaN
+		inline half nanh(const char *arg) { return functions::nanh(arg); }
+
+		/// \}
+		/// \name Exponential functions
+		/// \{
+
+		/// Exponential function.
+		/// \param arg function argument
+		/// \return e raised to \a arg
+//		template<typename T> typename enable<expr,T>::type exp(T arg) { return functions::exp(arg); }
+		inline expr exp(half arg) { return functions::exp(arg); }
+		inline expr exp(expr arg) { return functions::exp(arg); }
+
+		/// Exponential minus one.
+		/// \param arg function argument
+		/// \return e raised to \a arg subtracted by 1
+//		template<typename T> typename enable<expr,T>::type expm1(T arg) { return functions::expm1(arg); }
+		inline expr expm1(half arg) { return functions::expm1(arg); }
+		inline expr expm1(expr arg) { return functions::expm1(arg); }
+
+		/// Binary exponential.
+		/// \param arg function argument
+		/// \return 2 raised to \a arg
+//		template<typename T> typename enable<expr,T>::type exp2(T arg) { return functions::exp2(arg); }
+		inline expr exp2(half arg) { return functions::exp2(arg); }
+		inline expr exp2(expr arg) { return functions::exp2(arg); }
+
+		/// Natural logorithm.
+		/// \param arg function argument
+		/// \return logarithm of \a arg to base e
+//		template<typename T> typename enable<expr,T>::type log(T arg) { return functions::log(arg); }
+		inline expr log(half arg) { return functions::log(arg); }
+		inline expr log(expr arg) { return functions::log(arg); }
+
+		/// Common logorithm.
+		/// \param arg function argument
+		/// \return logarithm of \a arg to base 10
+//		template<typename T> typename enable<expr,T>::type log10(T arg) { return functions::log10(arg); }
+		inline expr log10(half arg) { return functions::log10(arg); }
+		inline expr log10(expr arg) { return functions::log10(arg); }
+
+		/// Natural logorithm.
+		/// \param arg function argument
+		/// \return logarithm of \a arg plus 1 to base e
+//		template<typename T> typename enable<expr,T>::type log1p(T arg) { return functions::log1p(arg); }
+		inline expr log1p(half arg) { return functions::log1p(arg); }
+		inline expr log1p(expr arg) { return functions::log1p(arg); }
+
+		/// Binary logorithm.
+		/// \param arg function argument
+		/// \return logarithm of \a arg to base 2
+//		template<typename T> typename enable<expr,T>::type log2(T arg) { return functions::log2(arg); }
+		inline expr log2(half arg) { return functions::log2(arg); }
+		inline expr log2(expr arg) { return functions::log2(arg); }
+
+		/// \}
+		/// \name Power functions
+		/// \{
+
+		/// Square root.
+		/// \param arg function argument
+		/// \return square root of \a arg
+//		template<typename T> typename enable<expr,T>::type sqrt(T arg) { return functions::sqrt(arg); }
+#ifdef HLS_NO_XIL_FPO_LIB
+		inline expr sqrt(half arg) { return functions::sqrt(arg); }
+#else
+		inline half sqrt(half arg) { return math_function_1arg(xip_fpo_sqrt, arg); }
+#endif
+		inline expr sqrt(expr arg) { return functions::sqrt(arg); }
+
+		/// Cubic root.
+		/// \param arg function argument
+		/// \return cubic root of \a arg
+//		template<typename T> typename enable<expr,T>::type cbrt(T arg) { return functions::cbrt(arg); }
+		inline expr cbrt(half arg) { return functions::cbrt(arg); }
+		inline expr cbrt(expr arg) { return functions::cbrt(arg); }
+
+		/// Hypotenuse function.
+		/// \param x first argument
+		/// \param y second argument
+		/// \return square root of sum of squares without internal over- or underflows
+//		template<typename T,typename U> typename enable<expr,T,U>::type hypot(T x, U y) { return functions::hypot(x, y); }
+		inline expr hypot(half x, half y) { return functions::hypot(x, y); }
+		inline expr hypot(half x, expr y) { return functions::hypot(x, y); }
+		inline expr hypot(expr x, half y) { return functions::hypot(x, y); }
+		inline expr hypot(expr x, expr y) { return functions::hypot(x, y); }
+
+		/// Power function.
+		/// \param base first argument
+		/// \param exp second argument
+		/// \return \a base raised to \a exp
+//		template<typename T,typename U> typename enable<expr,T,U>::type pow(T base, U exp) { return functions::pow(base, exp); }
+		inline expr pow(half base, half exp) { return functions::pow(base, exp); }
+		inline expr pow(half base, expr exp) { return functions::pow(base, exp); }
+		inline expr pow(expr base, half exp) { return functions::pow(base, exp); }
+		inline expr pow(expr base, expr exp) { return functions::pow(base, exp); }
+                inline expr powr(half base, half exp) { return functions::powr(base, exp); }
+                inline expr powr(half base, expr exp) { return functions::powr(base, exp); }
+                inline expr powr(expr base, half exp) { return functions::powr(base, exp); }
+                inline expr powr(expr base, expr exp) { return functions::powr(base, exp); }
+                inline expr pown(half base, int exp) { return functions::pown(base, exp); }
+                inline expr pown(expr base, int exp) { return functions::pown(base, exp); }
+
+		/// \}
+		/// \name Trigonometric functions
+		/// \{
+
+		/// Sine function.
+		/// \param arg function argument
+		/// \return sine value of \a arg
+//		template<typename T> typename enable<expr,T>::type sin(T arg) { return functions::sin(arg); }
+		inline expr sin(half arg) { return functions::sin(arg); }
+		inline expr sin(expr arg) { return functions::sin(arg); }
+
+		/// Cosine function.
+		/// \param arg function argument
+		/// \return cosine value of \a arg
+//		template<typename T> typename enable<expr,T>::type cos(T arg) { return functions::cos(arg); }
+		inline expr cos(half arg) { return functions::cos(arg); }
+		inline expr cos(expr arg) { return functions::cos(arg); }
+
+		/// Tangent function.
+		/// \param arg function argument
+		/// \return tangent value of \a arg
+//		template<typename T> typename enable<expr,T>::type tan(T arg) { return functions::tan(arg); }
+		inline expr tan(half arg) { return functions::tan(arg); }
+		inline expr tan(expr arg) { return functions::tan(arg); }
+
+		/// Arc sine.
+		/// \param arg function argument
+		/// \return arc sine value of \a arg
+//		template<typename T> typename enable<expr,T>::type asin(T arg) { return functions::asin(arg); }
+		inline expr asin(half arg) { return functions::asin(arg); }
+		inline expr asin(expr arg) { return functions::asin(arg); }
+
+		/// Arc cosine function.
+		/// \param arg function argument
+		/// \return arc cosine value of \a arg
+//		template<typename T> typename enable<expr,T>::type acos(T arg) { return functions::acos(arg); }
+		inline expr acos(half arg) { return functions::acos(arg); }
+		inline expr acos(expr arg) { return functions::acos(arg); }
+
+		/// Arc tangent function.
+		/// \param arg function argument
+		/// \return arc tangent value of \a arg
+//		template<typename T> typename enable<expr,T>::type atan(T arg) { return functions::atan(arg); }
+		inline expr atan(half arg) { return functions::atan(arg); }
+		inline expr atan(expr arg) { return functions::atan(arg); }
+
+		/// Arc tangent function.
+		/// \param x first argument
+		/// \param y second argument
+		/// \return arc tangent value
+//		template<typename T,typename U> typename enable<expr,T,U>::type atan2(T x, U y) { return functions::atan2(x, y); }
+		inline expr atan2(half x, half y) { return functions::atan2(x, y); }
+		inline expr atan2(half x, expr y) { return functions::atan2(x, y); }
+		inline expr atan2(expr x, half y) { return functions::atan2(x, y); }
+		inline expr atan2(expr x, expr y) { return functions::atan2(x, y); }
+
+		/// \}
+		/// \name Hyperbolic functions
+		/// \{
+
+		/// Hyperbolic sine.
+		/// \param arg function argument
+		/// \return hyperbolic sine value of \a arg
+//		template<typename T> typename enable<expr,T>::type sinh(T arg) { return functions::sinh(arg); }
+		inline expr sinh(half arg) { return functions::sinh(arg); }
+		inline expr sinh(expr arg) { return functions::sinh(arg); }
+
+		/// Hyperbolic cosine.
+		/// \param arg function argument
+		/// \return hyperbolic cosine value of \a arg
+//		template<typename T> typename enable<expr,T>::type cosh(T arg) { return functions::cosh(arg); }
+		inline expr cosh(half arg) { return functions::cosh(arg); }
+		inline expr cosh(expr arg) { return functions::cosh(arg); }
+
+		/// Hyperbolic tangent.
+		/// \param arg function argument
+		/// \return hyperbolic tangent value of \a arg
+//		template<typename T> typename enable<expr,T>::type tanh(T arg) { return functions::tanh(arg); }
+		inline expr tanh(half arg) { return functions::tanh(arg); }
+		inline expr tanh(expr arg) { return functions::tanh(arg); }
+
+		/// Hyperbolic area sine.
+		/// \param arg function argument
+		/// \return area sine value of \a arg
+//		template<typename T> typename enable<expr,T>::type asinh(T arg) { return functions::asinh(arg); }
+		inline expr asinh(half arg) { return functions::asinh(arg); }
+		inline expr asinh(expr arg) { return functions::asinh(arg); }
+
+		/// Hyperbolic area cosine.
+		/// \param arg function argument
+		/// \return area cosine value of \a arg
+//		template<typename T> typename enable<expr,T>::type acosh(T arg) { return functions::acosh(arg); }
+		inline expr acosh(half arg) { return functions::acosh(arg); }
+		inline expr acosh(expr arg) { return functions::acosh(arg); }
+
+		/// Hyperbolic area tangent.
+		/// \param arg function argument
+		/// \return area tangent value of \a arg
+//		template<typename T> typename enable<expr,T>::type atanh(T arg) { return functions::atanh(arg); }
+		inline expr atanh(half arg) { return functions::atanh(arg); }
+		inline expr atanh(expr arg) { return functions::atanh(arg); }
+
+		/// \}
+		/// \name Error and gamma functions
+		/// \{
+
+		/// Error function.
+		/// \param arg function argument
+		/// \return error function value of \a arg
+//		template<typename T> typename enable<expr,T>::type erf(T arg) { return functions::erf(arg); }
+		inline expr erf(half arg) { return functions::erf(arg); }
+		inline expr erf(expr arg) { return functions::erf(arg); }
+
+		/// Complementary error function.
+		/// \param arg function argument
+		/// \return 1 minus error function value of \a arg
+//		template<typename T> typename enable<expr,T>::type erfc(T arg) { return functions::erfc(arg); }
+		inline expr erfc(half arg) { return functions::erfc(arg); }
+		inline expr erfc(expr arg) { return functions::erfc(arg); }
+
+		/// Natural logarithm of gamma function.
+		/// \param arg function argument
+		/// \return natural logarith of gamma function for \a arg
+//		template<typename T> typename enable<expr,T>::type lgamma(T arg) { return functions::lgamma(arg); }
+                inline expr lgamma_r(half arg, int *signgamp) { return functions::lgamma(arg); }
+                inline expr lgamma_r(expr arg, int *signgamp) { return functions::lgamma(arg); }
+		inline expr lgamma(half arg) { return functions::lgamma(arg); }
+		inline expr lgamma(expr arg) { return functions::lgamma(arg); }
+
+		/// Gamma function.
+		/// \param arg function argument
+		/// \return gamma function value of \a arg
+//		template<typename T> typename enable<expr,T>::type tgamma(T arg) { return functions::tgamma(arg); }
+		inline expr tgamma(half arg) { return functions::tgamma(arg); }
+		inline expr tgamma(expr arg) { return functions::tgamma(arg); }
+
+		/// \}
+		/// \name Rounding
+		/// \{
+
+		/// Nearest integer not less than half value.
+		/// \param arg half to round
+		/// \return nearest integer not less than \a arg
+//		template<typename T> typename enable<half,T>::type ceil(T arg) { return functions::ceil(arg); }
+		inline half ceil(half arg) { return functions::ceil(arg); }
+		inline half ceil(expr arg) { return functions::ceil(arg); }
+
+		/// Nearest integer not greater than half value.
+		/// \param arg half to round
+		/// \return nearest integer not greater than \a arg
+//		template<typename T> typename enable<half,T>::type floor(T arg) { return functions::floor(arg); }
+		inline half floor(half arg) { return functions::floor(arg); }
+		inline half floor(expr arg) { return functions::floor(arg); }
+
+		/// Nearest integer not greater in magnitude than half value.
+		/// \param arg half to round
+		/// \return nearest integer not greater in magnitude than \a arg
+//		template<typename T> typename enable<half,T>::type trunc(T arg) { return functions::trunc(arg); }
+		inline half trunc(half arg) { return functions::trunc(arg); }
+		inline half trunc(expr arg) { return functions::trunc(arg); }
+
+		/// Nearest integer.
+		/// \param arg half to round
+		/// \return nearest integer, rounded away from zero in half-way cases
+//		template<typename T> typename enable<half,T>::type round(T arg) { return functions::round(arg); }
+		inline half round(half arg) { return functions::round(arg); }
+		inline half round(expr arg) { return functions::round(arg); }
+
+		/// Nearest integer.
+		/// \param arg half to round
+		/// \return nearest integer, rounded away from zero in half-way cases
+//		template<typename T> typename enable<long,T>::type lround(T arg) { return functions::lround(arg); }
+		inline long lround(half arg) { return functions::lround(arg); }
+		inline long lround(expr arg) { return functions::lround(arg); }
+
+		/// Nearest integer using half's internal rounding mode.
+		/// \param arg half expression to round
+		/// \return nearest integer using default rounding mode
+//		template<typename T> typename enable<half,T>::type nearbyint(T arg) { return functions::nearbyint(arg); }
+		inline half nearbyint(half arg) { return functions::rint(arg); }
+		inline half nearbyint(expr arg) { return functions::rint(arg); }
+
+		/// Nearest integer using half's internal rounding mode.
+		/// \param arg half expression to round
+		/// \return nearest integer using default rounding mode
+//		template<typename T> typename enable<half,T>::type rint(T arg) { return functions::rint(arg); }
+		inline half rint(half arg) { return functions::rint(arg); }
+		inline half rint(expr arg) { return functions::rint(arg); }
+
+		/// Nearest integer using half's internal rounding mode.
+		/// \param arg half expression to round
+		/// \return nearest integer using default rounding mode
+//		template<typename T> typename enable<long,T>::type lrint(T arg) { return functions::lrint(arg); }
+		inline long lrint(half arg) { return functions::lrint(arg); }
+		inline long lrint(expr arg) { return functions::lrint(arg); }
+	#if HALF_ENABLE_CPP11_LONG_LONG
+		/// Nearest integer.
+		/// \param arg half to round
+		/// \return nearest integer, rounded away from zero in half-way cases
+//		template<typename T> typename enable<long long,T>::type llround(T arg) { return functions::llround(arg); }
+		inline long long llround(half arg) { return functions::llround(arg); }
+		inline long long llround(expr arg) { return functions::llround(arg); }
+
+		/// Nearest integer using half's internal rounding mode.
+		/// \param arg half expression to round
+		/// \return nearest integer using default rounding mode
+//		template<typename T> typename enable<long long,T>::type llrint(T arg) { return functions::llrint(arg); }
+		inline long long llrint(half arg) { return functions::llrint(arg); }
+		inline long long llrint(expr arg) { return functions::llrint(arg); }
+	#endif
+
+		/// \}
+		/// \name Floating point manipulation
+		/// \{
+
+		/// Decompress floating point number.
+		/// \param arg number to decompress
+		/// \param exp address to store exponent at
+		/// \return significant in range [0.5, 1)
+//		template<typename T> typename enable<half,T>::type frexp(T arg, int *exp) { return functions::frexp(arg, exp); }
+		inline half frexp(half arg, int *exp) { return functions::frexp(arg, exp); }
+		inline half frexp(expr arg, int *exp) { return functions::frexp(arg, exp); }
+
+		/// Multiply by power of two.
+		/// \param arg number to modify
+		/// \param exp power of two to multiply with
+		/// \return \a arg multplied by 2 raised to \a exp
+//		template<typename T> typename enable<half,T>::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp); }
+		inline half ldexp(half arg, int exp) { return functions::scalbln(arg, exp); }
+		inline half ldexp(expr arg, int exp) { return functions::scalbln(arg, exp); }
+
+		/// Extract integer and fractional parts.
+		/// \param arg number to decompress
+		/// \param iptr address to store integer part at
+		/// \return fractional part
+//		template<typename T> typename enable<half,T>::type modf(T arg, half *iptr) { return functions::modf(arg, iptr); }
+		inline half modf(half arg, half *iptr) { return functions::modf(arg, iptr); }
+		inline half modf(expr arg, half *iptr) { return functions::modf(arg, iptr); }
+
+		/// Multiply by power of two.
+		/// \param arg number to modify
+		/// \param exp power of two to multiply with
+		/// \return \a arg multplied by 2 raised to \a exp
+//		template<typename T> typename enable<half,T>::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp); }
+		inline half scalbn(half arg, int exp) { return functions::scalbln(arg, exp); }
+		inline half scalbn(expr arg, int exp) { return functions::scalbln(arg, exp); }
+
+		/// Multiply by power of two.
+		/// \param arg number to modify
+		/// \param exp power of two to multiply with
+		/// \return \a arg multplied by 2 raised to \a exp	
+//		template<typename T> typename enable<half,T>::type scalbln(T arg, long exp) { return functions::scalbln(arg, exp); }
+		inline half scalbln(half arg, long exp) { return functions::scalbln(arg, exp); }
+		inline half scalbln(expr arg, long exp) { return functions::scalbln(arg, exp); }
+
+		/// Extract exponent.
+		/// \param arg number to query
+		/// \return floating point exponent
+		/// \retval FP_ILOGB0 for zero
+		/// \retval FP_ILOGBNAN for NaN
+		/// \retval MAX_INT for infinity
+//		template<typename T> typename enable<int,T>::type ilogb(T arg) { return functions::ilogb(arg); }
+		inline int ilogb(half arg) { return functions::ilogb(arg); }
+		inline int ilogb(expr arg) { return functions::ilogb(arg); }
+
+		/// Extract exponent.
+		/// \param arg number to query
+		/// \return floating point exponent
+//		template<typename T> typename enable<half,T>::type logb(T arg) { return functions::logb(arg); }
+		inline half logb(half arg) { return functions::logb(arg); }
+		inline half logb(expr arg) { return functions::logb(arg); }
+
+		/// Next representable value.
+		/// \param from value to compute next representable value for
+		/// \param to direction towards which to compute next value
+		/// \return next representable value after \a from in direction towards \a to
+//		template<typename T,typename U> typename enable<half,T,U>::type nextafter(T from, U to) { return functions::nextafter(from, to); }
+		inline half nextafter(half from, half to) { return functions::nextafter(from, to); }
+		inline half nextafter(half from, expr to) { return functions::nextafter(from, to); }
+		inline half nextafter(expr from, half to) { return functions::nextafter(from, to); }
+		inline half nextafter(expr from, expr to) { return functions::nextafter(from, to); }
+
+		/// Next representable value.
+		/// \param from value to compute next representable value for
+		/// \param to direction towards which to compute next value
+		/// \return next representable value after \a from in direction towards \a to
+//		template<typename T> typename enable<half,T>::type nexttoward(T from, long double to) { return functions::nexttoward(from, to); }
+		inline half nexttoward(half from, long double to) { return functions::nexttoward(from, to); }
+		inline half nexttoward(expr from, long double to) { return functions::nexttoward(from, to); }
+
+		/// Take sign.
+		/// \param x value to change sign for
+		/// \param y value to take sign from
+		/// \return value equal to \a x in magnitude and to \a y in sign
+//		template<typename T,typename U> typename enable<half,T,U>::type copysign(T x, U y) { return functions::copysign(x, y); }
+		inline half copysign(half x, half y) { return functions::copysign(x, y); }
+		inline half copysign(half x, expr y) { return functions::copysign(x, y); }
+		inline half copysign(expr x, half y) { return functions::copysign(x, y); }
+		inline half copysign(expr x, expr y) { return functions::copysign(x, y); }
+
+		/// \}
+		/// \name Floating point classification
+		/// \{
+
+
+		/// Classify floating point value.
+		/// \param arg number to classify
+		/// \retval FP_ZERO for positive and negative zero
+		/// \retval FP_SUBNORMAL for subnormal numbers
+		/// \retval FP_INFINITY for positive and negative infinity
+		/// \retval FP_NAN for NaNs
+		/// \retval FP_NORMAL for all other (normal) values
+//		template<typename T> typename enable<int,T>::type fpclassify(T arg) { return functions::fpclassify(arg); }
+		inline int fpclassify(half arg) { return functions::fpclassify(arg); }
+		inline int fpclassify(expr arg) { return functions::fpclassify(arg); }
+
+		/// Check if finite number.
+		/// \param arg number to check
+		/// \retval true if neither infinity nor NaN
+		/// \retval false else
+//		template<typename T> typename enable<bool,T>::type isfinite(T arg) { return functions::isfinite(arg); }
+		inline bool isfinite(half arg) { return functions::isfinite(arg); }
+		inline bool isfinite(expr arg) { return functions::isfinite(arg); }
+
+		/// Check for infinity.
+		/// \param arg number to check
+		/// \retval true for positive or negative infinity
+		/// \retval false else
+//		template<typename T> typename enable<bool,T>::type isinf(T arg) { return functions::isinf(arg); }
+		inline bool isinf(half arg) { return functions::isinf(arg); }
+		inline bool isinf(expr arg) { return functions::isinf(arg); }
+
+		/// Check for NaN.
+		/// \param arg number to check
+		/// \retval true for NaNs
+		/// \retval false else
+//		template<typename T> typename enable<bool,T>::type isnan(T arg) { return functions::isnan(arg); }
+		inline bool isnan(half arg) { return functions::isnan(arg); }
+		inline bool isnan(expr arg) { return functions::isnan(arg); }
+
+		/// Check if normal number.
+		/// \param arg number to check
+		/// \retval true if normal number
+		/// \retval false if either subnormal, zero, infinity or NaN
+//		template<typename T> typename enable<bool,T>::type isnormal(T arg) { return functions::isnormal(arg); }
+		inline bool isnormal(half arg) { return functions::isnormal(arg); }
+		inline bool isnormal(expr arg) { return functions::isnormal(arg); }
+
+		/// Check sign.
+		/// \param arg number to check
+		/// \retval true for negative number
+		/// \retval false for positive number
+//		template<typename T> typename enable<bool,T>::type signbit(T arg) { return functions::signbit(arg); }
+		inline bool signbit(half arg) { return functions::signbit(arg); }
+		inline bool signbit(expr arg) { return functions::signbit(arg); }
+
+		/// \}
+		/// \name Comparison
+		/// \{
+
+		/// Comparison for greater than.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if \a x greater than \a y
+		/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type isgreater(T x, U y) { return functions::isgreater(x, y); }
+		inline bool isgreater(half x, half y) { return functions::isgreater(x, y); }
+		inline bool isgreater(half x, expr y) { return functions::isgreater(x, y); }
+		inline bool isgreater(expr x, half y) { return functions::isgreater(x, y); }
+		inline bool isgreater(expr x, expr y) { return functions::isgreater(x, y); }
+
+		/// Comparison for greater equal.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if \a x greater equal \a y
+		/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type isgreaterequal(T x, U y) { return functions::isgreaterequal(x, y); }
+		inline bool isgreaterequal(half x, half y) { return functions::isgreaterequal(x, y); }
+		inline bool isgreaterequal(half x, expr y) { return functions::isgreaterequal(x, y); }
+		inline bool isgreaterequal(expr x, half y) { return functions::isgreaterequal(x, y); }
+		inline bool isgreaterequal(expr x, expr y) { return functions::isgreaterequal(x, y); }
+
+		/// Comparison for less than.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if \a x less than \a y
+		/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type isless(T x, U y) { return functions::isless(x, y); }
+		inline bool isless(half x, half y) { return functions::isless(x, y); }
+		inline bool isless(half x, expr y) { return functions::isless(x, y); }
+		inline bool isless(expr x, half y) { return functions::isless(x, y); }
+		inline bool isless(expr x, expr y) { return functions::isless(x, y); }
+
+		/// Comparison for less equal.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if \a x less equal \a y
+		/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type islessequal(T x, U y) { return functions::islessequal(x, y); }
+		inline bool islessequal(half x, half y) { return functions::islessequal(x, y); }
+		inline bool islessequal(half x, expr y) { return functions::islessequal(x, y); }
+		inline bool islessequal(expr x, half y) { return functions::islessequal(x, y); }
+		inline bool islessequal(expr x, expr y) { return functions::islessequal(x, y); }
+
+		/// Comarison for less or greater.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if either less or greater
+		/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type islessgreater(T x, U y) { return functions::islessgreater(x, y); }
+		inline bool islessgreater(half x, half y) { return functions::islessgreater(x, y); }
+		inline bool islessgreater(half x, expr y) { return functions::islessgreater(x, y); }
+		inline bool islessgreater(expr x, half y) { return functions::islessgreater(x, y); }
+		inline bool islessgreater(expr x, expr y) { return functions::islessgreater(x, y); }
+
+		/// Check if unordered.
+		/// \param x first operand
+		/// \param y second operand
+		/// \retval true if unordered (one or two NaN operands)
+		/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type isunordered(T x, U y) { return functions::isunordered(x, y); }
+		inline bool isunordered(half x, half y) { return functions::isunordered(x, y); }
+		inline bool isunordered(half x, expr y) { return functions::isunordered(x, y); }
+		inline bool isunordered(expr x, half y) { return functions::isunordered(x, y); }
+		inline bool isunordered(expr x, expr y) { return functions::isunordered(x, y); }
+
+		/// \name Casting
+		/// \{
+
+		/// Cast to or from half-precision floating point number.
+		/// This casts between [half](\ref half) and any built-in arithmetic type. Floating point types are 
+		/// converted via an explicit cast to/from `float` (using the rounding mode of the built-in single precision 
+		/// implementation) and thus any possible warnings due to an otherwise implicit conversion to/from `float` will be 
+		/// suppressed. Integer types are converted directly using the given rounding mode, without any roundtrip over `float` 
+		/// that a `static_cast` would otherwise do. It uses the default rounding mode.
+		///
+		/// Using this cast with neither of the two types being a [half](\ref half) or with any of the two types 
+		/// not being a built-in arithmetic type (apart from [half](\ref half), of course) results in a compiler 
+		/// error and casting between [half](\ref half)s is just a no-op.
+		/// \tparam T destination type (half or built-in arithmetic type)
+		/// \tparam U source type (half or built-in arithmetic type)
+		/// \param arg value to cast
+		/// \return \a arg converted to destination type
+		template<typename T,typename U> typename half_caster<T,U>::type half_cast(U arg) { return half_caster<T,U>::cast(arg); }
+
+		/// Cast to or from half-precision floating point number.
+		/// This casts between [half](\ref half) and any built-in arithmetic type. Floating point types are 
+		/// converted via an explicit cast to/from `float` (using the rounding mode of the built-in single precision 
+		/// implementation) and thus any possible warnings due to an otherwise implicit conversion to/from `float` will be 
+		/// suppressed. Integer types are converted directly using the given rounding mode, without any roundtrip over `float` 
+		/// that a `static_cast` would otherwise do.
+		///
+		/// Using this cast with neither of the two types being a [half](\ref half) or with any of the two types 
+		/// not being a built-in arithmetic type (apart from [half](\ref half), of course) results in a compiler 
+		/// error and casting between [half](\ref half)s is just a no-op.
+		/// \tparam T destination type (half or built-in arithmetic type)
+		/// \tparam R rounding mode to use.
+		/// \tparam U source type (half or built-in arithmetic type)
+		/// \param arg value to cast
+		/// \return \a arg converted to destination type
+		template<typename T,std::float_round_style R,typename U> typename half_caster<T,U,R>::type half_cast(U arg)
+			{ return half_caster<T,U,R>::cast(arg); }
+		/// \}
+	}
+
+	using detail::operator==;
+	using detail::operator!=;
+	using detail::operator<;
+	using detail::operator>;
+	using detail::operator<=;
+	using detail::operator>=;
+	using detail::operator+;
+	using detail::operator-;
+	using detail::operator*;
+	using detail::operator/;
+	using detail::operator<<;
+	using detail::operator>>;
+
+// 	using detail::abs;
+// 	using detail::fabs;
+// 	using detail::fmod;
+// 	using detail::remainder;
+// 	using detail::remquo;
+// 	using detail::fma;
+// 	using detail::fmax;
+// 	using detail::fmin;
+// 	using detail::fdim;
+// 	using detail::nanh;
+// 	using detail::exp;
+// 	using detail::expm1;
+// 	using detail::exp2;
+// 	using detail::log;
+// 	using detail::log10;
+// 	using detail::log1p;
+// 	using detail::log2;
+// 	using detail::sqrt;
+// 	using detail::cbrt;
+// 	using detail::hypot;
+// 	//using detail::pow;
+// 	using detail::sin;
+// 	using detail::cos;
+// 	using detail::tan;
+// 	using detail::asin;
+// 	using detail::acos;
+// 	using detail::atan;
+// 	using detail::atan2;
+// 	using detail::sinh;
+// 	using detail::cosh;
+// 	using detail::tanh;
+// 	using detail::asinh;
+// 	using detail::acosh;
+// 	using detail::atanh;
+// 	using detail::erf;
+// 	using detail::erfc;
+// 	using detail::lgamma;
+// 	using detail::tgamma;
+// 	using detail::ceil;
+// 	using detail::floor;
+// 	using detail::trunc;
+// 	using detail::round;
+// 	using detail::lround;
+// 	using detail::nearbyint;
+// 	using detail::rint;
+// 	using detail::lrint;
+// #if HALF_ENABLE_CPP11_LONG_LONG
+// 	using detail::llround;
+// 	using detail::llrint;
+// #endif
+// 	using detail::frexp;
+// 	using detail::ldexp;
+// 	using detail::modf;
+// 	using detail::scalbn;
+// 	using detail::scalbln;
+// 	using detail::ilogb;
+// 	using detail::logb;
+// 	using detail::nextafter;
+// 	using detail::nexttoward;
+// 	using detail::copysign;
+ 	using detail::fpclassify;
+ 	using detail::isfinite;
+ 	using detail::isinf;
+ 	using detail::isnan;
+ 	using detail::isnormal;
+ 	using detail::signbit;
+ 	using detail::isgreater;
+ 	using detail::isgreaterequal;
+ 	using detail::isless;
+ 	using detail::islessequal;
+ 	using detail::islessgreater;
+ 	using detail::isunordered;
+
+	using detail::half_cast;
+
+/// Extensions to the C++ standard library.
+namespace std
+{
+	/// Numeric limits for half-precision floats.
+	/// Because of the underlying single-precision implementation of many operations, it inherits some properties from 
+	/// `std::numeric_limits<float>`.
+	template<> struct numeric_limits<half> : public numeric_limits<float>
+	{
+	public:
+		/// Supports signed values.
+		static HALF_CONSTEXPR_CONST bool is_signed = true;
+
+		/// Is not exact.
+		static HALF_CONSTEXPR_CONST bool is_exact = false;
+
+		/// Doesn't provide modulo arithmetic.
+		static HALF_CONSTEXPR_CONST bool is_modulo = false;
+
+		/// IEEE conformant.
+		static HALF_CONSTEXPR_CONST bool is_iec559 = true;
+
+		/// Supports infinity.
+		static HALF_CONSTEXPR_CONST bool has_infinity = true;
+
+		/// Supports quiet NaNs.
+		static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true;
+
+		/// Supports subnormal values.
+		static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present;
+
+		/// Rounding mode.
+		/// Due to the mix of internal single-precision computations (using the rounding mode of the underlying 
+		/// single-precision implementation) with explicit truncation of the single-to-half conversions, the actual rounding 
+		/// mode is indeterminate.
+		static HALF_CONSTEXPR_CONST float_round_style round_style = (std::numeric_limits<float>::round_style==
+			half::round_style) ? half::round_style : round_indeterminate;
+
+		/// Significant digits.
+		static HALF_CONSTEXPR_CONST int digits = 11;
+
+		/// Significant decimal digits.
+		static HALF_CONSTEXPR_CONST int digits10 = 3;
+
+		/// Required decimal digits to represent all possible values.
+		static HALF_CONSTEXPR_CONST int max_digits10 = 5;
+
+		/// Number base.
+		static HALF_CONSTEXPR_CONST int radix = 2;
+
+		/// One more than smallest exponent.
+		static HALF_CONSTEXPR_CONST int min_exponent = -13;
+
+		/// Smallest normalized representable power of 10.
+		static HALF_CONSTEXPR_CONST int min_exponent10 = -4;
+
+		/// One more than largest exponent
+		static HALF_CONSTEXPR_CONST int max_exponent = 16;
+
+		/// Largest finitely representable power of 10.
+		static HALF_CONSTEXPR_CONST int max_exponent10 = 4;
+
+		/// Smallest positive normal value.
+		static HALF_CONSTEXPR half min() HALF_NOTHROW { return half(detail::binary, 0x0400); }
+
+		/// Smallest finite value.
+		static HALF_CONSTEXPR half lowest() HALF_NOTHROW { return half(detail::binary, 0xFBFF); }
+
+		/// Largest finite value.
+		static HALF_CONSTEXPR half max() HALF_NOTHROW { return half(detail::binary, 0x7BFF); }
+
+		/// Difference between one and next representable value.
+		static HALF_CONSTEXPR half epsilon() HALF_NOTHROW { return half(detail::binary, 0x1400); }
+
+		/// Maximum rounding error.
+		static HALF_CONSTEXPR half round_error() HALF_NOTHROW
+			{ return half(detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); }
+
+		/// Positive infinity.
+		static HALF_CONSTEXPR half infinity() HALF_NOTHROW { return half(detail::binary, 0x7C00); }
+
+		/// Quiet NaN.
+		static HALF_CONSTEXPR half quiet_NaN() HALF_NOTHROW { return half(detail::binary, 0x7FFF); }
+
+		/// Signalling NaN.
+		static HALF_CONSTEXPR half signaling_NaN() HALF_NOTHROW { return half(detail::binary, 0x7DFF); }
+
+		/// Smallest positive subnormal value.
+		static HALF_CONSTEXPR half denorm_min() HALF_NOTHROW { return half(detail::binary, 0x0001); }
+	};
+
+#if HALF_ENABLE_CPP11_HASH
+	/// Hash function for half-precision floats.
+	/// This is only defined if C++11 `std::hash` is supported and enabled.
+	template<> struct hash<half> //: unary_function<half,size_t>
+	{
+		/// Type of function argument.
+		typedef half argument_type;
+
+		/// Function return type.
+		typedef size_t result_type;
+
+		/// Compute hash function.
+		/// \param arg half to hash
+		/// \return hash value
+		result_type operator()(argument_type arg) const
+			{ return hash<detail::uint16>()(static_cast<unsigned int>(arg.data_)&-(arg.data_!=0x8000)); }
+	};
+#endif
+}
+
+
+#undef HALF_CONSTEXPR
+#undef HALF_CONSTEXPR_CONST
+#undef HALF_NOEXCEPT
+#undef HALF_NOTHROW
+#ifdef HALF_POP_WARNINGS
+	#pragma warning(pop)
+	#undef HALF_POP_WARNINGS
+#endif
+
+#endif // AESL_SYN
+
+// implemented in lib_hlsm.cpp
+//extern int __signbit(half a_re);
+extern half half_nan(const char *tagp);
+// extern int __isfinite(half t_in);
+// extern int __isinf(half t_in);
+// extern int __isnan(half t_in);
+// extern int __isnormal(half t_in);
+// extern int __fpclassify(half t_in);
+extern half half_atan(half t);
+extern half half_atan2(half y, half x);
+extern half half_copysign(half x, half y);
+//extern half copysign(half x, half y);
+extern half half_fabs(half x);
+//extern half fabs(half x);
+extern half half_abs(half x);
+extern half half_fma(half x, half y, half z);
+extern half half_mad(half x, half y, half z);
+extern half half_frexp (half x, int* exp);
+extern half half_ldexp (half x, int exp);
+extern half half_fmax(half x, half y);
+//extern half fmax(half x, half y);
+extern half half_fmin(half x, half y);
+//extern half fmin(half x, half y);
+extern half half_asin(half t_in);
+extern half half_acos(half t_in);
+extern half half_sin(half t_in);
+extern half half_cos(half t_in);
+extern void half_sincos(half x, half *sin, half *cos);
+extern half half_sinh(half t_in);
+extern half half_cosh(half t_in);
+extern half half_sinpi(half t_in);
+extern half half_cospi(half t_in);
+extern half half_recip(half x);
+extern half half_sqrt(half x);
+extern half half_rsqrt(half x);
+extern half half_cbrt(half x);
+extern half half_hypot(half x, half y);
+extern half half_log(half x);
+extern half half_log10(half x);
+extern half half_log2(half x);
+extern half half_logb(half x);
+extern half half_log1p(half x);
+extern int half_ilogb(half x);
+extern half half_exp(half x);
+extern half half_exp10(half x);
+extern half half_exp2(half x);
+extern half half_expm1(half x);
+extern half half_pow(half x, half y);
+extern half half_powr(half x, half y);
+extern half half_pown(half x, int y);
+extern half half_rootn(half x, int y);
+extern half half_floor(half x);
+//half floor(half x)
+extern half half_ceil(half x);
+//half ceil(half x)
+extern half half_trunc(half x);
+// half trunc(half x)
+extern half half_round(half x);
+//half round(half x)
+extern half half_nearbyint(half x);
+extern half half_rint(half x);
+extern long int half_lrint(half x);
+extern long long int half_llrint(half x);
+extern long int half_lround(half x);
+extern long long int half_llround(half x);
+extern half half_modf(half x, half *intpart);
+// half modf(half x, half *intpart)
+extern half half_fract(half x, half *intpart);
+extern half half_nextafter(half x, half y);
+extern half half_fmod(half x, half y);
+extern half half_remainder(half x, half y);
+extern half half_remquo(half x, half y, int* quo);
+extern half half_divide(half x, half y);
+#endif
+
+// vim: ts=4:sw=4:tw=4:noexpandtab:
+
diff --git a/include/hls_stream.h b/include/hls_stream.h
new file mode 100644
index 0000000..65e89be
--- /dev/null
+++ b/include/hls_stream.h
@@ -0,0 +1,268 @@
+/*
+#-  (c) Copyright 2011-2019 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES. 
+#- ************************************************************************
+
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#ifndef X_HLS_STREAM_SIM_H
+#define X_HLS_STREAM_SIM_H
+
+/*
+ * This file contains a C++ model of hls::stream.
+ * It defines C simulation model.
+ */
+#ifndef __cplusplus
+
+#error C++ is required to include this header file
+
+#else
+
+//////////////////////////////////////////////
+// C level simulation models for hls::stream
+//////////////////////////////////////////////
+#include <queue>
+#include <iostream>
+#include <typeinfo>
+#include <string>
+#include <sstream>
+
+#ifdef HLS_STREAM_THREAD_SAFE
+#include <mutex>
+#include <condition_variable>
+#endif
+
+#ifndef _MSC_VER
+#include <cxxabi.h>
+#include <stdlib.h>
+#endif
+
+namespace hls {
+
+template<typename __STREAM_T__>
+class stream
+{
+  protected:
+    std::string _name;
+    std::deque<__STREAM_T__> _data; // container for the elements
+#ifdef HLS_STREAM_THREAD_SAFE
+    std::mutex _mutex;
+    std::condition_variable _condition_var;
+#endif    
+
+  public:
+    /// Constructors
+    // Keep consistent with the synthesis model's constructors
+    stream() {
+        static unsigned _counter = 1;
+        std::stringstream ss;
+#ifndef _MSC_VER
+        char* _demangle_name = abi::__cxa_demangle(typeid(*this).name(), 0, 0, 0);
+        if (_demangle_name) {
+            _name = _demangle_name;
+            free(_demangle_name);
+        }
+        else {
+            _name = "hls_stream";
+        }
+#else
+        _name = typeid(*this).name();
+#endif
+
+        ss << _counter++;
+        _name += "." + ss.str();
+    }
+
+    stream(const std::string name) {
+    // default constructor,
+    // capacity set to predefined maximum
+        _name = name;
+    }
+
+  /// Make copy constructor and assignment operator private
+  private:
+    stream(const stream< __STREAM_T__ >& chn):
+        _name(chn._name), _data(chn._data) {
+    }
+
+    stream& operator = (const stream< __STREAM_T__ >& chn) {
+        _name = chn._name;
+        _data = chn._data;
+        return *this;
+    }
+
+  public:
+    /// Overload >> and << operators to implement read() and write()
+    void operator >> (__STREAM_T__& rdata) {
+        read(rdata);
+    }
+
+    void operator << (const __STREAM_T__& wdata) {
+        write(wdata);
+    }
+
+
+  public:
+    /// Destructor
+    /// Check status of the queue
+    virtual ~stream() {
+        if (!_data.empty())
+        {
+            std::cout << "WARNING: Hls::stream '" 
+                      << _name 
+                      << "' contains leftover data,"
+                      << " which may result in RTL simulation hanging."
+                      << std::endl;
+        }
+    }
+
+    /// Status of the queue
+    bool empty() {
+#ifdef HLS_STREAM_THREAD_SAFE
+        std::lock_guard<std::mutex> lg(_mutex);
+#endif
+        return _data.empty();
+    }    
+
+    bool full() const { return false; }
+
+    /// Blocking read
+    void read(__STREAM_T__& head) {
+        head = read();
+    }
+
+#ifdef HLS_STREAM_THREAD_SAFE
+    __STREAM_T__ read() {
+        std::unique_lock<std::mutex> ul(_mutex);
+        while (_data.empty()) {
+            _condition_var.wait(ul);
+        }
+
+        __STREAM_T__ elem;
+        elem = _data.front();
+        _data.pop_front();
+        return elem;
+    }
+#else
+    __STREAM_T__ read() {
+        __STREAM_T__ elem;
+#ifdef HLS_STREAM_WAIT_FOR_DATA_IN_BLOCKING_READ
+        while(_data.empty())
+          ;
+#endif
+        if (_data.empty()) {
+            std::cout << "WARNING: Hls::stream '"
+                      << _name 
+                      << "' is read while empty,"
+                      << " which may result in RTL simulation hanging."
+                      << std::endl;
+            elem = __STREAM_T__();
+        } else {
+            elem = _data.front();
+            _data.pop_front();
+        }
+        return elem;
+    }
+#endif
+
+    /// Blocking write
+    void write(const __STREAM_T__& tail) { 
+#ifdef HLS_STREAM_THREAD_SAFE
+        std::unique_lock<std::mutex> ul(_mutex);
+#endif
+        _data.push_back(tail);
+#ifdef HLS_STREAM_THREAD_SAFE
+        _condition_var.notify_one();
+#endif
+    }
+
+    /// Nonblocking read
+    bool read_nb(__STREAM_T__& head) {
+#ifdef HLS_STREAM_THREAD_SAFE
+        std::lock_guard<std::mutex> lg(_mutex);
+#endif    
+        bool is_empty = _data.empty();
+        if (is_empty) {
+            head = __STREAM_T__();
+        } else {
+            __STREAM_T__ elem(_data.front());
+            _data.pop_front();
+            head = elem;
+        }
+        return !is_empty;
+    }
+
+    /// Nonblocking write
+    bool write_nb(const __STREAM_T__& tail) {
+        bool is_full = full();
+        write(tail);
+        return !is_full;
+    }
+
+    /// Fifo size
+    size_t size() {
+        return _data.size();
+    }
+};
+
+} // namespace hls
+
+#endif // __cplusplus
+#endif  // X_HLS_STREAM_SIM_H
+
+
diff --git a/include/mpfr.h b/include/mpfr.h
new file mode 100644
index 0000000..d9da855
--- /dev/null
+++ b/include/mpfr.h
@@ -0,0 +1,945 @@
+/* mpfr.h -- Include file for mpfr.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+Contributed by the Arenaire and Cacao projects, INRIA.
+
+This file is part of the GNU MPFR Library.
+
+The GNU MPFR Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MPFR Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
+http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef __MPFR_H
+#define __MPFR_H
+
+/* Define MPFR version number */
+#define MPFR_VERSION_MAJOR 3
+#define MPFR_VERSION_MINOR 0
+#define MPFR_VERSION_PATCHLEVEL 1
+#define MPFR_VERSION_STRING "3.0.1-p4"
+
+/* Macros dealing with MPFR VERSION */
+#define MPFR_VERSION_NUM(a,b,c) (((a) << 16L) | ((b) << 8) | (c))
+#define MPFR_VERSION \
+MPFR_VERSION_NUM(MPFR_VERSION_MAJOR,MPFR_VERSION_MINOR,MPFR_VERSION_PATCHLEVEL)
+
+/* Check if GMP is included, and try to include it (Works with local GMP) */
+#ifndef __GMP_H__
+# include <gmp.h>
+#endif
+
+/* Check if stdio.h is included or if the user wants FILE */
+#if defined (_GMP_H_HAVE_FILE) || defined (MPFR_USE_FILE)
+# define _MPFR_H_HAVE_FILE 1
+#endif
+
+#if defined (_GMP_H_HAVE_VA_LIST)
+# define _MPFR_H_HAVE_VA_LIST 1
+#endif
+
+/* Check if <stdint.h> / <inttypes.h> is included or if the user
+   explicitly wants intmax_t. Automatical detection is done by
+   checking:
+     - INTMAX_C and UINTMAX_C, but not if the compiler is a C++ one
+       (as suggested by Patrick Pelissier) because the test does not
+       work well in this case. See:
+         http://websympa.loria.fr/wwsympa/arc/mpfr/2010-02/msg00025.html
+       We do not check INTMAX_MAX and UINTMAX_MAX because under Solaris,
+       these macros are always defined by <limits.h> (i.e. even when
+       <stdint.h> and <inttypes.h> are not included).
+     - _STDINT_H (defined by the glibc) and _STDINT_H_ (defined under
+       Mac OS X), but this test may not work with all implementations.
+       Portable software should not rely on these tests.
+*/
+#if (defined (INTMAX_C) && defined (UINTMAX_C) && !defined(__cplusplus)) || \
+  defined (MPFR_USE_INTMAX_T) || defined (_STDINT_H) || defined (_STDINT_H_)
+# define _MPFR_H_HAVE_INTMAX_T 1
+#endif
+
+/* Avoid some problems with macro expansion if the user defines macros
+   with the same name as keywords. By convention, identifiers and macro
+   names starting with mpfr_ are reserved by MPFR. */
+typedef void            mpfr_void;
+typedef int             mpfr_int;
+typedef unsigned int    mpfr_uint;
+typedef long            mpfr_long;
+typedef unsigned long   mpfr_ulong;
+typedef size_t          mpfr_size_t;
+
+/* Definition of rounding modes (DON'T USE MPFR_RNDNA!).
+   Warning! Changing the contents of this enum should be seen as an
+   interface change since the old and the new types are not compatible
+   (the integer type compatible with the enumerated type can even change,
+   see ISO C99, 6.7.2.2#4), and in Makefile.am, AGE should be set to 0.
+
+   MPFR_RNDU must appear just before MPFR_RNDD (see
+   MPFR_IS_RNDUTEST_OR_RNDDNOTTEST in mpfr-impl.h).
+
+   MPFR_RNDF has been added, though not implemented yet, in order to avoid
+   to break the ABI once faithful rounding gets implemented.
+
+   If you change the order of the rounding modes, please update the routines
+   in texceptions.c which assume 0=RNDN, 1=RNDZ, 2=RNDU, 3=RNDD, 4=RNDA.
+*/
+typedef enum {
+  MPFR_RNDN=0,  /* round to nearest, with ties to even */
+  MPFR_RNDZ,    /* round toward zero */
+  MPFR_RNDU,    /* round toward +Inf */
+  MPFR_RNDD,    /* round toward -Inf */
+  MPFR_RNDA,    /* round away from zero */
+  MPFR_RNDF,    /* faithful rounding (not implemented yet) */
+  MPFR_RNDNA=-1 /* round to nearest, with ties away from zero (mpfr_round) */
+} mpfr_rnd_t;
+
+/* kept for compatibility with MPFR 2.4.x and before */
+#define GMP_RNDN MPFR_RNDN
+#define GMP_RNDZ MPFR_RNDZ
+#define GMP_RNDU MPFR_RNDU
+#define GMP_RNDD MPFR_RNDD
+
+/* Define precision : 1 (short), 2 (int) or 3 (long) (DON'T USE IT!)*/
+#ifndef _MPFR_PREC_FORMAT
+# if __GMP_MP_SIZE_T_INT == 1
+#  define _MPFR_PREC_FORMAT 2
+# else
+#  define _MPFR_PREC_FORMAT 3
+# endif
+#endif
+
+/* Let's make mpfr_prec_t signed in order to avoid problems due to the
+   usual arithmetic conversions when mixing mpfr_prec_t and mpfr_exp_t
+   in an expression (for error analysis) if casts are forgotten. */
+#if   _MPFR_PREC_FORMAT == 1
+typedef short mpfr_prec_t;
+typedef unsigned short mpfr_uprec_t;
+#elif _MPFR_PREC_FORMAT == 2
+typedef int   mpfr_prec_t;
+typedef unsigned int   mpfr_uprec_t;
+#elif _MPFR_PREC_FORMAT == 3
+typedef long  mpfr_prec_t;
+typedef unsigned long  mpfr_uprec_t;
+#else
+# error "Invalid MPFR Prec format"
+#endif
+
+/* Definition of precision limits without needing <limits.h> */
+/* Note: the casts allows the expression to yield the wanted behavior
+   for _MPFR_PREC_FORMAT == 1 (due to integer promotion rules). */
+#define MPFR_PREC_MIN 2
+#define MPFR_PREC_MAX ((mpfr_prec_t)((mpfr_uprec_t)(~(mpfr_uprec_t)0)>>1))
+
+/* Definition of sign */
+typedef int          mpfr_sign_t;
+
+/* Definition of the exponent: same as in GMP. */
+typedef mp_exp_t     mpfr_exp_t;
+
+/* Definition of the standard exponent limits */
+#define MPFR_EMAX_DEFAULT ((mpfr_exp_t) (((mpfr_ulong) 1 << 30) - 1))
+#define MPFR_EMIN_DEFAULT (-(MPFR_EMAX_DEFAULT))
+
+/* Definition of the main structure */
+typedef struct {
+  mpfr_prec_t  _mpfr_prec;
+  mpfr_sign_t  _mpfr_sign;
+  mpfr_exp_t   _mpfr_exp;
+  mp_limb_t   *_mpfr_d;
+} __mpfr_struct;
+
+/* Compatibility with previous types of MPFR */
+#ifndef mp_rnd_t
+# define mp_rnd_t  mpfr_rnd_t
+#endif
+#ifndef mp_prec_t
+# define mp_prec_t mpfr_prec_t
+#endif
+
+/*
+   The represented number is
+      _sign*(_d[k-1]/B+_d[k-2]/B^2+...+_d[0]/B^k)*2^_exp
+   where k=ceil(_mp_prec/GMP_NUMB_BITS) and B=2^GMP_NUMB_BITS.
+
+   For the msb (most significant bit) normalized representation, we must have
+      _d[k-1]>=B/2, unless the number is singular.
+
+   We must also have the last k*GMP_NUMB_BITS-_prec bits set to zero.
+*/
+
+typedef __mpfr_struct mpfr_t[1];
+typedef __mpfr_struct *mpfr_ptr;
+typedef __gmp_const __mpfr_struct *mpfr_srcptr;
+
+/* For those who need a direct and fast access to the sign field.
+   However it is not in the API, thus use it at your own risk: it might
+   not be supported, or change name, in further versions!
+   Unfortunately, it must be defined here (instead of MPFR's internal
+   header file mpfr-impl.h) because it is used by some macros below.
+*/
+#define MPFR_SIGN(x) ((x)->_mpfr_sign)
+
+/* Stack interface */
+typedef enum {
+  MPFR_NAN_KIND = 0,
+  MPFR_INF_KIND = 1, MPFR_ZERO_KIND = 2, MPFR_REGULAR_KIND = 3
+} mpfr_kind_t;
+
+/* GMP defines:
+    + size_t:                Standard size_t
+    + __GMP_ATTRIBUTE_PURE   Attribute for math functions.
+    + __GMP_NOTHROW          For C++: can't throw .
+    + __GMP_EXTERN_INLINE    Attribute for inline function.
+    * __gmp_const            const (Supports for K&R compiler only for mpfr.h).
+    + __GMP_DECLSPEC_EXPORT  compiling to go into a DLL
+    + __GMP_DECLSPEC_IMPORT  compiling to go into a application
+*/
+/* Extra MPFR defines */
+#define __MPFR_SENTINEL_ATTR
+#if defined (__GNUC__)
+# if __GNUC__ >= 4
+#  undef __MPFR_SENTINEL_ATTR
+#  define __MPFR_SENTINEL_ATTR __attribute__ ((sentinel))
+# endif
+#endif
+
+/* Prototypes: Support of K&R compiler */
+#if defined (__GMP_PROTO)
+# define _MPFR_PROTO __GMP_PROTO
+#elif defined (__STDC__) || defined (__cplusplus)
+# define _MPFR_PROTO(x) x
+#else
+# define _MPFR_PROTO(x) ()
+#endif
+/* Support for WINDOWS Dll:
+   Check if we are inside a MPFR build, and if so export the functions.
+   Otherwise does the same thing as GMP */
+#if defined(__MPFR_WITHIN_MPFR) && __GMP_LIBGMP_DLL
+# define __MPFR_DECLSPEC __GMP_DECLSPEC_EXPORT
+#else
+# define __MPFR_DECLSPEC __GMP_DECLSPEC
+#endif
+
+/* Note: In order to be declared, some functions need a specific
+   system header to be included *before* "mpfr.h". If the user
+   forgets to include the header, the MPFR function prototype in
+   the user object file is not correct. To avoid wrong results,
+   we raise a linker error in that case by changing their internal
+   name in the library (prefixed by __gmpfr instead of mpfr). See
+   the lines of the form "#define mpfr_xxx __gmpfr_xxx" below. */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+__MPFR_DECLSPEC __gmp_const char * mpfr_get_version _MPFR_PROTO ((void));
+__MPFR_DECLSPEC __gmp_const char * mpfr_get_patches _MPFR_PROTO ((void));
+__MPFR_DECLSPEC int mpfr_buildopt_tls_p     _MPFR_PROTO ((void));
+__MPFR_DECLSPEC int mpfr_buildopt_decimal_p _MPFR_PROTO ((void));
+
+__MPFR_DECLSPEC mpfr_exp_t mpfr_get_emin     _MPFR_PROTO ((void));
+__MPFR_DECLSPEC int        mpfr_set_emin     _MPFR_PROTO ((mpfr_exp_t));
+__MPFR_DECLSPEC mpfr_exp_t mpfr_get_emin_min _MPFR_PROTO ((void));
+__MPFR_DECLSPEC mpfr_exp_t mpfr_get_emin_max _MPFR_PROTO ((void));
+__MPFR_DECLSPEC mpfr_exp_t mpfr_get_emax     _MPFR_PROTO ((void));
+__MPFR_DECLSPEC int        mpfr_set_emax     _MPFR_PROTO ((mpfr_exp_t));
+__MPFR_DECLSPEC mpfr_exp_t mpfr_get_emax_min _MPFR_PROTO ((void));
+__MPFR_DECLSPEC mpfr_exp_t mpfr_get_emax_max _MPFR_PROTO ((void));
+
+__MPFR_DECLSPEC void mpfr_set_default_rounding_mode _MPFR_PROTO((mpfr_rnd_t));
+__MPFR_DECLSPEC mpfr_rnd_t mpfr_get_default_rounding_mode _MPFR_PROTO((void));
+__MPFR_DECLSPEC __gmp_const char *
+   mpfr_print_rnd_mode _MPFR_PROTO((mpfr_rnd_t));
+
+__MPFR_DECLSPEC void mpfr_clear_flags _MPFR_PROTO ((void));
+__MPFR_DECLSPEC void mpfr_clear_underflow _MPFR_PROTO ((void));
+__MPFR_DECLSPEC void mpfr_clear_overflow _MPFR_PROTO ((void));
+__MPFR_DECLSPEC void mpfr_clear_nanflag _MPFR_PROTO ((void));
+__MPFR_DECLSPEC void mpfr_clear_inexflag _MPFR_PROTO ((void));
+__MPFR_DECLSPEC void mpfr_clear_erangeflag _MPFR_PROTO ((void));
+
+__MPFR_DECLSPEC void mpfr_set_underflow _MPFR_PROTO ((void));
+__MPFR_DECLSPEC void mpfr_set_overflow _MPFR_PROTO ((void));
+__MPFR_DECLSPEC void mpfr_set_nanflag _MPFR_PROTO ((void));
+__MPFR_DECLSPEC void mpfr_set_inexflag _MPFR_PROTO ((void));
+__MPFR_DECLSPEC void mpfr_set_erangeflag _MPFR_PROTO ((void));
+
+__MPFR_DECLSPEC int mpfr_underflow_p _MPFR_PROTO ((void));
+__MPFR_DECLSPEC int mpfr_overflow_p _MPFR_PROTO ((void));
+__MPFR_DECLSPEC int mpfr_nanflag_p _MPFR_PROTO ((void));
+__MPFR_DECLSPEC int mpfr_inexflag_p _MPFR_PROTO ((void));
+__MPFR_DECLSPEC int mpfr_erangeflag_p _MPFR_PROTO ((void));
+
+__MPFR_DECLSPEC int
+  mpfr_check_range _MPFR_PROTO ((mpfr_ptr, int, mpfr_rnd_t));
+
+__MPFR_DECLSPEC void mpfr_init2 _MPFR_PROTO ((mpfr_ptr, mpfr_prec_t));
+__MPFR_DECLSPEC void mpfr_init _MPFR_PROTO ((mpfr_ptr));
+__MPFR_DECLSPEC void mpfr_clear _MPFR_PROTO ((mpfr_ptr));
+
+__MPFR_DECLSPEC void
+  mpfr_inits2 _MPFR_PROTO ((mpfr_prec_t, mpfr_ptr, ...)) __MPFR_SENTINEL_ATTR;
+__MPFR_DECLSPEC void
+  mpfr_inits _MPFR_PROTO ((mpfr_ptr, ...)) __MPFR_SENTINEL_ATTR;
+__MPFR_DECLSPEC void
+  mpfr_clears _MPFR_PROTO ((mpfr_ptr, ...)) __MPFR_SENTINEL_ATTR;
+
+__MPFR_DECLSPEC int
+  mpfr_prec_round _MPFR_PROTO ((mpfr_ptr, mpfr_prec_t, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_can_round _MPFR_PROTO ((mpfr_srcptr, mpfr_exp_t, mpfr_rnd_t, mpfr_rnd_t,
+                               mpfr_prec_t));
+__MPFR_DECLSPEC mpfr_prec_t mpfr_min_prec _MPFR_PROTO ((mpfr_srcptr));
+
+__MPFR_DECLSPEC mpfr_exp_t mpfr_get_exp _MPFR_PROTO ((mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_set_exp _MPFR_PROTO ((mpfr_ptr, mpfr_exp_t));
+__MPFR_DECLSPEC mpfr_prec_t mpfr_get_prec _MPFR_PROTO((mpfr_srcptr));
+__MPFR_DECLSPEC void mpfr_set_prec _MPFR_PROTO((mpfr_ptr, mpfr_prec_t));
+__MPFR_DECLSPEC void mpfr_set_prec_raw _MPFR_PROTO((mpfr_ptr, mpfr_prec_t));
+__MPFR_DECLSPEC void mpfr_set_default_prec _MPFR_PROTO((mpfr_prec_t));
+__MPFR_DECLSPEC mpfr_prec_t mpfr_get_default_prec _MPFR_PROTO((void));
+
+__MPFR_DECLSPEC int mpfr_set_d _MPFR_PROTO ((mpfr_ptr, double, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_set_flt _MPFR_PROTO ((mpfr_ptr, float, mpfr_rnd_t));
+#ifdef MPFR_WANT_DECIMAL_FLOATS
+__MPFR_DECLSPEC int mpfr_set_decimal64 _MPFR_PROTO ((mpfr_ptr, _Decimal64,
+                                                     mpfr_rnd_t));
+#endif
+__MPFR_DECLSPEC int
+  mpfr_set_ld _MPFR_PROTO ((mpfr_ptr, long double, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_set_z _MPFR_PROTO ((mpfr_ptr, mpz_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_set_z_2exp _MPFR_PROTO ((mpfr_ptr, mpz_srcptr, mpfr_exp_t, mpfr_rnd_t));
+__MPFR_DECLSPEC void mpfr_set_nan _MPFR_PROTO ((mpfr_ptr));
+__MPFR_DECLSPEC void mpfr_set_inf _MPFR_PROTO ((mpfr_ptr, int));
+__MPFR_DECLSPEC void mpfr_set_zero _MPFR_PROTO ((mpfr_ptr, int));
+__MPFR_DECLSPEC int
+  mpfr_set_f _MPFR_PROTO ((mpfr_ptr, mpf_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_get_f _MPFR_PROTO ((mpf_ptr, mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_set_si _MPFR_PROTO ((mpfr_ptr, long, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_set_ui _MPFR_PROTO ((mpfr_ptr, unsigned long, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_set_si_2exp _MPFR_PROTO ((mpfr_ptr, long, mpfr_exp_t, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_set_ui_2exp _MPFR_PROTO ((mpfr_ptr,unsigned long,mpfr_exp_t,mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_set_q _MPFR_PROTO ((mpfr_ptr, mpq_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_set_str _MPFR_PROTO ((mpfr_ptr, __gmp_const char *, int, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_init_set_str _MPFR_PROTO ((mpfr_ptr, __gmp_const char *, int,
+                                  mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_set4 _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_rnd_t, int));
+__MPFR_DECLSPEC int
+  mpfr_abs _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_set _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_neg _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_signbit _MPFR_PROTO ((mpfr_srcptr));
+__MPFR_DECLSPEC int
+  mpfr_setsign _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, int, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_copysign _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mpfr_rnd_t));
+
+#ifdef _MPFR_H_HAVE_INTMAX_T
+#define mpfr_set_sj __gmpfr_set_sj
+#define mpfr_set_sj_2exp __gmpfr_set_sj_2exp
+#define mpfr_set_uj __gmpfr_set_uj
+#define mpfr_set_uj_2exp __gmpfr_set_uj_2exp
+#define mpfr_get_sj __gmpfr_mpfr_get_sj
+#define mpfr_get_uj __gmpfr_mpfr_get_uj
+__MPFR_DECLSPEC int mpfr_set_sj _MPFR_PROTO ((mpfr_t, intmax_t, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_set_sj_2exp _MPFR_PROTO ((mpfr_t, intmax_t, intmax_t, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_set_uj _MPFR_PROTO ((mpfr_t, uintmax_t, mpfr_rnd_t));
+__MPFR_DECLSPEC int
+  mpfr_set_uj_2exp _MPFR_PROTO ((mpfr_t, uintmax_t, intmax_t, mpfr_rnd_t));
+__MPFR_DECLSPEC intmax_t mpfr_get_sj _MPFR_PROTO ((mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC uintmax_t mpfr_get_uj _MPFR_PROTO ((mpfr_srcptr, mpfr_rnd_t));
+#endif
+
+__MPFR_DECLSPEC mpfr_exp_t mpfr_get_z_2exp _MPFR_PROTO ((mpz_ptr, mpfr_srcptr));
+__MPFR_DECLSPEC float mpfr_get_flt _MPFR_PROTO ((mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC double mpfr_get_d _MPFR_PROTO ((mpfr_srcptr, mpfr_rnd_t));
+#ifdef MPFR_WANT_DECIMAL_FLOATS
+__MPFR_DECLSPEC _Decimal64 mpfr_get_decimal64 _MPFR_PROTO ((mpfr_srcptr,
+                                                           mpfr_rnd_t));
+#endif
+__MPFR_DECLSPEC long double mpfr_get_ld _MPFR_PROTO ((mpfr_srcptr,
+                                                      mpfr_rnd_t));
+__MPFR_DECLSPEC double mpfr_get_d1 _MPFR_PROTO ((mpfr_srcptr));
+__MPFR_DECLSPEC double mpfr_get_d_2exp _MPFR_PROTO ((long*, mpfr_srcptr,
+                                                     mpfr_rnd_t));
+__MPFR_DECLSPEC long double mpfr_get_ld_2exp _MPFR_PROTO ((long*, mpfr_srcptr,
+                                                           mpfr_rnd_t));
+__MPFR_DECLSPEC long mpfr_get_si _MPFR_PROTO ((mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC unsigned long mpfr_get_ui _MPFR_PROTO ((mpfr_srcptr,
+                                                        mpfr_rnd_t));
+__MPFR_DECLSPEC char*mpfr_get_str _MPFR_PROTO ((char*, mpfr_exp_t*, int, size_t,
+                                                mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_get_z _MPFR_PROTO ((mpz_ptr z, mpfr_srcptr f,
+                                             mpfr_rnd_t));
+
+__MPFR_DECLSPEC void mpfr_free_str _MPFR_PROTO ((char *));
+
+__MPFR_DECLSPEC int mpfr_urandom _MPFR_PROTO ((mpfr_ptr, gmp_randstate_t,
+                                               mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_urandomb _MPFR_PROTO ((mpfr_ptr, gmp_randstate_t));
+
+__MPFR_DECLSPEC void mpfr_nextabove _MPFR_PROTO ((mpfr_ptr));
+__MPFR_DECLSPEC void mpfr_nextbelow _MPFR_PROTO ((mpfr_ptr));
+__MPFR_DECLSPEC void mpfr_nexttoward _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr));
+
+#ifdef _MPFR_H_HAVE_FILE
+#define mpfr_inp_str __gmpfr_inp_str
+#define mpfr_out_str __gmpfr_out_str
+__MPFR_DECLSPEC size_t mpfr_inp_str _MPFR_PROTO ((mpfr_ptr, FILE*, int,
+                                                  mpfr_rnd_t));
+__MPFR_DECLSPEC size_t mpfr_out_str _MPFR_PROTO ((FILE*, int, size_t,
+                                                  mpfr_srcptr, mpfr_rnd_t));
+#define mpfr_fprintf __gmpfr_fprintf
+__MPFR_DECLSPEC int mpfr_fprintf _MPFR_PROTO ((FILE*, __gmp_const char*,
+                                               ...));
+#endif
+__MPFR_DECLSPEC int mpfr_printf _MPFR_PROTO ((__gmp_const char*, ...));
+__MPFR_DECLSPEC int mpfr_asprintf _MPFR_PROTO ((char**, __gmp_const char*,
+                                                ...));
+__MPFR_DECLSPEC int mpfr_sprintf _MPFR_PROTO ((char*, __gmp_const char*,
+                                               ...));
+__MPFR_DECLSPEC int mpfr_snprintf _MPFR_PROTO ((char*, size_t,
+                                                __gmp_const char*, ...));
+
+#ifdef _MPFR_H_HAVE_VA_LIST
+#ifdef _MPFR_H_HAVE_FILE
+#define mpfr_vfprintf __gmpfr_vfprintf
+__MPFR_DECLSPEC int mpfr_vfprintf _MPFR_PROTO ((FILE*, __gmp_const char*,
+                                                va_list));
+#endif /* _MPFR_H_HAVE_FILE */
+#define mpfr_vprintf __gmpfr_vprintf
+#define mpfr_vasprintf __gmpfr_vasprintf
+#define mpfr_vsprintf __gmpfr_vsprintf
+#define mpfr_vsnprintf __gmpfr_vsnprintf
+__MPFR_DECLSPEC int mpfr_vprintf _MPFR_PROTO ((__gmp_const char*, va_list));
+__MPFR_DECLSPEC int mpfr_vasprintf _MPFR_PROTO ((char**, __gmp_const char*,
+                                                 va_list));
+__MPFR_DECLSPEC int mpfr_vsprintf _MPFR_PROTO ((char*, __gmp_const char*,
+                                               va_list));
+__MPFR_DECLSPEC int mpfr_vsnprintf _MPFR_PROTO ((char*, size_t,
+                                                __gmp_const char*, va_list));
+#endif /* _MPFR_H_HAVE_VA_LIST */
+
+__MPFR_DECLSPEC int mpfr_pow _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                           mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_pow_si _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              long int, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_pow_ui _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              unsigned long int, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_ui_pow_ui _MPFR_PROTO ((mpfr_ptr, unsigned long int,
+                                             unsigned long int, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_ui_pow _MPFR_PROTO ((mpfr_ptr, unsigned long int,
+                                              mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_pow_z _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpz_srcptr, mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_sqrt _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                            mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sqrt_ui _MPFR_PROTO ((mpfr_ptr, unsigned long,
+                                               mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_rec_sqrt _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                                mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_add _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                           mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sub _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                           mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_mul _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                           mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_div _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                           mpfr_srcptr, mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_add_ui _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              unsigned long, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sub_ui _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              unsigned long, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_ui_sub _MPFR_PROTO ((mpfr_ptr, unsigned long,
+                                              mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_mul_ui _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              unsigned long, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_div_ui _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              unsigned long, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_ui_div _MPFR_PROTO ((mpfr_ptr, unsigned long,
+                                              mpfr_srcptr, mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_add_si _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              long int, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sub_si _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              long int, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_si_sub _MPFR_PROTO ((mpfr_ptr, long int,
+                                              mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_mul_si _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              long int, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_div_si _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              long int, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_si_div _MPFR_PROTO ((mpfr_ptr, long int,
+                                              mpfr_srcptr, mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_add_d _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              double, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sub_d _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              double, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_d_sub _MPFR_PROTO ((mpfr_ptr, double,
+                                              mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_mul_d _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              double, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_div_d _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                              double, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_d_div _MPFR_PROTO ((mpfr_ptr, double,
+                                              mpfr_srcptr, mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_sqr _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_const_pi _MPFR_PROTO ((mpfr_ptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_const_log2 _MPFR_PROTO ((mpfr_ptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_const_euler _MPFR_PROTO ((mpfr_ptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_const_catalan _MPFR_PROTO ((mpfr_ptr, mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_agm _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_srcptr,
+                                           mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_log _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_log2 _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_log10 _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_log1p _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_exp _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_exp2 _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_exp10 _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_expm1 _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_eint _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_li2 _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_cmp  _MPFR_PROTO ((mpfr_srcptr, mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_cmp3 _MPFR_PROTO ((mpfr_srcptr, mpfr_srcptr, int));
+__MPFR_DECLSPEC int mpfr_cmp_d _MPFR_PROTO ((mpfr_srcptr, double));
+__MPFR_DECLSPEC int mpfr_cmp_ld _MPFR_PROTO ((mpfr_srcptr, long double));
+__MPFR_DECLSPEC int mpfr_cmpabs _MPFR_PROTO ((mpfr_srcptr, mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_cmp_ui _MPFR_PROTO ((mpfr_srcptr, unsigned long));
+__MPFR_DECLSPEC int mpfr_cmp_si _MPFR_PROTO ((mpfr_srcptr, long));
+__MPFR_DECLSPEC int mpfr_cmp_ui_2exp _MPFR_PROTO ((mpfr_srcptr, unsigned long,
+                                                   mpfr_exp_t));
+__MPFR_DECLSPEC int mpfr_cmp_si_2exp _MPFR_PROTO ((mpfr_srcptr, long,
+                                                   mpfr_exp_t));
+__MPFR_DECLSPEC void mpfr_reldiff _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                                mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_eq _MPFR_PROTO((mpfr_srcptr, mpfr_srcptr,
+                                         unsigned long));
+__MPFR_DECLSPEC int mpfr_sgn _MPFR_PROTO ((mpfr_srcptr));
+
+__MPFR_DECLSPEC int mpfr_mul_2exp _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                                unsigned long, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_div_2exp _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                                unsigned long, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_mul_2ui _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                               unsigned long, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_div_2ui _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                               unsigned long, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_mul_2si _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                               long, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_div_2si _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                               long, mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_rint _MPFR_PROTO((mpfr_ptr,mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_round _MPFR_PROTO((mpfr_ptr, mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_trunc _MPFR_PROTO((mpfr_ptr, mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_ceil _MPFR_PROTO((mpfr_ptr, mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_floor _MPFR_PROTO((mpfr_ptr, mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_rint_round _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                                  mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_rint_trunc _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                                  mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_rint_ceil _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                                 mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_rint_floor _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                                  mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_frac _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_modf _MPFR_PROTO ((mpfr_ptr, mpfr_ptr, mpfr_srcptr,
+                                                  mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_remquo _MPFR_PROTO ((mpfr_ptr, long*, mpfr_srcptr,
+                                              mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_remainder _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                                 mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_fmod _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                                 mpfr_srcptr, mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_fits_ulong_p _MPFR_PROTO((mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_fits_slong_p _MPFR_PROTO((mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_fits_uint_p _MPFR_PROTO((mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_fits_sint_p _MPFR_PROTO((mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_fits_ushort_p _MPFR_PROTO((mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_fits_sshort_p _MPFR_PROTO((mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_fits_uintmax_p _MPFR_PROTO((mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_fits_intmax_p _MPFR_PROTO((mpfr_srcptr, mpfr_rnd_t));
+
+__MPFR_DECLSPEC void mpfr_extract _MPFR_PROTO ((mpz_ptr, mpfr_srcptr,
+                                                unsigned int));
+__MPFR_DECLSPEC void mpfr_swap _MPFR_PROTO ((mpfr_ptr, mpfr_ptr));
+__MPFR_DECLSPEC void mpfr_dump _MPFR_PROTO ((mpfr_srcptr));
+
+__MPFR_DECLSPEC int mpfr_nan_p _MPFR_PROTO((mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_inf_p _MPFR_PROTO((mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_number_p _MPFR_PROTO((mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_integer_p _MPFR_PROTO ((mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_zero_p _MPFR_PROTO ((mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_regular_p _MPFR_PROTO ((mpfr_srcptr));
+
+__MPFR_DECLSPEC int mpfr_greater_p _MPFR_PROTO ((mpfr_srcptr, mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_greaterequal_p _MPFR_PROTO ((mpfr_srcptr,
+                                                      mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_less_p _MPFR_PROTO ((mpfr_srcptr, mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_lessequal_p _MPFR_PROTO ((mpfr_srcptr, mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_lessgreater_p _MPFR_PROTO((mpfr_srcptr,mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_equal_p _MPFR_PROTO ((mpfr_srcptr, mpfr_srcptr));
+__MPFR_DECLSPEC int mpfr_unordered_p _MPFR_PROTO ((mpfr_srcptr, mpfr_srcptr));
+
+__MPFR_DECLSPEC int mpfr_atanh _MPFR_PROTO((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_acosh _MPFR_PROTO((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_asinh _MPFR_PROTO((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_cosh _MPFR_PROTO((mpfr_ptr,mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sinh _MPFR_PROTO((mpfr_ptr,mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_tanh _MPFR_PROTO((mpfr_ptr,mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sinh_cosh _MPFR_PROTO ((mpfr_ptr, mpfr_ptr,
+                                               mpfr_srcptr, mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_sech _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_csch _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_coth _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_acos _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_asin _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_atan _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sin _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sin_cos _MPFR_PROTO ((mpfr_ptr, mpfr_ptr,
+                                               mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_cos _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_tan _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_atan2 _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,mpfr_srcptr,
+                                             mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sec _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_csc _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_cot _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_hypot _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_erf _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_erfc _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_cbrt _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_root _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,unsigned long,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_gamma _MPFR_PROTO((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_lngamma _MPFR_PROTO((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_lgamma _MPFR_PROTO((mpfr_ptr,int*,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_digamma _MPFR_PROTO((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_zeta _MPFR_PROTO ((mpfr_ptr,mpfr_srcptr,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_zeta_ui _MPFR_PROTO ((mpfr_ptr,unsigned long,mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_fac_ui _MPFR_PROTO ((mpfr_ptr, unsigned long int,
+                                              mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_j0 _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_j1 _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_jn _MPFR_PROTO ((mpfr_ptr, long, mpfr_srcptr,
+                                          mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_y0 _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_y1 _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_yn _MPFR_PROTO ((mpfr_ptr, long, mpfr_srcptr,
+                                          mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_ai _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_min _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_srcptr,
+                                           mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_max _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_srcptr,
+                                           mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_dim _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_srcptr,
+                                           mpfr_rnd_t));
+
+__MPFR_DECLSPEC int mpfr_mul_z _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpz_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_div_z _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpz_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_add_z _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpz_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sub_z _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpz_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_cmp_z _MPFR_PROTO ((mpfr_srcptr, mpz_srcptr));
+
+__MPFR_DECLSPEC int mpfr_mul_q _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpq_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_div_q _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpq_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_add_q _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpq_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sub_q _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr,
+                                             mpq_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_cmp_q _MPFR_PROTO ((mpfr_srcptr, mpq_srcptr));
+
+__MPFR_DECLSPEC int mpfr_cmp_f _MPFR_PROTO ((mpfr_srcptr, mpf_srcptr));
+
+__MPFR_DECLSPEC int mpfr_fma _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_srcptr,
+                                           mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_fms _MPFR_PROTO ((mpfr_ptr, mpfr_srcptr, mpfr_srcptr,
+                                           mpfr_srcptr, mpfr_rnd_t));
+__MPFR_DECLSPEC int mpfr_sum _MPFR_PROTO ((mpfr_ptr, mpfr_ptr *__gmp_const,
+                                           unsigned long, mpfr_rnd_t));
+
+__MPFR_DECLSPEC void mpfr_free_cache _MPFR_PROTO ((void));
+
+__MPFR_DECLSPEC int  mpfr_subnormalize _MPFR_PROTO ((mpfr_ptr, int,
+                                                     mpfr_rnd_t));
+
+__MPFR_DECLSPEC int  mpfr_strtofr _MPFR_PROTO ((mpfr_ptr, __gmp_const char *,
+                                                char **, int, mpfr_rnd_t));
+
+__MPFR_DECLSPEC size_t mpfr_custom_get_size   _MPFR_PROTO ((mpfr_prec_t));
+__MPFR_DECLSPEC void   mpfr_custom_init    _MPFR_PROTO ((void *, mpfr_prec_t));
+__MPFR_DECLSPEC void * mpfr_custom_get_significand _MPFR_PROTO ((mpfr_srcptr));
+__MPFR_DECLSPEC mpfr_exp_t mpfr_custom_get_exp  _MPFR_PROTO ((mpfr_srcptr));
+__MPFR_DECLSPEC void   mpfr_custom_move       _MPFR_PROTO ((mpfr_ptr, void *));
+__MPFR_DECLSPEC void   mpfr_custom_init_set   _MPFR_PROTO ((mpfr_ptr, int,
+                                             mpfr_exp_t, mpfr_prec_t, void *));
+__MPFR_DECLSPEC int    mpfr_custom_get_kind   _MPFR_PROTO ((mpfr_srcptr));
+
+#if defined (__cplusplus)
+}
+#endif
+
+/* DON'T USE THIS! (For MPFR-public macros only, see below.)
+   The mpfr_sgn macro uses the fact that __MPFR_EXP_NAN and __MPFR_EXP_ZERO
+   are the smallest values.
+   FIXME: In the following macros, the cast of an unsigned type with MSB set
+   to the signed type mpfr_exp_t yields an integer overflow, which can give
+   unexpected results with future compilers and aggressive optimisations.
+   Why not working only with signed types, using INT_MIN and LONG_MIN? */
+#if __GMP_MP_SIZE_T_INT
+#define __MPFR_EXP_NAN  ((mpfr_exp_t)((~((~(mpfr_uint)0)>>1))+2))
+#define __MPFR_EXP_ZERO ((mpfr_exp_t)((~((~(mpfr_uint)0)>>1))+1))
+#define __MPFR_EXP_INF  ((mpfr_exp_t)((~((~(mpfr_uint)0)>>1))+3))
+#else
+#define __MPFR_EXP_NAN  ((mpfr_exp_t)((~((~(mpfr_ulong)0)>>1))+2))
+#define __MPFR_EXP_ZERO ((mpfr_exp_t)((~((~(mpfr_ulong)0)>>1))+1))
+#define __MPFR_EXP_INF  ((mpfr_exp_t)((~((~(mpfr_ulong)0)>>1))+3))
+#endif
+
+/* Define MPFR_USE_EXTENSION to avoid "gcc -pedantic" warnings. */
+#ifndef MPFR_EXTENSION
+# if defined(MPFR_USE_EXTENSION)
+#  define MPFR_EXTENSION __extension__
+# else
+#  define MPFR_EXTENSION
+# endif
+#endif
+
+/* Warning! This macro doesn't work with K&R C (e.g., compare the "gcc -E"
+   output with and without -traditional) and shouldn't be used internally.
+   For public use only, but see the MPFR manual. */
+#define MPFR_DECL_INIT(_x, _p)                                        \
+  MPFR_EXTENSION mp_limb_t __gmpfr_local_tab_##_x[((_p)-1)/GMP_NUMB_BITS+1]; \
+  MPFR_EXTENSION mpfr_t _x = {{(_p),1,__MPFR_EXP_NAN,__gmpfr_local_tab_##_x}}
+
+/* Fast access macros to replace function interface.
+   If the USER don't want to use the macro interface, let him make happy
+   even if it produces faster and smaller code. */
+#ifndef MPFR_USE_NO_MACRO
+
+/* Inlining theses functions is both faster and smaller */
+#define mpfr_nan_p(_x)      ((_x)->_mpfr_exp == __MPFR_EXP_NAN)
+#define mpfr_inf_p(_x)      ((_x)->_mpfr_exp == __MPFR_EXP_INF)
+#define mpfr_zero_p(_x)     ((_x)->_mpfr_exp == __MPFR_EXP_ZERO)
+#define mpfr_regular_p(_x)  ((_x)->_mpfr_exp >  __MPFR_EXP_INF)
+#define mpfr_sgn(_x)                                               \
+  ((_x)->_mpfr_exp < __MPFR_EXP_INF ?                              \
+   (mpfr_nan_p (_x) ? mpfr_set_erangeflag () : (mpfr_void) 0), 0 : \
+   MPFR_SIGN (_x))
+
+/* Prevent them from using as lvalues */
+#define MPFR_VALUE_OF(x)  (0 ? (x) : (x))
+#define mpfr_get_prec(_x) MPFR_VALUE_OF((_x)->_mpfr_prec)
+#define mpfr_get_exp(_x)  MPFR_VALUE_OF((_x)->_mpfr_exp)
+/* Note: if need be, the MPFR_VALUE_OF can be used for other expressions
+   (of any type). Thanks to Wojtek Lerch and Tim Rentsch for the idea. */
+
+#define mpfr_round(a,b) mpfr_rint((a), (b), MPFR_RNDNA)
+#define mpfr_trunc(a,b) mpfr_rint((a), (b), MPFR_RNDZ)
+#define mpfr_ceil(a,b)  mpfr_rint((a), (b), MPFR_RNDU)
+#define mpfr_floor(a,b) mpfr_rint((a), (b), MPFR_RNDD)
+
+#define mpfr_cmp_ui(b,i) mpfr_cmp_ui_2exp((b),(i),0)
+#define mpfr_cmp_si(b,i) mpfr_cmp_si_2exp((b),(i),0)
+#define mpfr_set(a,b,r)  mpfr_set4(a,b,r,MPFR_SIGN(b))
+#define mpfr_abs(a,b,r)  mpfr_set4(a,b,r,1)
+#define mpfr_copysign(a,b,c,r) mpfr_set4(a,b,r,MPFR_SIGN(c))
+#define mpfr_setsign(a,b,s,r) mpfr_set4(a,b,r,(s) ? -1 : 1)
+#define mpfr_signbit(x)  (MPFR_SIGN(x) < 0)
+#define mpfr_cmp(b, c)   mpfr_cmp3(b, c, 1)
+#define mpfr_mul_2exp(y,x,n,r) mpfr_mul_2ui((y),(x),(n),(r))
+#define mpfr_div_2exp(y,x,n,r) mpfr_div_2ui((y),(x),(n),(r))
+
+
+/* When using GCC, optimize certain common comparisons and affectations.
+   + Remove ICC since it defines __GNUC__ but produces a
+     huge number of warnings if you use this code.
+     VL: I couldn't reproduce a single warning when enabling these macros
+     with icc 10.1 20080212 on Itanium. But with this version, __ICC isn't
+     defined (__INTEL_COMPILER is, though), so that these macros are enabled
+     anyway. Checking with other ICC versions is needed. Possibly detect
+     whether warnings are produced or not with a configure test.
+   + Remove C++ too, since it complains too much. */
+/* Added casts to improve robustness in case of undefined behavior and
+   compiler extensions based on UB (in particular -fwrapv). MPFR doesn't
+   use such extensions, but these macros will be used by 3rd-party code,
+   where such extensions may be required.
+   Moreover casts to unsigned long have been added to avoid warnings in
+   programs that use MPFR and are compiled with -Wconversion; such casts
+   are OK since if X is a constant expression, then (unsigned long) X is
+   also a constant expression, so that the optimizations still work. The
+   warnings are probably related to the following two bugs:
+     http://gcc.gnu.org/bugzilla/show_bug.cgi?id=4210
+     http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38470 (possibly a variant)
+   and the casts could be removed once these bugs are fixed.
+   Casts shouldn't be used on the generic calls (to the ..._2exp functions),
+   where implicit conversions are performed. Indeed, having at least one
+   implicit conversion in the macro allows the compiler to emit diagnostics
+   when normally expected, for instance in the following call:
+     mpfr_set_ui (x, "foo", MPFR_RNDN);
+   If this is not possible (for future macros), one of the tricks described
+   on http://groups.google.com/group/comp.std.c/msg/e92abd24bf9eaf7b could
+   be used. */
+#if defined (__GNUC__) && !defined(__ICC) && !defined(__cplusplus)
+#if (__GNUC__ >= 2)
+#undef mpfr_cmp_ui
+/* We use the fact that mpfr_sgn on NaN sets the erange flag and returns 0.
+   But warning! mpfr_sgn is specified as a macro in the API, thus the macro
+   mustn't be used if side effects are possible, like here. */
+#define mpfr_cmp_ui(_f,_u)                                      \
+  (__builtin_constant_p (_u) && (mpfr_ulong) (_u) == 0 ?        \
+   (mpfr_sgn) (_f) :                                            \
+   mpfr_cmp_ui_2exp ((_f), (_u), 0))
+#undef mpfr_cmp_si
+#define mpfr_cmp_si(_f,_s)                                      \
+  (__builtin_constant_p (_s) && (mpfr_long) (_s) >= 0 ?         \
+   mpfr_cmp_ui ((_f), (mpfr_ulong) (mpfr_long) (_s)) :          \
+   mpfr_cmp_si_2exp ((_f), (_s), 0))
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 95
+#undef mpfr_set_ui
+#define mpfr_set_ui(_f,_u,_r)                                   \
+  (__builtin_constant_p (_u) && (mpfr_ulong) (_u) == 0 ?        \
+   __extension__ ({                                             \
+       mpfr_ptr _p = (_f);                                      \
+       _p->_mpfr_sign = 1;                                      \
+       _p->_mpfr_exp = __MPFR_EXP_ZERO;                         \
+       (mpfr_void) (_r); 0; }) :                                \
+   mpfr_set_ui_2exp ((_f), (_u), 0, (_r)))
+#endif
+#undef mpfr_set_si
+#define mpfr_set_si(_f,_s,_r)                                   \
+  (__builtin_constant_p (_s) && (mpfr_long) (_s) >= 0 ?         \
+   mpfr_set_ui ((_f), (mpfr_ulong) (mpfr_long) (_s), (_r)) :    \
+   mpfr_set_si_2exp ((_f), (_s), 0, (_r)))
+#endif
+#endif
+
+/* Macro version of mpfr_stack interface for fast access */
+#define mpfr_custom_get_size(p) ((mpfr_size_t)                          \
+       (((p)+GMP_NUMB_BITS-1)/GMP_NUMB_BITS*sizeof (mp_limb_t)))
+#define mpfr_custom_init(m,p) do {} while (0)
+#define mpfr_custom_get_significand(x) ((mpfr_void*)((x)->_mpfr_d))
+#define mpfr_custom_get_exp(x) ((x)->_mpfr_exp)
+#define mpfr_custom_move(x,m) do { ((x)->_mpfr_d = (mp_limb_t*)(m)); } while (0)
+#define mpfr_custom_init_set(x,k,e,p,m) do {                   \
+  mpfr_ptr _x = (x);                                           \
+  mpfr_exp_t _e;                                               \
+  mpfr_kind_t _t;                                              \
+  mpfr_int _s, _k;                                             \
+  _k = (k);                                                    \
+  if (_k >= 0)  {                                              \
+    _t = (mpfr_kind_t) _k;                                     \
+    _s = 1;                                                    \
+  } else {                                                     \
+    _t = (mpfr_kind_t) -k;                                     \
+    _s = -1;                                                   \
+  }                                                            \
+  _e = _t == MPFR_REGULAR_KIND ? (e) :                         \
+    _t == MPFR_NAN_KIND ? __MPFR_EXP_NAN :                     \
+    _t == MPFR_INF_KIND ? __MPFR_EXP_INF : __MPFR_EXP_ZERO;    \
+  _x->_mpfr_prec = (p);                                        \
+  _x->_mpfr_sign = _s;                                         \
+  _x->_mpfr_exp  = _e;                                         \
+  _x->_mpfr_d    = (mp_limb_t*) (m);                           \
+ } while (0)
+#define mpfr_custom_get_kind(x)                                         \
+  ( (x)->_mpfr_exp >  __MPFR_EXP_INF ?                                  \
+    (mpfr_int) MPFR_REGULAR_KIND * MPFR_SIGN (x)                        \
+  : (x)->_mpfr_exp == __MPFR_EXP_INF ?                                  \
+    (mpfr_int) MPFR_INF_KIND * MPFR_SIGN (x)                            \
+  : (x)->_mpfr_exp == __MPFR_EXP_NAN ? (mpfr_int) MPFR_NAN_KIND         \
+  : (mpfr_int) MPFR_ZERO_KIND * MPFR_SIGN (x) )
+
+
+#endif /* MPFR_USE_NO_MACRO */
+
+/* Theses are defined to be macros */
+#define mpfr_init_set_si(x, i, rnd) \
+ ( mpfr_init(x), mpfr_set_si((x), (i), (rnd)) )
+#define mpfr_init_set_ui(x, i, rnd) \
+ ( mpfr_init(x), mpfr_set_ui((x), (i), (rnd)) )
+#define mpfr_init_set_d(x, d, rnd) \
+ ( mpfr_init(x), mpfr_set_d((x), (d), (rnd)) )
+#define mpfr_init_set_ld(x, d, rnd) \
+ ( mpfr_init(x), mpfr_set_ld((x), (d), (rnd)) )
+#define mpfr_init_set_z(x, i, rnd) \
+ ( mpfr_init(x), mpfr_set_z((x), (i), (rnd)) )
+#define mpfr_init_set_q(x, i, rnd) \
+ ( mpfr_init(x), mpfr_set_q((x), (i), (rnd)) )
+#define mpfr_init_set(x, y, rnd) \
+ ( mpfr_init(x), mpfr_set((x), (y), (rnd)) )
+#define mpfr_init_set_f(x, y, rnd) \
+ ( mpfr_init(x), mpfr_set_f((x), (y), (rnd)) )
+
+/* Compatibility layer -- obsolete functions and macros */
+#define mpfr_cmp_abs mpfr_cmpabs
+#define mpfr_round_prec(x,r,p) mpfr_prec_round(x,p,r)
+#define __gmp_default_rounding_mode (mpfr_get_default_rounding_mode())
+#define __mpfr_emin (mpfr_get_emin())
+#define __mpfr_emax (mpfr_get_emax())
+#define __mpfr_default_fp_bit_precision (mpfr_get_default_fp_bit_precision())
+#define MPFR_EMIN_MIN mpfr_get_emin_min()
+#define MPFR_EMIN_MAX mpfr_get_emin_max()
+#define MPFR_EMAX_MIN mpfr_get_emax_min()
+#define MPFR_EMAX_MAX mpfr_get_emax_max()
+#define mpfr_version (mpfr_get_version())
+#ifndef mpz_set_fr
+# define mpz_set_fr mpfr_get_z
+#endif
+#define mpfr_add_one_ulp(x,r) \
+ (mpfr_sgn (x) > 0 ? mpfr_nextabove (x) : mpfr_nextbelow (x))
+#define mpfr_sub_one_ulp(x,r) \
+ (mpfr_sgn (x) > 0 ? mpfr_nextbelow (x) : mpfr_nextabove (x))
+#define mpfr_get_z_exp mpfr_get_z_2exp
+#define mpfr_custom_get_mantissa mpfr_custom_get_significand
+
+#endif /* __MPFR_H*/
+