feat: add debug macros and config source files

2024-09-24 16:43:46 +08:00 · 2024-09-24 16:43:46 +08:00 · 03869b9959
commit 03869b9959
parent df550439a1
62 changed files with 1398 additions and 4450 deletions
--- a/src/common/bench/bench.c
+++ b/src/common/bench/bench.c
@ -1,5 +1,6 @@
 #include <am.h>
 #include <klib.h>
 #include <bench_debug.h>
 #include <klib-macros.h>
 uint64_t uptime()
@ -8,6 +9,41 @@ uint64_t uptime()
 }
 char *format_time(uint64_t us) {
  static char buf[128];
  uint64_t ms = us / 1000;
  uint64_t s  = ms / 1000;
  uint64_t min = s / 60;
  uint64_t h  = min / 60;
  us %= 1000;
  ms %= 1000;
  s  %= 60;
  min %= 60;
  int len = 0;
  if(h > 0) {
      len = bench_sprintf(buf, "%ld h %ld min %ld s %ld.000 ms", h, min, s, ms);
  }
  else if (min > 0) {
      len = bench_sprintf(buf, "%ld min %ld s, %ld.000 ms",min, s, ms);
  }
  else if ( s > 0) {
      len = bench_sprintf(buf, "%ld s, %ld.000 ms", s, ms);
  }
  else {
      len = bench_sprintf(buf, "%ld.000 ms", ms);
  }
  char *p = &buf[len - 4];
  while (us > 0) {
    *(p --) = '0' + us % 10;
    us /= 10;
  }
  return buf;
 }
 /* char *format_time(uint64_t us) {
  static char buf[32];
  uint64_t ms = us / 1000;
  us -= ms * 1000;
@ -19,7 +55,7 @@ char *format_time(uint64_t us) {
    us /= 10;
  }
  return buf;
-}
+} */
 // FNV hash
 uint32_t checksum(void *start, void *end) {
--- a/src/common/bench/bench_printf.c
+++ b/src/common/bench/bench_printf.c
@ -295,7 +295,6 @@ int bench_vsprintf(char *buf, const char *fmt, va_list args)
                case '0': flags |= ZEROPAD; goto repeat;
            }
        field_width = -1;
        if ('0' <= *fmt && *fmt <= '9')
            field_width = skip_atoi(&fmt);
--- a/src/common/bench/bench_malloc.c
+++ b/src/common/bench/bench_malloc.c
@ -50,8 +50,8 @@ static void *malloc_base() {
    return b;
 }
-//We need this function because we use variable `heap`
+//We need this function because the variable `heap` is used
-//and it is initialized in run time.
+//and initialized at runtime.
 void bench_malloc_init() {
  program_break = (intptr_t)heap.start;
 }
--- a/src/common/bench/include/bench_debug.h
+++ b/src/common/bench/include/bench_debug.h
@ -0,0 +1,35 @@
 #ifndef __BENCH_DEBUG_H
 #define __BENCH_DEBUG_H
 #include <stdarg.h>
 #define ANSI_ERROR     "\33[1;31m"
 #define  ANSI_WARN     "\33[1;93m"
 #define  ANSI_INFO     "\33[1;34m"
 #define ANSI_DEBUG     "\33[1;32m"
 #define ANSI_TRACE     "\33[1;90m"
 #define  ANSI_NONE     "\33[0m"
 #ifndef LOG_LEVEL
 #define LOG_LEVEL 2
 #endif
 #define LOG_LEVEL_ERROR 0
 #define LOG_LEVEL_WARN  1
 #define LOG_LEVEL_INFO  2
 #define LOG_LEVEL_DEBUG 3
 #define LOG_LEVEL_TRACE 4
 #define ANSI_FMT(str, fmt) fmt str ANSI_NONE
 #define BENCH_LOG(level, str, ...) \
  do { \
    if (LOG_LEVEL >= LOG_LEVEL##level) {  \
    bench_printf(ANSI_FMT(str, ANSI_##level), ##__VA_ARGS__); \
    } \
  } while (0) \
 int bench_vsprintf(char *buf, const char *fmt, va_list args);
 int bench_sprintf(char *s, const char *fmt, ...);
 int bench_printf(const char *fmt, ...);
 #endif
--- a/src/common/bench/include/bench_printf.h
+++ b/src/common/bench/include/bench_printf.h
@ -1,9 +0,0 @@
 #ifndef __BENCH_DEBUG_H
 #define __BENCH_DEBUG_H
 #include <stdarg.h>
 int bench_vsprintf(char *buf, const char *fmt, va_list args);
 int bench_sprintf( char *s, const char *fmt, ... );
 int bench_printf(const char *fmt, ... );
 #endif
--- a/src/gemm/gemm.c
+++ b/src/gemm/gemm.c
@ -1,57 +1,58 @@
 #include <bench_debug.h>
 #include <gemm.h>
-#define A(i,j) a[(j)*lda + (i)]
+#define A(i, j) a[(j) * lda + (i)]
-void serial_init(int m, int n, double * a, int lda){
+void serial_init(int m, int n, double *a, int lda) {
  int count = 1;
-    for(int j=0;j<n;j++){
+  for (int j = 0; j < n; j++) {
-        for(int i=0;i<m;i++)
+    for (int i = 0; i < m; i++)
-            A(i,j) = count++;
+      A(i, j) = count++;
  }
 }
-void random_init(int m, int n, double * a, int lda){
+void random_init(int m, int n, double *a, int lda) {
-    for(int j=0;j<n;j++){
+  for (int j = 0; j < n; j++) {
-        for(int i=0;i<m;i++)
+    for (int i = 0; i < m; i++)
-            A(i,j) = 2.0 * rand() - 1.0;
+      A(i, j) = 2.0 * rand() - 1.0;
  }
 }
 extern bench_gemm_config config;
-int main(){
+int main() {
  bench_malloc_init();
  int m = config.m;
  int n = config.n;
  int k = config.k;
-    //TODO: calculate the memory size.
+  // TODO: calculate the memory size.
-    double * A = (double*)bench_malloc(m*k*sizeof(double));
+  double *A = (double *)bench_malloc(m * k * sizeof(double));
-    double * B = (double*)bench_malloc(k*n*sizeof(double));
+  double *B = (double *)bench_malloc(k * n * sizeof(double));
-    double * C = (double*)bench_malloc(m*n*sizeof(double));
+  double *C = (double *)bench_malloc(m * n * sizeof(double));
  assert(A);
  assert(B);
  assert(C);
-    memset(A,0,m*k*sizeof(double));
+  memset(A, 0, m * k * sizeof(double));
-    memset(B,0,k*n*sizeof(double));
+  memset(B, 0, k * n * sizeof(double));
-    memset(C,0,m*n*sizeof(double));
+  memset(C, 0, m * n * sizeof(double));
  uint64_t start_time, end_time;
  srand(1556);
-    random_init(m,k,A,m);
+  random_init(m, k, A, m);
-    random_init(k,n,B,k);
+  random_init(k, n, B, k);
  start_time = uptime();
-    matmul(m,n,k,A,m,B,k,C,m);
+  matmul(m, n, k, A, m, B, k, C, m);
  end_time = uptime();
  bench_free(A);
  bench_free(B);
  bench_free(C);
-    printf("time: %s ms \n", format_time(end_time - start_time));
+  bench_printf("time: %s \n", format_time(end_time - start_time));
  return 0;
 }
--- a/src/gemm/include/gemm.h
+++ b/src/gemm/include/gemm.h
@ -6,6 +6,7 @@
 #include <stdint.h>
 #define TEST
 typedef struct {
  uint32_t m;
  uint32_t n;
--- a/src/gemm/matmul.c
+++ b/src/gemm/matmul.c
@ -1,4 +1,4 @@
-#include "gemm.h"
+#include <gemm.h>
 #define A(i,j) a[(j)*lda+(i)]
--- a/src/linpack/Makefile
+++ b/src/linpack/Makefile
@ -1,5 +1,6 @@
 NAME = linpack
 mainargs ?= ref
 BENCH_LIBS = bench openlibm soft-fp
--- a/src/linpack/configs/ref-config.c
+++ b/src/linpack/configs/ref-config.c
@ -0,0 +1,4 @@
 #include <linpack.h>
 bench_linpack_config config = { 270 };
--- a/src/linpack/configs/test-config.c
+++ b/src/linpack/configs/test-config.c
@ -0,0 +1,4 @@
 #include <linpack.h>
 bench_linpack_config config = { 100 };
--- a/src/linpack/configs/train-config.c
+++ b/src/linpack/configs/train-config.c
@ -0,0 +1,4 @@
 #include <linpack.h>
 bench_linpack_config config = { 80 };
--- a/src/linpack/include/linpack.h
+++ b/src/linpack/include/linpack.h
@ -0,0 +1,43 @@
 #ifndef __LINPACK_H__
 #define __LINPACK_H__
 #define FLT_DIG 6
 #define DBL_DIG 15
 #define SP
 #ifndef SP
 #ifndef DP
 #define DP
 #endif
 #endif
 #ifdef SP
 #define ZERO 0.0
 #define ONE 1.0
 #define PREC "Single"
 #define BASE10DIG FLT_DIG
 typedef float REAL;
 #endif
 #ifdef DP
 #define ZERO 0.0e0
 #define ONE 1.0e0
 #define PREC "Double"
 #define BASE10DIG DBL_DIG
 typedef double REAL;
 #endif
 /* 2022-07-26: Macro defined for memreq variable to resolve warnings
 *             during malloc check
 */
 #define MEM_T long
 typedef struct {
  int arsize;
 } bench_linpack_config ;
 #endif
--- a/src/linpack/linpack.c
+++ b/src/linpack/linpack.c
@ -31,42 +31,12 @@
 #include <am.h>
 #include <bench.h>
 #include <bench_malloc.h>
 #include <bench_debug.h>
 #include <klib-macros.h>
 #include <klib.h>
 #include <linpack.h>
-#define FLT_DIG 6
+extern bench_linpack_config config;
 #define DBL_DIG 15
 #define SP
 #ifndef SP
 #ifndef DP
 #define DP
 #endif
 #endif
 #ifdef SP
 #define ZERO 0.0
 #define ONE 1.0
 #define PREC "Single"
 #define BASE10DIG FLT_DIG
 typedef float REAL;
 #endif
 #ifdef DP
 #define ZERO 0.0e0
 #define ONE 1.0e0
 #define PREC "Double"
 #define BASE10DIG DBL_DIG
 typedef double REAL;
 #endif
 /* 2022-07-26: Macro defined for memreq variable to resolve warnings
 *             during malloc check
 */
 #define MEM_T long
 static REAL linpack (long nreps, int arsize);
 static void matgen (REAL *a, int lda, int n, REAL *b, REAL *norma);
@ -81,110 +51,69 @@ static REAL ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy);
 static void dscal_ur (int n, REAL da, REAL *dx, int incx);
 static int idamax (int n, REAL *dx, int incx);
 static REAL second (void);
-static double
+static inline double fabs(double x) { return x < 0 ? -x : x; }
 fabs (double x)
 {
  return x < 0 ? -x : x;
 }
 static void *mempool = NULL;
-int
+int main(int argc, char **argv)
 main (int argc, char **argv)
 {
-  ioe_init ();
+  ioe_init();
-  bench_malloc_init ();
+  bench_malloc_init();
  int arsize;
  long arsize2d, nreps;
  volatile size_t malloc_arg;
  volatile MEM_T memreq;
-  arsize = 270;
+  arsize = config.arsize;
  arsize2d = (long)arsize * (long)arsize;
-  memreq = arsize2d * sizeof (REAL) + (long)arsize * sizeof (REAL)
+  memreq = arsize2d * sizeof(REAL) + (long)arsize * sizeof(REAL) +
-           + (long)arsize * sizeof (int);
+           (long)arsize * sizeof(int);
  malloc_arg = (size_t)memreq;
  uint64_t start_time, end_time;
-  if ((MEM_T)malloc_arg != memreq
+  if ((MEM_T)malloc_arg != memreq ||
-      || (mempool = bench_malloc (malloc_arg)) == NULL)
+      (mempool = bench_malloc(malloc_arg)) == NULL) {
-    {
+      BENCH_LOG(ERROR, "Not enough memory available for given array size.\n");
      // printf("Not enough memory available for given array size.\n");
    return 1;
  }
  // printf("LINPACK benchmark, %s precision.\n", PREC);
  // printf("Machine precision:  %d digits.\n", BASE10DIG);
  // printf("Array size %d X %d.\n", arsize, arsize);
  // printf("Memory required:  %ldK.\n", (memreq + 512L) >> 10);
  // printf("Average rolled and unrolled performance:\n\n");
  // printf("    Reps Time(s) DGEFA   DGESL  OVERHEAD    KFLOPS\n");
  // printf("----------------------------------------------------\n");
  nreps = 1;
-  start_time = uptime ();
+  start_time = uptime();
-  while (linpack (nreps, arsize) < 10.)
+  while (linpack(nreps, arsize) < 10.) {
    {
    nreps *= 2;
  }
-  end_time = uptime ();
+  end_time = uptime();
-  bench_free (mempool);
+  bench_free(mempool);
-  printf ("time: %s ms\n", format_time (end_time - start_time));
+  BENCH_LOG(INFO, "time: %s", format_time(end_time - start_time));
  return 0;
 }
-REAL
+REAL linpack(long nreps, int arsize)
 linpack (long nreps, int arsize)
 {
  REAL *a, *b;
-  REAL norma, t1, kflops, tdgesl, tdgefa, totalt, toverhead, ops;
+  REAL norma, totalt;
  int *ipvt, n, info, lda;
  long i, arsize2d;
  lda = arsize;
  n = arsize / 2;
  arsize2d = (long)arsize * (long)arsize;
  ops = ((2.0 * n * n * n) / 3.0 + 2.0 * n * n);
  a = (REAL *)mempool;
  b = a + arsize2d;
  ipvt = (int *)&b[arsize];
-  tdgesl = 0;
+  totalt = second();
-  tdgefa = 0;
+  for (i = 0; i < nreps; i++) {
-  totalt = second ();
+    matgen(a, lda, n, b, &norma);
-  for (i = 0; i < nreps; i++)
+    dgefa(a, lda, n, ipvt, &info, 1);
-    {
+    dgesl(a, lda, n, ipvt, b, 0, 1);
      matgen (a, lda, n, b, &norma);
      t1 = second ();
      dgefa (a, lda, n, ipvt, &info, 1);
      tdgefa += second () - t1;
      t1 = second ();
      dgesl (a, lda, n, ipvt, b, 0, 1);
      tdgesl += second () - t1;
  }
-  for (i = 0; i < nreps; i++)
+  for (i = 0; i < nreps; i++) {
-    {
+    matgen(a, lda, n, b, &norma);
-      matgen (a, lda, n, b, &norma);
+    dgefa(a, lda, n, ipvt, &info, 0);
-      t1 = second ();
+    dgesl(a, lda, n, ipvt, b, 0, 0);
      dgefa (a, lda, n, ipvt, &info, 0);
      tdgefa += second () - t1;
      t1 = second ();
      dgesl (a, lda, n, ipvt, b, 0, 0);
      tdgesl += second () - t1;
  }
-  totalt = second () - totalt;
+  totalt = second() - totalt;
  if (totalt < 0.5 || tdgefa + tdgesl < 0.2)
    return (0.);
  kflops = 2. * nreps * ops / (1000. * (tdgefa + tdgesl));
  toverhead = totalt - tdgefa - tdgesl;
  if (tdgefa < 0.)
    tdgefa = 0.;
  if (tdgesl < 0.)
    tdgesl = 0.;
  if (toverhead < 0.)
    toverhead = 0.;
  // printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%%  %9.3f\n", nreps, totalt,
  // 100. * tdgefa / totalt, 100. * tdgesl / totalt,
  // 100. * toverhead / totalt, kflops);
  return (totalt);
 }
@ -193,8 +122,7 @@ linpack (long nreps, int arsize)
 ** We would like to declare a[][lda], but c does not allow it.  In this
 ** function, references to a[i][j] are written a[lda*i+j].
 */
-static void
+static void matgen(REAL *a, int lda, int n, REAL *b, REAL *norma)
 matgen (REAL *a, int lda, int n, REAL *b, REAL *norma)
 {
  int init, i, j;
@ -202,8 +130,7 @@ matgen (REAL *a, int lda, int n, REAL *b, REAL *norma)
  init = 1325;
  *norma = 0.0;
  for (j = 0; j < n; j++)
-    for (i = 0; i < n; i++)
+    for (i = 0; i < n; i++) {
      {
      init = (int)((long)3125 * (long)init % 65536L);
      a[lda * j + i] = (init - 32768.0) / 16384.0;
      *norma = (a[lda * j + i] > *norma) ? a[lda * j + i] : *norma;
@ -266,8 +193,7 @@ matgen (REAL *a, int lda, int n, REAL *b, REAL *norma)
 **   blas daxpy,dscal,idamax
 **
 */
-static void
+static void dgefa(REAL *a, int lda, int n, int *ipvt, int *info, int roll)
 dgefa (REAL *a, int lda, int n, int *ipvt, int *info, int roll)
 {
  REAL t;
@ -275,30 +201,26 @@ dgefa (REAL *a, int lda, int n, int *ipvt, int *info, int roll)
  /* gaussian elimination with partial pivoting */
-  if (roll)
+  if (roll) {
    {
    *info = 0;
    nm1 = n - 1;
    if (nm1 >= 0)
-        for (k = 0; k < nm1; k++)
+      for (k = 0; k < nm1; k++) {
          {
        kp1 = k + 1;
        /* find l = pivot index */
-            l = idamax (n - k, &a[lda * k + k], 1) + k;
+        l = idamax(n - k, &a[lda * k + k], 1) + k;
        ipvt[k] = l;
        /* zero pivot implies this column already
           triangularized */
-            if (a[lda * k + l] != ZERO)
+        if (a[lda * k + l] != ZERO) {
              {
          /* interchange if necessary */
-                if (l != k)
+          if (l != k) {
                  {
            t = a[lda * k + l];
            a[lda * k + l] = a[lda * k + k];
            a[lda * k + k] = t;
@ -307,53 +229,45 @@ dgefa (REAL *a, int lda, int n, int *ipvt, int *info, int roll)
          /* compute multipliers */
          t = -ONE / a[lda * k + k];
-                dscal_r (n - (k + 1), t, &a[lda * k + k + 1], 1);
+          dscal_r(n - (k + 1), t, &a[lda * k + k + 1], 1);
          /* row elimination with column indexing */
-                for (j = kp1; j < n; j++)
+          for (j = kp1; j < n; j++) {
                  {
            t = a[lda * j + l];
-                    if (l != k)
+            if (l != k) {
                      {
              a[lda * j + l] = a[lda * j + k];
              a[lda * j + k] = t;
            }
-                    daxpy_r (n - (k + 1), t, &a[lda * k + k + 1], 1,
+            daxpy_r(n - (k + 1), t, &a[lda * k + k + 1], 1, &a[lda * j + k + 1],
-                             &a[lda * j + k + 1], 1);
+                    1);
          }
-              }
+        } else
            else
          (*info) = k;
      }
    ipvt[n - 1] = n - 1;
    if (a[lda * (n - 1) + (n - 1)] == ZERO)
      (*info) = n - 1;
-    }
+  } else {
  else
    {
    *info = 0;
    nm1 = n - 1;
    if (nm1 >= 0)
-        for (k = 0; k < nm1; k++)
+      for (k = 0; k < nm1; k++) {
          {
        kp1 = k + 1;
        /* find l = pivot index */
-            l = idamax (n - k, &a[lda * k + k], 1) + k;
+        l = idamax(n - k, &a[lda * k + k], 1) + k;
        ipvt[k] = l;
        /* zero pivot implies this column already
           triangularized */
-            if (a[lda * k + l] != ZERO)
+        if (a[lda * k + l] != ZERO) {
              {
          /* interchange if necessary */
-                if (l != k)
+          if (l != k) {
                  {
            t = a[lda * k + l];
            a[lda * k + l] = a[lda * k + k];
            a[lda * k + k] = t;
@ -362,23 +276,20 @@ dgefa (REAL *a, int lda, int n, int *ipvt, int *info, int roll)
          /* compute multipliers */
          t = -ONE / a[lda * k + k];
-                dscal_ur (n - (k + 1), t, &a[lda * k + k + 1], 1);
+          dscal_ur(n - (k + 1), t, &a[lda * k + k + 1], 1);
          /* row elimination with column indexing */
-                for (j = kp1; j < n; j++)
+          for (j = kp1; j < n; j++) {
                  {
            t = a[lda * j + l];
-                    if (l != k)
+            if (l != k) {
                      {
              a[lda * j + l] = a[lda * j + k];
              a[lda * j + k] = t;
            }
-                    daxpy_ur (n - (k + 1), t, &a[lda * k + k + 1], 1,
+            daxpy_ur(n - (k + 1), t, &a[lda * k + k + 1], 1,
                     &a[lda * j + k + 1], 1);
          }
-              }
+        } else
            else
          (*info) = k;
      }
    ipvt[n - 1] = n - 1;
@ -448,133 +359,109 @@ dgefa (REAL *a, int lda, int n, int *ipvt, int *info, int roll)
 **
 **   blas daxpy,ddot
 */
-static void
+static void dgesl(REAL *a, int lda, int n, int *ipvt, REAL *b, int job,
-dgesl (REAL *a, int lda, int n, int *ipvt, REAL *b, int job, int roll)
+                  int roll)
 {
  REAL t;
  int k, kb, l, nm1;
-  if (roll)
+  if (roll) {
    {
    nm1 = n - 1;
-      if (job == 0)
+    if (job == 0) {
        {
      /* job = 0 , solve  a * x = b   */
      /* first solve  l*y = b         */
      if (nm1 >= 1)
-            for (k = 0; k < nm1; k++)
+        for (k = 0; k < nm1; k++) {
              {
          l = ipvt[k];
          t = b[l];
-                if (l != k)
+          if (l != k) {
                  {
            b[l] = b[k];
            b[k] = t;
          }
-                daxpy_r (n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1);
+          daxpy_r(n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1);
        }
      /* now solve  u*x = y */
-          for (kb = 0; kb < n; kb++)
+      for (kb = 0; kb < n; kb++) {
            {
        k = n - (kb + 1);
        b[k] = b[k] / a[lda * k + k];
        t = -b[k];
-              daxpy_r (k, t, &a[lda * k + 0], 1, &b[0], 1);
+        daxpy_r(k, t, &a[lda * k + 0], 1, &b[0], 1);
      }
-        }
+    } else {
      else
        {
      /* job = nonzero, solve  trans(a) * x = b  */
      /* first solve  trans(u)*y = b             */
-          for (k = 0; k < n; k++)
+      for (k = 0; k < n; k++) {
-            {
+        t = ddot_r(k, &a[lda * k + 0], 1, &b[0], 1);
              t = ddot_r (k, &a[lda * k + 0], 1, &b[0], 1);
        b[k] = (b[k] - t) / a[lda * k + k];
      }
      /* now solve trans(l)*x = y     */
      if (nm1 >= 1)
-            for (kb = 1; kb < nm1; kb++)
+        for (kb = 1; kb < nm1; kb++) {
              {
          k = n - (kb + 1);
-                b[k] = b[k]
+          b[k] =
-                       + ddot_r (n - (k + 1), &a[lda * k + k + 1], 1,
+              b[k] + ddot_r(n - (k + 1), &a[lda * k + k + 1], 1, &b[k + 1], 1);
                                 &b[k + 1], 1);
          l = ipvt[k];
-                if (l != k)
+          if (l != k) {
                  {
            t = b[l];
            b[l] = b[k];
            b[k] = t;
          }
        }
    }
-    }
+  } else {
  else
    {
    nm1 = n - 1;
-      if (job == 0)
+    if (job == 0) {
        {
      /* job = 0 , solve  a * x = b   */
      /* first solve  l*y = b         */
      if (nm1 >= 1)
-            for (k = 0; k < nm1; k++)
+        for (k = 0; k < nm1; k++) {
              {
          l = ipvt[k];
          t = b[l];
-                if (l != k)
+          if (l != k) {
                  {
            b[l] = b[k];
            b[k] = t;
          }
-                daxpy_ur (n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1],
+          daxpy_ur(n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1);
                          1);
        }
      /* now solve  u*x = y */
-          for (kb = 0; kb < n; kb++)
+      for (kb = 0; kb < n; kb++) {
            {
        k = n - (kb + 1);
        b[k] = b[k] / a[lda * k + k];
        t = -b[k];
-              daxpy_ur (k, t, &a[lda * k + 0], 1, &b[0], 1);
+        daxpy_ur(k, t, &a[lda * k + 0], 1, &b[0], 1);
      }
-        }
+    } else {
      else
        {
      /* job = nonzero, solve  trans(a) * x = b  */
      /* first solve  trans(u)*y = b             */
-          for (k = 0; k < n; k++)
+      for (k = 0; k < n; k++) {
-            {
+        t = ddot_ur(k, &a[lda * k + 0], 1, &b[0], 1);
              t = ddot_ur (k, &a[lda * k + 0], 1, &b[0], 1);
        b[k] = (b[k] - t) / a[lda * k + k];
      }
      /* now solve trans(l)*x = y     */
      if (nm1 >= 1)
-            for (kb = 1; kb < nm1; kb++)
+        for (kb = 1; kb < nm1; kb++) {
              {
          k = n - (kb + 1);
-                b[k] = b[k]
+          b[k] =
-                       + ddot_ur (n - (k + 1), &a[lda * k + k + 1], 1,
+              b[k] + ddot_ur(n - (k + 1), &a[lda * k + k + 1], 1, &b[k + 1], 1);
                                  &b[k + 1], 1);
          l = ipvt[k];
-                if (l != k)
+          if (l != k) {
                  {
            t = b[l];
            b[l] = b[k];
            b[k] = t;
@ -589,8 +476,7 @@ dgesl (REAL *a, int lda, int n, int *ipvt, REAL *b, int job, int roll)
 ** Jack Dongarra, linpack, 3/11/78.
 ** ROLLED version
 */
-static void
+static void daxpy_r(int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
 daxpy_r (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
 {
  int i, ix, iy;
@ -600,8 +486,7 @@ daxpy_r (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
  if (da == ZERO)
    return;
-  if (incx != 1 || incy != 1)
+  if (incx != 1 || incy != 1) {
    {
    /* code for unequal increments or equal increments != 1 */
@ -611,8 +496,7 @@ daxpy_r (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
      ix = (-n + 1) * incx + 1;
    if (incy < 0)
      iy = (-n + 1) * incy + 1;
-      for (i = 0; i < n; i++)
+    for (i = 0; i < n; i++) {
        {
      dy[iy] = dy[iy] + da * dx[ix];
      ix = ix + incx;
      iy = iy + incy;
@ -631,8 +515,7 @@ daxpy_r (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
 ** Jack Dongarra, linpack, 3/11/78.
 ** ROLLED version
 */
-static REAL
+static REAL ddot_r(int n, REAL *dx, int incx, REAL *dy, int incy)
 ddot_r (int n, REAL *dx, int incx, REAL *dy, int incy)
 {
  REAL dtemp;
@ -643,8 +526,7 @@ ddot_r (int n, REAL *dx, int incx, REAL *dy, int incy)
  if (n <= 0)
    return (ZERO);
-  if (incx != 1 || incy != 1)
+  if (incx != 1 || incy != 1) {
    {
    /* code for unequal increments or equal increments != 1 */
@ -654,8 +536,7 @@ ddot_r (int n, REAL *dx, int incx, REAL *dy, int incy)
      ix = (-n + 1) * incx;
    if (incy < 0)
      iy = (-n + 1) * incy;
-      for (i = 0; i < n; i++)
+    for (i = 0; i < n; i++) {
        {
      dtemp = dtemp + dx[ix] * dy[iy];
      ix = ix + incx;
      iy = iy + incy;
@ -675,16 +556,14 @@ ddot_r (int n, REAL *dx, int incx, REAL *dy, int incy)
 ** Jack Dongarra, linpack, 3/11/78.
 ** ROLLED version
 */
-static void
+static void dscal_r(int n, REAL da, REAL *dx, int incx)
 dscal_r (int n, REAL da, REAL *dx, int incx)
 {
  int i, nincx;
  if (n <= 0)
    return;
-  if (incx != 1)
+  if (incx != 1) {
    {
    /* code for increment not equal to 1 */
@ -705,8 +584,7 @@ dscal_r (int n, REAL da, REAL *dx, int incx)
 ** Jack Dongarra, linpack, 3/11/78.
 ** UNROLLED version
 */
-static void
+static void daxpy_ur(int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
 daxpy_ur (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
 {
  int i, ix, iy, m;
@ -716,8 +594,7 @@ daxpy_ur (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
  if (da == ZERO)
    return;
-  if (incx != 1 || incy != 1)
+  if (incx != 1 || incy != 1) {
    {
    /* code for unequal increments or equal increments != 1 */
@ -727,8 +604,7 @@ daxpy_ur (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
      ix = (-n + 1) * incx + 1;
    if (incy < 0)
      iy = (-n + 1) * incy + 1;
-      for (i = 0; i < n; i++)
+    for (i = 0; i < n; i++) {
        {
      dy[iy] = dy[iy] + da * dx[ix];
      ix = ix + incx;
      iy = iy + incy;
@ -739,15 +615,13 @@ daxpy_ur (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
  /* code for both increments equal to 1 */
  m = n % 4;
-  if (m != 0)
+  if (m != 0) {
    {
    for (i = 0; i < m; i++)
      dy[i] = dy[i] + da * dx[i];
    if (n < 4)
      return;
  }
-  for (i = m; i < n; i = i + 4)
+  for (i = m; i < n; i = i + 4) {
    {
    dy[i] = dy[i] + da * dx[i];
    dy[i + 1] = dy[i + 1] + da * dx[i + 1];
    dy[i + 2] = dy[i + 2] + da * dx[i + 2];
@ -760,8 +634,7 @@ daxpy_ur (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
 ** Jack Dongarra, linpack, 3/11/78.
 ** UNROLLED version
 */
-static REAL
+static REAL ddot_ur(int n, REAL *dx, int incx, REAL *dy, int incy)
 ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy)
 {
  REAL dtemp;
@ -772,8 +645,7 @@ ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy)
  if (n <= 0)
    return (ZERO);
-  if (incx != 1 || incy != 1)
+  if (incx != 1 || incy != 1) {
    {
    /* code for unequal increments or equal increments != 1 */
@ -783,8 +655,7 @@ ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy)
      ix = (-n + 1) * incx;
    if (incy < 0)
      iy = (-n + 1) * incy;
-      for (i = 0; i < n; i++)
+    for (i = 0; i < n; i++) {
        {
      dtemp = dtemp + dx[ix] * dy[iy];
      ix = ix + incx;
      iy = iy + incy;
@ -795,18 +666,16 @@ ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy)
  /* code for both increments equal to 1 */
  m = n % 5;
-  if (m != 0)
+  if (m != 0) {
    {
    for (i = 0; i < m; i++)
      dtemp = dtemp + dx[i] * dy[i];
    if (n < 5)
      return (dtemp);
  }
-  for (i = m; i < n; i = i + 5)
+  for (i = m; i < n; i = i + 5) {
-    {
+    dtemp = dtemp + dx[i] * dy[i] + dx[i + 1] * dy[i + 1] +
-      dtemp = dtemp + dx[i] * dy[i] + dx[i + 1] * dy[i + 1]
+            dx[i + 2] * dy[i + 2] + dx[i + 3] * dy[i + 3] +
-              + dx[i + 2] * dy[i + 2] + dx[i + 3] * dy[i + 3]
+            dx[i + 4] * dy[i + 4];
              + dx[i + 4] * dy[i + 4];
  }
  return (dtemp);
 }
@ -816,16 +685,14 @@ ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy)
 ** Jack Dongarra, linpack, 3/11/78.
 ** UNROLLED version
 */
-static void
+static void dscal_ur(int n, REAL da, REAL *dx, int incx)
 dscal_ur (int n, REAL da, REAL *dx, int incx)
 {
  int i, m, nincx;
  if (n <= 0)
    return;
-  if (incx != 1)
+  if (incx != 1) {
    {
    /* code for increment not equal to 1 */
@ -838,15 +705,13 @@ dscal_ur (int n, REAL da, REAL *dx, int incx)
  /* code for increment equal to 1 */
  m = n % 5;
-  if (m != 0)
+  if (m != 0) {
    {
    for (i = 0; i < m; i++)
      dx[i] = da * dx[i];
    if (n < 5)
      return;
  }
-  for (i = m; i < n; i = i + 5)
+  for (i = m; i < n; i = i + 5) {
    {
    dx[i] = da * dx[i];
    dx[i + 1] = da * dx[i + 1];
    dx[i + 2] = da * dx[i + 2];
@ -859,8 +724,7 @@ dscal_ur (int n, REAL da, REAL *dx, int incx)
 ** Finds the index of element having max. absolute value.
 ** Jack Dongarra, linpack, 3/11/78.
 */
-static int
+static int idamax(int n, REAL *dx, int incx)
 idamax (int n, REAL *dx, int incx)
 {
  REAL dmax;
@ -870,44 +734,37 @@ idamax (int n, REAL *dx, int incx)
    return (-1);
  if (n == 1)
    return (0);
-  if (incx != 1)
+  if (incx != 1) {
    {
    /* code for increment not equal to 1 */
    ix = 1;
-      dmax = fabs ((double)dx[0]);
+    dmax = fabs((double)dx[0]);
    ix = ix + incx;
-      for (i = 1; i < n; i++)
+    for (i = 1; i < n; i++) {
-        {
+      if (fabs((double)dx[ix]) > dmax) {
          if (fabs ((double)dx[ix]) > dmax)
            {
        itemp = i;
-              dmax = fabs ((double)dx[ix]);
+        dmax = fabs((double)dx[ix]);
      }
      ix = ix + incx;
    }
-    }
+  } else {
  else
    {
    /* code for increment equal to 1 */
    itemp = 0;
-      dmax = fabs ((double)dx[0]);
+    dmax = fabs((double)dx[0]);
    for (i = 1; i < n; i++)
-        if (fabs ((double)dx[i]) > dmax)
+      if (fabs((double)dx[i]) > dmax) {
          {
        itemp = i;
-            dmax = fabs ((double)dx[i]);
+        dmax = fabs((double)dx[i]);
      }
  }
  return (itemp);
 }
-static REAL
+static REAL second(void)
 second (void)
 {
-  return ((REAL)(uptime () / 1000));
+  return ((REAL)(uptime() / 1000));
 }
--- a/src/mcf/Makefile
+++ b/src/mcf/Makefile
@ -1,9 +1,10 @@
 NAME = mcf
 mainargs ?= ref
 BENCH_LIBS = bench openlibm soft-fp
-SRCS = 	main.c mcf.c pqueue.c $(shell realpath ./test-gen/test.c)
+SRCS = 	main.c mcf.c pqueue.c ./configs/$(mainargs)-config.c
 INC_PATH += 	../common/openlibm/include \
 			../common/openlibm/src \
@ -12,7 +13,6 @@ INC_PATH += 	../common/openlibm/include \
 include $(AM_HOME)/Makefile
 BENCH_LINKAGE = $(addsuffix -$(ARCH).a, $(join \
 					 $(addsuffix /build/, $(addprefix $(WORK_DIR)/../common/, $(BENCH_LIBS))), \
 					 $(BENCH_LIBS) ))
--- a/src/mcf/configs/ref-config.c
+++ b/src/mcf/configs/ref-config.c
@ -0,0 +1,93 @@
 #include "input.h"
 const int nodes_num   =    14;
 const int edges_num   =    25;
 const int demands_num =    40;
 node_t node_buf[]={
 	{    0,     0,     0,     6},
 	{    1,     0,     0,     1},
 	{    2,     0,     0,     1},
 	{    3,     0,     0,     1},
 	{    4,     0,     0,     6},
 	{    5,     0,     0,     1},
 	{    6,     0,     0,     1},
 	{    7,     0,     0,     1},
 	{    8,     0,     0,     2},
 	{    9,     0,     0,     1},
 	{   10,     0,     0,     1},
 	{   11,     0,     0,     2},
 	{   12,     0,     0,     1},
 	{   13,     0,     0,     1},
 };
 edge_t edge_buf[]={
 	{    0,     0,     1,   101,   122},
 	{    1,     1,     2,   179,   377},
 	{    2,     2,     3,   124,   202},
 	{    3,     3,     4,   125,   261},
 	{    4,     4,     5,   182,   423},
 	{    5,     5,     6,   184,   405},
 	{    6,     6,     7,   140,   259},
 	{    7,     7,     8,   118,   398},
 	{    8,     8,     9,   128,   228},
 	{    9,     9,    10,   186,   238},
 	{   10,    10,    11,   172,   236},
 	{   11,    11,    12,   187,   350},
 	{   12,    12,    13,   163,   217},
 	{   13,     0,     4,   180,   181},
 	{   14,     0,     6,     5,   249},
 	{   15,     0,     7,   108,   427},
 	{   16,     0,    12,   155,   139},
 	{   17,     0,     8,     3,   322},
 	{   18,     4,     7,   106,   182},
 	{   19,     4,     9,    81,   345},
 	{   20,     4,     5,   212,   289},
 	{   21,     4,     6,   166,   419},
 	{   22,     4,    10,   198,    30},
 	{   23,     8,    12,   221,   308},
 	{   24,    11,    12,   179,   235},
 };
 demands_t demands_buf[]={
 	{    0,     0,    13,    10},
 	{    1,     0,     1,    96},
 	{    2,     1,     6,    78},
 	{    3,     1,     9,    95},
 	{    4,     3,     5,    35},
 	{    5,     3,    10,    77},
 	{    6,     3,    13,    38},
 	{    7,     3,     9,    98},
 	{    8,     3,    11,    92},
 	{    9,     3,     6,    29},
 	{   10,     3,     4,    38},
 	{   11,     4,    10,    73},
 	{   12,     4,     5,     6},
 	{   13,     4,    12,    28},
 	{   14,     4,    13,     4},
 	{   15,     4,     8,    56},
 	{   16,     4,     9,    22},
 	{   17,     4,     7,    48},
 	{   18,     4,     6,    29},
 	{   19,     5,     9,    35},
 	{   20,     5,    13,    39},
 	{   21,     5,    12,    77},
 	{   22,     5,     8,    42},
 	{   23,     5,    10,    63},
 	{   24,     6,    12,     7},
 	{   25,     6,    10,    25},
 	{   26,     6,    11,    18},
 	{   27,     6,     8,    29},
 	{   28,     6,    13,    36},
 	{   29,     6,     9,    45},
 	{   30,     7,    11,    36},
 	{   31,     7,    13,    95},
 	{   32,     7,    12,    68},
 	{   33,     7,     8,    33},
 	{   34,     7,    10,    11},
 	{   35,     8,    13,    82},
 	{   36,     8,    10,     7},
 	{   37,     9,    10,    25},
 	{   38,     9,    11,    84},
 	{   39,    10,    13,    78},
 };
--- a/src/mcf/configs/test-config.c
+++ b/src/mcf/configs/test-config.c
--- a/src/mcf/configs/train-config.c
+++ b/src/mcf/configs/train-config.c
@ -0,0 +1,31 @@
 #include "input.h"
 const int nodes_num   =     6;
 const int edges_num   =     5;
 const int demands_num =     6;
 node_t node_buf[]={
 	{    0,     0,     0,     1},
 	{    1,     0,     0,     1},
 	{    2,     0,     0,     1},
 	{    3,     0,     0,     1},
 	{    4,     0,     0,     1},
 	{    5,     0,     0,     1},
 };
 edge_t edge_buf[]={
 	{    0,     0,     1,   164,   484},
 	{    1,     1,     2,   193,   186},
 	{    2,     2,     3,   167,   274},
 	{    3,     3,     4,   180,   133},
 	{    4,     4,     5,   129,   348},
 };
 demands_t demands_buf[]={
 	{    0,     0,     5,    10},
 	{    1,     0,     2,    52},
 	{    2,     0,     4,    13},
 	{    3,     1,     5,    20},
 	{    4,     1,     2,    72},
 	{    5,     1,     3,    44},
 };
--- a/src/mcf/include/input.h
+++ b/src/mcf/include/input.h
@ -1,5 +1,5 @@
-#ifndef __test_h__
+#ifndef __TEST_H__
-#define __test_h__
+#define __TEST_H__
 extern const int nodes_num;
 extern const int edges_num;
--- a/src/mcf/main.c
+++ b/src/mcf/main.c
@ -1,6 +1,6 @@
 #include <am.h>
 #include <klib-macros.h>
-#include <bench_printf.h>
+#include <bench_debug.h>
 #include <mcf.h>
 #include <input.h>
@ -18,7 +18,7 @@ int main(char *args)
    uint64_t start_time, end_time;
    start_time = uptime();
-    bench_printf("\nRandomized rounded paths: size: %d\n", sizeof(size_t));
+    BENCH_LOG(DEBUG, "\nRandomized rounded paths: size: %d", sizeof(size_t));
    for(demands_select = 0; demands_select < demands_num; demands_select++)
    {
        // (1) run MCF solver;
@ -52,7 +52,7 @@ int main(char *args)
        free_topology(&mcf);
    }
    end_time = uptime();
-    bench_printf("time: %s ms \n", format_time(end_time - start_time));
+    BENCH_LOG(INFO, "time: %s", format_time(end_time - start_time));
    return 0;
 }
--- a/src/mcf/mcf.c
+++ b/src/mcf/mcf.c
@ -1,5 +1,5 @@
 #include <klib.h>
-#include <bench_printf.h>
+#include <bench_debug.h>
 #include <stdio.h>
 #include <pqueue.h>
 #include <mcf.h>
@ -90,13 +90,13 @@ bool parse_options(MCF *mcf, char *arg, float epsilon)
    else if (strcmp(arg, "MCMCF") == 0) {
        mcf->_problem_type = MCMCF_TYPE;
    } else {
-        bench_printf("Error:  -problem_type must be MCF or MCMCF.\n");
+        BENCH_LOG(ERROR, "Error:  -problem_type must be MCF or MCMCF.\n");
        assert(0);
    }
    mcf->_epsilon1 = epsilon;
    if (mcf->_epsilon1 <= 0 || mcf->_epsilon1 >= 1) {
-        bench_printf("Error:  -epsilon option requires a float in (0,1).\n");
+        BENCH_LOG(ERROR, "Error:  -epsilon option requires a float in (0,1).\n");
        assert(0);
    }
@ -1035,7 +1035,7 @@ void print_routing_paths(MCF *mcf_v)
    for ( int i = 0; i < mcf_v->no_commodity; i++) {
        // printf("Commodity %d: %d -> %d: ", i, 
        //        mcf_v->_commodities[i].src, mcf_v->_commodities[i].dest);
-        bench_printf("Commodity %d: %d -> %d: ", demands_select, 
+        BENCH_LOG(DEBUG, "Commodity %d: %d -> %d: ", demands_select, 
               mcf_v->_commodities[i].src, mcf_v->_commodities[i].dest);
--- a/src/mcf/pqueue.c
+++ b/src/mcf/pqueue.c
@ -3,7 +3,7 @@
 // #include <float.h>
 #include <klib.h>
 #include <pqueue.h>
-#include <bench_printf.h>
+#include <bench_debug.h>
 ////////////////////////////////////////////////////////////////////////////////
 //
--- a/src/stream/Makefile
+++ b/src/stream/Makefile
@ -1,7 +1,9 @@
 NAME = stream
 mainargs ?= ref
 BENCH_LIBS = bench openlibm soft-fp
-SRCS = stream.c
+SRCS = stream.c ./configs/$(mainargs)-config.c
 INC_PATH += 	../common/openlibm/include \
 			../common/openlibm/src \
--- a/src/stream/configs/ref-config.c
+++ b/src/stream/configs/ref-config.c
@ -0,0 +1,3 @@
 #include <stream.h>
 bench_stream_config config = {200000};
--- a/src/stream/configs/test-config.c
+++ b/src/stream/configs/test-config.c
@ -0,0 +1,4 @@
 #include <stream.h>
 bench_stream_config config = {100000};
--- a/src/stream/configs/train-config.c
+++ b/src/stream/configs/train-config.c
@ -0,0 +1,4 @@
 #include <stream.h>
 bench_stream_config config = {10000};
--- a/src/stream/include/stream.h
+++ b/src/stream/include/stream.h
@ -0,0 +1,4 @@
 typedef struct {
  unsigned int stream_array_size;
 } bench_stream_config;
--- a/src/stream/stream.c
+++ b/src/stream/stream.c
@ -42,9 +42,11 @@
 /*----------------------------------------------------------------------------*/
 #include <am.h>
 #include <bench.h>
-#include <klib.h>
+#include <bench_malloc.h>
-#include <klib-macros.h>
+#include <stream.h>
 #include <float.h>
 #include <klib-macros.h>
 #include <klib.h>
 /*-----------------------------------------------------------------------
 * INSTRUCTIONS:
 *
@ -70,7 +72,8 @@
 *           Example: most versions of Windows have a 10 millisecond timer
 *               granularity.  20 "ticks" at 10 ms/tic is 200 milliseconds.
 *               If the chip is capable of 10 GB/s, it moves 2 GB in 200 msec.
- *               This means the each array must be at least 1 GB, or 128M elements.
+ *               This means the each array must be at least 1 GB, or 128M
 *elements.
 *
 *      Version 5.10 increases the default array size from 2 million
 *          elements to 10 million elements in response to the increasing
@ -85,12 +88,12 @@
 *          code for the (many) compilers that support preprocessor definitions
 *          on the compile line.  E.g.,
 *                gcc -O -DSTREAM_ARRAY_SIZE=100000000 stream.c -o stream.100M
- *          will override the default size of 10M with a new size of 100M elements
+ *          will override the default size of 10M with a new size of 100M
- *          per array.
+ *elements per array.
 */
-#if (STREAM_ARRAY_SIZE+0) > 0
+#if (STREAM_ARRAY_SIZE + 0) > 0
 #else
-#   define STREAM_ARRAY_SIZE 200000
+#define STREAM_ARRAY_SIZE 200000
 #endif
 /*  2) STREAM runs each kernel "NTIMES" times and reports the *best* result
 *         for any iteration after the first, therefore the minimum value
@ -102,24 +105,24 @@
 *         code using, for example, "-DNTIMES=7".
 */
 #ifdef NTIMES
-#if NTIMES<=1
+#if NTIMES <= 1
-#   define NTIMES	10
+#define NTIMES 10
 #endif
 #endif
 #ifndef NTIMES
-#   define NTIMES	2
+#define NTIMES 2
 #endif
 /*  Users are allowed to modify the "OFFSET" variable, which *may* change the
 *         relative alignment of the arrays (though compilers may change the
- *         effective offset by making the arrays non-contiguous on some systems). 
+ *         effective offset by making the arrays non-contiguous on some
- *      Use of non-zero values for OFFSET can be especially helpful if the
+ * systems). Use of non-zero values for OFFSET can be especially helpful if the
 *         STREAM_ARRAY_SIZE is set to a value close to a large power of 2.
 *      OFFSET can also be set on the compile line without changing the source
 *         code using, for example, "-DOFFSET=56".
 */
 #ifndef OFFSET
-#   define OFFSET	0
+#define OFFSET 0
 #endif
 /*
@ -133,84 +136,88 @@
 *     This is known to work on many, many systems....
 *
 *     To use multiple cores, you need to tell the compiler to obey the OpenMP
- *       directives in the code.  This varies by compiler, but a common example is
+ *       directives in the code.  This varies by compiler, but a common example
- *            gcc -O -fopenmp stream.c -o stream_omp
+ *is gcc -O -fopenmp stream.c -o stream_omp The environment variable
- *       The environment variable OMP_NUM_THREADS allows runtime control of the 
+ *OMP_NUM_THREADS allows runtime control of the number of threads/cores used
- *         number of threads/cores used when the resulting "stream_omp" program
+ *when the resulting "stream_omp" program is executed.
 *         is executed.
 *
 *     To run with single-precision variables and arithmetic, simply add
 *         -DSTREAM_TYPE=float
 *     to the compile line.
- *     Note that this changes the minimum array sizes required --- see (1) above.
+ *     Note that this changes the minimum array sizes required --- see (1)
 *above.
 *
- *     The preprocessor directive "TUNED" does not do much -- it simply causes the 
+ *     The preprocessor directive "TUNED" does not do much -- it simply causes
- *       code to call separate functions to execute each kernel.  Trivial versions
+ *the code to call separate functions to execute each kernel.  Trivial versions
 *       of these functions are provided, but they are *not* tuned -- they just
 *       provide predefined interfaces to be replaced with tuned code.
 *
 *
 *	4) Optional: Mail the results to mccalpin@cs.virginia.edu
 *	   Be sure to include info that will help me understand:
- *		a) the computer hardware configuration (e.g., processor model, memory type)
+ *		a) the computer hardware configuration (e.g., processor model,
- *		b) the compiler name/version and compilation flags
+ *memory type) b) the compiler name/version and compilation flags c) any
- *      c) any run-time information (such as OMP_NUM_THREADS)
+ *run-time information (such as OMP_NUM_THREADS) d) all of the output from the
- *		d) all of the output from the test case.
+ *test case.
 *
 * Thanks!
 *
 *-----------------------------------------------------------------------*/
-# define HLINE "-------------------------------------------------------------\n"
+#define HLINE "-------------------------------------------------------------\n"
 #define DIS_OPENMP
-# ifndef MIN
+#ifndef MIN
-# define MIN(x,y) ((x)<(y)?(x):(y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
-# endif
+#endif
-# ifndef MAX
+#ifndef MAX
-# define MAX(x,y) ((x)>(y)?(x):(y))
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
-# endif
+#endif
 #ifndef STREAM_TYPE
 #define STREAM_TYPE double
 #endif
-static STREAM_TYPE	a[STREAM_ARRAY_SIZE+OFFSET],
+// static STREAM_TYPE a[STREAM_ARRAY_SIZE + OFFSET], b[STREAM_ARRAY_SIZE + OFFSET],
-			b[STREAM_ARRAY_SIZE+OFFSET],
+//  c[STREAM_ARRAY_SIZE + OFFSET];
 			c[STREAM_ARRAY_SIZE+OFFSET];
 static double avgtime[4] = {0}, maxtime[4] = {0},
-		mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
+              mintime[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
-static char	*label[4] = {"Copy:      ", "Scale:     ",
+static char *label[4] = {
-    "Add:       ", "Triad:     "};
+    "Copy:      ", "Scale:     ", "Add:       ", "Triad:     "};
-static double	bytes[4] = {
+// static double bytes[4] = {2 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
-    2 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
+//                        2 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
-    2 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
+//                        3 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
-    3 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
+//                        3 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE};
    3 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE
    };
 extern double mysecond();
 extern void checkSTREAMresults();
 #ifdef TUNED
 extern void tuned_STREAM_Copy();
 extern void tuned_STREAM_Scale(STREAM_TYPE scalar);
 extern void tuned_STREAM_Add();
 extern void tuned_STREAM_Triad(STREAM_TYPE scalar);
 #endif
 #ifndef DIS_OPENMP
 #ifdef _OPENMP
 extern int omp_get_num_threads();
 #endif
 #endif
-int
+
-main()
+extern const bench_stream_config config;
-    {
+
 int main() {
  int asize = config.stream_array_size;
  bench_malloc_init();
  STREAM_TYPE *a = bench_malloc(sizeof(STREAM_TYPE) * (asize + OFFSET));
  STREAM_TYPE *b = bench_malloc(sizeof(STREAM_TYPE) * (asize + OFFSET));
  STREAM_TYPE *c = bench_malloc(sizeof(STREAM_TYPE) * (asize + OFFSET));
  double bytes[4] = {2 * sizeof(STREAM_TYPE) * asize,
                          2 * sizeof(STREAM_TYPE) * asize,
                          3 * sizeof(STREAM_TYPE) * asize,
                          3 * sizeof(STREAM_TYPE) * asize};
  STREAM_TYPE *vptr[] = {a, b, c};
  int quantum, checktick();
-    int			BytesPerWord;
+  // int BytesPerWord;
  int k;
  size_t j;
  STREAM_TYPE scalar;
@ -218,32 +225,36 @@ main()
  /* --- SETUP --- determine precision and check timing --- */
-    printf(HLINE);
+  // printf(HLINE);
-    printf("STREAM version $Revision: 5.10 $\n");
+  // printf("STREAM version $Revision: 5.10 $\n");
-    printf(HLINE);
+  // printf(HLINE);
-    BytesPerWord = sizeof(STREAM_TYPE);
+  // BytesPerWord = sizeof(STREAM_TYPE);
-    printf("This system uses %d bytes per array element.\n",
+  // printf("This system uses %d bytes per array element.\n", BytesPerWord);
 	BytesPerWord);
-    printf(HLINE);
+  // printf(HLINE);
 #ifdef N
-    printf("*****  WARNING: ******\n");
+  /* printf("*****  WARNING: ******\n");
-    printf("      It appears that you set the preprocessor variable N when compiling this code.\n");
+  printf("      It appears that you set the preprocessor variable N when "
-    printf("      This version of the code uses the preprocesor variable STREAM_ARRAY_SIZE to control the array size\n");
+         "compiling this code.\n");
-    printf("      Reverting to default value of STREAM_ARRAY_SIZE=%.0f\n",(double) STREAM_ARRAY_SIZE);
+  printf("      This version of the code uses the preprocesor variable "
-    printf("*****  WARNING: ******\n");
+         "STREAM_ARRAY_SIZE to control the array size\n");
  printf("      Reverting to default value of STREAM_ARRAY_SIZE=%.0f\n",
         (double)STREAM_ARRAY_SIZE);
  printf("*****  WARNING: ******\n"); */
 #endif
-
+  /*
-    printf("Array size = %.0f (elements), Offset = %d (elements)\n" , (double) STREAM_ARRAY_SIZE, OFFSET);
+  printf("Array size = %.0f (elements), Offset = %d (elements)\n",
         (double)STREAM_ARRAY_SIZE, OFFSET);
  printf("Memory per array = %.1f MiB (= %.1f GiB).\n",
-	BytesPerWord * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.0),
+         BytesPerWord * ((double)STREAM_ARRAY_SIZE / 1024.0 / 1024.0),
-	BytesPerWord * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.0/1024.0));
+         BytesPerWord * ((double)STREAM_ARRAY_SIZE / 1024.0 / 1024.0 / 1024.0));
  printf("Total memory required = %.1f MiB (= %.1f GiB).\n",
-	(3.0 * BytesPerWord) * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.),
+         (3.0 * BytesPerWord) * ((double)STREAM_ARRAY_SIZE / 1024.0 / 1024.),
-	(3.0 * BytesPerWord) * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024./1024.));
+         (3.0 * BytesPerWord) *
             ((double)STREAM_ARRAY_SIZE / 1024.0 / 1024. / 1024.));
  printf("Each kernel will be executed %d times.\n", NTIMES);
  printf(" The *best* time for each kernel (excluding the first iteration)\n");
-    printf(" will be used to compute the reported bandwidth.\n");
+  printf(" will be used to compute the reported bandwidth.\n"); */
 #ifndef DIS_OPENMP
 #ifdef _OPENMP
@ -253,7 +264,7 @@ main()
 #pragma omp master
    {
      k = omp_get_num_threads();
-	    printf ("Number of Threads requested = %i\n",k);
+      printf("Number of Threads requested = %i\n", k);
    }
  }
 #endif
@ -265,7 +276,7 @@ main()
 #pragma omp parallel
 #pragma omp atomic
  k++;
-    printf ("Number of Threads counted = %i\n",k);
+  printf("Number of Threads counted = %i\n", k);
 #endif
 #endif
@ -273,7 +284,7 @@ main()
 #ifndef DIS_OPENMP
 #pragma omp parallel for
 #endif
-    for (j=0; j<STREAM_ARRAY_SIZE; j++) {
+  for (j = 0; j < asize; j++) {
    a[j] = 1.0;
    b[j] = 2.0;
    c[j] = 0.0;
@ -281,9 +292,10 @@ main()
  printf(HLINE);
-    if  ( (quantum = checktick()) >= 1) 
+  if ((quantum = checktick()) >= 1)
    printf("Your clock granularity/precision appears to be "
-	    "%d microseconds.\n", quantum);
+           "%d microseconds.\n",
           quantum);
  else {
    printf("Your clock granularity appears to be "
           "less than one microsecond.\n");
@ -294,13 +306,14 @@ main()
 #ifndef DIS_OPENMP
 #pragma omp parallel for
 #endif
-    for (j = 0; j < STREAM_ARRAY_SIZE; j++)
+  for (j = 0; j < asize; j++)
    a[j] = 2.0E0 * a[j];
  t = 1.0E6 * (mysecond() - t);
  printf("Each test below will take on the order"
-	" of %d microseconds.\n", (int) t  );
+         " of %d microseconds.\n",
-    printf("   (= %d clock ticks)\n", (int) (t/quantum) );
+         (int)t);
  printf("   (= %d clock ticks)\n", (int)(t / quantum));
  printf("Increase the size of the arrays if this shows that\n");
  printf("you are not getting at least 20 clock ticks per test.\n");
@ -314,63 +327,45 @@ main()
  /*	--- MAIN LOOP --- repeat test cases NTIMES times --- */
  scalar = 3.0;
-    for (k=0; k<NTIMES; k++)
+  for (k = 0; k < NTIMES; k++) {
 	{
    times[0][k] = mysecond();
 #ifdef TUNED
        tuned_STREAM_Copy();
 #else
 #ifndef DIS_OPENMP
 #pragma omp parallel for
 #endif
-	for (j=0; j<STREAM_ARRAY_SIZE; j++)
+    for (j = 0; j < asize; j++)
      c[j] = a[j];
 #endif
    times[0][k] = mysecond() - times[0][k];
    times[1][k] = mysecond();
 #ifdef TUNED
        tuned_STREAM_Scale(scalar);
 #else
 #ifndef DIS_OPENMP
 #pragma omp parallel for
 #endif
-	for (j=0; j<STREAM_ARRAY_SIZE; j++)
+    for (j = 0; j < asize; j++)
-	    b[j] = scalar*c[j];
+      b[j] = scalar * c[j];
 #endif
    times[1][k] = mysecond() - times[1][k];
    times[2][k] = mysecond();
 #ifdef TUNED
        tuned_STREAM_Add();
 #else
 #ifndef DIS_OPENMP
 #pragma omp parallel for
 #endif
-	for (j=0; j<STREAM_ARRAY_SIZE; j++)
+    for (j = 0; j < asize; j++)
-	    c[j] = a[j]+b[j];
+      c[j] = a[j] + b[j];
 #endif
    times[2][k] = mysecond() - times[2][k];
    times[3][k] = mysecond();
 #ifdef TUNED
        tuned_STREAM_Triad(scalar);
 #else
 #ifndef DIS_OPENMP
 #pragma omp parallel for
 #endif
-	for (j=0; j<STREAM_ARRAY_SIZE; j++)
+    for (j = 0; j < asize; j++)
-	    a[j] = b[j]+scalar*c[j];
+      a[j] = b[j] + scalar * c[j];
 #endif
    times[3][k] = mysecond() - times[3][k];
  }
  /*	--- SUMMARY --- */
-    for (k=1; k<NTIMES; k++) /* note -- skip first iteration */
+  for (k = 1; k < NTIMES; k++) /* note -- skip first iteration */
 	{
 	for (j=0; j<4; j++)
  {
    for (j = 0; j < 4; j++) {
      avgtime[j] = avgtime[j] + times[j][k];
      mintime[j] = MIN(mintime[j], times[j][k]);
      maxtime[j] = MAX(maxtime[j], times[j][k]);
@ -378,49 +373,41 @@ main()
  }
  printf("Function    Best Rate MB/s  Avg time     Min time     Max time\n");
-    for (j=0; j<4; j++) {
+  for (j = 0; j < 4; j++) {
-		avgtime[j] = avgtime[j]/(double)(NTIMES-1);
+    avgtime[j] = avgtime[j] / (double)(NTIMES - 1);
    printf("%s%12.1f  %11.6f  %11.6f  %11.6f\n", label[j],
-	       1.0E-06 * bytes[j]/mintime[j],
+           1.0E-06 * bytes[j] / mintime[j], avgtime[j], mintime[j], maxtime[j]);
 	       avgtime[j],
 	       mintime[j],
 	       maxtime[j]);
  }
  printf(HLINE);
  /* --- Check Results --- */
-    checkSTREAMresults();
+  checkSTREAMresults(asize, &vptr);
  printf(HLINE);
-    double total_time = avgtime[0]\
+  double total_time = avgtime[0] + avgtime[1] + avgtime[2] + avgtime[3];
                        + avgtime[1]\
                        + avgtime[2]\
                        + avgtime[3];
  printf("time: %s ms\n", format_time((uint64_t)(total_time * 1000)));
  return 0;
 }
-# define	M	20
+#define M 20
-int
+int checktick() {
 checktick()
    {
  int i, minDelta, Delta;
  double t1, t2, timesfound[M];
-/*  Collect a sequence of M unique time values from the system. */
+  /*  Collect a sequence of M unique time values from the system. */
  for (i = 0; i < M; i++) {
    t1 = mysecond();
-	while( ((t2=mysecond()) - t1) < 1.0E-6 )
+    while (((t2 = mysecond()) - t1) < 1.0E-6)
      ;
    timesfound[i] = t1 = t2;
  }
-/*
+  /*
   * Determine the minimum difference between these M values.
   * This result will be our estimate (in microseconds) for the
   * clock granularity.
@ -428,14 +415,12 @@ checktick()
  minDelta = 1000000;
  for (i = 1; i < M; i++) {
-	Delta = (int)( 1.0E6 * (timesfound[i]-timesfound[i-1]));
+    Delta = (int)(1.0E6 * (timesfound[i] - timesfound[i - 1]));
-	minDelta = MIN(minDelta, MAX(Delta,0));
+    minDelta = MIN(minDelta, MAX(Delta, 0));
  }
-   return(minDelta);
+  return (minDelta);
-    }
+}
 /* A gettimeofday routine to give access to the wall
   clock timer on most UNIX-like systems.  */
@ -443,23 +428,19 @@ checktick()
 /* This function has been modified from the original version to ensure
 * ANSI compliance, due to the deprecation of the "timezone" struct. */
-
+double mysecond() { return ((double)uptime() / 1000); }
 double mysecond()
 {
        return ((double)uptime() / 1000);
 }
 #ifndef abs
 #define abs(a) ((a) >= 0 ? (a) : -(a))
 #endif
-void checkSTREAMresults ()
+void checkSTREAMresults(int asize, STREAM_TYPE **vptr) {
-{
+  assert(vptr);
-	STREAM_TYPE aj,bj,cj,scalar;
+  STREAM_TYPE aj, bj, cj, scalar;
-	STREAM_TYPE aSumErr,bSumErr,cSumErr;
+  STREAM_TYPE aSumErr, bSumErr, cSumErr;
-	STREAM_TYPE aAvgErr,bAvgErr,cAvgErr;
+  STREAM_TYPE aAvgErr, bAvgErr, cAvgErr;
  double epsilon;
  size_t j;
-	int	k,ierr,err;
+  int k, ierr, err;
  /* reproduce initialization */
  aj = 1.0;
@ -469,147 +450,112 @@ void checkSTREAMresults ()
  aj = 2.0E0 * aj;
  /* now execute timing loop */
  scalar = 3.0;
-	for (k=0; k<NTIMES; k++)
+  for (k = 0; k < NTIMES; k++) {
        {
    cj = aj;
-            bj = scalar*cj;
+    bj = scalar * cj;
-            cj = aj+bj;
+    cj = aj + bj;
-            aj = bj+scalar*cj;
+    aj = bj + scalar * cj;
  }
  /* accumulate deltas between observed and expected results */
  aSumErr = 0.0;
  bSumErr = 0.0;
  cSumErr = 0.0;
-	for (j=0; j<STREAM_ARRAY_SIZE; j++) {
+  for (j = 0; j < asize; j++) {
-		aSumErr += abs(a[j] - aj);
+    aSumErr += abs(vptr[0][j] - aj);
-		bSumErr += abs(b[j] - bj);
+    bSumErr += abs(vptr[1][j] - bj);
-		cSumErr += abs(c[j] - cj);
+    cSumErr += abs(vptr[2][j] - cj);
-		/* if (j == 417) printf("Index 417: c[j]: %f, cj: %f\n",c[j],cj);	*/ /* MCCALPIN */
+    /* if (j == 417) printf("Index 417: c[j]: %f, cj: %f\n",c[j],cj);	*/ /* MCCALPIN
                                                                            */
  }
-	aAvgErr = aSumErr / (STREAM_TYPE) STREAM_ARRAY_SIZE;
+  aAvgErr = aSumErr / (STREAM_TYPE)asize;
-	bAvgErr = bSumErr / (STREAM_TYPE) STREAM_ARRAY_SIZE;
+  bAvgErr = bSumErr / (STREAM_TYPE)asize;
-	cAvgErr = cSumErr / (STREAM_TYPE) STREAM_ARRAY_SIZE;
+  cAvgErr = cSumErr / (STREAM_TYPE)asize;
  if (sizeof(STREAM_TYPE) == 4) {
    epsilon = 1.e-6;
-	}
+  } else if (sizeof(STREAM_TYPE) == 8) {
 	else if (sizeof(STREAM_TYPE) == 8) {
    epsilon = 1.e-13;
-	}
+  } else {
-	else {
+    printf("WEIRD: sizeof(STREAM_TYPE) = %lu\n", sizeof(STREAM_TYPE));
 		printf("WEIRD: sizeof(STREAM_TYPE) = %lu\n",sizeof(STREAM_TYPE));
    epsilon = 1.e-6;
  }
  err = 0;
-	if (abs(aAvgErr/aj) > epsilon) {
+  if (abs(aAvgErr / aj) > epsilon) {
    err++;
-		printf ("Failed Validation on array a[], AvgRelAbsErr > epsilon (%e)\n",epsilon);
+    printf("Failed Validation on array a[], AvgRelAbsErr > epsilon (%e)\n",
-		printf ("     Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n",aj,aAvgErr,abs(aAvgErr)/aj);
+           epsilon);
    printf("     Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n", aj,
           aAvgErr, abs(aAvgErr) / aj);
    ierr = 0;
-		for (j=0; j<STREAM_ARRAY_SIZE; j++) {
+    for (j = 0; j < asize; j++) {
-			if (abs(a[j]/aj-1.0) > epsilon) {
+      if (abs(vptr[0][j] / aj - 1.0) > epsilon) {
        ierr++;
 #ifdef VERBOSE
        if (ierr < 10) {
-					printf("         array a: index: %ld, expected: %e, observed: %e, relative error: %e\n",
+          printf("         array a: index: %ld, expected: %e, observed: %e, "
-						j,aj,a[j],abs((aj-a[j])/aAvgErr));
+                 "relative error: %e\n",
                 j, aj, vptr[0][j], abs((aj - vptr[0][j]) / aAvgErr));
        }
 #endif
      }
    }
-		printf("     For array a[], %d errors were found.\n",ierr);
+    printf("     For array a[], %d errors were found.\n", ierr);
  }
-	if (abs(bAvgErr/bj) > epsilon) {
+  if (abs(bAvgErr / bj) > epsilon) {
    err++;
-		printf ("Failed Validation on array b[], AvgRelAbsErr > epsilon (%e)\n",epsilon);
+    printf("Failed Validation on array b[], AvgRelAbsErr > epsilon (%e)\n",
-		printf ("     Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n",bj,bAvgErr,abs(bAvgErr)/bj);
+           epsilon);
-		printf ("     AvgRelAbsErr > Epsilon (%e)\n",epsilon);
+    printf("     Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n", bj,
           bAvgErr, abs(bAvgErr) / bj);
    printf("     AvgRelAbsErr > Epsilon (%e)\n", epsilon);
    ierr = 0;
-		for (j=0; j<STREAM_ARRAY_SIZE; j++) {
+    for (j = 0; j < asize; j++) {
-			if (abs(b[j]/bj-1.0) > epsilon) {
+      if (abs(vptr[1][j] / bj - 1.0) > epsilon) {
        ierr++;
 #ifdef VERBOSE
        if (ierr < 10) {
-					printf("         array b: index: %ld, expected: %e, observed: %e, relative error: %e\n",
+          printf("         array b: index: %ld, expected: %e, observed: %e, "
-						j,bj,b[j],abs((bj-b[j])/bAvgErr));
+                 "relative error: %e\n",
                 j, bj, vptr[1][j], abs((bj - vptr[1][j]) / bAvgErr));
        }
 #endif
      }
    }
-		printf("     For array b[], %d errors were found.\n",ierr);
+    printf("     For array b[], %d errors were found.\n", ierr);
  }
-	if (abs(cAvgErr/cj) > epsilon) {
+  if (abs(cAvgErr / cj) > epsilon) {
    err++;
-		printf ("Failed Validation on array c[], AvgRelAbsErr > epsilon (%e)\n",epsilon);
+    printf("Failed Validation on array c[], AvgRelAbsErr > epsilon (%e)\n",
-		printf ("     Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n",cj,cAvgErr,abs(cAvgErr)/cj);
+           epsilon);
-		printf ("     AvgRelAbsErr > Epsilon (%e)\n",epsilon);
+    printf("     Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n", cj,
           cAvgErr, abs(cAvgErr) / cj);
    printf("     AvgRelAbsErr > Epsilon (%e)\n", epsilon);
    ierr = 0;
-		for (j=0; j<STREAM_ARRAY_SIZE; j++) {
+    for (j = 0; j < asize; j++) {
-			if (abs(c[j]/cj-1.0) > epsilon) {
+      if (abs(vptr[2][j] / cj - 1.0) > epsilon) {
        ierr++;
 #ifdef VERBOSE
        if (ierr < 10) {
-					printf("         array c: index: %ld, expected: %e, observed: %e, relative error: %e\n",
+          printf("         array c: index: %ld, expected: %e, observed: %e, "
-						j,cj,c[j],abs((cj-c[j])/cAvgErr));
+                 "relative error: %e\n",
                 j, cj, c[j], abs((cj - c[j]) / cAvgErr));
        }
 #endif
      }
    }
-		printf("     For array c[], %d errors were found.\n",ierr);
+    printf("     For array c[], %d errors were found.\n", ierr);
  }
  if (err == 0) {
-		printf ("Solution Validates: avg error less than %e on all three arrays\n",epsilon);
+    printf("Solution Validates: avg error less than %e on all three arrays\n",
           epsilon);
  }
 #ifdef VERBOSE
-	printf ("Results Validation Verbose Results: \n");
+  printf("Results Validation Verbose Results: \n");
-	printf ("    Expected a(1), b(1), c(1): %f %f %f \n",aj,bj,cj);
+  printf("    Expected a(1), b(1), c(1): %f %f %f \n", aj, bj, cj);
-	printf ("    Observed a(1), b(1), c(1): %f %f %f \n",a[1],b[1],c[1]);
+  printf("    Observed a(1), b(1), c(1): %f %f %f \n", vptr[0][1], vptr[1][1], vptr[2][1]);
-	printf ("    Rel Errors on a, b, c:     %e %e %e \n",abs(aAvgErr/aj),abs(bAvgErr/bj),abs(cAvgErr/cj));
+  printf("    Rel Errors on a, b, c:     %e %e %e \n", abs(aAvgErr / aj),
         abs(bAvgErr / bj), abs(cAvgErr / cj));
 #endif
 }
 #ifdef TUNED
 /* stubs for "tuned" versions of the kernels */
 void tuned_STREAM_Copy()
 {
 	size_t j;
 #ifndef DIS_OPENMP
 #pragma omp parallel for
 #endif
        for (j=0; j<STREAM_ARRAY_SIZE; j++)
            c[j] = a[j];
 }
 void tuned_STREAM_Scale(STREAM_TYPE scalar)
 {
 	size_t j;
 #ifndef DIS_OPENMP
 #pragma omp parallel for
 #endif
 	for (j=0; j<STREAM_ARRAY_SIZE; j++)
 	    b[j] = scalar*c[j];
 }
 void tuned_STREAM_Add()
 {
 	size_t j;
 #ifndef DIS_OPENMP
 #pragma omp parallel for
 #endif
 	for (j=0; j<STREAM_ARRAY_SIZE; j++)
 	    c[j] = a[j]+b[j];
 }
 void tuned_STREAM_Triad(STREAM_TYPE scalar)
 {
 	size_t j;
 #ifndef DIS_OPENMP
 #pragma omp parallel for
 #endif
 	for (j=0; j<STREAM_ARRAY_SIZE; j++)
 	    a[j] = b[j]+scalar*c[j];
 }
 /* end of stubs for the "tuned" versions of the kernels */
 #endif
--- a/src/tcc/Makefile
+++ b/src/tcc/Makefile
@ -1,27 +1,16 @@
 NAME = riscv-tcc
 mainargs ?= ref
 BENCH_LIBS = bench openlibm soft-fp
-SRCS := tcc.c resources.S my_qsort.c fs.c
+SRCS := tcc.c my_qsort.c fs.c ./configs/$(mainargs)-config.c ./resources/resources-$(mainargs).S
 INC_PATH += 	../common/openlibm/include \
 			../common/openlibm/src \
 			./include \
 			../common/bench/include
 ifeq ($(mainargs), test)
 CFLAGS += -D__BENCH_TEST__
 ASFLAGS += -D__BENCH_TEST__
 else ifeq ($(mainargs), train)
 CFLAGS += -D__BENCH_TRAIN__
 ASFLAGS += -D__BENCH_TRAIN__
 else ifeq ($(mainargs), huge)
 CFLAGS += -D__BENCH_HUGE__
 ASFLAGS += -D__BENCH_HUGE__
 else
 CFLAGS += -D__BENCH_REF__
 ASFLAGS += -D__BENCH_REF__
 endif
 include $(AM_HOME)/Makefile
--- a/src/tcc/config.h
+++ b/src/tcc/config.h
@ -15,3 +15,7 @@
 #define ONE_SOURCE 1
 // #define SDE_RISCV32_DEV 1
 // #define __SIZEOF_POINTER__ 4
 //
 typedef  struct {
  int file_count;
 } bench_tcc_config;
--- a/src/tcc/configs/ref-config.c
+++ b/src/tcc/configs/ref-config.c
--- a/src/tcc/configs/test-config.c
+++ b/src/tcc/configs/test-config.c
@ -0,0 +1,25 @@
 #include <fs.h>
 #include "../config.h"
 Finfo file_table[] = {
  {"/share/test.c", 336, 0, NULL, NULL},
  {"/share/test", 752, 336, NULL, NULL},
  {"/share/trm.c", 273, 1088, NULL, NULL},
 };
 int tcc_argc1 = 10;
 char *tcc_argv1[]={
    "./tcc",
    "/share/trm.c",
    "/share/test.c",
    "-ffreestanding",
    "-nostdlib",
    "-o",
    "/share/test",
    "-Wl,-Ttext=0x80000000",
    "-O2",
    "-static"
 };
 bench_tcc_config config = {3};
--- a/src/tcc/configs/train-config.c
+++ b/src/tcc/configs/train-config.c
@ -0,0 +1,24 @@
 #include <fs.h>
 #include "../config.h"
 Finfo file_table[] = {
  {"/share/trm.c", 273, 0, NULL, NULL},
  {"/share/trap.h", 106, 273, NULL, NULL},
  {"/share/train.c", 1521, 379, NULL, NULL},
  {"/share/train", 106, 1900, NULL, NULL},
 };
 int tcc_argc1 = 10;
 char *tcc_argv1[]={
    "./tcc",
    "/share/trm.c",
    "/share/train.c",
    "-ffreestanding",
    "-nostdlib",
    "-o",
    "/share/train",
    "-Wl,-Ttext=0x80000000",
    "-O2",
    "-static"
 };
 bench_tcc_config config = {4};
--- a/src/tcc/fs.c
+++ b/src/tcc/fs.c
@ -18,8 +18,6 @@ int fs_init(Finfo *list, size_t count) {
  return 0;
 }
 size_t ramdisk_read(void *buf, size_t offset, size_t len)
 {
  assert(offset + len <= RAMDISK_SIZE);
@ -40,7 +38,7 @@ int fs_open(const char *pathname, int flags, int mode)
  //printf("ex1 addr is %x\n",  (uint32_t)ramdisk_start + 336);
  for(int fs_num = 0; fs_num < file_count; fs_num ++)
  {
-    if(strcmp(pathname, file_table[fs_num].name) == 0) // 匹配成功
+    if(strcmp(pathname, file_table[fs_num].name) == 0)
    {
      file_table[fs_num].open_offset = 0;
      return fs_num;
--- a/src/tcc/resources/resources-ref.S
+++ b/src/tcc/resources/resources-ref.S
--- a/src/tcc/resources/resources-test.S
+++ b/src/tcc/resources/resources-test.S
@ -0,0 +1,7 @@
 .section .data
 .global ramdisk_start, ramdisk_end
 ramdisk_start:
 .incbin "input/test.c"
 .incbin "input/test"
 .incbin "input/trm.c"
 ramdisk_end:
--- a/src/tcc/resources/resources-train.S
+++ b/src/tcc/resources/resources-train.S
@ -0,0 +1,8 @@
 .section .data
 .global ramdisk_start, ramdisk_end
 ramdisk_start:
 .incbin "input/trm.c"
 .incbin "input/trap.h"
 .incbin "input/train.c"
 .incbin "input/train"
 ramdisk_end:
--- a/src/tcc/tcc.c
+++ b/src/tcc/tcc.c
@ -26,49 +26,6 @@
 #include "tcctools.c"
 #if defined (__BENCH_TEST__)
 static Finfo file_table[] = {
  {"/share/test.c", 336, 0, NULL, NULL},
  {"/share/test", 752, 336, NULL, NULL},
  {"/share/trm.c", 273, 1088, NULL, NULL},
 };
 int tcc_argc1 = 10;
 char *tcc_argv1[]={
    "./tcc",
    "/share/trm.c",
    "/share/test.c",
    "-ffreestanding",
    "-nostdlib",
    "-o",
    "/share/test",
    "-Wl,-Ttext=0x80000000",
    "-O2",
    "-static"
 };
 #elif defined (__BENCH_TRAIN__)
 static Finfo file_table[] = {
  {"/share/trm.c", 273, 0, NULL, NULL},
  {"/share/trap.h", 106, 273, NULL, NULL},
  {"/share/train.c", 1521, 379, NULL, NULL},
  {"/share/train", 106, 1900, NULL, NULL},
 };
 int tcc_argc1 = 10;
 char *tcc_argv1[]={
    "./tcc",
    "/share/trm.c",
    "/share/train.c",
    "-ffreestanding",
    "-nostdlib",
    "-o",
    "/share/train",
    "-Wl,-Ttext=0x80000000",
    "-O2",
    "-static"
 };
 #elif defined (__BENCH_HUGE__)
 #else //default to ref
 #endif
 static void set_environment(TCCState *s)
 {
    char * path;
@ -115,11 +72,13 @@ static char *default_outputfile(TCCState *s, const char *first_file)
    return tcc_strdup(buf);
 }
 extern bench_tcc_config config;
 int main(int argc0, char **argv0)
 {
-  fs_init(file_table, 4);
+  extern Finfo file_table[];
  fs_init(file_table, config.file_count);
  bench_malloc_init();
    TCCState *s, *s1;
    int ret, opt, n = 0, t = 0, done;
@ -128,10 +87,10 @@ int main(int argc0, char **argv0)
    int argc; char **argv;
    int ppfp = FD_STDOUT;
-
+    extern int tcc_argc1;
    extern char *tcc_argv1[];
    start_time = uptime();
 redo:
    // argc = argc0, argv = argv0;
    argc = tcc_argc1, argv = tcc_argv1;
    s = s1 = tcc_new();
 #ifdef CONFIG_TCC_SWITCHES /* predefined options */
--- a/src/tcc/tcc.h
+++ b/src/tcc/tcc.h
@ -1885,4 +1885,5 @@ PUB_FUNC void tcc_exit_state(TCCState *s1);
 #else
 # define TCC_STATE_VAR(sym) s1->sym
 # define TCC_SET_STATE(fn) (tcc_enter_state(s1),fn)
 #endif
--- a/src/whetstone/Makefile
+++ b/src/whetstone/Makefile
@ -1,6 +1,8 @@
 NAME = whetstone
-SRCS = whetstone.c
+mainargs ?= ref
 SRCS = whetstone.c ./configs/$(mainargs)-config.c
 BENCH_LIBS = bench openlibm soft-fp
--- a/src/whetstone/configs/ref-config.c
+++ b/src/whetstone/configs/ref-config.c
@ -0,0 +1,5 @@
 #include <whestone.h>
 bench_whestone_config config = {200};
--- a/src/whetstone/configs/test-config.c
+++ b/src/whetstone/configs/test-config.c
@ -0,0 +1,3 @@
 #include <whestone.h>
 bench_whestone_config config = {30};
--- a/src/whetstone/configs/train-config.c
+++ b/src/whetstone/configs/train-config.c
@ -0,0 +1,4 @@
 #include <whestone.h>
 bench_whestone_config config = {10};
--- a/src/whetstone/include/cdefs-compat.h
+++ b/src/whetstone/include/cdefs-compat.h
@ -1,105 +0,0 @@
 #ifndef _CDEFS_COMPAT_H_
 #define	_CDEFS_COMPAT_H_
 #if !defined(__BEGIN_DECLS)
 #if defined(__cplusplus)
 #define	__BEGIN_DECLS	extern "C" {
 #define	__END_DECLS	}
 #else
 #define	__BEGIN_DECLS
 #define	__END_DECLS
 #endif
 #endif /* !defined(__BEGIN_DECLS) */
 #ifdef __GNUC__
 #if defined(__strong_alias) && defined(__NetBSD__)
 #define openlibm_strong_reference(sym,alias) __strong_alias(alias,sym)
 #elif defined(__strong_reference)
 #define openlibm_strong_reference(sym,alias) __strong_reference(sym,alias)
 #else
 #ifdef __APPLE__
 #define openlibm_strong_reference(sym,aliassym) openlibm_weak_reference(sym,aliassym)
 #else
 #define openlibm_strong_reference(sym,aliassym)	\
 	OLM_DLLEXPORT extern __typeof (aliassym) aliassym __attribute__ ((__alias__ (#sym)));
 #endif /* __APPLE__ */
 #endif /* __strong_reference */
 #ifdef __wasm__
 #define openlibm_weak_reference(sym,alias) openlibm_strong_reference(sym,alias)
 #elif defined(__weak_alias) && defined(__NetBSD__)
 #define openlibm_weak_reference(sym,alias) __weak_alias(alias,sym)
 #elif defined(__weak_reference)
 #define openlibm_weak_reference(sym,alias) __weak_reference(sym,alias)
 #else
 #ifdef __ELF__
 #ifdef __STDC__
 #define openlibm_weak_reference(sym,alias)	\
 	__asm__(".weak " #alias);	\
 	__asm__(".equ "  #alias ", " #sym)
 #ifdef __warn_references
 #define openlibm_warn_references(sym,msg) __warn_references(sym,msg)
 #else
 #define openlibm_warn_references(sym,msg)	\
 	__asm__(".section .gnu.warning." #sym);	\
 	__asm__(".asciz \"" msg "\"");	\
 	__asm__(".previous")
 #endif /* __warn_references */
 #else
 #define openlibm_weak_reference(sym,alias)	\
 	__asm__(".weak alias");		\
 	__asm__(".equ alias, sym")
 #ifdef __warn_references
 #define openlibm_warn_references(sym,msg) __warn_references(sym,msg)
 #else
 #define openlibm_warn_references(sym,msg)	\
 	__asm__(".section .gnu.warning.sym"); \
 	__asm__(".asciz \"msg\"");	\
 	__asm__(".previous")
 #endif	/* __warn_references */
 #endif	/* __STDC__ */
 #elif defined(__clang__) /* CLANG */
 #if defined(_WIN32) && defined(_X86_)
 #define openlibm_asm_symbol_prefix "_"
 #else
 #define openlibm_asm_symbol_prefix ""
 #endif
 #ifdef __STDC__
 #define openlibm_weak_reference(sym,alias)     \
    __asm__(".weak_reference " openlibm_asm_symbol_prefix #alias); \
    __asm__(".set " openlibm_asm_symbol_prefix #alias ", " openlibm_asm_symbol_prefix #sym)
 #else
 #define openlibm_weak_reference(sym,alias)     \
    __asm__(".weak_reference openlibm_asm_symbol_prefix/**/alias");\
    __asm__(".set openlibm_asm_symbol_prefix/**/alias, openlibm_asm_symbol_prefix/**/sym")
 #endif
 #else	/* !__ELF__ */
 #ifdef __STDC__
 #define openlibm_weak_reference(sym,alias)	\
 	__asm__(".stabs \"_" #alias "\",11,0,0,0");	\
 	__asm__(".stabs \"_" #sym "\",1,0,0,0")
 #ifdef __warn_references
 #define openlibm_warn_references(sym,msg) __warn_references(sym,msg)
 #else
 #define openlibm_warn_references(sym,msg)	\
 	__asm__(".stabs \"" msg "\",30,0,0,0");		\
 	__asm__(".stabs \"_" #sym "\",1,0,0,0")
 #endif /* __warn_references */
 #else
 #define openlibm_weak_reference(sym,alias)	\
 	__asm__(".stabs \"_/**/alias\",11,0,0,0");	\
 	__asm__(".stabs \"_/**/sym\",1,0,0,0")
 #ifdef __warn_references
 #define openlibm_warn_references(sym,msg) __warn_references(sym,msg)
 #else
 #define openlibm_warn_references(sym,msg)	\
 	__asm__(".stabs msg,30,0,0,0");			\
 	__asm__(".stabs \"_/**/sym\",1,0,0,0")
 #endif	/* __warn_references */
 #endif	/* __STDC__ */
 #endif	/* __ELF__ */
 #endif  /* __weak_reference */
 #endif	/* __GNUC__ */
 #endif /* _CDEFS_COMPAT_H_ */
--- a/src/whetstone/include/e_rem_pio2l.h
+++ b/src/whetstone/include/e_rem_pio2l.h
@ -1,144 +0,0 @@
 /* From: @(#)e_rem_pio2.c 1.4 95/01/18 */
 /*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans.
 *
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice 
 * is preserved.
 * ====================================================
 *
 * Optimized by Bruce D. Evans.
 */
 #include "cdefs-compat.h"
 //__FBSDID("$FreeBSD: src/lib/msun/ld128/e_rem_pio2l.h,v 1.2 2011/05/30 19:41:28 kargl Exp $");
 /* ld128 version of __ieee754_rem_pio2l(x,y)
 * 
 * return the remainder of x rem pi/2 in y[0]+y[1] 
 * use __kernel_rem_pio2()
 */
 #include <float.h>
 #include <openlibm_math.h>
 #include "math_private.h"
 #include "fpmath.h"
 #define	BIAS	(LDBL_MAX_EXP - 1)
 /*
 * XXX need to verify that nonzero integer multiples of pi/2 within the
 * range get no closer to a long double than 2**-140, or that
 * ilogb(x) + ilogb(min_delta) < 45 - -140.
 */
 /*
 * invpio2:  113 bits of 2/pi
 * pio2_1:   first  68 bits of pi/2
 * pio2_1t:  pi/2 - pio2_1
 * pio2_2:   second 68 bits of pi/2
 * pio2_2t:  pi/2 - (pio2_1+pio2_2)
 * pio2_3:   third  68 bits of pi/2
 * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
 */
 static const double
 zero =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
 two24 =  1.67772160000000000000e+07; /* 0x41700000, 0x00000000 */
 static const long double
 invpio2 =  6.3661977236758134307553505349005747e-01L,	/*  0x145f306dc9c882a53f84eafa3ea6a.0p-113 */
 pio2_1  =  1.5707963267948966192292994253909555e+00L,	/*  0x1921fb54442d18469800000000000.0p-112 */
 pio2_1t =  2.0222662487959507323996846200947577e-21L,	/*  0x13198a2e03707344a4093822299f3.0p-181 */
 pio2_2  =  2.0222662487959507323994779168837751e-21L,	/*  0x13198a2e03707344a400000000000.0p-181 */
 pio2_2t =  2.0670321098263988236496903051604844e-43L,	/*  0x127044533e63a0105df531d89cd91.0p-254 */
 pio2_3  =  2.0670321098263988236499468110329591e-43L,	/*  0x127044533e63a0105e00000000000.0p-254 */
 pio2_3t = -2.5650587247459238361625433492959285e-65L;	/* -0x159c4ec64ddaeb5f78671cbfb2210.0p-327 */
 //VBS
 //static inline __always_inline int
 //__ieee754_rem_pio2l(long double x, long double *y)
 static inline int
 __ieee754_rem_pio2l(long double x, long double *y)
 {
 	union IEEEl2bits u,u1;
 	long double z,w,t,r,fn;
 	double tx[5],ty[3];
 	int64_t n;
 	int e0,ex,i,j,nx;
 	int16_t expsign;
 	u.e = x;
 	expsign = u.xbits.expsign;
 	ex = expsign & 0x7fff;
 	if (ex < BIAS + 45 || ex == BIAS + 45 &&
 	    u.bits.manh < 0x921fb54442d1LL) {
 	    /* |x| ~< 2^45*(pi/2), medium size */
 	    /* Use a specialized rint() to get fn.  Assume round-to-nearest. */
 	    fn = x*invpio2+0x1.8p112;
 	    fn = fn-0x1.8p112;
 #ifdef HAVE_EFFICIENT_I64RINT
 	    n  = i64rint(fn);
 #else
 	    n  = fn;
 #endif
 	    r  = x-fn*pio2_1;
 	    w  = fn*pio2_1t;	/* 1st round good to 180 bit */
 	    {
 		union IEEEl2bits u2;
 	        int ex1;
 	        j  = ex;
 	        y[0] = r-w; 
 		u2.e = y[0];
 		ex1 = u2.xbits.expsign & 0x7fff;
 	        i = j-ex1;
 	        if(i>51) {  /* 2nd iteration needed, good to 248 */
 		    t  = r;
 		    w  = fn*pio2_2;	
 		    r  = t-w;
 		    w  = fn*pio2_2t-((t-r)-w);	
 		    y[0] = r-w;
 		    u2.e = y[0];
 		    ex1 = u2.xbits.expsign & 0x7fff;
 		    i = j-ex1;
 		    if(i>119) {	/* 3rd iteration need, 316 bits acc */
 		    	t  = r;	/* will cover all possible cases */
 		    	w  = fn*pio2_3;	
 		    	r  = t-w;
 		    	w  = fn*pio2_3t-((t-r)-w);	
 		    	y[0] = r-w;
 		    }
 		}
 	    }
 	    y[1] = (r-y[0])-w;
 	    return n;
 	}
    /* 
     * all other (large) arguments
     */
 	if(ex==0x7fff) {		/* x is inf or NaN */
 	    y[0]=y[1]=x-x; return 0;
 	}
    /* set z = scalbn(|x|,ilogb(x)-23) */
 	u1.e = x;
 	e0 = ex - BIAS - 23;		/* e0 = ilogb(|x|)-23; */
 	u1.xbits.expsign = ex - e0;
 	z = u1.e;
 	for(i=0;i<4;i++) {
 		tx[i] = (double)((int32_t)(z));
 		z     = (z-tx[i])*two24;
 	}
 	tx[4] = z;
 	nx = 5;
 	while(tx[nx-1]==zero) nx--;	/* skip zero term */
 	n  =  __kernel_rem_pio2(tx,ty,e0,nx,3);
 	t = (long double)ty[2] + ty[1];
 	r = t + ty[0];
 	w = ty[0] - (r - t);
 	if(expsign<0) {y[0] = -r; y[1] = -w; return -n;}
 	y[0] = r; y[1] = w; return n;
 }
--- a/src/whetstone/include/invtrig.h
+++ b/src/whetstone/include/invtrig.h
@ -1,113 +0,0 @@
 /*-
 * Copyright (c) 2008 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD: src/lib/msun/ld128/invtrig.h,v 1.1 2008/07/31 22:41:26 das Exp $
 */
 #include <float.h>
 #include "fpmath.h"
 #define	BIAS		(LDBL_MAX_EXP - 1)
 #define	MANH_SIZE	(LDBL_MANH_SIZE + 1)
 /* Approximation thresholds. */
 #define	ASIN_LINEAR	(BIAS - 56)	/* 2**-56 */
 #define	ACOS_CONST	(BIAS - 113)	/* 2**-113 */
 #define	ATAN_CONST	(BIAS + 113)	/* 2**113 */
 #define	ATAN_LINEAR	(BIAS - 56)	/* 2**-56 */
 /* 0.95 */
 #define	THRESH	((0xe666666666666666ULL>>(64-(MANH_SIZE-1)))|LDBL_NBIT)
 /* Constants shared by the long double inverse trig functions. */
 #define	pS0	_ItL_pS0
 #define	pS1	_ItL_pS1
 #define	pS2	_ItL_pS2
 #define	pS3	_ItL_pS3
 #define	pS4	_ItL_pS4
 #define	pS5	_ItL_pS5
 #define	pS6	_ItL_pS6
 #define	pS7	_ItL_pS7
 #define	pS8	_ItL_pS8
 #define	pS9	_ItL_pS9
 #define	qS1	_ItL_qS1
 #define	qS2	_ItL_qS2
 #define	qS3	_ItL_qS3
 #define	qS4	_ItL_qS4
 #define	qS5	_ItL_qS5
 #define	qS6	_ItL_qS6
 #define	qS7	_ItL_qS7
 #define	qS8	_ItL_qS8
 #define	qS9	_ItL_qS9
 #define	atanhi	_ItL_atanhi
 #define	atanlo	_ItL_atanlo
 #define	aT	_ItL_aT
 #define	pi_lo	_ItL_pi_lo
 #define	pio2_hi	atanhi[3]
 #define	pio2_lo	atanlo[3]
 #define	pio4_hi	atanhi[1]
 /* Constants shared by the long double inverse trig functions. */
 extern const long double pS0, pS1, pS2, pS3, pS4, pS5, pS6, pS7, pS8, pS9;
 extern const long double qS1, qS2, qS3, qS4, qS5, qS6, qS7, qS8, qS9;
 extern const long double atanhi[], atanlo[], aT[];
 extern const long double pi_lo;
 static inline long double
 P(long double x)
 {
 	return (x * (pS0 + x * (pS1 + x * (pS2 + x * (pS3 + x * \
 		(pS4 + x * (pS5 + x * (pS6 + x * (pS7 + x * (pS8 + x * \
 		pS9))))))))));
 }
 static inline long double
 Q(long double x)
 {
 	return (1.0 + x * (qS1 + x * (qS2 + x * (qS3 + x * (qS4 + x * \
 		(qS5 + x * (qS6 + x * (qS7 + x * (qS8 + x * qS9)))))))));
 }
 static inline long double
 T_even(long double x)
 {
 	return (aT[0] + x * (aT[2] + x * (aT[4] + x * (aT[6] + x * \
 		(aT[8] + x * (aT[10] + x * (aT[12] + x * (aT[14] + x * \
 		(aT[16] + x * (aT[18] + x * (aT[20] + x * aT[22])))))))))));
 }
 static inline long double
 T_odd(long double x)
 {
 	return (aT[1] + x * (aT[3] + x * (aT[5] + x * (aT[7] + x * \
 		(aT[9] + x * (aT[11] + x * (aT[13] + x * (aT[15] + x * \
 		(aT[17] + x * (aT[19] + x * (aT[21] + x * aT[23])))))))))));
 }
--- a/src/whetstone/include/mathimpl.h
+++ b/src/whetstone/include/mathimpl.h
@ -1,70 +0,0 @@
 /*
 * Copyright (c) 1988, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)mathimpl.h	8.1 (Berkeley) 6/4/93
 * $FreeBSD: src/lib/msun/bsdsrc/mathimpl.h,v 1.7 2005/11/18 05:03:12 bde Exp $
 */
 #ifndef _MATHIMPL_H_
 #define	_MATHIMPL_H_
 #include "cdefs-compat.h"
 #include "math_private.h"
 /*
 * TRUNC() is a macro that sets the trailing 27 bits in the mantissa of an
 * IEEE double variable to zero.  It must be expression-like for syntactic
 * reasons, and we implement this expression using an inline function
 * instead of a pure macro to avoid depending on the gcc feature of
 * statement-expressions.
 */
 #define	TRUNC(d)	(_b_trunc(&(d)))
 static __inline void
 _b_trunc(volatile double *_dp)
 {
        //VBS
        //u_int32_t _lw;
 	u_int32_t _lw;
 	GET_LOW_WORD(_lw, *_dp);
 	SET_LOW_WORD(*_dp, _lw & 0xf8000000);
 }
 struct Double {
 	double	a;
 	double	b;
 };
 /*
 * Functions internal to the math package, yet not static.
 */
 double	__exp__D(double, double);
 struct Double __log__D(double);
 #endif /* !_MATHIMPL_H_ */
--- a/src/whetstone/include/openlibm.h
+++ b/src/whetstone/include/openlibm.h
@ -1,8 +0,0 @@
 #ifndef OPENLIBM_H
 #define OPENLIBM_H
 #include <openlibm_complex.h>
 #include <openlibm_fenv.h>
 #include <openlibm_math.h>
 #endif /* !OPENLIBM_H */
--- a/src/whetstone/include/openlibm_complex.h
+++ b/src/whetstone/include/openlibm_complex.h
@ -1,179 +0,0 @@
 /*	$OpenBSD: complex.h,v 1.5 2014/03/16 18:38:30 guenther Exp $	*/
 /*
 * Copyright (c) 2008 Martynas Venckus <martynas@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
 #ifdef OPENLIBM_USE_HOST_COMPLEX_H
 #include <complex.h>
 #else /* !OPENLIBM_USE_HOST_COMPLEX_H */
 #ifndef OPENLIBM_COMPLEX_H
 #define	OPENLIBM_COMPLEX_H
 #define complex _Complex
 #define _Complex_I 1.0fi
 #define I _Complex_I
 /*
 * Macros that can be used to construct complex values.
 *
 * The C99 standard intends x+I*y to be used for this, but x+I*y is
 * currently unusable in general since gcc introduces many overflow,
 * underflow, sign and efficiency bugs by rewriting I*y as
 * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product.
 * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted
 * to -0.0+I*0.0.
 *
 * In C11, a CMPLX(x,y) macro was added to circumvent this limitation,
 * and gcc 4.7 added a __builtin_complex feature to simplify implementation
 * of CMPLX in libc, so we can take advantage of these features if they
 * are available. Clang simply allows complex values to be constructed
 * using a compound literal.
 *
 * If __builtin_complex is not available, resort to using inline
 * functions instead. These can unfortunately not be used to construct
 * compile-time constants.
 *
 * C99 specifies that complex numbers have the same representation as
 * an array of two elements, where the first element is the real part
 * and the second element is the imaginary part.
 */
 #ifdef __clang__
 #  define CMPLXF(x, y) ((float complex){x, y})
 #  define CMPLX(x, y) ((double complex){x, y})
 #  define CMPLXL(x, y) ((long double complex){x, y})
 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) && !defined(__INTEL_COMPILER)
 #  define CMPLXF(x,y) __builtin_complex ((float) (x), (float) (y))
 #  define CMPLX(x,y) __builtin_complex ((double) (x), (double) (y))
 #  define CMPLXL(x,y) __builtin_complex ((long double) (x), (long double) (y))
 #else
 static inline float complex
 CMPLXF(float x, float y)
 {
 	union {
 		float a[2];
 		float complex f;
 	} z = {{ x, y }};
 	return (z.f);
 }
 static inline double complex
 CMPLX(double x, double y)
 {
 	union {
 		double a[2];
 		double complex f;
 	} z = {{ x, y }};
 	return (z.f);
 }
 static inline long double complex
 CMPLXL(long double x, long double y)
 {
 	union {
 		long double a[2];
 		long double complex f;
 	} z = {{ x, y }};
 	return (z.f);
 }
 #endif
 /*
 * Double versions of C99 functions
 */
 double complex cacos(double complex);
 double complex casin(double complex);
 double complex catan(double complex);
 double complex ccos(double complex);
 double complex csin(double complex);
 double complex ctan(double complex);
 double complex cacosh(double complex);
 double complex casinh(double complex);
 double complex catanh(double complex);
 double complex ccosh(double complex);
 double complex csinh(double complex);
 double complex ctanh(double complex);
 double complex cexp(double complex);
 double complex clog(double complex);
 double cabs(double complex);
 double complex cpow(double complex, double complex);
 double complex csqrt(double complex);
 double carg(double complex);
 double cimag(double complex);
 double complex conj(double complex);
 double complex cproj(double complex);
 double creal(double complex);
 /*
 * Float versions of C99 functions
 */
 float complex cacosf(float complex);
 float complex casinf(float complex);
 float complex catanf(float complex);
 float complex ccosf(float complex);
 float complex csinf(float complex);
 float complex ctanf(float complex);
 float complex cacoshf(float complex);
 float complex casinhf(float complex);
 float complex catanhf(float complex);
 float complex ccoshf(float complex);
 float complex csinhf(float complex);
 float complex ctanhf(float complex);
 float complex cexpf(float complex);
 float complex clogf(float complex);
 float cabsf(float complex);
 float complex cpowf(float complex, float complex);
 float complex csqrtf(float complex);
 float cargf(float complex);
 float cimagf(float complex);
 float complex conjf(float complex);
 float complex cprojf(float complex);
 float crealf(float complex);
 /*
 * Long double versions of C99 functions
 */
 long double complex cacosl(long double complex);
 long double complex casinl(long double complex);
 long double complex catanl(long double complex);
 long double complex ccosl(long double complex);
 long double complex csinl(long double complex);
 long double complex ctanl(long double complex);
 long double complex cacoshl(long double complex);
 long double complex casinhl(long double complex);
 long double complex catanhl(long double complex);
 long double complex ccoshl(long double complex);
 long double complex csinhl(long double complex);
 long double complex ctanhl(long double complex);
 long double complex cexpl(long double complex);
 long double complex clogl(long double complex);
 long double cabsl(long double complex);
 long double complex cpowl(long double complex,
 	long double complex);
 long double complex csqrtl(long double complex);
 long double cargl(long double complex);
 long double cimagl(long double complex);
 long double complex conjl(long double complex);
 long double complex cprojl(long double complex);
 long double creall(long double complex);
 #endif /* !OPENLIBM_COMPLEX_H */
 #endif /* OPENLIBM_USE_HOST_COMPLEX_H */
--- a/src/whetstone/include/openlibm_defs.h
+++ b/src/whetstone/include/openlibm_defs.h
@ -1,14 +0,0 @@
 #ifndef OPENLIBM_DEFS_H_
 #define OPENLIBM_DEFS_H_
 #ifdef _WIN32
 # ifdef IMPORT_EXPORTS
 #  define OLM_DLLEXPORT __declspec(dllimport)
 # else
 #  define OLM_DLLEXPORT __declspec(dllexport)
 # endif
 #else
 #define OLM_DLLEXPORT __attribute__ ((visibility("default")))
 #endif
 #endif // OPENLIBM_DEFS_H_
--- a/src/whetstone/include/openlibm_fenv.h
+++ b/src/whetstone/include/openlibm_fenv.h
@ -1,25 +0,0 @@
 #ifdef OPENLIBM_USE_HOST_FENV_H
 #include <fenv.h>
 #else /* !OPENLIBM_USE_HOST_FENV_H */
 #if defined(__aarch64__) || defined(__arm__)
 #include <openlibm_fenv_arm.h>
 #elif defined(__x86_64__)
 #include <openlibm_fenv_amd64.h>
 #elif defined(__i386__)
 #include <openlibm_fenv_i387.h>
 #elif defined(__powerpc__) || defined(__ppc__)
 #include <openlibm_fenv_powerpc.h>
 #elif defined(__mips__)
 #include <openlibm_fenv_mips.h>
 #elif defined(__s390__)
 #include <openlibm_fenv_s390.h>
 #elif defined(__riscv)
 #include <openlibm_fenv_riscv.h>
 #elif defined(__loongarch64)
 #include <openlibm_fenv_loongarch64.h>
 #else
 #error "Unsupported platform"
 #endif
 #endif /* OPENLIBM_USE_HOST_FENV_H */
--- a/src/whetstone/include/openlibm_fenv_amd64.h
+++ b/src/whetstone/include/openlibm_fenv_amd64.h
@ -1,223 +0,0 @@
 /*-
 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD: src/lib/msun/amd64/fenv.h,v 1.8 2011/10/10 15:43:09 das Exp $
 */
 #ifndef	_FENV_H_
 #define	_FENV_H_
 #include <openlibm_defs.h>
 #include "cdefs-compat.h"
 #include "types-compat.h"
 #ifndef	__fenv_static
 #define	__fenv_static	static
 #endif
 typedef struct {
 	struct {
 		uint32_t	__control;
 		uint32_t	__status;
 		uint32_t	__tag;
 		char		__other[16];
 	} __x87;
 	uint32_t		__mxcsr;
 } fenv_t;
 typedef	uint16_t	fexcept_t;
 /* Exception flags */
 #define	FE_INVALID	0x01
 #define	FE_DENORMAL	0x02
 #define	FE_DIVBYZERO	0x04
 #define	FE_OVERFLOW	0x08
 #define	FE_UNDERFLOW	0x10
 #define	FE_INEXACT	0x20
 #define	FE_ALL_EXCEPT	(FE_DIVBYZERO | FE_DENORMAL | FE_INEXACT | \
 			 FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
 /* Rounding modes */
 #define	FE_TONEAREST	0x0000
 #define	FE_DOWNWARD	0x0400
 #define	FE_UPWARD	0x0800
 #define	FE_TOWARDZERO	0x0c00
 #define	_ROUND_MASK	(FE_TONEAREST | FE_DOWNWARD | \
 			 FE_UPWARD | FE_TOWARDZERO)
 /*
 * As compared to the x87 control word, the SSE unit's control word
 * has the rounding control bits offset by 3 and the exception mask
 * bits offset by 7.
 */
 #define	_SSE_ROUND_SHIFT	3
 #define	_SSE_EMASK_SHIFT	7
 __BEGIN_DECLS
 /* Default floating-point environment */
 extern const fenv_t	__fe_dfl_env;
 #define	FE_DFL_ENV	(&__fe_dfl_env)
 #define	__fldcw(__cw)		__asm __volatile("fldcw %0" : : "m" (__cw))
 #define	__fldenv(__env)		__asm __volatile("fldenv %0" : : "m" (__env))
 #define	__fldenvx(__env)	__asm __volatile("fldenv %0" : : "m" (__env)  \
 				: "st", "st(1)", "st(2)", "st(3)", "st(4)",   \
 				"st(5)", "st(6)", "st(7)")
 #define	__fnclex()		__asm __volatile("fnclex")
 #define	__fnstenv(__env)	__asm __volatile("fnstenv %0" : "=m" (*(__env)))
 #define	__fnstcw(__cw)		__asm __volatile("fnstcw %0" : "=m" (*(__cw)))
 #define	__fnstsw(__sw)		__asm __volatile("fnstsw %0" : "=am" (*(__sw)))
 #define	__fwait()		__asm __volatile("fwait")
 #define	__ldmxcsr(__csr)	__asm __volatile("ldmxcsr %0" : : "m" (__csr))
 #define	__stmxcsr(__csr)	__asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
 __fenv_static __attribute__((always_inline)) inline int
 feclearexcept(int __excepts)
 {
 	fenv_t __env;
 	if (__excepts == FE_ALL_EXCEPT) {
 		__fnclex();
 	} else {
 		__fnstenv(&__env.__x87);
 		__env.__x87.__status &= ~__excepts;
 		__fldenv(__env.__x87);
 	}
 	__stmxcsr(&__env.__mxcsr);
 	__env.__mxcsr &= ~__excepts;
 	__ldmxcsr(__env.__mxcsr);
 	return (0);
 }
 __fenv_static inline int
 fegetexceptflag(fexcept_t *__flagp, int __excepts)
 {
 	uint32_t __mxcsr;
 	uint16_t __status;
 	__stmxcsr(&__mxcsr);
 	__fnstsw(&__status);
 	*__flagp = (__mxcsr | __status) & __excepts;
 	return (0);
 }
 OLM_DLLEXPORT int fesetexceptflag(const fexcept_t *__flagp, int __excepts);
 OLM_DLLEXPORT int feraiseexcept(int __excepts);
 __fenv_static __attribute__((always_inline)) inline int
 fetestexcept(int __excepts)
 {
 	uint32_t __mxcsr;
 	uint16_t __status;
 	__stmxcsr(&__mxcsr);
 	__fnstsw(&__status);
 	return ((__status | __mxcsr) & __excepts);
 }
 __fenv_static inline int
 fegetround(void)
 {
 	uint16_t __control;
 	/*
 	 * We assume that the x87 and the SSE unit agree on the
 	 * rounding mode.  Reading the control word on the x87 turns
 	 * out to be about 5 times faster than reading it on the SSE
 	 * unit on an Opteron 244.
 	 */
 	__fnstcw(&__control);
 	return (__control & _ROUND_MASK);
 }
 __fenv_static inline int
 fesetround(int __round)
 {
 	uint32_t __mxcsr;
 	uint16_t __control;
 	if (__round & ~_ROUND_MASK)
 		return (-1);
 	__fnstcw(&__control);
 	__control &= ~_ROUND_MASK;
 	__control |= __round;
 	__fldcw(__control);
 	__stmxcsr(&__mxcsr);
 	__mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
 	__mxcsr |= __round << _SSE_ROUND_SHIFT;
 	__ldmxcsr(__mxcsr);
 	return (0);
 }
 OLM_DLLEXPORT int fegetenv(fenv_t *__envp);
 OLM_DLLEXPORT int feholdexcept(fenv_t *__envp);
 __fenv_static inline int
 fesetenv(const fenv_t *__envp)
 {
 	/*
 	 * XXX Using fldenvx() instead of fldenv() tells the compiler that this
 	 * instruction clobbers the i387 register stack.  This happens because
 	 * we restore the tag word from the saved environment.  Normally, this
 	 * would happen anyway and we wouldn't care, because the ABI allows
 	 * function calls to clobber the i387 regs.  However, fesetenv() is
 	 * inlined, so we need to be more careful.
 	 */
 	__fldenvx(__envp->__x87);
 	__ldmxcsr(__envp->__mxcsr);
 	return (0);
 }
 OLM_DLLEXPORT int feupdateenv(const fenv_t *__envp);
 #if __BSD_VISIBLE
 OLM_DLLEXPORT int feenableexcept(int __mask);
 OLM_DLLEXPORT int fedisableexcept(int __mask);
 /* We currently provide no external definition of fegetexcept(). */
 static inline int
 fegetexcept(void)
 {
 	uint16_t __control;
 	/*
 	 * We assume that the masks for the x87 and the SSE unit are
 	 * the same.
 	 */
 	__fnstcw(&__control);
 	return (~__control & FE_ALL_EXCEPT);
 }
 #endif /* __BSD_VISIBLE */
 __END_DECLS
 #endif	/* !_FENV_H_ */
--- a/src/whetstone/include/openlibm_fenv_arm.h
+++ b/src/whetstone/include/openlibm_fenv_arm.h
@ -1,230 +0,0 @@
 /*-
 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD: src/lib/msun/arm/fenv.h,v 1.6 2011/10/10 15:43:09 das Exp $
 */
 #ifndef	_FENV_H_
 #define	_FENV_H_
 #include <stdint.h>
 #include "cdefs-compat.h"
 #ifndef	__fenv_static
 #define	__fenv_static	static
 #endif
 typedef	uint32_t	fenv_t;
 typedef	uint32_t	fexcept_t;
 /* Exception flags */
 #define	FE_INVALID	0x0001
 #define	FE_DIVBYZERO	0x0002
 #define	FE_OVERFLOW	0x0004
 #define	FE_UNDERFLOW	0x0008
 #define	FE_INEXACT	0x0010
 #define	FE_ALL_EXCEPT	(FE_DIVBYZERO | FE_INEXACT | \
 			 FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
 /* Rounding modes */
 #define	FE_TONEAREST	0x0000
 #define	FE_TOWARDZERO	0x0001
 #define	FE_UPWARD	0x0002
 #define	FE_DOWNWARD	0x0003
 #define	_ROUND_MASK	(FE_TONEAREST | FE_DOWNWARD | \
 			 FE_UPWARD | FE_TOWARDZERO)
 __BEGIN_DECLS
 /* Default floating-point environment */
 extern const fenv_t	__fe_dfl_env;
 #define	FE_DFL_ENV	(&__fe_dfl_env)
 /* We need to be able to map status flag positions to mask flag positions */
 #define _FPUSW_SHIFT	16
 #define	_ENABLE_MASK	(FE_ALL_EXCEPT << _FPUSW_SHIFT)
 #if defined(__aarch64__)
 #define __rfs(__fpsr)   __asm __volatile("mrs %0,fpsr" : "=r" (*(__fpsr)))
 #define __wfs(__fpsr)   __asm __volatile("msr fpsr,%0" : : "r" (__fpsr))
 /* Test for hardware support for ARM floating point operations, explicitly
 checking for float and double support, see "ARM C Language Extensions", 6.5.1 */
 #elif defined(__ARM_FP) && (__ARM_FP & 0x0C) != 0
 #define __rfs(__fpsr)   __asm __volatile("vmrs %0,fpscr" : "=&r" (*(__fpsr)))
 #define __wfs(__fpsr)   __asm __volatile("vmsr fpscr,%0" : : "r" (__fpsr))
 #else
 #define __rfs(__fpsr)	(*(__fpsr) = 0)
 #define __wfs(__fpsr)
 #endif
 __fenv_static inline int
 feclearexcept(int __excepts)
 {
 	fexcept_t __fpsr;
 	__rfs(&__fpsr);
 	__fpsr &= ~__excepts;
 	__wfs(__fpsr);
 	return (0);
 }
 __fenv_static inline int
 fegetexceptflag(fexcept_t *__flagp, int __excepts)
 {
 	fexcept_t __fpsr;
 	__rfs(&__fpsr);
 	*__flagp = __fpsr & __excepts;
 	return (0);
 }
 __fenv_static inline int
 fesetexceptflag(const fexcept_t *__flagp, int __excepts)
 {
 	fexcept_t __fpsr;
 	__rfs(&__fpsr);
 	__fpsr &= ~__excepts;
 	__fpsr |= *__flagp & __excepts;
 	__wfs(__fpsr);
 	return (0);
 }
 __fenv_static inline int
 feraiseexcept(int __excepts)
 {
 	fexcept_t __ex = __excepts;
 	fesetexceptflag(&__ex, __excepts);	/* XXX */
 	return (0);
 }
 __fenv_static inline int
 fetestexcept(int __excepts)
 {
 	fexcept_t __fpsr;
 	__rfs(&__fpsr);
 	return (__fpsr & __excepts);
 }
 __fenv_static inline int
 fegetround(void)
 {
 	/*
 	 * Apparently, the rounding mode is specified as part of the
 	 * instruction format on ARM, so the dynamic rounding mode is
 	 * indeterminate.  Some FPUs may differ.
 	 */
 	return (-1);
 }
 __fenv_static inline int
 fesetround(int __round)
 {
 	return (-1);
 }
 __fenv_static inline int
 fegetenv(fenv_t *__envp)
 {
 	__rfs(__envp);
 	return (0);
 }
 __fenv_static inline int
 feholdexcept(fenv_t *__envp)
 {
 	fenv_t __env;
 	__rfs(&__env);
 	*__envp = __env;
 	__env &= ~(FE_ALL_EXCEPT | _ENABLE_MASK);
 	__wfs(__env);
 	return (0);
 }
 __fenv_static inline int
 fesetenv(const fenv_t *__envp)
 {
 	__wfs(*__envp);
 	return (0);
 }
 __fenv_static inline int
 feupdateenv(const fenv_t *__envp)
 {
 	fexcept_t __fpsr;
 	__rfs(&__fpsr);
 	__wfs(*__envp);
 	feraiseexcept(__fpsr & FE_ALL_EXCEPT);
 	return (0);
 }
 #if __BSD_VISIBLE
 /* We currently provide no external definitions of the functions below. */
 static inline int
 feenableexcept(int __mask)
 {
 	fenv_t __old_fpsr, __new_fpsr;
 	__rfs(&__old_fpsr);
 	__new_fpsr = __old_fpsr | (__mask & FE_ALL_EXCEPT) << _FPUSW_SHIFT;
 	__wfs(__new_fpsr);
 	return ((__old_fpsr >> _FPUSW_SHIFT) & FE_ALL_EXCEPT);
 }
 static inline int
 fedisableexcept(int __mask)
 {
 	fenv_t __old_fpsr, __new_fpsr;
 	__rfs(&__old_fpsr);
 	__new_fpsr = __old_fpsr & ~((__mask & FE_ALL_EXCEPT) << _FPUSW_SHIFT);
 	__wfs(__new_fpsr);
 	return ((__old_fpsr >> _FPUSW_SHIFT) & FE_ALL_EXCEPT);
 }
 static inline int
 fegetexcept(void)
 {
 	fenv_t __fpsr;
 	__rfs(&__fpsr);
 	return ((__fpsr & _ENABLE_MASK) >> _FPUSW_SHIFT);
 }
 #endif /* __BSD_VISIBLE */
 __END_DECLS
 #endif	/* !_FENV_H_ */
--- a/src/whetstone/include/openlibm_fenv_i387.h
+++ b/src/whetstone/include/openlibm_fenv_i387.h
@ -1,260 +0,0 @@
 /*-
 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD: src/lib/msun/i387/fenv.h,v 1.8 2011/10/10 15:43:09 das Exp $
 */
 #ifndef	_FENV_H_
 #define	_FENV_H_
 #include "openlibm_defs.h"
 #include "cdefs-compat.h"
 #include "types-compat.h"
 #ifndef	__fenv_static
 #define	__fenv_static	static
 #endif
 /*                   
 * To preserve binary compatibility with FreeBSD 5.3, we pack the
 * mxcsr into some reserved fields, rather than changing sizeof(fenv_t).
 */
 typedef struct {
 	uint16_t	__control;
 	uint16_t      __mxcsr_hi;
 	uint16_t	__status;
 	uint16_t      __mxcsr_lo;
 	uint32_t	__tag;
 	char		__other[16];
 } fenv_t;
 #define	__get_mxcsr(env)	(((env).__mxcsr_hi << 16) |	\
 				 ((env).__mxcsr_lo))
 #define	__set_mxcsr(env, x)	do {				\
 	(env).__mxcsr_hi = (uint32_t)(x) >> 16;		\
 	(env).__mxcsr_lo = (uint16_t)(x);			\
 } while (0)
 typedef	uint16_t	fexcept_t;
 /* Exception flags */
 #define	FE_INVALID	0x01
 #define	FE_DENORMAL	0x02
 #define	FE_DIVBYZERO	0x04
 #define	FE_OVERFLOW	0x08
 #define	FE_UNDERFLOW	0x10
 #define	FE_INEXACT	0x20
 #define	FE_ALL_EXCEPT	(FE_DIVBYZERO | FE_DENORMAL | FE_INEXACT | \
 			 FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
 /* Rounding modes */
 #define	FE_TONEAREST	0x0000
 #define	FE_DOWNWARD	0x0400
 #define	FE_UPWARD	0x0800
 #define	FE_TOWARDZERO	0x0c00
 #define	_ROUND_MASK	(FE_TONEAREST | FE_DOWNWARD | \
 			 FE_UPWARD | FE_TOWARDZERO)
 /*
 * As compared to the x87 control word, the SSE unit's control word
 * has the rounding control bits offset by 3 and the exception mask
 * bits offset by 7.
 */
 #define	_SSE_ROUND_SHIFT	3
 #define	_SSE_EMASK_SHIFT	7
 __BEGIN_DECLS
 /* After testing for SSE support once, we cache the result in __has_sse. */
 enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
 OLM_DLLEXPORT extern enum __sse_support __has_sse;
 OLM_DLLEXPORT int __test_sse(void);
 #ifdef __SSE__
 #define	__HAS_SSE()	1
 #else
 #define	__HAS_SSE()	(__has_sse == __SSE_YES ||			\
 			 (__has_sse == __SSE_UNK && __test_sse()))
 #endif
 /* Default floating-point environment */
 OLM_DLLEXPORT extern const fenv_t	__fe_dfl_env;
 #define	FE_DFL_ENV	(&__fe_dfl_env)
 #define	__fldcw(__cw)		__asm __volatile("fldcw %0" : : "m" (__cw))
 #define	__fldenv(__env)		__asm __volatile("fldenv %0" : : "m" (__env))
 #define	__fldenvx(__env)	__asm __volatile("fldenv %0" : : "m" (__env)  \
 				: "st", "st(1)", "st(2)", "st(3)", "st(4)",   \
 				"st(5)", "st(6)", "st(7)")
 #define	__fnclex()		__asm __volatile("fnclex")
 #define	__fnstenv(__env)	__asm __volatile("fnstenv %0" : "=m" (*(__env)))
 #define	__fnstcw(__cw)		__asm __volatile("fnstcw %0" : "=m" (*(__cw)))
 #define	__fnstsw(__sw)		__asm __volatile("fnstsw %0" : "=am" (*(__sw)))
 #define	__fwait()		__asm __volatile("fwait")
 #define	__ldmxcsr(__csr)	__asm __volatile("ldmxcsr %0" : : "m" (__csr))
 #define	__stmxcsr(__csr)	__asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
 __fenv_static inline int
 feclearexcept(int __excepts)
 {
 	fenv_t __env;
 	uint32_t __mxcsr;
 	if (__excepts == FE_ALL_EXCEPT) {
 		__fnclex();
 	} else {
 		__fnstenv(&__env);
 		__env.__status &= ~__excepts;
 		__fldenv(__env);
 	}
 	if (__HAS_SSE()) {
 		__stmxcsr(&__mxcsr);
 		__mxcsr &= ~__excepts;
 		__ldmxcsr(__mxcsr);
 	}
 	return (0);
 }
 __fenv_static inline int
 fegetexceptflag(fexcept_t *__flagp, int __excepts)
 {
 	uint32_t __mxcsr;
 	uint16_t __status;
 	__fnstsw(&__status);
 	if (__HAS_SSE())
 		__stmxcsr(&__mxcsr);
 	else
 		__mxcsr = 0;
 	*__flagp = (__mxcsr | __status) & __excepts;
 	return (0);
 }
 OLM_DLLEXPORT int fesetexceptflag(const fexcept_t *__flagp, int __excepts);
 OLM_DLLEXPORT int feraiseexcept(int __excepts);
 __fenv_static inline int
 fetestexcept(int __excepts)
 {
 	uint32_t __mxcsr;
 	uint16_t __status;
 	__fnstsw(&__status);
 	if (__HAS_SSE())
 		__stmxcsr(&__mxcsr);
 	else
 		__mxcsr = 0;
 	return ((__status | __mxcsr) & __excepts);
 }
 __fenv_static inline int
 fegetround(void)
 {
 	uint16_t __control;
 	/*
 	 * We assume that the x87 and the SSE unit agree on the
 	 * rounding mode.  Reading the control word on the x87 turns
 	 * out to be about 5 times faster than reading it on the SSE
 	 * unit on an Opteron 244.
 	 */
 	__fnstcw(&__control);
 	return (__control & _ROUND_MASK);
 }
 __fenv_static inline int
 fesetround(int __round)
 {
 	uint32_t __mxcsr;
 	uint16_t __control;
 	if (__round & ~_ROUND_MASK)
 		return (-1);
 	__fnstcw(&__control);
 	__control &= ~_ROUND_MASK;
 	__control |= __round;
 	__fldcw(__control);
 	if (__HAS_SSE()) {
 		__stmxcsr(&__mxcsr);
 		__mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
 		__mxcsr |= __round << _SSE_ROUND_SHIFT;
 		__ldmxcsr(__mxcsr);
 	}
 	return (0);
 }
 OLM_DLLEXPORT int fegetenv(fenv_t *__envp);
 OLM_DLLEXPORT int feholdexcept(fenv_t *__envp);
 __fenv_static inline int
 fesetenv(const fenv_t *__envp)
 {
 	fenv_t __env = *__envp;
 	uint32_t __mxcsr;
 	__mxcsr = __get_mxcsr(__env);
 	__set_mxcsr(__env, 0xffffffff);
 	/*
 	 * XXX Using fldenvx() instead of fldenv() tells the compiler that this
 	 * instruction clobbers the i387 register stack.  This happens because
 	 * we restore the tag word from the saved environment.  Normally, this
 	 * would happen anyway and we wouldn't care, because the ABI allows
 	 * function calls to clobber the i387 regs.  However, fesetenv() is
 	 * inlined, so we need to be more careful.
 	 */
 	__fldenvx(__env);
 	if (__HAS_SSE())
 		__ldmxcsr(__mxcsr);
 	return (0);
 }
 OLM_DLLEXPORT int feupdateenv(const fenv_t *__envp);
 #if __BSD_VISIBLE
 OLM_DLLEXPORT int feenableexcept(int __mask);
 OLM_DLLEXPORT int fedisableexcept(int __mask);
 /* We currently provide no external definition of fegetexcept(). */
 static inline int
 fegetexcept(void)
 {
 	uint16_t __control;
 	/*
 	 * We assume that the masks for the x87 and the SSE unit are
 	 * the same.
 	 */
 	__fnstcw(&__control);
 	return (~__control & FE_ALL_EXCEPT);
 }
 #endif /* __BSD_VISIBLE */
 __END_DECLS
 #endif	/* !_FENV_H_ */
--- a/src/whetstone/include/openlibm_fenv_loongarch64.h
+++ b/src/whetstone/include/openlibm_fenv_loongarch64.h
@ -1,226 +0,0 @@
 /*-
 * Copyright (c) 2023 Yifan An <me@anyi.fan>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
 #ifndef	_FENV_H_
 #define	_FENV_H_
 #include <stdint.h>
 #include "cdefs-compat.h"
 #ifndef	__fenv_static
 #define	__fenv_static	static
 #endif
 typedef	uint32_t	fenv_t;
 typedef	uint32_t	fexcept_t;
 /* Exception flags */
 #define	FE_INVALID	0x100000
 #define	FE_DIVBYZERO	0x080000
 #define	FE_OVERFLOW	0x040000
 #define	FE_UNDERFLOW	0x020000
 #define	FE_INEXACT	0x010000
 #define	FE_ALL_EXCEPT	(FE_DIVBYZERO | FE_INEXACT | \
 			 FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
 /* Rounding modes */
 #define	FE_TONEAREST	0x0000
 #define	FE_TOWARDZERO	0x0100
 #define	FE_DOWNWARD	0x0200
 #define	FE_UPWARD	0x0300
 #define	_ROUND_MASK	(FE_TONEAREST | FE_DOWNWARD | \
 			 FE_UPWARD | FE_TOWARDZERO)
 __BEGIN_DECLS
 /* Default floating-point environment */
 extern const fenv_t	__fe_dfl_env;
 #define	FE_DFL_ENV	(&__fe_dfl_env)
 #define	_FPU_MASK_V	0x10
 #define	_FPU_MASK_Z	0x08
 #define	_FPU_MASK_O	0x04
 #define	_FPU_MASK_U	0x02
 #define	_FPU_MASK_I	0x01
 #define _FPUSW_SHIFT	16
 #define	_ENABLE_MASK	(_FPU_MASK_V | _FPU_MASK_Z | _FPU_MASK_O | _FPU_MASK_U | _FPU_MASK_I)
 #define __rfs(__fpsr)   __asm __volatile("movfcsr2gr %0,$r0" : "=r"(__fpsr))
 #define __wfs(__fpsr)   __asm __volatile("movgr2fcsr $r0,%0" : : "r"(__fpsr))
 __fenv_static inline int
 feclearexcept(int __excepts)
 {
 	fexcept_t __fpsr;
 	__rfs(__fpsr);
 	__fpsr &= ~__excepts;
 	__wfs(__fpsr);
 	return (0);
 }
 __fenv_static inline int
 fegetexceptflag(fexcept_t *__flagp, int __excepts)
 {
 	fexcept_t __fpsr;
 	__rfs(__fpsr);
 	*__flagp = __fpsr & __excepts;
 	return (0);
 }
 __fenv_static inline int
 fesetexceptflag(const fexcept_t *__flagp, int __excepts)
 {
 	fexcept_t __fpsr;
 	__rfs(__fpsr);
 	__fpsr &= ~__excepts;
 	__fpsr |= *__flagp & __excepts;
 	__wfs(__fpsr);
 	return (0);
 }
 __fenv_static inline int
 feraiseexcept(int __excepts)
 {
 	fexcept_t __ex = __excepts;
 	fesetexceptflag(&__ex, __excepts);	/* XXX */
 	return (0);
 }
 __fenv_static inline int
 fetestexcept(int __excepts)
 {
 	fexcept_t __fpsr;
 	__rfs(__fpsr);
 	return (__fpsr & __excepts);
 }
 __fenv_static inline int
 fegetround(void)
 {
 	fexcept_t __fpsr;
 	__rfs(__fpsr);
 	return __fpsr & _ROUND_MASK;
 }
 __fenv_static inline int
 fesetround(int __round)
 {
 	fexcept_t __fpsr;
 	if ((__round & ~_ROUND_MASK) != 0)
 		return 1;
 	__rfs(__fpsr);
 	__fpsr &= ~_ROUND_MASK;
 	__fpsr |= __round;
 	__wfs(__fpsr);
 	return (0);
 }
 __fenv_static inline int
 fegetenv(fenv_t *__envp)
 {
 	__rfs(*__envp);
 	return (0);
 }
 __fenv_static inline int
 feholdexcept(fenv_t *__envp)
 {
 	fenv_t __env;
 	__rfs(__env);
 	*__envp = __env;
 	__env &= ~(FE_ALL_EXCEPT | _FPU_MASK_V | _FPU_MASK_Z | _FPU_MASK_O | _FPU_MASK_U | _FPU_MASK_I);
 	__wfs(__env);
 	return (0);
 }
 __fenv_static inline int
 fesetenv(const fenv_t *__envp)
 {
 	__wfs(*__envp);
 	return (0);
 }
 __fenv_static inline int
 feupdateenv(const fenv_t *__envp)
 {
 	fexcept_t __fpsr;
 	__rfs(__fpsr);
 	__wfs(*__envp);
 	feraiseexcept(__fpsr & FE_ALL_EXCEPT);
 	return (0);
 }
 #if __BSD_VISIBLE
 static inline int
 feenableexcept(int __mask)
 {
 	fenv_t __old_fpsr, __new_fpsr;
 	__rfs(__new_fpsr);
 	__old_fpsr = (__new_fpsr & _ENABLE_MASK) << _FPUSW_SHIFT;
 	__new_fpsr |= (__mask & FE_ALL_EXCEPT) >> _FPUSW_SHIFT;
 	__wfs(__new_fpsr);
 	return __old_fpsr;
 }
 static inline int
 fedisableexcept(int __mask)
 {
 	fenv_t __old_fpsr, __new_fpsr;
 	__rfs(__new_fpsr);
 	__old_fpsr = (__new_fpsr & _ENABLE_MASK) << _FPUSW_SHIFT;
 	__new_fpsr &= ~((__mask & FE_ALL_EXCEPT) >> _FPUSW_SHIFT);
 	__wfs(__new_fpsr);
 	return __old_fpsr;
 }
 static inline int
 fegetexcept(void)
 {
 	fenv_t __fpsr;
 	__rfs(__fpsr);
 	return ((__fpsr & _ENABLE_MASK) << _FPUSW_SHIFT);
 }
 #endif /* __BSD_VISIBLE */
 __END_DECLS
 #endif	/* !_FENV_H_ */
--- a/src/whetstone/include/openlibm_fenv_mips.h
+++ b/src/whetstone/include/openlibm_fenv_mips.h
@ -1,278 +0,0 @@
 /*-
 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD$
 */
 #ifndef	_FENV_H_
 #define	_FENV_H_
 #include <stdint.h>
 #include "cdefs-compat.h"
 #ifndef	__fenv_static
 #define	__fenv_static	static
 #endif
 typedef	uint32_t	fenv_t;
 typedef	uint32_t	fexcept_t;
 /* Exception flags */
 #ifdef __mips_soft_float
 #define	_FPUSW_SHIFT	16
 #define	FE_INVALID	0x0001
 #define	FE_DIVBYZERO	0x0002
 #define	FE_OVERFLOW	0x0004
 #define	FE_UNDERFLOW	0x0008
 #define	FE_INEXACT	0x0010
 #else
 #define	_FCSR_CAUSE_SHIFT	10
 #define	FE_INVALID	0x0040
 #define	FE_DIVBYZERO	0x0020
 #define	FE_OVERFLOW	0x0010
 #define	FE_UNDERFLOW	0x0008
 #define	FE_INEXACT	0x0004
 #endif
 #define	FE_ALL_EXCEPT	(FE_DIVBYZERO | FE_INEXACT | \
 			 FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
 /* Rounding modes */
 #define	FE_TONEAREST	0x0000
 #define	FE_TOWARDZERO	0x0001
 #define	FE_UPWARD	0x0002
 #define	FE_DOWNWARD	0x0003
 #define	_ROUND_MASK	(FE_TONEAREST | FE_DOWNWARD | \
 			 FE_UPWARD | FE_TOWARDZERO)
 __BEGIN_DECLS
 /* Default floating-point environment */
 extern const fenv_t	__fe_dfl_env;
 #define	FE_DFL_ENV	(&__fe_dfl_env)
 /* We need to be able to map status flag positions to mask flag positions */
 #define	_ENABLE_SHIFT	5
 #define	_ENABLE_MASK	(FE_ALL_EXCEPT << _ENABLE_SHIFT)
 #ifndef __mips_soft_float
 #define	__cfc1(__fcsr)	__asm __volatile("cfc1 %0, $31" : "=r" (__fcsr))
 #define	__ctc1(__fcsr)	__asm __volatile("ctc1 %0, $31" :: "r" (__fcsr))
 #endif
 #ifdef __mips_soft_float
 int feclearexcept(int __excepts);
 int fegetexceptflag(fexcept_t *__flagp, int __excepts);
 int fesetexceptflag(const fexcept_t *__flagp, int __excepts);
 int feraiseexcept(int __excepts);
 int fetestexcept(int __excepts);
 int fegetround(void);
 int fesetround(int __round);
 int fegetenv(fenv_t *__envp);
 int feholdexcept(fenv_t *__envp);
 int fesetenv(const fenv_t *__envp);
 int feupdateenv(const fenv_t *__envp);
 #else
 __fenv_static inline int
 feclearexcept(int __excepts)
 {
 	fexcept_t fcsr;
 	__excepts &= FE_ALL_EXCEPT;
 	__cfc1(fcsr);
 	fcsr &= ~(__excepts | (__excepts << _FCSR_CAUSE_SHIFT));
 	__ctc1(fcsr);
 	return (0);
 }
 __fenv_static inline int
 fegetexceptflag(fexcept_t *__flagp, int __excepts)
 {
 	fexcept_t fcsr;
 	__excepts &= FE_ALL_EXCEPT;
 	__cfc1(fcsr);
 	*__flagp = fcsr & __excepts;
 	return (0);
 }
 __fenv_static inline int
 fesetexceptflag(const fexcept_t *__flagp, int __excepts)
 {
 	fexcept_t fcsr;
 	__excepts &= FE_ALL_EXCEPT;
 	__cfc1(fcsr);
 	fcsr &= ~__excepts;
 	fcsr |= *__flagp & __excepts;
 	__ctc1(fcsr);
 	return (0);
 }
 __fenv_static inline int
 feraiseexcept(int __excepts)
 {
 	fexcept_t fcsr;
 	__excepts &= FE_ALL_EXCEPT;
 	__cfc1(fcsr);
 	fcsr |= __excepts | (__excepts << _FCSR_CAUSE_SHIFT);
 	__ctc1(fcsr);
 	return (0);
 }
 __fenv_static inline int
 fetestexcept(int __excepts)
 {
 	fexcept_t fcsr;
 	__excepts &= FE_ALL_EXCEPT;
 	__cfc1(fcsr);
 	return (fcsr & __excepts);
 }
 __fenv_static inline int
 fegetround(void)
 {
 	fexcept_t fcsr;
 	__cfc1(fcsr);
 	return (fcsr & _ROUND_MASK);
 }
 __fenv_static inline int
 fesetround(int __round)
 {
 	fexcept_t fcsr;
 	if (__round & ~_ROUND_MASK)
 		return (-1);
 	__cfc1(fcsr);
 	fcsr &= ~_ROUND_MASK;
 	fcsr |= __round;
 	__ctc1(fcsr);
 	return (0);
 }
 __fenv_static inline int
 fegetenv(fenv_t *__envp)
 {
 	__cfc1(*__envp);
 	return (0);
 }
 __fenv_static inline int
 feholdexcept(fenv_t *__envp)
 {
 	fexcept_t fcsr;
 	__cfc1(fcsr);
 	*__envp = fcsr;
 	fcsr &= ~(FE_ALL_EXCEPT | _ENABLE_MASK);
 	__ctc1(fcsr);
 	return (0);
 }
 __fenv_static inline int
 fesetenv(const fenv_t *__envp)
 {
 	__ctc1(*__envp);
 	return (0);
 }
 __fenv_static inline int
 feupdateenv(const fenv_t *__envp)
 {
 	fexcept_t fcsr;
 	__cfc1(fcsr);
 	fesetenv(__envp);
 	feraiseexcept(fcsr);
 	return (0);
 }
 #endif /* !__mips_soft_float */
 #if __BSD_VISIBLE
 /* We currently provide no external definitions of the functions below. */
 #ifdef __mips_soft_float
 int feenableexcept(int __mask);
 int fedisableexcept(int __mask);
 int fegetexcept(void);
 #else
 static inline int
 feenableexcept(int __mask)
 {
 	fenv_t __old_fcsr, __new_fcsr;
 	__cfc1(__old_fcsr);
 	__new_fcsr = __old_fcsr | (__mask & FE_ALL_EXCEPT) << _ENABLE_SHIFT;
 	__ctc1(__new_fcsr);
 	return ((__old_fcsr >> _ENABLE_SHIFT) & FE_ALL_EXCEPT);
 }
 static inline int
 fedisableexcept(int __mask)
 {
 	fenv_t __old_fcsr, __new_fcsr;
 	__cfc1(__old_fcsr);
 	__new_fcsr = __old_fcsr & ~((__mask & FE_ALL_EXCEPT) << _ENABLE_SHIFT);
 	__ctc1(__new_fcsr);
 	return ((__old_fcsr >> _ENABLE_SHIFT) & FE_ALL_EXCEPT);
 }
 static inline int
 fegetexcept(void)
 {
 	fexcept_t fcsr;
 	__cfc1(fcsr);
 	return ((fcsr & _ENABLE_MASK) >> _ENABLE_SHIFT);
 }
 #endif /* !__mips_soft_float */
 #endif /* __BSD_VISIBLE */
 __END_DECLS
 #endif	/* !_FENV_H_ */
--- a/src/whetstone/include/openlibm_fenv_powerpc.h
+++ b/src/whetstone/include/openlibm_fenv_powerpc.h
@ -1,279 +0,0 @@
 /*-
 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD$
 */
 #ifndef	_FENV_H_
 #define	_FENV_H_
 #include <sys/types.h>
 #ifndef	__fenv_static
 #define	__fenv_static	static
 #endif
 typedef	__uint32_t	fenv_t;
 typedef	__uint32_t	fexcept_t;
 /* Exception flags */
 #define	FE_INEXACT	0x02000000
 #define	FE_DIVBYZERO	0x04000000
 #define	FE_UNDERFLOW	0x08000000
 #define	FE_OVERFLOW	0x10000000
 #define	FE_INVALID	0x20000000	/* all types of invalid FP ops */
 /*
 * The PowerPC architecture has extra invalid flags that indicate the
 * specific type of invalid operation occurred.  These flags may be
 * tested, set, and cleared---but not masked---separately.  All of
 * these bits are cleared when FE_INVALID is cleared, but only
 * FE_VXSOFT is set when FE_INVALID is explicitly set in software.
 */
 #define	FE_VXCVI	0x00000100	/* invalid integer convert */
 #define	FE_VXSQRT	0x00000200	/* square root of a negative */
 #define	FE_VXSOFT	0x00000400	/* software-requested exception */
 #define	FE_VXVC		0x00080000	/* ordered comparison involving NaN */
 #define	FE_VXIMZ	0x00100000	/* inf * 0 */
 #define	FE_VXZDZ	0x00200000	/* 0 / 0 */
 #define	FE_VXIDI	0x00400000	/* inf / inf */
 #define	FE_VXISI	0x00800000	/* inf - inf */
 #define	FE_VXSNAN	0x01000000	/* operation on a signalling NaN */
 #define	FE_ALL_INVALID	(FE_VXCVI | FE_VXSQRT | FE_VXSOFT | FE_VXVC | \
 			 FE_VXIMZ | FE_VXZDZ | FE_VXIDI | FE_VXISI | \
 			 FE_VXSNAN | FE_INVALID)
 #define	FE_ALL_EXCEPT	(FE_DIVBYZERO | FE_INEXACT | \
 			 FE_ALL_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
 /* Rounding modes */
 #define	FE_TONEAREST	0x0000
 #define	FE_TOWARDZERO	0x0001
 #define	FE_UPWARD	0x0002
 #define	FE_DOWNWARD	0x0003
 #define	_ROUND_MASK	(FE_TONEAREST | FE_DOWNWARD | \
 			 FE_UPWARD | FE_TOWARDZERO)
 __BEGIN_DECLS
 /* Default floating-point environment */
 extern const fenv_t	__fe_dfl_env;
 #define	FE_DFL_ENV	(&__fe_dfl_env)
 /* We need to be able to map status flag positions to mask flag positions */
 #define	_FPUSW_SHIFT	22
 #define	_ENABLE_MASK	((FE_DIVBYZERO | FE_INEXACT | FE_INVALID | \
 			 FE_OVERFLOW | FE_UNDERFLOW) >> _FPUSW_SHIFT)
 #ifndef _SOFT_FLOAT
 #define	__mffs(__env)	__asm __volatile("mffs %0" : "=f" (*(__env)))
 #define	__mtfsf(__env)	__asm __volatile("mtfsf 255,%0" : : "f" (__env))
 #else
 #define	__mffs(__env)
 #define	__mtfsf(__env)
 #endif
 union __fpscr {
 	double __d;
 	struct {
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 		fenv_t __reg;
 		__uint32_t __junk;
 #else
 		__uint32_t __junk;
 		fenv_t __reg;
 #endif
 	} __bits;
 };
 __fenv_static inline int
 feclearexcept(int __excepts)
 {
 	union __fpscr __r;
 	if (__excepts & FE_INVALID)
 		__excepts |= FE_ALL_INVALID;
 	__mffs(&__r.__d);
 	__r.__bits.__reg &= ~__excepts;
 	__mtfsf(__r.__d);
 	return (0);
 }
 __fenv_static inline int
 fegetexceptflag(fexcept_t *__flagp, int __excepts)
 {
 	union __fpscr __r;
 	__mffs(&__r.__d);
 	*__flagp = __r.__bits.__reg & __excepts;
 	return (0);
 }
 __fenv_static inline int
 fesetexceptflag(const fexcept_t *__flagp, int __excepts)
 {
 	union __fpscr __r;
 	if (__excepts & FE_INVALID)
 		__excepts |= FE_ALL_EXCEPT;
 	__mffs(&__r.__d);
 	__r.__bits.__reg &= ~__excepts;
 	__r.__bits.__reg |= *__flagp & __excepts;
 	__mtfsf(__r.__d);
 	return (0);
 }
 __fenv_static inline int
 feraiseexcept(int __excepts)
 {
 	union __fpscr __r;
 	if (__excepts & FE_INVALID)
 		__excepts |= FE_VXSOFT;
 	__mffs(&__r.__d);
 	__r.__bits.__reg |= __excepts;
 	__mtfsf(__r.__d);
 	return (0);
 }
 __fenv_static inline int
 fetestexcept(int __excepts)
 {
 	union __fpscr __r;
 	__mffs(&__r.__d);
 	return (__r.__bits.__reg & __excepts);
 }
 __fenv_static inline int
 fegetround(void)
 {
 	union __fpscr __r;
 	__mffs(&__r.__d);
 	return (__r.__bits.__reg & _ROUND_MASK);
 }
 __fenv_static inline int
 fesetround(int __round)
 {
 	union __fpscr __r;
 	if (__round & ~_ROUND_MASK)
 		return (-1);
 	__mffs(&__r.__d);
 	__r.__bits.__reg &= ~_ROUND_MASK;
 	__r.__bits.__reg |= __round;
 	__mtfsf(__r.__d);
 	return (0);
 }
 __fenv_static inline int
 fegetenv(fenv_t *__envp)
 {
 	union __fpscr __r;
 	__mffs(&__r.__d);
 	*__envp = __r.__bits.__reg;
 	return (0);
 }
 __fenv_static inline int
 feholdexcept(fenv_t *__envp)
 {
 	union __fpscr __r;
 	__mffs(&__r.__d);
 	*__envp = __r.__d;
 	__r.__bits.__reg &= ~(FE_ALL_EXCEPT | _ENABLE_MASK);
 	__mtfsf(__r.__d);
 	return (0);
 }
 __fenv_static inline int
 fesetenv(const fenv_t *__envp)
 {
 	union __fpscr __r;
 	__r.__bits.__reg = *__envp;
 	__mtfsf(__r.__d);
 	return (0);
 }
 __fenv_static inline int
 feupdateenv(const fenv_t *__envp)
 {
 	union __fpscr __r;
 	__mffs(&__r.__d);
 	__r.__bits.__reg &= FE_ALL_EXCEPT;
 	__r.__bits.__reg |= *__envp;
 	__mtfsf(__r.__d);
 	return (0);
 }
 #if __BSD_VISIBLE
 /* We currently provide no external definitions of the functions below. */
 static inline int
 feenableexcept(int __mask)
 {
 	union __fpscr __r;
 	fenv_t __oldmask;
 	__mffs(&__r.__d);
 	__oldmask = __r.__bits.__reg;
 	__r.__bits.__reg |= (__mask & FE_ALL_EXCEPT) >> _FPUSW_SHIFT;
 	__mtfsf(__r.__d);
 	return ((__oldmask & _ENABLE_MASK) << _FPUSW_SHIFT);
 }
 static inline int
 fedisableexcept(int __mask)
 {
 	union __fpscr __r;
 	fenv_t __oldmask;
 	__mffs(&__r.__d);
 	__oldmask = __r.__bits.__reg;
 	__r.__bits.__reg &= ~((__mask & FE_ALL_EXCEPT) >> _FPUSW_SHIFT);
 	__mtfsf(__r.__d);
 	return ((__oldmask & _ENABLE_MASK) << _FPUSW_SHIFT);
 }
 static inline int
 fegetexcept(void)
 {
 	union __fpscr __r;
 	__mffs(&__r.__d);
 	return ((__r.__bits.__reg & _ENABLE_MASK) << _FPUSW_SHIFT);
 }
 #endif /* __BSD_VISIBLE */
 __END_DECLS
 #endif	/* !_FENV_H_ */
--- a/src/whetstone/include/openlibm_fenv_riscv.h
+++ b/src/whetstone/include/openlibm_fenv_riscv.h
@ -1,261 +0,0 @@
 /*-
 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
 * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
 * All rights reserved.
 *
 * Portions of this software were developed by SRI International and the
 * University of Cambridge Computer Laboratory under DARPA/AFRL contract
 * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
 *
 * Portions of this software were developed by the University of Cambridge
 * Computer Laboratory as part of the CTSRD Project, with support from the
 * UK Higher Education Innovation Fund (HEIF).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD: head/lib/msun/riscv/fenv.h 332792 2018-04-19 20:36:15Z brooks $
 */
 #ifndef	_FENV_H_
 #define	_FENV_H_
 #include <stdint.h>
 #include "cdefs-compat.h"
 #ifndef	__fenv_static
 #define	__fenv_static	static
 #endif
 typedef	__uint64_t	fenv_t;
 typedef	__uint64_t	fexcept_t;
 /* Exception flags */
 #define	FE_INVALID	0x0010
 #define	FE_DIVBYZERO	0x0008
 #define	FE_OVERFLOW	0x0004
 #define	FE_UNDERFLOW	0x0002
 #define	FE_INEXACT	0x0001
 #define	FE_ALL_EXCEPT	(FE_DIVBYZERO | FE_INEXACT | \
 			 FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
 /*
 * RISC-V Rounding modes
 */
 #define	_ROUND_SHIFT	5
 #define	FE_TONEAREST	(0x00 << _ROUND_SHIFT)
 #define	FE_TOWARDZERO	(0x01 << _ROUND_SHIFT)
 #define	FE_DOWNWARD	(0x02 << _ROUND_SHIFT)
 #define	FE_UPWARD	(0x03 << _ROUND_SHIFT)
 #define	_ROUND_MASK	(FE_TONEAREST | FE_DOWNWARD | \
 			 FE_UPWARD | FE_TOWARDZERO)
 __BEGIN_DECLS
 /* Default floating-point environment */
 extern const fenv_t	__fe_dfl_env;
 #define	FE_DFL_ENV	(&__fe_dfl_env)
 #if !defined(__riscv_float_abi_soft) && !defined(__riscv_float_abi_double)
 #if defined(__riscv_float_abi_single)
 #error single precision floating point ABI not supported
 #else
 #error compiler did not set soft/hard float macros
 #endif
 #endif
 #ifndef __riscv_float_abi_soft
 #define	__rfs(__fcsr)	__asm __volatile("csrr %0, fcsr" : "=r" (__fcsr))
 #define	__wfs(__fcsr)	__asm __volatile("csrw fcsr, %0" :: "r" (__fcsr))
 #endif
 #ifdef __riscv_float_abi_soft
 int feclearexcept(int __excepts);
 int fegetexceptflag(fexcept_t *__flagp, int __excepts);
 int fesetexceptflag(const fexcept_t *__flagp, int __excepts);
 int feraiseexcept(int __excepts);
 int fetestexcept(int __excepts);
 int fegetround(void);
 int fesetround(int __round);
 int fegetenv(fenv_t *__envp);
 int feholdexcept(fenv_t *__envp);
 int fesetenv(const fenv_t *__envp);
 int feupdateenv(const fenv_t *__envp);
 #else
 __fenv_static inline int
 feclearexcept(int __excepts)
 {
 	__asm __volatile("csrc fflags, %0" :: "r"(__excepts));
 	return (0);
 }
 __fenv_static inline int
 fegetexceptflag(fexcept_t *__flagp, int __excepts)
 {
 	fexcept_t __fcsr;
 	__rfs(__fcsr);
 	*__flagp = __fcsr & __excepts;
 	return (0);
 }
 __fenv_static inline int
 fesetexceptflag(const fexcept_t *__flagp, int __excepts)
 {
 	fexcept_t __fcsr;
 	__fcsr = *__flagp;
 	__asm __volatile("csrc fflags, %0" :: "r"(__excepts));
 	__asm __volatile("csrs fflags, %0" :: "r"(__fcsr & __excepts));
 	return (0);
 }
 __fenv_static inline int
 feraiseexcept(int __excepts)
 {
 	__asm __volatile("csrs fflags, %0" :: "r"(__excepts));
 	return (0);
 }
 __fenv_static inline int
 fetestexcept(int __excepts)
 {
 	fexcept_t __fcsr;
 	__rfs(__fcsr);
 	return (__fcsr & __excepts);
 }
 __fenv_static inline int
 fegetround(void)
 {
 	fexcept_t __fcsr;
 	__rfs(__fcsr);
 	return (__fcsr & _ROUND_MASK);
 }
 __fenv_static inline int
 fesetround(int __round)
 {
 	fexcept_t __fcsr;
 	if (__round & ~_ROUND_MASK)
 		return (-1);
 	__rfs(__fcsr);
 	__fcsr &= ~_ROUND_MASK;
 	__fcsr |= __round;
 	__wfs(__fcsr);
 	return (0);
 }
 __fenv_static inline int
 fegetenv(fenv_t *__envp)
 {
 	__rfs(*__envp);
 	return (0);
 }
 __fenv_static inline int
 feholdexcept(fenv_t *__envp)
 {
 	/* No exception traps. */
 	return (-1);
 }
 __fenv_static inline int
 fesetenv(const fenv_t *__envp)
 {
 	__wfs(*__envp);
 	return (0);
 }
 __fenv_static inline int
 feupdateenv(const fenv_t *__envp)
 {
 	fexcept_t __fcsr;
 	__rfs(__fcsr);
 	__wfs(*__envp);
 	feraiseexcept(__fcsr & FE_ALL_EXCEPT);
 	return (0);
 }
 #endif /* !__riscv_float_abi_soft */
 #if __BSD_VISIBLE
 /* We currently provide no external definitions of the functions below. */
 #ifdef __riscv_float_abi_soft
 int feenableexcept(int __mask);
 int fedisableexcept(int __mask);
 int fegetexcept(void);
 #else
 static inline int
 feenableexcept(int __mask)
 {
 	/* No exception traps. */
 	return (-1);
 }
 static inline int
 fedisableexcept(int __mask)
 {
 	/* No exception traps. */
 	return (0);
 }
 static inline int
 fegetexcept(void)
 {
 	/* No exception traps. */
 	return (0);
 }
 #endif /* !__riscv_float_abi_soft */
 #endif /* __BSD_VISIBLE */
 __END_DECLS
 #endif	/* !_FENV_H_ */
--- a/src/whetstone/include/openlibm_fenv_s390.h
+++ b/src/whetstone/include/openlibm_fenv_s390.h
@ -1,235 +0,0 @@
 /*-
 * Copyright (c) 2016 Dan Horák <dan[at]danny.cz>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD$
 */
 #ifndef	_FENV_H_
 #define	_FENV_H_
 #include <sys/types.h>
 #ifndef	__fenv_static
 #define	__fenv_static	static
 #endif
 typedef	__uint32_t	fenv_t;
 typedef	__uint32_t	fexcept_t;
 /* Exception flags */
 #define	FE_INEXACT	0x080000
 #define	FE_UNDERFLOW	0x100000
 #define	FE_OVERFLOW	0x200000
 #define	FE_DIVBYZERO	0x400000
 #define	FE_INVALID	0x800000	/* all types of invalid FP ops */
 #define	FE_ALL_EXCEPT	(FE_INVALID | FE_DIVBYZERO | FE_INEXACT | FE_OVERFLOW | FE_UNDERFLOW)
 /* Rounding modes */
 #define	FE_TONEAREST	0x0000
 #define	FE_TOWARDZERO	0x0001
 #define	FE_UPWARD	0x0002
 #define	FE_DOWNWARD	0x0003
 #define	_ROUND_MASK	(FE_TONEAREST | FE_DOWNWARD | \
 			 FE_UPWARD | FE_TOWARDZERO)
 __BEGIN_DECLS
 /* Default floating-point environment */
 extern const fenv_t	__fe_dfl_env;
 #define	FE_DFL_ENV	(&__fe_dfl_env)
 /* We need to be able to map status flag positions to mask flag positions */
 #define	_FPC_EXC_MASK_SHIFT	8
 #define	_ENABLE_MASK	((FE_DIVBYZERO | FE_INEXACT | FE_INVALID | \
 			 FE_OVERFLOW | FE_UNDERFLOW) << _FPC_EXC_MASK_SHIFT)
 /* Macros for accessing the hardware control word.  */
 #define _FPU_GETCW(cw)  __asm__ __volatile__ ("efpc %0,0" : "=d" (cw))
 #define _FPU_SETCW(cw)  __asm__ __volatile__ ("sfpc  %0,0" : : "d" (cw))
 __fenv_static inline int
 feclearexcept(int __excepts)
 {
 	fexcept_t __r;
 	if (__excepts & FE_INVALID)
 		__excepts |= FE_ALL_EXCEPT;
 	_FPU_GETCW(__r);
 	__r &= ~__excepts;
 	_FPU_SETCW(__r);
 	return (0);
 }
 __fenv_static inline int
 fegetexceptflag(fexcept_t *__flagp, int __excepts)
 {
 	fexcept_t __r;
 	_FPU_GETCW(__r);
 	*__flagp = __r & __excepts;
 	return (0);
 }
 __fenv_static inline int
 fesetexceptflag(const fexcept_t *__flagp, int __excepts)
 {
 	fexcept_t __r;
 	if (__excepts & FE_INVALID)
 		__excepts |= FE_ALL_EXCEPT;
 	_FPU_GETCW(__r);
 	__r &= ~__excepts;
 	__r |= *__flagp & __excepts;
 	_FPU_SETCW(__r);
 	return (0);
 }
 __fenv_static inline int
 feraiseexcept(int __excepts)
 {
 	fexcept_t __r;
 	_FPU_GETCW(__r);
 	__r |= __excepts;
 	_FPU_SETCW(__r);
 	return (0);
 }
 __fenv_static inline int
 fetestexcept(int __excepts)
 {
 	fexcept_t __r;
 	_FPU_GETCW(__r);
 	return (__r & __excepts);
 }
 __fenv_static inline int
 fegetround(void)
 {
 	fexcept_t __r;
 	_FPU_GETCW(__r);
 	return (__r & _ROUND_MASK);
 }
 __fenv_static inline int
 fesetround(int __round)
 {
 	fexcept_t __r;
 	if (__round & ~_ROUND_MASK)
 		return (-1);
 	_FPU_GETCW(__r);
 	__r &= ~_ROUND_MASK;
 	__r |= __round;
 	_FPU_SETCW(__r);
 	return (0);
 }
 __fenv_static inline int
 fegetenv(fenv_t *__envp)
 {
 	_FPU_GETCW(*__envp);
 	return (0);
 }
 __fenv_static inline int
 feholdexcept(fenv_t *__envp)
 {
 	fexcept_t __r;
 	_FPU_GETCW(__r);
 	*__envp = __r;
 	__r &= ~(FE_ALL_EXCEPT | _ENABLE_MASK);
 	_FPU_SETCW(__r);
 	return (0);
 }
 __fenv_static inline int
 fesetenv(const fenv_t *__envp)
 {
 	_FPU_SETCW(*__envp);
 	return (0);
 }
 __fenv_static inline int
 feupdateenv(const fenv_t *__envp)
 {
 	fexcept_t __r;
 	_FPU_GETCW(__r);
 	__r &= FE_ALL_EXCEPT;
 	__r |= *__envp;
 	_FPU_SETCW(__r);
 	return (0);
 }
 #if __BSD_VISIBLE
 /* We currently provide no external definitions of the functions below. */
 static inline int
 feenableexcept(int __mask)
 {
 	fenv_t __r;
 	fenv_t __oldmask;
 	_FPU_GETCW(__r);
 	__oldmask = __r;
 	__r |= (__mask & FE_ALL_EXCEPT) << _FPC_EXC_MASK_SHIFT;
 	_FPU_SETCW(__r);
 	return ((__oldmask & _ENABLE_MASK) >> _FPC_EXC_MASK_SHIFT);
 }
 static inline int
 fedisableexcept(int __mask)
 {
 	fenv_t __r;
 	fenv_t __oldmask;
 	_FPU_GETCW(__r);
 	__oldmask = __r;
 	__r &= ~((__mask & FE_ALL_EXCEPT) << _FPC_EXC_MASK_SHIFT);
 	_FPU_SETCW(__r);
 	return ((__oldmask & _ENABLE_MASK) >> _FPC_EXC_MASK_SHIFT);
 }
 static inline int
 fegetexcept(void)
 {
 	fexcept_t __r;
 	_FPU_GETCW(__r);
 	return (__r & (_ENABLE_MASK >> _FPC_EXC_MASK_SHIFT));
 }
 #endif /* __BSD_VISIBLE */
 __END_DECLS
 #endif	/* !_FENV_H_ */
--- a/src/whetstone/include/openlibm_math.h
+++ b/src/whetstone/include/openlibm_math.h
@ -1,491 +0,0 @@
 /*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
 /*
 * from: @(#)fdlibm.h 5.1 93/09/24
 * $FreeBSD: src/lib/msun/src/openlibm.h,v 1.82 2011/11/12 19:55:48 theraven Exp $
 */
 #ifdef OPENLIBM_USE_HOST_MATH_H
 #include <math.h>
 #else /* !OPENLIBM_USE_HOST_MATH_H */
 #include <openlibm_defs.h>
 #define __BSD_VISIBLE 1
 #ifndef OPENLIBM_MATH_H
 #define	OPENLIBM_MATH_H
 #if (defined(_WIN32) || defined (_MSC_VER)) && !defined(__WIN32__)
    #define __WIN32__
 #endif
 #ifndef __pure2
 #define __pure2
 #endif
 /*
 * ANSI/POSIX
 */
 extern const union __infinity_un {
 	unsigned char	__uc[8];
 	double		__ud;
 } __infinity;
 extern const union __nan_un {
 	unsigned char	__uc[sizeof(float)];
 	float		__uf;
 } __nan;
 /* VBS
 #if __GNUC_PREREQ__(3, 3) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 800)
 #define	__MATH_BUILTIN_CONSTANTS
 #endif
 #if __GNUC_PREREQ__(3, 0) && !defined(__INTEL_COMPILER)
 #define	__MATH_BUILTIN_RELOPS
 #endif
 */
 //VBS begin
 #define __MATH_BUILTIN_CONSTANTS
 #define	__MATH_BUILTIN_RELOPS
 #ifndef __ISO_C_VISIBLE
 #define __ISO_C_VISIBLE 1999
 #endif
 //VBS end
 #ifdef __MATH_BUILTIN_CONSTANTS
 #define	HUGE_VAL	__builtin_huge_val()
 #else
 #define	HUGE_VAL	(__infinity.__ud)
 #endif
 #if __ISO_C_VISIBLE >= 1999
 #define	FP_ILOGB0	(-INT_MAX)
 #define	FP_ILOGBNAN	INT_MAX
 #ifdef __MATH_BUILTIN_CONSTANTS
 #define	HUGE_VALF	__builtin_huge_valf()
 #define	HUGE_VALL	__builtin_huge_vall()
 #define	INFINITY	__builtin_inff()
 #define	NAN		__builtin_nanf("")
 #else
 #define	HUGE_VALF	(float)HUGE_VAL
 #define	HUGE_VALL	(long double)HUGE_VAL
 #define	INFINITY	HUGE_VALF
 #define	NAN		(__nan.__uf)
 #endif /* __MATH_BUILTIN_CONSTANTS */
 #define	MATH_ERRNO	1
 #define	MATH_ERREXCEPT	2
 #define	math_errhandling	MATH_ERREXCEPT
 #define	FP_FAST_FMAF	1
 #ifdef __ia64__
 #define	FP_FAST_FMA	1
 #define	FP_FAST_FMAL	1
 #endif
 /* Symbolic constants to classify floating point numbers. */
 #define	FP_INFINITE	0x01
 #define	FP_NAN		0x02
 #define	FP_NORMAL	0x04
 #define	FP_SUBNORMAL	0x08
 #define	FP_ZERO		0x10
 #define	fpclassify(x) \
    ((sizeof (x) == sizeof (float)) ? __fpclassifyf(x) \
    : (sizeof (x) == sizeof (double)) ? __fpclassifyd(x) \
    : __fpclassifyl(x))
 #define	isfinite(x)					\
    ((sizeof (x) == sizeof (float)) ? __isfinitef(x)	\
    : (sizeof (x) == sizeof (double)) ? __isfinite(x)	\
    : __isfinitel(x))
 #define	isinf(x)					\
    ((sizeof (x) == sizeof (float)) ? __isinff(x)	\
    : (sizeof (x) == sizeof (double)) ? isinf(x)	\
    : __isinfl(x))
 #define	isnan(x)					\
    ((sizeof (x) == sizeof (float)) ? __isnanf(x)	\
    : (sizeof (x) == sizeof (double)) ? isnan(x)	\
    : __isnanl(x))
 #define	isnormal(x)					\
    ((sizeof (x) == sizeof (float)) ? __isnormalf(x)	\
    : (sizeof (x) == sizeof (double)) ? __isnormal(x)	\
    : __isnormall(x))
 #ifdef __MATH_BUILTIN_RELOPS
 #define	isgreater(x, y)		__builtin_isgreater((x), (y))
 #define	isgreaterequal(x, y)	__builtin_isgreaterequal((x), (y))
 #define	isless(x, y)		__builtin_isless((x), (y))
 #define	islessequal(x, y)	__builtin_islessequal((x), (y))
 #define	islessgreater(x, y)	__builtin_islessgreater((x), (y))
 #define	isunordered(x, y)	__builtin_isunordered((x), (y))
 #else
 #define	isgreater(x, y)		(!isunordered((x), (y)) && (x) > (y))
 #define	isgreaterequal(x, y)	(!isunordered((x), (y)) && (x) >= (y))
 #define	isless(x, y)		(!isunordered((x), (y)) && (x) < (y))
 #define	islessequal(x, y)	(!isunordered((x), (y)) && (x) <= (y))
 #define	islessgreater(x, y)	(!isunordered((x), (y)) && \
 					((x) > (y) || (y) > (x)))
 #define	isunordered(x, y)	(isnan(x) || isnan(y))
 #endif /* __MATH_BUILTIN_RELOPS */
 #define	signbit(x)					\
    ((sizeof (x) == sizeof (float)) ? __signbitf(x)	\
    : (sizeof (x) == sizeof (double)) ? __signbit(x)	\
    : __signbitl(x))
 //VBS
 //typedef	__double_t	double_t;
 //typedef	__float_t	float_t;
 #endif /* __ISO_C_VISIBLE >= 1999 */
 /*
 * XOPEN/SVID
 */
 #if __BSD_VISIBLE || __XSI_VISIBLE
 #define	M_E		2.7182818284590452354	/* e */
 #define	M_LOG2E		1.4426950408889634074	/* log 2e */
 #define	M_LOG10E	0.43429448190325182765	/* log 10e */
 #define	M_LN2		0.69314718055994530942	/* log e2 */
 #define	M_LN10		2.30258509299404568402	/* log e10 */
 #define	M_PI		3.14159265358979323846	/* pi */
 #define	M_PI_2		1.57079632679489661923	/* pi/2 */
 #define	M_PI_4		0.78539816339744830962	/* pi/4 */
 #define	M_1_PI		0.31830988618379067154	/* 1/pi */
 #define	M_2_PI		0.63661977236758134308	/* 2/pi */
 #define	M_2_SQRTPI	1.12837916709551257390	/* 2/sqrt(pi) */
 #define	M_SQRT2		1.41421356237309504880	/* sqrt(2) */
 #define	M_SQRT1_2	0.70710678118654752440	/* 1/sqrt(2) */
 #define	MAXFLOAT	((float)3.40282346638528860e+38)
 #ifndef OPENLIBM_ONLY_THREAD_SAFE
 OLM_DLLEXPORT extern int signgam;
 #endif
 #endif /* __BSD_VISIBLE || __XSI_VISIBLE */
 #if __BSD_VISIBLE
 #if 0
 /* Old value from 4.4BSD-Lite openlibm.h; this is probably better. */
 #define	HUGE		HUGE_VAL
 #else
 #define	HUGE		MAXFLOAT
 #endif
 #endif /* __BSD_VISIBLE */
 /*
 * Most of these functions depend on the rounding mode and have the side
 * effect of raising floating-point exceptions, so they are not declared
 * as __pure2.  In C99, FENV_ACCESS affects the purity of these functions.
 */
 #if defined(__cplusplus)
 extern "C" {
 #endif
 /* Symbol present when OpenLibm is used. */
 int isopenlibm(void);
 /*
 * ANSI/POSIX
 */
 OLM_DLLEXPORT int	__fpclassifyd(double) __pure2;
 OLM_DLLEXPORT int	__fpclassifyf(float) __pure2;
 OLM_DLLEXPORT int	__fpclassifyl(long double) __pure2;
 OLM_DLLEXPORT int	__isfinitef(float) __pure2;
 OLM_DLLEXPORT int	__isfinite(double) __pure2;
 OLM_DLLEXPORT int	__isfinitel(long double) __pure2;
 OLM_DLLEXPORT int	__isinff(float) __pure2;
 OLM_DLLEXPORT int	__isinfl(long double) __pure2;
 OLM_DLLEXPORT int	__isnanf(float) __pure2;
 OLM_DLLEXPORT int	__isnanl(long double) __pure2;
 OLM_DLLEXPORT int	__isnormalf(float) __pure2;
 OLM_DLLEXPORT int	__isnormal(double) __pure2;
 OLM_DLLEXPORT int	__isnormall(long double) __pure2;
 OLM_DLLEXPORT int	__signbit(double) __pure2;
 OLM_DLLEXPORT int	__signbitf(float) __pure2;
 OLM_DLLEXPORT int	__signbitl(long double) __pure2;
 OLM_DLLEXPORT double	acos(double);
 OLM_DLLEXPORT double	asin(double);
 OLM_DLLEXPORT double	atan(double);
 OLM_DLLEXPORT double	atan2(double, double);
 OLM_DLLEXPORT double	cos(double);
 OLM_DLLEXPORT double	sin(double);
 OLM_DLLEXPORT double	tan(double);
 OLM_DLLEXPORT double	cosh(double);
 OLM_DLLEXPORT double	sinh(double);
 OLM_DLLEXPORT double	tanh(double);
 OLM_DLLEXPORT double	exp(double);
 OLM_DLLEXPORT double	frexp(double, int *);	/* fundamentally !__pure2 */
 OLM_DLLEXPORT double	ldexp(double, int);
 OLM_DLLEXPORT double	log(double);
 OLM_DLLEXPORT double	log10(double);
 OLM_DLLEXPORT double	modf(double, double *);	/* fundamentally !__pure2 */
 OLM_DLLEXPORT double	pow(double, double);
 OLM_DLLEXPORT double	sqrt(double);
 OLM_DLLEXPORT double	ceil(double);
 OLM_DLLEXPORT double	fabs(double) __pure2;
 OLM_DLLEXPORT double	floor(double);
 OLM_DLLEXPORT double	fmod(double, double);
 /*
 * These functions are not in C90.
 */
 #if __BSD_VISIBLE || __ISO_C_VISIBLE >= 1999 || __XSI_VISIBLE
 OLM_DLLEXPORT double	acosh(double);
 OLM_DLLEXPORT double	asinh(double);
 OLM_DLLEXPORT double	atanh(double);
 OLM_DLLEXPORT double	cbrt(double);
 OLM_DLLEXPORT double	erf(double);
 OLM_DLLEXPORT double	erfc(double);
 OLM_DLLEXPORT double	exp2(double);
 OLM_DLLEXPORT double	expm1(double);
 OLM_DLLEXPORT double	fma(double, double, double);
 OLM_DLLEXPORT double	hypot(double, double);
 OLM_DLLEXPORT int	ilogb(double) __pure2;
 OLM_DLLEXPORT int	(isinf)(double) __pure2;
 OLM_DLLEXPORT int	(isnan)(double) __pure2;
 OLM_DLLEXPORT double	lgamma(double);
 OLM_DLLEXPORT long long llrint(double);
 OLM_DLLEXPORT long long llround(double);
 OLM_DLLEXPORT double	log1p(double);
 OLM_DLLEXPORT double	log2(double);
 OLM_DLLEXPORT double	logb(double);
 OLM_DLLEXPORT long	lrint(double);
 OLM_DLLEXPORT long	lround(double);
 OLM_DLLEXPORT double	nan(const char *) __pure2;
 OLM_DLLEXPORT double	nextafter(double, double);
 OLM_DLLEXPORT double	remainder(double, double);
 OLM_DLLEXPORT double	remquo(double, double, int *);
 OLM_DLLEXPORT double	rint(double);
 #endif /* __BSD_VISIBLE || __ISO_C_VISIBLE >= 1999 || __XSI_VISIBLE */
 #if __BSD_VISIBLE || __XSI_VISIBLE
 OLM_DLLEXPORT double	j0(double);
 OLM_DLLEXPORT double	j1(double);
 OLM_DLLEXPORT double	jn(int, double);
 OLM_DLLEXPORT double	y0(double);
 OLM_DLLEXPORT double	y1(double);
 OLM_DLLEXPORT double	yn(int, double);
 #endif /* __BSD_VISIBLE || __XSI_VISIBLE */
 #if __BSD_VISIBLE || __ISO_C_VISIBLE >= 1999
 OLM_DLLEXPORT double	copysign(double, double) __pure2;
 OLM_DLLEXPORT double	fdim(double, double);
 OLM_DLLEXPORT double	fmax(double, double) __pure2;
 OLM_DLLEXPORT double	fmin(double, double) __pure2;
 OLM_DLLEXPORT double	nearbyint(double);
 OLM_DLLEXPORT double	round(double);
 OLM_DLLEXPORT double	scalbln(double, long);
 OLM_DLLEXPORT double	scalbn(double, int);
 OLM_DLLEXPORT double	tgamma(double);
 OLM_DLLEXPORT double	trunc(double);
 #endif
 /*
 * BSD math library entry points
 */
 #if __BSD_VISIBLE
 OLM_DLLEXPORT int	isinff(float) __pure2;
 OLM_DLLEXPORT int	isnanf(float) __pure2;
 /*
 * Reentrant version of lgamma; passes signgam back by reference as the
 * second argument; user must allocate space for signgam.
 */
 OLM_DLLEXPORT double	lgamma_r(double, int *);
 /*
 * Single sine/cosine function.
 */
 OLM_DLLEXPORT void	sincos(double, double *, double *);
 #endif /* __BSD_VISIBLE */
 /* float versions of ANSI/POSIX functions */
 #if __ISO_C_VISIBLE >= 1999
 OLM_DLLEXPORT float	acosf(float);
 OLM_DLLEXPORT float	asinf(float);
 OLM_DLLEXPORT float	atanf(float);
 OLM_DLLEXPORT float	atan2f(float, float);
 OLM_DLLEXPORT float	cosf(float);
 OLM_DLLEXPORT float	sinf(float);
 OLM_DLLEXPORT float	tanf(float);
 OLM_DLLEXPORT float	coshf(float);
 OLM_DLLEXPORT float	sinhf(float);
 OLM_DLLEXPORT float	tanhf(float);
 OLM_DLLEXPORT float	exp2f(float);
 OLM_DLLEXPORT float	expf(float);
 OLM_DLLEXPORT float	expm1f(float);
 OLM_DLLEXPORT float	frexpf(float, int *);	/* fundamentally !__pure2 */
 OLM_DLLEXPORT int	ilogbf(float) __pure2;
 OLM_DLLEXPORT float	ldexpf(float, int);
 OLM_DLLEXPORT float	log10f(float);
 OLM_DLLEXPORT float	log1pf(float);
 OLM_DLLEXPORT float	log2f(float);
 OLM_DLLEXPORT float	logf(float);
 OLM_DLLEXPORT float	modff(float, float *);	/* fundamentally !__pure2 */
 OLM_DLLEXPORT float	powf(float, float);
 OLM_DLLEXPORT float	sqrtf(float);
 OLM_DLLEXPORT float	ceilf(float);
 OLM_DLLEXPORT float	fabsf(float) __pure2;
 OLM_DLLEXPORT float	floorf(float);
 OLM_DLLEXPORT float	fmodf(float, float);
 OLM_DLLEXPORT float	roundf(float);
 OLM_DLLEXPORT float	erff(float);
 OLM_DLLEXPORT float	erfcf(float);
 OLM_DLLEXPORT float	hypotf(float, float);
 OLM_DLLEXPORT float	lgammaf(float);
 OLM_DLLEXPORT float	tgammaf(float);
 OLM_DLLEXPORT float	acoshf(float);
 OLM_DLLEXPORT float	asinhf(float);
 OLM_DLLEXPORT float	atanhf(float);
 OLM_DLLEXPORT float	cbrtf(float);
 OLM_DLLEXPORT float	logbf(float);
 OLM_DLLEXPORT float	copysignf(float, float) __pure2;
 OLM_DLLEXPORT long long llrintf(float);
 OLM_DLLEXPORT long long llroundf(float);
 OLM_DLLEXPORT long	lrintf(float);
 OLM_DLLEXPORT long	lroundf(float);
 OLM_DLLEXPORT float	nanf(const char *) __pure2;
 OLM_DLLEXPORT float	nearbyintf(float);
 OLM_DLLEXPORT float	nextafterf(float, float);
 OLM_DLLEXPORT float	remainderf(float, float);
 OLM_DLLEXPORT float	remquof(float, float, int *);
 OLM_DLLEXPORT float	rintf(float);
 OLM_DLLEXPORT float	scalblnf(float, long);
 OLM_DLLEXPORT float	scalbnf(float, int);
 OLM_DLLEXPORT float	truncf(float);
 OLM_DLLEXPORT float	fdimf(float, float);
 OLM_DLLEXPORT float	fmaf(float, float, float);
 OLM_DLLEXPORT float	fmaxf(float, float) __pure2;
 OLM_DLLEXPORT float	fminf(float, float) __pure2;
 #endif
 /*
 * float versions of BSD math library entry points
 */
 #if __BSD_VISIBLE
 OLM_DLLEXPORT float	dremf(float, float);
 OLM_DLLEXPORT float	j0f(float);
 OLM_DLLEXPORT float	j1f(float);
 OLM_DLLEXPORT float	jnf(int, float);
 OLM_DLLEXPORT float	y0f(float);
 OLM_DLLEXPORT float	y1f(float);
 OLM_DLLEXPORT float	ynf(int, float);
 /*
 * Float versions of reentrant version of lgamma; passes signgam back by
 * reference as the second argument; user must allocate space for signgam.
 */
 OLM_DLLEXPORT float	lgammaf_r(float, int *);
 /*
 * Single sine/cosine function.
 */
 OLM_DLLEXPORT void	sincosf(float, float *, float *);
 #endif	/* __BSD_VISIBLE */
 /*
 * long double versions of ISO/POSIX math functions
 */
 #if __ISO_C_VISIBLE >= 1999
 OLM_DLLEXPORT long double	acoshl(long double);
 OLM_DLLEXPORT long double	acosl(long double);
 OLM_DLLEXPORT long double	asinhl(long double);
 OLM_DLLEXPORT long double	asinl(long double);
 OLM_DLLEXPORT long double	atan2l(long double, long double);
 OLM_DLLEXPORT long double	atanhl(long double);
 OLM_DLLEXPORT long double	atanl(long double);
 OLM_DLLEXPORT long double	cbrtl(long double);
 OLM_DLLEXPORT long double	ceill(long double);
 OLM_DLLEXPORT long double	copysignl(long double, long double) __pure2;
 OLM_DLLEXPORT long double	coshl(long double);
 OLM_DLLEXPORT long double	cosl(long double);
 OLM_DLLEXPORT long double	erfcl(long double);
 OLM_DLLEXPORT long double	erfl(long double);
 OLM_DLLEXPORT long double	exp2l(long double);
 OLM_DLLEXPORT long double	expl(long double);
 OLM_DLLEXPORT long double	expm1l(long double);
 OLM_DLLEXPORT long double	fabsl(long double) __pure2;
 OLM_DLLEXPORT long double	fdiml(long double, long double);
 OLM_DLLEXPORT long double	floorl(long double);
 OLM_DLLEXPORT long double	fmal(long double, long double, long double);
 OLM_DLLEXPORT long double	fmaxl(long double, long double) __pure2;
 OLM_DLLEXPORT long double	fminl(long double, long double) __pure2;
 OLM_DLLEXPORT long double	fmodl(long double, long double);
 OLM_DLLEXPORT long double	frexpl(long double value, int *); /* fundamentally !__pure2 */
 OLM_DLLEXPORT long double	hypotl(long double, long double);
 OLM_DLLEXPORT int		ilogbl(long double) __pure2;
 OLM_DLLEXPORT long double	ldexpl(long double, int);
 OLM_DLLEXPORT long double	lgammal(long double);
 OLM_DLLEXPORT long long	llrintl(long double);
 OLM_DLLEXPORT long long	llroundl(long double);
 OLM_DLLEXPORT long double	log10l(long double);
 OLM_DLLEXPORT long double	log1pl(long double);
 OLM_DLLEXPORT long double	log2l(long double);
 OLM_DLLEXPORT long double	logbl(long double);
 OLM_DLLEXPORT long double	logl(long double);
 OLM_DLLEXPORT long		lrintl(long double);
 OLM_DLLEXPORT long		lroundl(long double);
 OLM_DLLEXPORT long double	modfl(long double, long double *); /* fundamentally !__pure2 */
 OLM_DLLEXPORT long double	nanl(const char *) __pure2;
 OLM_DLLEXPORT long double	nearbyintl(long double);
 OLM_DLLEXPORT long double	nextafterl(long double, long double);
 OLM_DLLEXPORT double		nexttoward(double, long double);
 OLM_DLLEXPORT float		nexttowardf(float, long double);
 OLM_DLLEXPORT long double	nexttowardl(long double, long double);
 OLM_DLLEXPORT long double	powl(long double, long double);
 OLM_DLLEXPORT long double	remainderl(long double, long double);
 OLM_DLLEXPORT long double	remquol(long double, long double, int *);
 OLM_DLLEXPORT long double	rintl(long double);
 OLM_DLLEXPORT long double	roundl(long double);
 OLM_DLLEXPORT long double	scalblnl(long double, long);
 OLM_DLLEXPORT long double	scalbnl(long double, int);
 OLM_DLLEXPORT long double	sinhl(long double);
 OLM_DLLEXPORT long double	sinl(long double);
 OLM_DLLEXPORT long double	sqrtl(long double);
 OLM_DLLEXPORT long double	tanhl(long double);
 OLM_DLLEXPORT long double	tanl(long double);
 OLM_DLLEXPORT long double	tgammal(long double);
 OLM_DLLEXPORT long double	truncl(long double);
 #endif /* __ISO_C_VISIBLE >= 1999 */
 /* Reentrant version of lgammal. */
 #if __BSD_VISIBLE
 OLM_DLLEXPORT long double	lgammal_r(long double, int *);
 /*
 * Single sine/cosine function.
 */
 OLM_DLLEXPORT void	sincosl(long double, long double *, long double *);
 #endif	/* __BSD_VISIBLE */
 #if defined(__cplusplus)
 }
 #endif
 #endif /* !OPENLIBM_MATH_H */
 #endif /* OPENLIBM_USE_HOST_MATH_H */
--- a/src/whetstone/include/whestone.h
+++ b/src/whetstone/include/whestone.h
@ -0,0 +1,25 @@
 /* the following is optional depending on the timing function used */
 /* map the FORTRAN math functions, etc. to the C versions */
 #define DSIN sin
 #define DCOS cos
 #define DATAN atan
 #define DLOG log
 #define DEXP exp
 #define DSQRT sqrt
 #define IF if
 /* function prototypes */
 void POUT(long N, long J, long K, double X1, double X2, double X3, double X4);
 void PA(double E[]);
 void P0(void);
 void P3(double X, double Y, double *Z);
 #define USAGE "usage: whetdc [-c] [loops]\n"
 /*
        COMMON T,T1,T2,E1(4),J,K,L
 */
 typedef struct {
  long loopstart;
 } bench_whestone_config;
--- a/src/whetstone/whetstone.c
+++ b/src/whetstone/whetstone.c
@ -54,43 +54,22 @@ C**********************************************************************
 /* standard C library headers required */
 #include <am.h>
 #include <bench.h>
 #include <klib.h>
 #include <klib-macros.h>
 #include <float.h>
 #include <klib-macros.h>
 #include <klib.h>
 #include <openlibm.h>
 #include <stdint.h>
 #include <whestone.h>
-/* the following is optional depending on the timing function used */
+double T, T1, T2, E1[5];
 int J, K, L;
-/* map the FORTRAN math functions, etc. to the C versions */
+extern bench_whestone_config config;
-#define DSIN	sin
+int main(int argc, char *argv[]) {
 #define DCOS	cos
 #define DATAN	atan
 #define DLOG	log
 #define DEXP	exp
 #define DSQRT	sqrt
 #define IF		if
 /* function prototypes */
 void POUT(long N, long J, long K, double X1, double X2, double X3, double X4);
 void PA(double E[]);
 void P0(void);
 void P3(double X, double Y, double *Z);
 #define USAGE	"usage: whetdc [-c] [loops]\n"
 /*
 	COMMON T,T1,T2,E1(4),J,K,L
 */
 double T,T1,T2,E1[5];
 int J,K,L;
 int
 main(int argc, char *argv[])
 {
  /* used in the FORTRAN version */
  long I1;
  long N1, N2, N3, N4, N6, N7, N8, N9, N10, N11;
-	double X1,X2,X3,X4,X,Y,Z;
+  double X1, X2, X3, X4, X, Y, Z;
  long LOOP;
  int II, JJ;
@ -100,34 +79,34 @@ main(int argc, char *argv[])
  float KIPS;
  int continuous;
-	//loopstart = 1000;		/* see the note about LOOP below */
+  // loopstart = 1000;		/* see the note about LOOP below */
-  loopstart = 200;
+  loopstart = config.loopstart;
  continuous = 0;
 LCONT:
-/*
+  /*
-C
+  C
-C	Start benchmark timing at this point.
+  C	Start benchmark timing at this point.
-C
+  C
-*/
+  */
  startsec = uptime();
-/*
+  /*
-C
+  C
-C	The actual benchmark starts here.
+  C	The actual benchmark starts here.
-C
+  C
-*/
+  */
  T = .499975;
  T1 = 0.50025;
  T2 = 2.0;
-/*
+  /*
-C
+  C
-C	With loopcount LOOP=10, one million Whetstone instructions
+  C	With loopcount LOOP=10, one million Whetstone instructions
-C	will be executed in EACH MAJOR LOOP..A MAJOR LOOP IS EXECUTED
+  C	will be executed in EACH MAJOR LOOP..A MAJOR LOOP IS EXECUTED
-C	'II' TIMES TO INCREASE WALL-CLOCK TIMING ACCURACY.
+  C	'II' TIMES TO INCREASE WALL-CLOCK TIMING ACCURACY.
-C
+  C
          LOOP = 1000;
-*/
+  */
  LOOP = loopstart;
  II = 1;
@ -144,11 +123,11 @@ IILOOP:
  N9 = 616 * LOOP;
  N10 = 0;
  N11 = 93 * LOOP;
-/*
+  /*
-C
+  C
-C	Module 1: Simple identifiers
+  C	Module 1: Simple identifiers
-C
+  C
-*/
+  */
  X1 = 1.0;
  X2 = -1.0;
  X3 = -1.0;
@ -158,50 +137,50 @@ C
    X1 = (X1 + X2 + X3 - X4) * T;
    X2 = (X1 + X2 - X3 + X4) * T;
    X3 = (X1 - X2 + X3 + X4) * T;
-	    X4 = (-X1+ X2 + X3 + X4) * T;
+    X4 = (-X1 + X2 + X3 + X4) * T;
  }
 #ifdef PRINTOUT
-	IF (JJ==II)POUT(N1,N1,N1,X1,X2,X3,X4);
+  IF(JJ == II) POUT(N1, N1, N1, X1, X2, X3, X4);
 #endif
-/*
+  /*
-C
+  C
-C	Module 2: Array elements
+  C	Module 2: Array elements
-C
+  C
-*/
+  */
  E1[1] = 1.0;
  E1[2] = -1.0;
  E1[3] = -1.0;
  E1[4] = -1.0;
  for (I1 = 1; I1 <= N2; I1++) {
-	    E1[1] = ( E1[1] + E1[2] + E1[3] - E1[4]) * T;
+    E1[1] = (E1[1] + E1[2] + E1[3] - E1[4]) * T;
-	    E1[2] = ( E1[1] + E1[2] - E1[3] + E1[4]) * T;
+    E1[2] = (E1[1] + E1[2] - E1[3] + E1[4]) * T;
-	    E1[3] = ( E1[1] - E1[2] + E1[3] + E1[4]) * T;
+    E1[3] = (E1[1] - E1[2] + E1[3] + E1[4]) * T;
    E1[4] = (-E1[1] + E1[2] + E1[3] + E1[4]) * T;
  }
 #ifdef PRINTOUT
-	IF (JJ==II)POUT(N2,N3,N2,E1[1],E1[2],E1[3],E1[4]);
+  IF(JJ == II) POUT(N2, N3, N2, E1[1], E1[2], E1[3], E1[4]);
 #endif
-/*
+  /*
-C
+  C
-C	Module 3: Array as parameter
+  C	Module 3: Array as parameter
-C
+  C
-*/
+  */
  for (I1 = 1; I1 <= N3; I1++)
    PA(E1);
 #ifdef PRINTOUT
-	IF (JJ==II)POUT(N3,N2,N2,E1[1],E1[2],E1[3],E1[4]);
+  IF(JJ == II) POUT(N3, N2, N2, E1[1], E1[2], E1[3], E1[4]);
 #endif
-/*
+  /*
-C
+  C
-C	Module 4: Conditional jumps
+  C	Module 4: Conditional jumps
-C
+  C
-*/
+  */
  J = 1;
  for (I1 = 1; I1 <= N4; I1++) {
    if (J == 1)
@ -221,70 +200,70 @@ C
  }
 #ifdef PRINTOUT
-	IF (JJ==II)POUT(N4,J,J,X1,X2,X3,X4);
+  IF(JJ == II) POUT(N4, J, J, X1, X2, X3, X4);
 #endif
-/*
+  /*
-C
+  C
-C	Module 5: Omitted
+  C	Module 5: Omitted
-C 	Module 6: Integer arithmetic
+  C 	Module 6: Integer arithmetic
-C
+  C
-*/
+  */
  J = 1;
  K = 2;
  L = 3;
  for (I1 = 1; I1 <= N6; I1++) {
-	    J = J * (K-J) * (L-K);
+    J = J * (K - J) * (L - K);
-	    K = L * K - (L-J) * K;
+    K = L * K - (L - J) * K;
-	    L = (L-K) * (K+J);
+    L = (L - K) * (K + J);
-	    E1[L-1] = J + K + L;
+    E1[L - 1] = J + K + L;
-	    E1[K-1] = J * K * L;
+    E1[K - 1] = J * K * L;
  }
 #ifdef PRINTOUT
-	IF (JJ==II)POUT(N6,J,K,E1[1],E1[2],E1[3],E1[4]);
+  IF(JJ == II) POUT(N6, J, K, E1[1], E1[2], E1[3], E1[4]);
 #endif
-/*
+  /*
-C
+  C
-C	Module 7: Trigonometric functions
+  C	Module 7: Trigonometric functions
-C
+  C
-*/
+  */
  X = 0.5;
  Y = 0.5;
  for (I1 = 1; I1 <= N7; I1++) {
-		X = T * DATAN(T2*DSIN(X)*DCOS(X)/(DCOS(X+Y)+DCOS(X-Y)-1.0));
+    X = T * DATAN(T2 * DSIN(X) * DCOS(X) / (DCOS(X + Y) + DCOS(X - Y) - 1.0));
-		Y = T * DATAN(T2*DSIN(Y)*DCOS(Y)/(DCOS(X+Y)+DCOS(X-Y)-1.0));
+    Y = T * DATAN(T2 * DSIN(Y) * DCOS(Y) / (DCOS(X + Y) + DCOS(X - Y) - 1.0));
  }
 #ifdef PRINTOUT
-	IF (JJ==II)POUT(N7,J,K,X,X,Y,Y);
+  IF(JJ == II) POUT(N7, J, K, X, X, Y, Y);
 #endif
-/*
+  /*
-C
+  C
-C	Module 8: Procedure calls
+  C	Module 8: Procedure calls
-C
+  C
-*/
+  */
  X = 1.0;
  Y = 1.0;
  Z = 1.0;
  for (I1 = 1; I1 <= N8; I1++)
-		P3(X,Y,&Z);
+    P3(X, Y, &Z);
 #ifdef PRINTOUT
-	IF (JJ==II)POUT(N8,J,K,X,Y,Z,Z);
+  IF(JJ == II) POUT(N8, J, K, X, Y, Z, Z);
 #endif
-/*
+  /*
-C
+  C
-C	Module 9: Array references
+  C	Module 9: Array references
-C
+  C
-*/
+  */
  J = 1;
  K = 2;
  L = 3;
@ -296,14 +275,14 @@ C
    P0();
 #ifdef PRINTOUT
-	IF (JJ==II)POUT(N9,J,K,E1[1],E1[2],E1[3],E1[4]);
+  IF(JJ == II) POUT(N9, J, K, E1[1], E1[2], E1[3], E1[4]);
 #endif
-/*
+  /*
-C
+  C
-C	Module 10: Integer arithmetic
+  C	Module 10: Integer arithmetic
-C
+  C
-*/
+  */
  J = 2;
  K = 3;
@ -315,59 +294,60 @@ C
  }
 #ifdef PRINTOUT
-	IF (JJ==II)POUT(N10,J,K,X1,X2,X3,X4);
+  IF(JJ == II) POUT(N10, J, K, X1, X2, X3, X4);
 #endif
-/*
+  /*
-C
+  C
-C	Module 11: Standard functions
+  C	Module 11: Standard functions
-C
+  C
-*/
+  */
  X = 0.75;
  for (I1 = 1; I1 <= N11; I1++)
-		X = DSQRT(DEXP(DLOG(X)/T1));
+    X = DSQRT(DEXP(DLOG(X) / T1));
 #ifdef PRINTOUT
-	IF (JJ==II)POUT(N11,J,K,X,X,X,X);
+  IF(JJ == II) POUT(N11, J, K, X, X, X, X);
 #endif
-/*
+  /*
-C
+  C
-C      THIS IS THE END OF THE MAJOR LOOP.
+  C      THIS IS THE END OF THE MAJOR LOOP.
-C
+  C
-*/
+  */
  if (++JJ <= II)
    goto IILOOP;
-/*
+  /*
-C
+  C
-C      Stop benchmark timing at this point.
+  C      Stop benchmark timing at this point.
-C
+  C
-*/
+  */
  finisec = uptime();
-/*
+  /*
-C----------------------------------------------------------------
+  C----------------------------------------------------------------
-C      Performance in Whetstone KIP's per second is given by
+  C      Performance in Whetstone KIP's per second is given by
-C
+  C
-C	(100*LOOP*II)/TIME
+  C	(100*LOOP*II)/TIME
-C
+  C
-C      where TIME is in seconds.
+  C      where TIME is in seconds.
-C--------------------------------------------------------------------
+  C--------------------------------------------------------------------
-*/
+  */
  printf("\n");
-	if (finisec-startsec <= 0) {
+  if (finisec - startsec <= 0) {
    printf("Insufficient duration- Increase the LOOP count\n");
-		return(1);
+    return (1);
  }
-	printf("Loops: %ld, Iterations: %d, Duration: %ld sec.\n",
+  printf("Loops: %ld, Iterations: %d, Duration: %ld sec.\n", LOOP, II,
-			LOOP, II, finisec-startsec);
+         finisec - startsec);
-	KIPS = (100.0*LOOP*II)/(float)(finisec-startsec);
+  KIPS = (100.0 * LOOP * II) / (float)(finisec - startsec);
  if (KIPS >= 1000.0)
-		printf("C Converted Double Precision Whetstones: %.1f MWIPS\n", KIPS/1000.0);
+    printf("C Converted Double Precision Whetstones: %.1f MWIPS\n",
           KIPS / 1000.0);
  else
    printf("C Converted Double Precision Whetstones: %.1f KWIPS\n", KIPS);
@ -378,15 +358,13 @@ C--------------------------------------------------------------------
  return 0;
 }
-void
+void PA(double E[]) {
 PA(double E[])
 {
  J = 0;
 L10:
-	E[1] = ( E[1] + E[2] + E[3] - E[4]) * T;
+  E[1] = (E[1] + E[2] + E[3] - E[4]) * T;
-	E[2] = ( E[1] + E[2] - E[3] + E[4]) * T;
+  E[2] = (E[1] + E[2] - E[3] + E[4]) * T;
-	E[3] = ( E[1] - E[2] + E[3] + E[4]) * T;
+  E[3] = (E[1] - E[2] + E[3] + E[4]) * T;
  E[4] = (-E[1] + E[2] + E[3] + E[4]) / T2;
  J += 1;
@ -394,17 +372,13 @@ L10:
    goto L10;
 }
-void
+void P0(void) {
 P0(void)
 {
  E1[J] = E1[K];
  E1[K] = E1[L];
  E1[L] = E1[J];
 }
-void
+void P3(double X, double Y, double *Z) {
 P3(double X, double Y, double *Z)
 {
  double X1, Y1;
  X1 = X;
@ -415,10 +389,8 @@ P3(double X, double Y, double *Z)
 }
 #ifdef PRINTOUT
-void
+void POUT(long N, long J, long K, double X1, double X2, double X3, double X4) {
-POUT(long N, long J, long K, double X1, double X2, double X3, double X4)
+  printf("%7ld %7ld %7ld %12.4e %12.4e %12.4e %12.4e\n", N, J, K, X1, X2, X3,
-{
+         X4);
 	printf("%7ld %7ld %7ld %12.4e %12.4e %12.4e %12.4e\n",
 						N, J, K, X1, X2, X3, X4);
 }
 #endif
`@ -1,4 +1,4 @@`
	`#include "gemm.h"`	`#include <gemm.h>`


	`#define A(i,j) a[(j)*lda+(i)]`	`#define A(i,j) a[(j)*lda+(i)]`
		`@ -0,0 +1,4 @@`
							`#include <linpack.h>`


							`bench_linpack_config config = { 270 };`
		`@ -0,0 +1,4 @@`
							`#include <linpack.h>`


							`bench_linpack_config config = { 100 };`
		`@ -0,0 +1,4 @@`
							`#include <linpack.h>`


							`bench_linpack_config config = { 80 };`
		`@ -0,0 +1,3 @@`
							`#include <stream.h>`

							`bench_stream_config config = {200000};`
		`@ -0,0 +1,4 @@`
							`#include <stream.h>`


							`bench_stream_config config = {100000};`
		`@ -0,0 +1,4 @@`
							`#include <stream.h>`


							`bench_stream_config config = {10000};`
		`@ -0,0 +1,5 @@`
							`#include <whestone.h>`


							`bench_whestone_config config = {200};`
		`@ -0,0 +1,3 @@`
							`#include <whestone.h>`

							`bench_whestone_config config = {30};`
		`@ -0,0 +1,4 @@`
							`#include <whestone.h>`


							`bench_whestone_config config = {10};`