feat: add debug macros and config source files

This commit is contained in:
dgyyy 2024-09-24 16:43:46 +08:00
parent df550439a1
commit 03869b9959
62 changed files with 1398 additions and 4450 deletions

View file

@ -1,5 +1,6 @@
#include <am.h> #include <am.h>
#include <klib.h> #include <klib.h>
#include <bench_debug.h>
#include <klib-macros.h> #include <klib-macros.h>
uint64_t uptime() uint64_t uptime()
@ -8,6 +9,41 @@ uint64_t uptime()
} }
char *format_time(uint64_t us) { char *format_time(uint64_t us) {
static char buf[128];
uint64_t ms = us / 1000;
uint64_t s = ms / 1000;
uint64_t min = s / 60;
uint64_t h = min / 60;
us %= 1000;
ms %= 1000;
s %= 60;
min %= 60;
int len = 0;
if(h > 0) {
len = bench_sprintf(buf, "%ld h %ld min %ld s %ld.000 ms", h, min, s, ms);
}
else if (min > 0) {
len = bench_sprintf(buf, "%ld min %ld s, %ld.000 ms",min, s, ms);
}
else if ( s > 0) {
len = bench_sprintf(buf, "%ld s, %ld.000 ms", s, ms);
}
else {
len = bench_sprintf(buf, "%ld.000 ms", ms);
}
char *p = &buf[len - 4];
while (us > 0) {
*(p --) = '0' + us % 10;
us /= 10;
}
return buf;
}
/* char *format_time(uint64_t us) {
static char buf[32]; static char buf[32];
uint64_t ms = us / 1000; uint64_t ms = us / 1000;
us -= ms * 1000; us -= ms * 1000;
@ -19,7 +55,7 @@ char *format_time(uint64_t us) {
us /= 10; us /= 10;
} }
return buf; return buf;
} } */
// FNV hash // FNV hash
uint32_t checksum(void *start, void *end) { uint32_t checksum(void *start, void *end) {

View file

@ -295,7 +295,6 @@ int bench_vsprintf(char *buf, const char *fmt, va_list args)
case '0': flags |= ZEROPAD; goto repeat; case '0': flags |= ZEROPAD; goto repeat;
} }
field_width = -1; field_width = -1;
if ('0' <= *fmt && *fmt <= '9') if ('0' <= *fmt && *fmt <= '9')
field_width = skip_atoi(&fmt); field_width = skip_atoi(&fmt);

View file

@ -50,8 +50,8 @@ static void *malloc_base() {
return b; return b;
} }
//We need this function because we use variable `heap` //We need this function because the variable `heap` is used
//and it is initialized in run time. //and initialized at runtime.
void bench_malloc_init() { void bench_malloc_init() {
program_break = (intptr_t)heap.start; program_break = (intptr_t)heap.start;
} }

View file

@ -0,0 +1,35 @@
#ifndef __BENCH_DEBUG_H
#define __BENCH_DEBUG_H
#include <stdarg.h>
#define ANSI_ERROR "\33[1;31m"
#define ANSI_WARN "\33[1;93m"
#define ANSI_INFO "\33[1;34m"
#define ANSI_DEBUG "\33[1;32m"
#define ANSI_TRACE "\33[1;90m"
#define ANSI_NONE "\33[0m"
#ifndef LOG_LEVEL
#define LOG_LEVEL 2
#endif
#define LOG_LEVEL_ERROR 0
#define LOG_LEVEL_WARN 1
#define LOG_LEVEL_INFO 2
#define LOG_LEVEL_DEBUG 3
#define LOG_LEVEL_TRACE 4
#define ANSI_FMT(str, fmt) fmt str ANSI_NONE
#define BENCH_LOG(level, str, ...) \
do { \
if (LOG_LEVEL >= LOG_LEVEL##level) { \
bench_printf(ANSI_FMT(str, ANSI_##level), ##__VA_ARGS__); \
} \
} while (0) \
int bench_vsprintf(char *buf, const char *fmt, va_list args);
int bench_sprintf(char *s, const char *fmt, ...);
int bench_printf(const char *fmt, ...);
#endif

View file

@ -1,9 +0,0 @@
#ifndef __BENCH_DEBUG_H
#define __BENCH_DEBUG_H
#include <stdarg.h>
int bench_vsprintf(char *buf, const char *fmt, va_list args);
int bench_sprintf( char *s, const char *fmt, ... );
int bench_printf(const char *fmt, ... );
#endif

View file

@ -1,57 +1,58 @@
#include <bench_debug.h>
#include <gemm.h> #include <gemm.h>
#define A(i,j) a[(j)*lda + (i)] #define A(i, j) a[(j) * lda + (i)]
void serial_init(int m, int n, double * a, int lda){ void serial_init(int m, int n, double *a, int lda) {
int count = 1; int count = 1;
for(int j=0;j<n;j++){ for (int j = 0; j < n; j++) {
for(int i=0;i<m;i++) for (int i = 0; i < m; i++)
A(i,j) = count++; A(i, j) = count++;
} }
} }
void random_init(int m, int n, double * a, int lda){ void random_init(int m, int n, double *a, int lda) {
for(int j=0;j<n;j++){ for (int j = 0; j < n; j++) {
for(int i=0;i<m;i++) for (int i = 0; i < m; i++)
A(i,j) = 2.0 * rand() - 1.0; A(i, j) = 2.0 * rand() - 1.0;
} }
} }
extern bench_gemm_config config; extern bench_gemm_config config;
int main(){ int main() {
bench_malloc_init(); bench_malloc_init();
int m = config.m; int m = config.m;
int n = config.n; int n = config.n;
int k = config.k; int k = config.k;
//TODO: calculate the memory size. // TODO: calculate the memory size.
double * A = (double*)bench_malloc(m*k*sizeof(double)); double *A = (double *)bench_malloc(m * k * sizeof(double));
double * B = (double*)bench_malloc(k*n*sizeof(double)); double *B = (double *)bench_malloc(k * n * sizeof(double));
double * C = (double*)bench_malloc(m*n*sizeof(double)); double *C = (double *)bench_malloc(m * n * sizeof(double));
assert(A); assert(A);
assert(B); assert(B);
assert(C); assert(C);
memset(A,0,m*k*sizeof(double)); memset(A, 0, m * k * sizeof(double));
memset(B,0,k*n*sizeof(double)); memset(B, 0, k * n * sizeof(double));
memset(C,0,m*n*sizeof(double)); memset(C, 0, m * n * sizeof(double));
uint64_t start_time, end_time; uint64_t start_time, end_time;
srand(1556); srand(1556);
random_init(m,k,A,m); random_init(m, k, A, m);
random_init(k,n,B,k); random_init(k, n, B, k);
start_time = uptime(); start_time = uptime();
matmul(m,n,k,A,m,B,k,C,m); matmul(m, n, k, A, m, B, k, C, m);
end_time = uptime(); end_time = uptime();
bench_free(A); bench_free(A);
bench_free(B); bench_free(B);
bench_free(C); bench_free(C);
printf("time: %s ms \n", format_time(end_time - start_time)); bench_printf("time: %s \n", format_time(end_time - start_time));
return 0; return 0;
} }

View file

@ -6,6 +6,7 @@
#include <stdint.h> #include <stdint.h>
#define TEST
typedef struct { typedef struct {
uint32_t m; uint32_t m;
uint32_t n; uint32_t n;

View file

@ -1,4 +1,4 @@
#include "gemm.h" #include <gemm.h>
#define A(i,j) a[(j)*lda+(i)] #define A(i,j) a[(j)*lda+(i)]

View file

@ -1,5 +1,6 @@
NAME = linpack NAME = linpack
mainargs ?= ref
BENCH_LIBS = bench openlibm soft-fp BENCH_LIBS = bench openlibm soft-fp

View file

@ -0,0 +1,4 @@
#include <linpack.h>
bench_linpack_config config = { 270 };

View file

@ -0,0 +1,4 @@
#include <linpack.h>
bench_linpack_config config = { 100 };

View file

@ -0,0 +1,4 @@
#include <linpack.h>
bench_linpack_config config = { 80 };

View file

@ -0,0 +1,43 @@
#ifndef __LINPACK_H__
#define __LINPACK_H__
#define FLT_DIG 6
#define DBL_DIG 15
#define SP
#ifndef SP
#ifndef DP
#define DP
#endif
#endif
#ifdef SP
#define ZERO 0.0
#define ONE 1.0
#define PREC "Single"
#define BASE10DIG FLT_DIG
typedef float REAL;
#endif
#ifdef DP
#define ZERO 0.0e0
#define ONE 1.0e0
#define PREC "Double"
#define BASE10DIG DBL_DIG
typedef double REAL;
#endif
/* 2022-07-26: Macro defined for memreq variable to resolve warnings
* during malloc check
*/
#define MEM_T long
typedef struct {
int arsize;
} bench_linpack_config ;
#endif

View file

@ -31,42 +31,12 @@
#include <am.h> #include <am.h>
#include <bench.h> #include <bench.h>
#include <bench_malloc.h> #include <bench_malloc.h>
#include <bench_debug.h>
#include <klib-macros.h> #include <klib-macros.h>
#include <klib.h> #include <klib.h>
#include <linpack.h>
#define FLT_DIG 6 extern bench_linpack_config config;
#define DBL_DIG 15
#define SP
#ifndef SP
#ifndef DP
#define DP
#endif
#endif
#ifdef SP
#define ZERO 0.0
#define ONE 1.0
#define PREC "Single"
#define BASE10DIG FLT_DIG
typedef float REAL;
#endif
#ifdef DP
#define ZERO 0.0e0
#define ONE 1.0e0
#define PREC "Double"
#define BASE10DIG DBL_DIG
typedef double REAL;
#endif
/* 2022-07-26: Macro defined for memreq variable to resolve warnings
* during malloc check
*/
#define MEM_T long
static REAL linpack (long nreps, int arsize); static REAL linpack (long nreps, int arsize);
static void matgen (REAL *a, int lda, int n, REAL *b, REAL *norma); static void matgen (REAL *a, int lda, int n, REAL *b, REAL *norma);
@ -81,110 +51,69 @@ static REAL ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy);
static void dscal_ur (int n, REAL da, REAL *dx, int incx); static void dscal_ur (int n, REAL da, REAL *dx, int incx);
static int idamax (int n, REAL *dx, int incx); static int idamax (int n, REAL *dx, int incx);
static REAL second (void); static REAL second (void);
static double static inline double fabs(double x) { return x < 0 ? -x : x; }
fabs (double x)
{
return x < 0 ? -x : x;
}
static void *mempool = NULL; static void *mempool = NULL;
int int main(int argc, char **argv)
main (int argc, char **argv)
{ {
ioe_init (); ioe_init();
bench_malloc_init (); bench_malloc_init();
int arsize; int arsize;
long arsize2d, nreps; long arsize2d, nreps;
volatile size_t malloc_arg; volatile size_t malloc_arg;
volatile MEM_T memreq; volatile MEM_T memreq;
arsize = 270; arsize = config.arsize;
arsize2d = (long)arsize * (long)arsize; arsize2d = (long)arsize * (long)arsize;
memreq = arsize2d * sizeof (REAL) + (long)arsize * sizeof (REAL) memreq = arsize2d * sizeof(REAL) + (long)arsize * sizeof(REAL) +
+ (long)arsize * sizeof (int); (long)arsize * sizeof(int);
malloc_arg = (size_t)memreq; malloc_arg = (size_t)memreq;
uint64_t start_time, end_time; uint64_t start_time, end_time;
if ((MEM_T)malloc_arg != memreq if ((MEM_T)malloc_arg != memreq ||
|| (mempool = bench_malloc (malloc_arg)) == NULL) (mempool = bench_malloc(malloc_arg)) == NULL) {
{ BENCH_LOG(ERROR, "Not enough memory available for given array size.\n");
// printf("Not enough memory available for given array size.\n");
return 1; return 1;
} }
// printf("LINPACK benchmark, %s precision.\n", PREC);
// printf("Machine precision: %d digits.\n", BASE10DIG);
// printf("Array size %d X %d.\n", arsize, arsize);
// printf("Memory required: %ldK.\n", (memreq + 512L) >> 10);
// printf("Average rolled and unrolled performance:\n\n");
// printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n");
// printf("----------------------------------------------------\n");
nreps = 1; nreps = 1;
start_time = uptime (); start_time = uptime();
while (linpack (nreps, arsize) < 10.) while (linpack(nreps, arsize) < 10.) {
{
nreps *= 2; nreps *= 2;
} }
end_time = uptime (); end_time = uptime();
bench_free (mempool); bench_free(mempool);
printf ("time: %s ms\n", format_time (end_time - start_time)); BENCH_LOG(INFO, "time: %s", format_time(end_time - start_time));
return 0; return 0;
} }
REAL REAL linpack(long nreps, int arsize)
linpack (long nreps, int arsize)
{ {
REAL *a, *b; REAL *a, *b;
REAL norma, t1, kflops, tdgesl, tdgefa, totalt, toverhead, ops; REAL norma, totalt;
int *ipvt, n, info, lda; int *ipvt, n, info, lda;
long i, arsize2d; long i, arsize2d;
lda = arsize; lda = arsize;
n = arsize / 2; n = arsize / 2;
arsize2d = (long)arsize * (long)arsize; arsize2d = (long)arsize * (long)arsize;
ops = ((2.0 * n * n * n) / 3.0 + 2.0 * n * n);
a = (REAL *)mempool; a = (REAL *)mempool;
b = a + arsize2d; b = a + arsize2d;
ipvt = (int *)&b[arsize]; ipvt = (int *)&b[arsize];
tdgesl = 0; totalt = second();
tdgefa = 0; for (i = 0; i < nreps; i++) {
totalt = second (); matgen(a, lda, n, b, &norma);
for (i = 0; i < nreps; i++) dgefa(a, lda, n, ipvt, &info, 1);
{ dgesl(a, lda, n, ipvt, b, 0, 1);
matgen (a, lda, n, b, &norma);
t1 = second ();
dgefa (a, lda, n, ipvt, &info, 1);
tdgefa += second () - t1;
t1 = second ();
dgesl (a, lda, n, ipvt, b, 0, 1);
tdgesl += second () - t1;
} }
for (i = 0; i < nreps; i++) for (i = 0; i < nreps; i++) {
{ matgen(a, lda, n, b, &norma);
matgen (a, lda, n, b, &norma); dgefa(a, lda, n, ipvt, &info, 0);
t1 = second (); dgesl(a, lda, n, ipvt, b, 0, 0);
dgefa (a, lda, n, ipvt, &info, 0);
tdgefa += second () - t1;
t1 = second ();
dgesl (a, lda, n, ipvt, b, 0, 0);
tdgesl += second () - t1;
} }
totalt = second () - totalt; totalt = second() - totalt;
if (totalt < 0.5 || tdgefa + tdgesl < 0.2)
return (0.);
kflops = 2. * nreps * ops / (1000. * (tdgefa + tdgesl));
toverhead = totalt - tdgefa - tdgesl;
if (tdgefa < 0.)
tdgefa = 0.;
if (tdgesl < 0.)
tdgesl = 0.;
if (toverhead < 0.)
toverhead = 0.;
// printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps, totalt,
// 100. * tdgefa / totalt, 100. * tdgesl / totalt,
// 100. * toverhead / totalt, kflops);
return (totalt); return (totalt);
} }
@ -193,8 +122,7 @@ linpack (long nreps, int arsize)
** We would like to declare a[][lda], but c does not allow it. In this ** We would like to declare a[][lda], but c does not allow it. In this
** function, references to a[i][j] are written a[lda*i+j]. ** function, references to a[i][j] are written a[lda*i+j].
*/ */
static void static void matgen(REAL *a, int lda, int n, REAL *b, REAL *norma)
matgen (REAL *a, int lda, int n, REAL *b, REAL *norma)
{ {
int init, i, j; int init, i, j;
@ -202,8 +130,7 @@ matgen (REAL *a, int lda, int n, REAL *b, REAL *norma)
init = 1325; init = 1325;
*norma = 0.0; *norma = 0.0;
for (j = 0; j < n; j++) for (j = 0; j < n; j++)
for (i = 0; i < n; i++) for (i = 0; i < n; i++) {
{
init = (int)((long)3125 * (long)init % 65536L); init = (int)((long)3125 * (long)init % 65536L);
a[lda * j + i] = (init - 32768.0) / 16384.0; a[lda * j + i] = (init - 32768.0) / 16384.0;
*norma = (a[lda * j + i] > *norma) ? a[lda * j + i] : *norma; *norma = (a[lda * j + i] > *norma) ? a[lda * j + i] : *norma;
@ -266,8 +193,7 @@ matgen (REAL *a, int lda, int n, REAL *b, REAL *norma)
** blas daxpy,dscal,idamax ** blas daxpy,dscal,idamax
** **
*/ */
static void static void dgefa(REAL *a, int lda, int n, int *ipvt, int *info, int roll)
dgefa (REAL *a, int lda, int n, int *ipvt, int *info, int roll)
{ {
REAL t; REAL t;
@ -275,30 +201,26 @@ dgefa (REAL *a, int lda, int n, int *ipvt, int *info, int roll)
/* gaussian elimination with partial pivoting */ /* gaussian elimination with partial pivoting */
if (roll) if (roll) {
{
*info = 0; *info = 0;
nm1 = n - 1; nm1 = n - 1;
if (nm1 >= 0) if (nm1 >= 0)
for (k = 0; k < nm1; k++) for (k = 0; k < nm1; k++) {
{
kp1 = k + 1; kp1 = k + 1;
/* find l = pivot index */ /* find l = pivot index */
l = idamax (n - k, &a[lda * k + k], 1) + k; l = idamax(n - k, &a[lda * k + k], 1) + k;
ipvt[k] = l; ipvt[k] = l;
/* zero pivot implies this column already /* zero pivot implies this column already
triangularized */ triangularized */
if (a[lda * k + l] != ZERO) if (a[lda * k + l] != ZERO) {
{
/* interchange if necessary */ /* interchange if necessary */
if (l != k) if (l != k) {
{
t = a[lda * k + l]; t = a[lda * k + l];
a[lda * k + l] = a[lda * k + k]; a[lda * k + l] = a[lda * k + k];
a[lda * k + k] = t; a[lda * k + k] = t;
@ -307,53 +229,45 @@ dgefa (REAL *a, int lda, int n, int *ipvt, int *info, int roll)
/* compute multipliers */ /* compute multipliers */
t = -ONE / a[lda * k + k]; t = -ONE / a[lda * k + k];
dscal_r (n - (k + 1), t, &a[lda * k + k + 1], 1); dscal_r(n - (k + 1), t, &a[lda * k + k + 1], 1);
/* row elimination with column indexing */ /* row elimination with column indexing */
for (j = kp1; j < n; j++) for (j = kp1; j < n; j++) {
{
t = a[lda * j + l]; t = a[lda * j + l];
if (l != k) if (l != k) {
{
a[lda * j + l] = a[lda * j + k]; a[lda * j + l] = a[lda * j + k];
a[lda * j + k] = t; a[lda * j + k] = t;
} }
daxpy_r (n - (k + 1), t, &a[lda * k + k + 1], 1, daxpy_r(n - (k + 1), t, &a[lda * k + k + 1], 1, &a[lda * j + k + 1],
&a[lda * j + k + 1], 1); 1);
} }
} } else
else
(*info) = k; (*info) = k;
} }
ipvt[n - 1] = n - 1; ipvt[n - 1] = n - 1;
if (a[lda * (n - 1) + (n - 1)] == ZERO) if (a[lda * (n - 1) + (n - 1)] == ZERO)
(*info) = n - 1; (*info) = n - 1;
} } else {
else
{
*info = 0; *info = 0;
nm1 = n - 1; nm1 = n - 1;
if (nm1 >= 0) if (nm1 >= 0)
for (k = 0; k < nm1; k++) for (k = 0; k < nm1; k++) {
{
kp1 = k + 1; kp1 = k + 1;
/* find l = pivot index */ /* find l = pivot index */
l = idamax (n - k, &a[lda * k + k], 1) + k; l = idamax(n - k, &a[lda * k + k], 1) + k;
ipvt[k] = l; ipvt[k] = l;
/* zero pivot implies this column already /* zero pivot implies this column already
triangularized */ triangularized */
if (a[lda * k + l] != ZERO) if (a[lda * k + l] != ZERO) {
{
/* interchange if necessary */ /* interchange if necessary */
if (l != k) if (l != k) {
{
t = a[lda * k + l]; t = a[lda * k + l];
a[lda * k + l] = a[lda * k + k]; a[lda * k + l] = a[lda * k + k];
a[lda * k + k] = t; a[lda * k + k] = t;
@ -362,23 +276,20 @@ dgefa (REAL *a, int lda, int n, int *ipvt, int *info, int roll)
/* compute multipliers */ /* compute multipliers */
t = -ONE / a[lda * k + k]; t = -ONE / a[lda * k + k];
dscal_ur (n - (k + 1), t, &a[lda * k + k + 1], 1); dscal_ur(n - (k + 1), t, &a[lda * k + k + 1], 1);
/* row elimination with column indexing */ /* row elimination with column indexing */
for (j = kp1; j < n; j++) for (j = kp1; j < n; j++) {
{
t = a[lda * j + l]; t = a[lda * j + l];
if (l != k) if (l != k) {
{
a[lda * j + l] = a[lda * j + k]; a[lda * j + l] = a[lda * j + k];
a[lda * j + k] = t; a[lda * j + k] = t;
} }
daxpy_ur (n - (k + 1), t, &a[lda * k + k + 1], 1, daxpy_ur(n - (k + 1), t, &a[lda * k + k + 1], 1,
&a[lda * j + k + 1], 1); &a[lda * j + k + 1], 1);
} }
} } else
else
(*info) = k; (*info) = k;
} }
ipvt[n - 1] = n - 1; ipvt[n - 1] = n - 1;
@ -448,133 +359,109 @@ dgefa (REAL *a, int lda, int n, int *ipvt, int *info, int roll)
** **
** blas daxpy,ddot ** blas daxpy,ddot
*/ */
static void static void dgesl(REAL *a, int lda, int n, int *ipvt, REAL *b, int job,
dgesl (REAL *a, int lda, int n, int *ipvt, REAL *b, int job, int roll) int roll)
{ {
REAL t; REAL t;
int k, kb, l, nm1; int k, kb, l, nm1;
if (roll) if (roll) {
{
nm1 = n - 1; nm1 = n - 1;
if (job == 0) if (job == 0) {
{
/* job = 0 , solve a * x = b */ /* job = 0 , solve a * x = b */
/* first solve l*y = b */ /* first solve l*y = b */
if (nm1 >= 1) if (nm1 >= 1)
for (k = 0; k < nm1; k++) for (k = 0; k < nm1; k++) {
{
l = ipvt[k]; l = ipvt[k];
t = b[l]; t = b[l];
if (l != k) if (l != k) {
{
b[l] = b[k]; b[l] = b[k];
b[k] = t; b[k] = t;
} }
daxpy_r (n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1); daxpy_r(n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1);
} }
/* now solve u*x = y */ /* now solve u*x = y */
for (kb = 0; kb < n; kb++) for (kb = 0; kb < n; kb++) {
{
k = n - (kb + 1); k = n - (kb + 1);
b[k] = b[k] / a[lda * k + k]; b[k] = b[k] / a[lda * k + k];
t = -b[k]; t = -b[k];
daxpy_r (k, t, &a[lda * k + 0], 1, &b[0], 1); daxpy_r(k, t, &a[lda * k + 0], 1, &b[0], 1);
} }
} } else {
else
{
/* job = nonzero, solve trans(a) * x = b */ /* job = nonzero, solve trans(a) * x = b */
/* first solve trans(u)*y = b */ /* first solve trans(u)*y = b */
for (k = 0; k < n; k++) for (k = 0; k < n; k++) {
{ t = ddot_r(k, &a[lda * k + 0], 1, &b[0], 1);
t = ddot_r (k, &a[lda * k + 0], 1, &b[0], 1);
b[k] = (b[k] - t) / a[lda * k + k]; b[k] = (b[k] - t) / a[lda * k + k];
} }
/* now solve trans(l)*x = y */ /* now solve trans(l)*x = y */
if (nm1 >= 1) if (nm1 >= 1)
for (kb = 1; kb < nm1; kb++) for (kb = 1; kb < nm1; kb++) {
{
k = n - (kb + 1); k = n - (kb + 1);
b[k] = b[k] b[k] =
+ ddot_r (n - (k + 1), &a[lda * k + k + 1], 1, b[k] + ddot_r(n - (k + 1), &a[lda * k + k + 1], 1, &b[k + 1], 1);
&b[k + 1], 1);
l = ipvt[k]; l = ipvt[k];
if (l != k) if (l != k) {
{
t = b[l]; t = b[l];
b[l] = b[k]; b[l] = b[k];
b[k] = t; b[k] = t;
} }
} }
} }
} } else {
else
{
nm1 = n - 1; nm1 = n - 1;
if (job == 0) if (job == 0) {
{
/* job = 0 , solve a * x = b */ /* job = 0 , solve a * x = b */
/* first solve l*y = b */ /* first solve l*y = b */
if (nm1 >= 1) if (nm1 >= 1)
for (k = 0; k < nm1; k++) for (k = 0; k < nm1; k++) {
{
l = ipvt[k]; l = ipvt[k];
t = b[l]; t = b[l];
if (l != k) if (l != k) {
{
b[l] = b[k]; b[l] = b[k];
b[k] = t; b[k] = t;
} }
daxpy_ur (n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], daxpy_ur(n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1);
1);
} }
/* now solve u*x = y */ /* now solve u*x = y */
for (kb = 0; kb < n; kb++) for (kb = 0; kb < n; kb++) {
{
k = n - (kb + 1); k = n - (kb + 1);
b[k] = b[k] / a[lda * k + k]; b[k] = b[k] / a[lda * k + k];
t = -b[k]; t = -b[k];
daxpy_ur (k, t, &a[lda * k + 0], 1, &b[0], 1); daxpy_ur(k, t, &a[lda * k + 0], 1, &b[0], 1);
} }
} } else {
else
{
/* job = nonzero, solve trans(a) * x = b */ /* job = nonzero, solve trans(a) * x = b */
/* first solve trans(u)*y = b */ /* first solve trans(u)*y = b */
for (k = 0; k < n; k++) for (k = 0; k < n; k++) {
{ t = ddot_ur(k, &a[lda * k + 0], 1, &b[0], 1);
t = ddot_ur (k, &a[lda * k + 0], 1, &b[0], 1);
b[k] = (b[k] - t) / a[lda * k + k]; b[k] = (b[k] - t) / a[lda * k + k];
} }
/* now solve trans(l)*x = y */ /* now solve trans(l)*x = y */
if (nm1 >= 1) if (nm1 >= 1)
for (kb = 1; kb < nm1; kb++) for (kb = 1; kb < nm1; kb++) {
{
k = n - (kb + 1); k = n - (kb + 1);
b[k] = b[k] b[k] =
+ ddot_ur (n - (k + 1), &a[lda * k + k + 1], 1, b[k] + ddot_ur(n - (k + 1), &a[lda * k + k + 1], 1, &b[k + 1], 1);
&b[k + 1], 1);
l = ipvt[k]; l = ipvt[k];
if (l != k) if (l != k) {
{
t = b[l]; t = b[l];
b[l] = b[k]; b[l] = b[k];
b[k] = t; b[k] = t;
@ -589,8 +476,7 @@ dgesl (REAL *a, int lda, int n, int *ipvt, REAL *b, int job, int roll)
** Jack Dongarra, linpack, 3/11/78. ** Jack Dongarra, linpack, 3/11/78.
** ROLLED version ** ROLLED version
*/ */
static void static void daxpy_r(int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
daxpy_r (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
{ {
int i, ix, iy; int i, ix, iy;
@ -600,8 +486,7 @@ daxpy_r (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
if (da == ZERO) if (da == ZERO)
return; return;
if (incx != 1 || incy != 1) if (incx != 1 || incy != 1) {
{
/* code for unequal increments or equal increments != 1 */ /* code for unequal increments or equal increments != 1 */
@ -611,8 +496,7 @@ daxpy_r (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
ix = (-n + 1) * incx + 1; ix = (-n + 1) * incx + 1;
if (incy < 0) if (incy < 0)
iy = (-n + 1) * incy + 1; iy = (-n + 1) * incy + 1;
for (i = 0; i < n; i++) for (i = 0; i < n; i++) {
{
dy[iy] = dy[iy] + da * dx[ix]; dy[iy] = dy[iy] + da * dx[ix];
ix = ix + incx; ix = ix + incx;
iy = iy + incy; iy = iy + incy;
@ -631,8 +515,7 @@ daxpy_r (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
** Jack Dongarra, linpack, 3/11/78. ** Jack Dongarra, linpack, 3/11/78.
** ROLLED version ** ROLLED version
*/ */
static REAL static REAL ddot_r(int n, REAL *dx, int incx, REAL *dy, int incy)
ddot_r (int n, REAL *dx, int incx, REAL *dy, int incy)
{ {
REAL dtemp; REAL dtemp;
@ -643,8 +526,7 @@ ddot_r (int n, REAL *dx, int incx, REAL *dy, int incy)
if (n <= 0) if (n <= 0)
return (ZERO); return (ZERO);
if (incx != 1 || incy != 1) if (incx != 1 || incy != 1) {
{
/* code for unequal increments or equal increments != 1 */ /* code for unequal increments or equal increments != 1 */
@ -654,8 +536,7 @@ ddot_r (int n, REAL *dx, int incx, REAL *dy, int incy)
ix = (-n + 1) * incx; ix = (-n + 1) * incx;
if (incy < 0) if (incy < 0)
iy = (-n + 1) * incy; iy = (-n + 1) * incy;
for (i = 0; i < n; i++) for (i = 0; i < n; i++) {
{
dtemp = dtemp + dx[ix] * dy[iy]; dtemp = dtemp + dx[ix] * dy[iy];
ix = ix + incx; ix = ix + incx;
iy = iy + incy; iy = iy + incy;
@ -675,16 +556,14 @@ ddot_r (int n, REAL *dx, int incx, REAL *dy, int incy)
** Jack Dongarra, linpack, 3/11/78. ** Jack Dongarra, linpack, 3/11/78.
** ROLLED version ** ROLLED version
*/ */
static void static void dscal_r(int n, REAL da, REAL *dx, int incx)
dscal_r (int n, REAL da, REAL *dx, int incx)
{ {
int i, nincx; int i, nincx;
if (n <= 0) if (n <= 0)
return; return;
if (incx != 1) if (incx != 1) {
{
/* code for increment not equal to 1 */ /* code for increment not equal to 1 */
@ -705,8 +584,7 @@ dscal_r (int n, REAL da, REAL *dx, int incx)
** Jack Dongarra, linpack, 3/11/78. ** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version ** UNROLLED version
*/ */
static void static void daxpy_ur(int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
daxpy_ur (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
{ {
int i, ix, iy, m; int i, ix, iy, m;
@ -716,8 +594,7 @@ daxpy_ur (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
if (da == ZERO) if (da == ZERO)
return; return;
if (incx != 1 || incy != 1) if (incx != 1 || incy != 1) {
{
/* code for unequal increments or equal increments != 1 */ /* code for unequal increments or equal increments != 1 */
@ -727,8 +604,7 @@ daxpy_ur (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
ix = (-n + 1) * incx + 1; ix = (-n + 1) * incx + 1;
if (incy < 0) if (incy < 0)
iy = (-n + 1) * incy + 1; iy = (-n + 1) * incy + 1;
for (i = 0; i < n; i++) for (i = 0; i < n; i++) {
{
dy[iy] = dy[iy] + da * dx[ix]; dy[iy] = dy[iy] + da * dx[ix];
ix = ix + incx; ix = ix + incx;
iy = iy + incy; iy = iy + incy;
@ -739,15 +615,13 @@ daxpy_ur (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
/* code for both increments equal to 1 */ /* code for both increments equal to 1 */
m = n % 4; m = n % 4;
if (m != 0) if (m != 0) {
{
for (i = 0; i < m; i++) for (i = 0; i < m; i++)
dy[i] = dy[i] + da * dx[i]; dy[i] = dy[i] + da * dx[i];
if (n < 4) if (n < 4)
return; return;
} }
for (i = m; i < n; i = i + 4) for (i = m; i < n; i = i + 4) {
{
dy[i] = dy[i] + da * dx[i]; dy[i] = dy[i] + da * dx[i];
dy[i + 1] = dy[i + 1] + da * dx[i + 1]; dy[i + 1] = dy[i + 1] + da * dx[i + 1];
dy[i + 2] = dy[i + 2] + da * dx[i + 2]; dy[i + 2] = dy[i + 2] + da * dx[i + 2];
@ -760,8 +634,7 @@ daxpy_ur (int n, REAL da, REAL *dx, int incx, REAL *dy, int incy)
** Jack Dongarra, linpack, 3/11/78. ** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version ** UNROLLED version
*/ */
static REAL static REAL ddot_ur(int n, REAL *dx, int incx, REAL *dy, int incy)
ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy)
{ {
REAL dtemp; REAL dtemp;
@ -772,8 +645,7 @@ ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy)
if (n <= 0) if (n <= 0)
return (ZERO); return (ZERO);
if (incx != 1 || incy != 1) if (incx != 1 || incy != 1) {
{
/* code for unequal increments or equal increments != 1 */ /* code for unequal increments or equal increments != 1 */
@ -783,8 +655,7 @@ ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy)
ix = (-n + 1) * incx; ix = (-n + 1) * incx;
if (incy < 0) if (incy < 0)
iy = (-n + 1) * incy; iy = (-n + 1) * incy;
for (i = 0; i < n; i++) for (i = 0; i < n; i++) {
{
dtemp = dtemp + dx[ix] * dy[iy]; dtemp = dtemp + dx[ix] * dy[iy];
ix = ix + incx; ix = ix + incx;
iy = iy + incy; iy = iy + incy;
@ -795,18 +666,16 @@ ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy)
/* code for both increments equal to 1 */ /* code for both increments equal to 1 */
m = n % 5; m = n % 5;
if (m != 0) if (m != 0) {
{
for (i = 0; i < m; i++) for (i = 0; i < m; i++)
dtemp = dtemp + dx[i] * dy[i]; dtemp = dtemp + dx[i] * dy[i];
if (n < 5) if (n < 5)
return (dtemp); return (dtemp);
} }
for (i = m; i < n; i = i + 5) for (i = m; i < n; i = i + 5) {
{ dtemp = dtemp + dx[i] * dy[i] + dx[i + 1] * dy[i + 1] +
dtemp = dtemp + dx[i] * dy[i] + dx[i + 1] * dy[i + 1] dx[i + 2] * dy[i + 2] + dx[i + 3] * dy[i + 3] +
+ dx[i + 2] * dy[i + 2] + dx[i + 3] * dy[i + 3] dx[i + 4] * dy[i + 4];
+ dx[i + 4] * dy[i + 4];
} }
return (dtemp); return (dtemp);
} }
@ -816,16 +685,14 @@ ddot_ur (int n, REAL *dx, int incx, REAL *dy, int incy)
** Jack Dongarra, linpack, 3/11/78. ** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version ** UNROLLED version
*/ */
static void static void dscal_ur(int n, REAL da, REAL *dx, int incx)
dscal_ur (int n, REAL da, REAL *dx, int incx)
{ {
int i, m, nincx; int i, m, nincx;
if (n <= 0) if (n <= 0)
return; return;
if (incx != 1) if (incx != 1) {
{
/* code for increment not equal to 1 */ /* code for increment not equal to 1 */
@ -838,15 +705,13 @@ dscal_ur (int n, REAL da, REAL *dx, int incx)
/* code for increment equal to 1 */ /* code for increment equal to 1 */
m = n % 5; m = n % 5;
if (m != 0) if (m != 0) {
{
for (i = 0; i < m; i++) for (i = 0; i < m; i++)
dx[i] = da * dx[i]; dx[i] = da * dx[i];
if (n < 5) if (n < 5)
return; return;
} }
for (i = m; i < n; i = i + 5) for (i = m; i < n; i = i + 5) {
{
dx[i] = da * dx[i]; dx[i] = da * dx[i];
dx[i + 1] = da * dx[i + 1]; dx[i + 1] = da * dx[i + 1];
dx[i + 2] = da * dx[i + 2]; dx[i + 2] = da * dx[i + 2];
@ -859,8 +724,7 @@ dscal_ur (int n, REAL da, REAL *dx, int incx)
** Finds the index of element having max. absolute value. ** Finds the index of element having max. absolute value.
** Jack Dongarra, linpack, 3/11/78. ** Jack Dongarra, linpack, 3/11/78.
*/ */
static int static int idamax(int n, REAL *dx, int incx)
idamax (int n, REAL *dx, int incx)
{ {
REAL dmax; REAL dmax;
@ -870,44 +734,37 @@ idamax (int n, REAL *dx, int incx)
return (-1); return (-1);
if (n == 1) if (n == 1)
return (0); return (0);
if (incx != 1) if (incx != 1) {
{
/* code for increment not equal to 1 */ /* code for increment not equal to 1 */
ix = 1; ix = 1;
dmax = fabs ((double)dx[0]); dmax = fabs((double)dx[0]);
ix = ix + incx; ix = ix + incx;
for (i = 1; i < n; i++) for (i = 1; i < n; i++) {
{ if (fabs((double)dx[ix]) > dmax) {
if (fabs ((double)dx[ix]) > dmax)
{
itemp = i; itemp = i;
dmax = fabs ((double)dx[ix]); dmax = fabs((double)dx[ix]);
} }
ix = ix + incx; ix = ix + incx;
} }
} } else {
else
{
/* code for increment equal to 1 */ /* code for increment equal to 1 */
itemp = 0; itemp = 0;
dmax = fabs ((double)dx[0]); dmax = fabs((double)dx[0]);
for (i = 1; i < n; i++) for (i = 1; i < n; i++)
if (fabs ((double)dx[i]) > dmax) if (fabs((double)dx[i]) > dmax) {
{
itemp = i; itemp = i;
dmax = fabs ((double)dx[i]); dmax = fabs((double)dx[i]);
} }
} }
return (itemp); return (itemp);
} }
static REAL static REAL second(void)
second (void)
{ {
return ((REAL)(uptime () / 1000)); return ((REAL)(uptime() / 1000));
} }

View file

@ -1,9 +1,10 @@
NAME = mcf NAME = mcf
mainargs ?= ref
BENCH_LIBS = bench openlibm soft-fp BENCH_LIBS = bench openlibm soft-fp
SRCS = main.c mcf.c pqueue.c $(shell realpath ./test-gen/test.c) SRCS = main.c mcf.c pqueue.c ./configs/$(mainargs)-config.c
INC_PATH += ../common/openlibm/include \ INC_PATH += ../common/openlibm/include \
../common/openlibm/src \ ../common/openlibm/src \
@ -12,7 +13,6 @@ INC_PATH += ../common/openlibm/include \
include $(AM_HOME)/Makefile include $(AM_HOME)/Makefile
BENCH_LINKAGE = $(addsuffix -$(ARCH).a, $(join \ BENCH_LINKAGE = $(addsuffix -$(ARCH).a, $(join \
$(addsuffix /build/, $(addprefix $(WORK_DIR)/../common/, $(BENCH_LIBS))), \ $(addsuffix /build/, $(addprefix $(WORK_DIR)/../common/, $(BENCH_LIBS))), \
$(BENCH_LIBS) )) $(BENCH_LIBS) ))

View file

@ -0,0 +1,93 @@
#include "input.h"
const int nodes_num = 14;
const int edges_num = 25;
const int demands_num = 40;
node_t node_buf[]={
{ 0, 0, 0, 6},
{ 1, 0, 0, 1},
{ 2, 0, 0, 1},
{ 3, 0, 0, 1},
{ 4, 0, 0, 6},
{ 5, 0, 0, 1},
{ 6, 0, 0, 1},
{ 7, 0, 0, 1},
{ 8, 0, 0, 2},
{ 9, 0, 0, 1},
{ 10, 0, 0, 1},
{ 11, 0, 0, 2},
{ 12, 0, 0, 1},
{ 13, 0, 0, 1},
};
edge_t edge_buf[]={
{ 0, 0, 1, 101, 122},
{ 1, 1, 2, 179, 377},
{ 2, 2, 3, 124, 202},
{ 3, 3, 4, 125, 261},
{ 4, 4, 5, 182, 423},
{ 5, 5, 6, 184, 405},
{ 6, 6, 7, 140, 259},
{ 7, 7, 8, 118, 398},
{ 8, 8, 9, 128, 228},
{ 9, 9, 10, 186, 238},
{ 10, 10, 11, 172, 236},
{ 11, 11, 12, 187, 350},
{ 12, 12, 13, 163, 217},
{ 13, 0, 4, 180, 181},
{ 14, 0, 6, 5, 249},
{ 15, 0, 7, 108, 427},
{ 16, 0, 12, 155, 139},
{ 17, 0, 8, 3, 322},
{ 18, 4, 7, 106, 182},
{ 19, 4, 9, 81, 345},
{ 20, 4, 5, 212, 289},
{ 21, 4, 6, 166, 419},
{ 22, 4, 10, 198, 30},
{ 23, 8, 12, 221, 308},
{ 24, 11, 12, 179, 235},
};
demands_t demands_buf[]={
{ 0, 0, 13, 10},
{ 1, 0, 1, 96},
{ 2, 1, 6, 78},
{ 3, 1, 9, 95},
{ 4, 3, 5, 35},
{ 5, 3, 10, 77},
{ 6, 3, 13, 38},
{ 7, 3, 9, 98},
{ 8, 3, 11, 92},
{ 9, 3, 6, 29},
{ 10, 3, 4, 38},
{ 11, 4, 10, 73},
{ 12, 4, 5, 6},
{ 13, 4, 12, 28},
{ 14, 4, 13, 4},
{ 15, 4, 8, 56},
{ 16, 4, 9, 22},
{ 17, 4, 7, 48},
{ 18, 4, 6, 29},
{ 19, 5, 9, 35},
{ 20, 5, 13, 39},
{ 21, 5, 12, 77},
{ 22, 5, 8, 42},
{ 23, 5, 10, 63},
{ 24, 6, 12, 7},
{ 25, 6, 10, 25},
{ 26, 6, 11, 18},
{ 27, 6, 8, 29},
{ 28, 6, 13, 36},
{ 29, 6, 9, 45},
{ 30, 7, 11, 36},
{ 31, 7, 13, 95},
{ 32, 7, 12, 68},
{ 33, 7, 8, 33},
{ 34, 7, 10, 11},
{ 35, 8, 13, 82},
{ 36, 8, 10, 7},
{ 37, 9, 10, 25},
{ 38, 9, 11, 84},
{ 39, 10, 13, 78},
};

View file

@ -0,0 +1,31 @@
#include "input.h"
const int nodes_num = 6;
const int edges_num = 5;
const int demands_num = 6;
node_t node_buf[]={
{ 0, 0, 0, 1},
{ 1, 0, 0, 1},
{ 2, 0, 0, 1},
{ 3, 0, 0, 1},
{ 4, 0, 0, 1},
{ 5, 0, 0, 1},
};
edge_t edge_buf[]={
{ 0, 0, 1, 164, 484},
{ 1, 1, 2, 193, 186},
{ 2, 2, 3, 167, 274},
{ 3, 3, 4, 180, 133},
{ 4, 4, 5, 129, 348},
};
demands_t demands_buf[]={
{ 0, 0, 5, 10},
{ 1, 0, 2, 52},
{ 2, 0, 4, 13},
{ 3, 1, 5, 20},
{ 4, 1, 2, 72},
{ 5, 1, 3, 44},
};

View file

@ -1,5 +1,5 @@
#ifndef __test_h__ #ifndef __TEST_H__
#define __test_h__ #define __TEST_H__
extern const int nodes_num; extern const int nodes_num;
extern const int edges_num; extern const int edges_num;

View file

@ -1,6 +1,6 @@
#include <am.h> #include <am.h>
#include <klib-macros.h> #include <klib-macros.h>
#include <bench_printf.h> #include <bench_debug.h>
#include <mcf.h> #include <mcf.h>
#include <input.h> #include <input.h>
@ -18,7 +18,7 @@ int main(char *args)
uint64_t start_time, end_time; uint64_t start_time, end_time;
start_time = uptime(); start_time = uptime();
bench_printf("\nRandomized rounded paths: size: %d\n", sizeof(size_t)); BENCH_LOG(DEBUG, "\nRandomized rounded paths: size: %d", sizeof(size_t));
for(demands_select = 0; demands_select < demands_num; demands_select++) for(demands_select = 0; demands_select < demands_num; demands_select++)
{ {
// (1) run MCF solver; // (1) run MCF solver;
@ -52,7 +52,7 @@ int main(char *args)
free_topology(&mcf); free_topology(&mcf);
} }
end_time = uptime(); end_time = uptime();
bench_printf("time: %s ms \n", format_time(end_time - start_time)); BENCH_LOG(INFO, "time: %s", format_time(end_time - start_time));
return 0; return 0;
} }

View file

@ -1,5 +1,5 @@
#include <klib.h> #include <klib.h>
#include <bench_printf.h> #include <bench_debug.h>
#include <stdio.h> #include <stdio.h>
#include <pqueue.h> #include <pqueue.h>
#include <mcf.h> #include <mcf.h>
@ -90,13 +90,13 @@ bool parse_options(MCF *mcf, char *arg, float epsilon)
else if (strcmp(arg, "MCMCF") == 0) { else if (strcmp(arg, "MCMCF") == 0) {
mcf->_problem_type = MCMCF_TYPE; mcf->_problem_type = MCMCF_TYPE;
} else { } else {
bench_printf("Error: -problem_type must be MCF or MCMCF.\n"); BENCH_LOG(ERROR, "Error: -problem_type must be MCF or MCMCF.\n");
assert(0); assert(0);
} }
mcf->_epsilon1 = epsilon; mcf->_epsilon1 = epsilon;
if (mcf->_epsilon1 <= 0 || mcf->_epsilon1 >= 1) { if (mcf->_epsilon1 <= 0 || mcf->_epsilon1 >= 1) {
bench_printf("Error: -epsilon option requires a float in (0,1).\n"); BENCH_LOG(ERROR, "Error: -epsilon option requires a float in (0,1).\n");
assert(0); assert(0);
} }
@ -1035,7 +1035,7 @@ void print_routing_paths(MCF *mcf_v)
for ( int i = 0; i < mcf_v->no_commodity; i++) { for ( int i = 0; i < mcf_v->no_commodity; i++) {
// printf("Commodity %d: %d -> %d: ", i, // printf("Commodity %d: %d -> %d: ", i,
// mcf_v->_commodities[i].src, mcf_v->_commodities[i].dest); // mcf_v->_commodities[i].src, mcf_v->_commodities[i].dest);
bench_printf("Commodity %d: %d -> %d: ", demands_select, BENCH_LOG(DEBUG, "Commodity %d: %d -> %d: ", demands_select,
mcf_v->_commodities[i].src, mcf_v->_commodities[i].dest); mcf_v->_commodities[i].src, mcf_v->_commodities[i].dest);

View file

@ -3,7 +3,7 @@
// #include <float.h> // #include <float.h>
#include <klib.h> #include <klib.h>
#include <pqueue.h> #include <pqueue.h>
#include <bench_printf.h> #include <bench_debug.h>
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// //

View file

@ -1,7 +1,9 @@
NAME = stream NAME = stream
mainargs ?= ref
BENCH_LIBS = bench openlibm soft-fp BENCH_LIBS = bench openlibm soft-fp
SRCS = stream.c SRCS = stream.c ./configs/$(mainargs)-config.c
INC_PATH += ../common/openlibm/include \ INC_PATH += ../common/openlibm/include \
../common/openlibm/src \ ../common/openlibm/src \

View file

@ -0,0 +1,3 @@
#include <stream.h>
bench_stream_config config = {200000};

View file

@ -0,0 +1,4 @@
#include <stream.h>
bench_stream_config config = {100000};

View file

@ -0,0 +1,4 @@
#include <stream.h>
bench_stream_config config = {10000};

View file

@ -0,0 +1,4 @@
typedef struct {
unsigned int stream_array_size;
} bench_stream_config;

View file

@ -42,9 +42,11 @@
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
#include <am.h> #include <am.h>
#include <bench.h> #include <bench.h>
#include <klib.h> #include <bench_malloc.h>
#include <klib-macros.h> #include <stream.h>
#include <float.h> #include <float.h>
#include <klib-macros.h>
#include <klib.h>
/*----------------------------------------------------------------------- /*-----------------------------------------------------------------------
* INSTRUCTIONS: * INSTRUCTIONS:
* *
@ -70,7 +72,8 @@
* Example: most versions of Windows have a 10 millisecond timer * Example: most versions of Windows have a 10 millisecond timer
* granularity. 20 "ticks" at 10 ms/tic is 200 milliseconds. * granularity. 20 "ticks" at 10 ms/tic is 200 milliseconds.
* If the chip is capable of 10 GB/s, it moves 2 GB in 200 msec. * If the chip is capable of 10 GB/s, it moves 2 GB in 200 msec.
* This means the each array must be at least 1 GB, or 128M elements. * This means the each array must be at least 1 GB, or 128M
*elements.
* *
* Version 5.10 increases the default array size from 2 million * Version 5.10 increases the default array size from 2 million
* elements to 10 million elements in response to the increasing * elements to 10 million elements in response to the increasing
@ -85,12 +88,12 @@
* code for the (many) compilers that support preprocessor definitions * code for the (many) compilers that support preprocessor definitions
* on the compile line. E.g., * on the compile line. E.g.,
* gcc -O -DSTREAM_ARRAY_SIZE=100000000 stream.c -o stream.100M * gcc -O -DSTREAM_ARRAY_SIZE=100000000 stream.c -o stream.100M
* will override the default size of 10M with a new size of 100M elements * will override the default size of 10M with a new size of 100M
* per array. *elements per array.
*/ */
#if (STREAM_ARRAY_SIZE+0) > 0 #if (STREAM_ARRAY_SIZE + 0) > 0
#else #else
# define STREAM_ARRAY_SIZE 200000 #define STREAM_ARRAY_SIZE 200000
#endif #endif
/* 2) STREAM runs each kernel "NTIMES" times and reports the *best* result /* 2) STREAM runs each kernel "NTIMES" times and reports the *best* result
* for any iteration after the first, therefore the minimum value * for any iteration after the first, therefore the minimum value
@ -102,24 +105,24 @@
* code using, for example, "-DNTIMES=7". * code using, for example, "-DNTIMES=7".
*/ */
#ifdef NTIMES #ifdef NTIMES
#if NTIMES<=1 #if NTIMES <= 1
# define NTIMES 10 #define NTIMES 10
#endif #endif
#endif #endif
#ifndef NTIMES #ifndef NTIMES
# define NTIMES 2 #define NTIMES 2
#endif #endif
/* Users are allowed to modify the "OFFSET" variable, which *may* change the /* Users are allowed to modify the "OFFSET" variable, which *may* change the
* relative alignment of the arrays (though compilers may change the * relative alignment of the arrays (though compilers may change the
* effective offset by making the arrays non-contiguous on some systems). * effective offset by making the arrays non-contiguous on some
* Use of non-zero values for OFFSET can be especially helpful if the * systems). Use of non-zero values for OFFSET can be especially helpful if the
* STREAM_ARRAY_SIZE is set to a value close to a large power of 2. * STREAM_ARRAY_SIZE is set to a value close to a large power of 2.
* OFFSET can also be set on the compile line without changing the source * OFFSET can also be set on the compile line without changing the source
* code using, for example, "-DOFFSET=56". * code using, for example, "-DOFFSET=56".
*/ */
#ifndef OFFSET #ifndef OFFSET
# define OFFSET 0 #define OFFSET 0
#endif #endif
/* /*
@ -133,84 +136,88 @@
* This is known to work on many, many systems.... * This is known to work on many, many systems....
* *
* To use multiple cores, you need to tell the compiler to obey the OpenMP * To use multiple cores, you need to tell the compiler to obey the OpenMP
* directives in the code. This varies by compiler, but a common example is * directives in the code. This varies by compiler, but a common example
* gcc -O -fopenmp stream.c -o stream_omp *is gcc -O -fopenmp stream.c -o stream_omp The environment variable
* The environment variable OMP_NUM_THREADS allows runtime control of the *OMP_NUM_THREADS allows runtime control of the number of threads/cores used
* number of threads/cores used when the resulting "stream_omp" program *when the resulting "stream_omp" program is executed.
* is executed.
* *
* To run with single-precision variables and arithmetic, simply add * To run with single-precision variables and arithmetic, simply add
* -DSTREAM_TYPE=float * -DSTREAM_TYPE=float
* to the compile line. * to the compile line.
* Note that this changes the minimum array sizes required --- see (1) above. * Note that this changes the minimum array sizes required --- see (1)
*above.
* *
* The preprocessor directive "TUNED" does not do much -- it simply causes the * The preprocessor directive "TUNED" does not do much -- it simply causes
* code to call separate functions to execute each kernel. Trivial versions *the code to call separate functions to execute each kernel. Trivial versions
* of these functions are provided, but they are *not* tuned -- they just * of these functions are provided, but they are *not* tuned -- they just
* provide predefined interfaces to be replaced with tuned code. * provide predefined interfaces to be replaced with tuned code.
* *
* *
* 4) Optional: Mail the results to mccalpin@cs.virginia.edu * 4) Optional: Mail the results to mccalpin@cs.virginia.edu
* Be sure to include info that will help me understand: * Be sure to include info that will help me understand:
* a) the computer hardware configuration (e.g., processor model, memory type) * a) the computer hardware configuration (e.g., processor model,
* b) the compiler name/version and compilation flags *memory type) b) the compiler name/version and compilation flags c) any
* c) any run-time information (such as OMP_NUM_THREADS) *run-time information (such as OMP_NUM_THREADS) d) all of the output from the
* d) all of the output from the test case. *test case.
* *
* Thanks! * Thanks!
* *
*-----------------------------------------------------------------------*/ *-----------------------------------------------------------------------*/
# define HLINE "-------------------------------------------------------------\n" #define HLINE "-------------------------------------------------------------\n"
#define DIS_OPENMP #define DIS_OPENMP
# ifndef MIN #ifndef MIN
# define MIN(x,y) ((x)<(y)?(x):(y)) #define MIN(x, y) ((x) < (y) ? (x) : (y))
# endif #endif
# ifndef MAX #ifndef MAX
# define MAX(x,y) ((x)>(y)?(x):(y)) #define MAX(x, y) ((x) > (y) ? (x) : (y))
# endif #endif
#ifndef STREAM_TYPE #ifndef STREAM_TYPE
#define STREAM_TYPE double #define STREAM_TYPE double
#endif #endif
static STREAM_TYPE a[STREAM_ARRAY_SIZE+OFFSET], // static STREAM_TYPE a[STREAM_ARRAY_SIZE + OFFSET], b[STREAM_ARRAY_SIZE + OFFSET],
b[STREAM_ARRAY_SIZE+OFFSET], // c[STREAM_ARRAY_SIZE + OFFSET];
c[STREAM_ARRAY_SIZE+OFFSET];
static double avgtime[4] = {0}, maxtime[4] = {0}, static double avgtime[4] = {0}, maxtime[4] = {0},
mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; mintime[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
static char *label[4] = {"Copy: ", "Scale: ", static char *label[4] = {
"Add: ", "Triad: "}; "Copy: ", "Scale: ", "Add: ", "Triad: "};
static double bytes[4] = { // static double bytes[4] = {2 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
2 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE, // 2 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
2 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE, // 3 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
3 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE, // 3 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE};
3 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE
};
extern double mysecond(); extern double mysecond();
extern void checkSTREAMresults(); extern void checkSTREAMresults();
#ifdef TUNED
extern void tuned_STREAM_Copy();
extern void tuned_STREAM_Scale(STREAM_TYPE scalar);
extern void tuned_STREAM_Add();
extern void tuned_STREAM_Triad(STREAM_TYPE scalar);
#endif
#ifndef DIS_OPENMP #ifndef DIS_OPENMP
#ifdef _OPENMP #ifdef _OPENMP
extern int omp_get_num_threads(); extern int omp_get_num_threads();
#endif #endif
#endif #endif
int
main() extern const bench_stream_config config;
{
int main() {
int asize = config.stream_array_size;
bench_malloc_init();
STREAM_TYPE *a = bench_malloc(sizeof(STREAM_TYPE) * (asize + OFFSET));
STREAM_TYPE *b = bench_malloc(sizeof(STREAM_TYPE) * (asize + OFFSET));
STREAM_TYPE *c = bench_malloc(sizeof(STREAM_TYPE) * (asize + OFFSET));
double bytes[4] = {2 * sizeof(STREAM_TYPE) * asize,
2 * sizeof(STREAM_TYPE) * asize,
3 * sizeof(STREAM_TYPE) * asize,
3 * sizeof(STREAM_TYPE) * asize};
STREAM_TYPE *vptr[] = {a, b, c};
int quantum, checktick(); int quantum, checktick();
int BytesPerWord; // int BytesPerWord;
int k; int k;
size_t j; size_t j;
STREAM_TYPE scalar; STREAM_TYPE scalar;
@ -218,32 +225,36 @@ main()
/* --- SETUP --- determine precision and check timing --- */ /* --- SETUP --- determine precision and check timing --- */
printf(HLINE); // printf(HLINE);
printf("STREAM version $Revision: 5.10 $\n"); // printf("STREAM version $Revision: 5.10 $\n");
printf(HLINE); // printf(HLINE);
BytesPerWord = sizeof(STREAM_TYPE); // BytesPerWord = sizeof(STREAM_TYPE);
printf("This system uses %d bytes per array element.\n", // printf("This system uses %d bytes per array element.\n", BytesPerWord);
BytesPerWord);
printf(HLINE); // printf(HLINE);
#ifdef N #ifdef N
printf("***** WARNING: ******\n"); /* printf("***** WARNING: ******\n");
printf(" It appears that you set the preprocessor variable N when compiling this code.\n"); printf(" It appears that you set the preprocessor variable N when "
printf(" This version of the code uses the preprocesor variable STREAM_ARRAY_SIZE to control the array size\n"); "compiling this code.\n");
printf(" Reverting to default value of STREAM_ARRAY_SIZE=%.0f\n",(double) STREAM_ARRAY_SIZE); printf(" This version of the code uses the preprocesor variable "
printf("***** WARNING: ******\n"); "STREAM_ARRAY_SIZE to control the array size\n");
printf(" Reverting to default value of STREAM_ARRAY_SIZE=%.0f\n",
(double)STREAM_ARRAY_SIZE);
printf("***** WARNING: ******\n"); */
#endif #endif
/*
printf("Array size = %.0f (elements), Offset = %d (elements)\n" , (double) STREAM_ARRAY_SIZE, OFFSET); printf("Array size = %.0f (elements), Offset = %d (elements)\n",
(double)STREAM_ARRAY_SIZE, OFFSET);
printf("Memory per array = %.1f MiB (= %.1f GiB).\n", printf("Memory per array = %.1f MiB (= %.1f GiB).\n",
BytesPerWord * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.0), BytesPerWord * ((double)STREAM_ARRAY_SIZE / 1024.0 / 1024.0),
BytesPerWord * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.0/1024.0)); BytesPerWord * ((double)STREAM_ARRAY_SIZE / 1024.0 / 1024.0 / 1024.0));
printf("Total memory required = %.1f MiB (= %.1f GiB).\n", printf("Total memory required = %.1f MiB (= %.1f GiB).\n",
(3.0 * BytesPerWord) * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.), (3.0 * BytesPerWord) * ((double)STREAM_ARRAY_SIZE / 1024.0 / 1024.),
(3.0 * BytesPerWord) * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024./1024.)); (3.0 * BytesPerWord) *
((double)STREAM_ARRAY_SIZE / 1024.0 / 1024. / 1024.));
printf("Each kernel will be executed %d times.\n", NTIMES); printf("Each kernel will be executed %d times.\n", NTIMES);
printf(" The *best* time for each kernel (excluding the first iteration)\n"); printf(" The *best* time for each kernel (excluding the first iteration)\n");
printf(" will be used to compute the reported bandwidth.\n"); printf(" will be used to compute the reported bandwidth.\n"); */
#ifndef DIS_OPENMP #ifndef DIS_OPENMP
#ifdef _OPENMP #ifdef _OPENMP
@ -253,7 +264,7 @@ main()
#pragma omp master #pragma omp master
{ {
k = omp_get_num_threads(); k = omp_get_num_threads();
printf ("Number of Threads requested = %i\n",k); printf("Number of Threads requested = %i\n", k);
} }
} }
#endif #endif
@ -265,7 +276,7 @@ main()
#pragma omp parallel #pragma omp parallel
#pragma omp atomic #pragma omp atomic
k++; k++;
printf ("Number of Threads counted = %i\n",k); printf("Number of Threads counted = %i\n", k);
#endif #endif
#endif #endif
@ -273,7 +284,7 @@ main()
#ifndef DIS_OPENMP #ifndef DIS_OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif #endif
for (j=0; j<STREAM_ARRAY_SIZE; j++) { for (j = 0; j < asize; j++) {
a[j] = 1.0; a[j] = 1.0;
b[j] = 2.0; b[j] = 2.0;
c[j] = 0.0; c[j] = 0.0;
@ -281,9 +292,10 @@ main()
printf(HLINE); printf(HLINE);
if ( (quantum = checktick()) >= 1) if ((quantum = checktick()) >= 1)
printf("Your clock granularity/precision appears to be " printf("Your clock granularity/precision appears to be "
"%d microseconds.\n", quantum); "%d microseconds.\n",
quantum);
else { else {
printf("Your clock granularity appears to be " printf("Your clock granularity appears to be "
"less than one microsecond.\n"); "less than one microsecond.\n");
@ -294,13 +306,14 @@ main()
#ifndef DIS_OPENMP #ifndef DIS_OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif #endif
for (j = 0; j < STREAM_ARRAY_SIZE; j++) for (j = 0; j < asize; j++)
a[j] = 2.0E0 * a[j]; a[j] = 2.0E0 * a[j];
t = 1.0E6 * (mysecond() - t); t = 1.0E6 * (mysecond() - t);
printf("Each test below will take on the order" printf("Each test below will take on the order"
" of %d microseconds.\n", (int) t ); " of %d microseconds.\n",
printf(" (= %d clock ticks)\n", (int) (t/quantum) ); (int)t);
printf(" (= %d clock ticks)\n", (int)(t / quantum));
printf("Increase the size of the arrays if this shows that\n"); printf("Increase the size of the arrays if this shows that\n");
printf("you are not getting at least 20 clock ticks per test.\n"); printf("you are not getting at least 20 clock ticks per test.\n");
@ -314,63 +327,45 @@ main()
/* --- MAIN LOOP --- repeat test cases NTIMES times --- */ /* --- MAIN LOOP --- repeat test cases NTIMES times --- */
scalar = 3.0; scalar = 3.0;
for (k=0; k<NTIMES; k++) for (k = 0; k < NTIMES; k++) {
{
times[0][k] = mysecond(); times[0][k] = mysecond();
#ifdef TUNED
tuned_STREAM_Copy();
#else
#ifndef DIS_OPENMP #ifndef DIS_OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif #endif
for (j=0; j<STREAM_ARRAY_SIZE; j++) for (j = 0; j < asize; j++)
c[j] = a[j]; c[j] = a[j];
#endif
times[0][k] = mysecond() - times[0][k]; times[0][k] = mysecond() - times[0][k];
times[1][k] = mysecond(); times[1][k] = mysecond();
#ifdef TUNED
tuned_STREAM_Scale(scalar);
#else
#ifndef DIS_OPENMP #ifndef DIS_OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif #endif
for (j=0; j<STREAM_ARRAY_SIZE; j++) for (j = 0; j < asize; j++)
b[j] = scalar*c[j]; b[j] = scalar * c[j];
#endif
times[1][k] = mysecond() - times[1][k]; times[1][k] = mysecond() - times[1][k];
times[2][k] = mysecond(); times[2][k] = mysecond();
#ifdef TUNED
tuned_STREAM_Add();
#else
#ifndef DIS_OPENMP #ifndef DIS_OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif #endif
for (j=0; j<STREAM_ARRAY_SIZE; j++) for (j = 0; j < asize; j++)
c[j] = a[j]+b[j]; c[j] = a[j] + b[j];
#endif
times[2][k] = mysecond() - times[2][k]; times[2][k] = mysecond() - times[2][k];
times[3][k] = mysecond(); times[3][k] = mysecond();
#ifdef TUNED
tuned_STREAM_Triad(scalar);
#else
#ifndef DIS_OPENMP #ifndef DIS_OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif #endif
for (j=0; j<STREAM_ARRAY_SIZE; j++) for (j = 0; j < asize; j++)
a[j] = b[j]+scalar*c[j]; a[j] = b[j] + scalar * c[j];
#endif
times[3][k] = mysecond() - times[3][k]; times[3][k] = mysecond() - times[3][k];
} }
/* --- SUMMARY --- */ /* --- SUMMARY --- */
for (k=1; k<NTIMES; k++) /* note -- skip first iteration */ for (k = 1; k < NTIMES; k++) /* note -- skip first iteration */
{
for (j=0; j<4; j++)
{ {
for (j = 0; j < 4; j++) {
avgtime[j] = avgtime[j] + times[j][k]; avgtime[j] = avgtime[j] + times[j][k];
mintime[j] = MIN(mintime[j], times[j][k]); mintime[j] = MIN(mintime[j], times[j][k]);
maxtime[j] = MAX(maxtime[j], times[j][k]); maxtime[j] = MAX(maxtime[j], times[j][k]);
@ -378,49 +373,41 @@ main()
} }
printf("Function Best Rate MB/s Avg time Min time Max time\n"); printf("Function Best Rate MB/s Avg time Min time Max time\n");
for (j=0; j<4; j++) { for (j = 0; j < 4; j++) {
avgtime[j] = avgtime[j]/(double)(NTIMES-1); avgtime[j] = avgtime[j] / (double)(NTIMES - 1);
printf("%s%12.1f %11.6f %11.6f %11.6f\n", label[j], printf("%s%12.1f %11.6f %11.6f %11.6f\n", label[j],
1.0E-06 * bytes[j]/mintime[j], 1.0E-06 * bytes[j] / mintime[j], avgtime[j], mintime[j], maxtime[j]);
avgtime[j],
mintime[j],
maxtime[j]);
} }
printf(HLINE); printf(HLINE);
/* --- Check Results --- */ /* --- Check Results --- */
checkSTREAMresults(); checkSTREAMresults(asize, &vptr);
printf(HLINE); printf(HLINE);
double total_time = avgtime[0]\ double total_time = avgtime[0] + avgtime[1] + avgtime[2] + avgtime[3];
+ avgtime[1]\
+ avgtime[2]\
+ avgtime[3];
printf("time: %s ms\n", format_time((uint64_t)(total_time * 1000))); printf("time: %s ms\n", format_time((uint64_t)(total_time * 1000)));
return 0; return 0;
} }
# define M 20 #define M 20
int int checktick() {
checktick()
{
int i, minDelta, Delta; int i, minDelta, Delta;
double t1, t2, timesfound[M]; double t1, t2, timesfound[M];
/* Collect a sequence of M unique time values from the system. */ /* Collect a sequence of M unique time values from the system. */
for (i = 0; i < M; i++) { for (i = 0; i < M; i++) {
t1 = mysecond(); t1 = mysecond();
while( ((t2=mysecond()) - t1) < 1.0E-6 ) while (((t2 = mysecond()) - t1) < 1.0E-6)
; ;
timesfound[i] = t1 = t2; timesfound[i] = t1 = t2;
} }
/* /*
* Determine the minimum difference between these M values. * Determine the minimum difference between these M values.
* This result will be our estimate (in microseconds) for the * This result will be our estimate (in microseconds) for the
* clock granularity. * clock granularity.
@ -428,14 +415,12 @@ checktick()
minDelta = 1000000; minDelta = 1000000;
for (i = 1; i < M; i++) { for (i = 1; i < M; i++) {
Delta = (int)( 1.0E6 * (timesfound[i]-timesfound[i-1])); Delta = (int)(1.0E6 * (timesfound[i] - timesfound[i - 1]));
minDelta = MIN(minDelta, MAX(Delta,0)); minDelta = MIN(minDelta, MAX(Delta, 0));
} }
return(minDelta); return (minDelta);
} }
/* A gettimeofday routine to give access to the wall /* A gettimeofday routine to give access to the wall
clock timer on most UNIX-like systems. */ clock timer on most UNIX-like systems. */
@ -443,23 +428,19 @@ checktick()
/* This function has been modified from the original version to ensure /* This function has been modified from the original version to ensure
* ANSI compliance, due to the deprecation of the "timezone" struct. */ * ANSI compliance, due to the deprecation of the "timezone" struct. */
double mysecond() { return ((double)uptime() / 1000); }
double mysecond()
{
return ((double)uptime() / 1000);
}
#ifndef abs #ifndef abs
#define abs(a) ((a) >= 0 ? (a) : -(a)) #define abs(a) ((a) >= 0 ? (a) : -(a))
#endif #endif
void checkSTREAMresults () void checkSTREAMresults(int asize, STREAM_TYPE **vptr) {
{ assert(vptr);
STREAM_TYPE aj,bj,cj,scalar; STREAM_TYPE aj, bj, cj, scalar;
STREAM_TYPE aSumErr,bSumErr,cSumErr; STREAM_TYPE aSumErr, bSumErr, cSumErr;
STREAM_TYPE aAvgErr,bAvgErr,cAvgErr; STREAM_TYPE aAvgErr, bAvgErr, cAvgErr;
double epsilon; double epsilon;
size_t j; size_t j;
int k,ierr,err; int k, ierr, err;
/* reproduce initialization */ /* reproduce initialization */
aj = 1.0; aj = 1.0;
@ -469,147 +450,112 @@ void checkSTREAMresults ()
aj = 2.0E0 * aj; aj = 2.0E0 * aj;
/* now execute timing loop */ /* now execute timing loop */
scalar = 3.0; scalar = 3.0;
for (k=0; k<NTIMES; k++) for (k = 0; k < NTIMES; k++) {
{
cj = aj; cj = aj;
bj = scalar*cj; bj = scalar * cj;
cj = aj+bj; cj = aj + bj;
aj = bj+scalar*cj; aj = bj + scalar * cj;
} }
/* accumulate deltas between observed and expected results */ /* accumulate deltas between observed and expected results */
aSumErr = 0.0; aSumErr = 0.0;
bSumErr = 0.0; bSumErr = 0.0;
cSumErr = 0.0; cSumErr = 0.0;
for (j=0; j<STREAM_ARRAY_SIZE; j++) { for (j = 0; j < asize; j++) {
aSumErr += abs(a[j] - aj); aSumErr += abs(vptr[0][j] - aj);
bSumErr += abs(b[j] - bj); bSumErr += abs(vptr[1][j] - bj);
cSumErr += abs(c[j] - cj); cSumErr += abs(vptr[2][j] - cj);
/* if (j == 417) printf("Index 417: c[j]: %f, cj: %f\n",c[j],cj); */ /* MCCALPIN */ /* if (j == 417) printf("Index 417: c[j]: %f, cj: %f\n",c[j],cj); */ /* MCCALPIN
*/
} }
aAvgErr = aSumErr / (STREAM_TYPE) STREAM_ARRAY_SIZE; aAvgErr = aSumErr / (STREAM_TYPE)asize;
bAvgErr = bSumErr / (STREAM_TYPE) STREAM_ARRAY_SIZE; bAvgErr = bSumErr / (STREAM_TYPE)asize;
cAvgErr = cSumErr / (STREAM_TYPE) STREAM_ARRAY_SIZE; cAvgErr = cSumErr / (STREAM_TYPE)asize;
if (sizeof(STREAM_TYPE) == 4) { if (sizeof(STREAM_TYPE) == 4) {
epsilon = 1.e-6; epsilon = 1.e-6;
} } else if (sizeof(STREAM_TYPE) == 8) {
else if (sizeof(STREAM_TYPE) == 8) {
epsilon = 1.e-13; epsilon = 1.e-13;
} } else {
else { printf("WEIRD: sizeof(STREAM_TYPE) = %lu\n", sizeof(STREAM_TYPE));
printf("WEIRD: sizeof(STREAM_TYPE) = %lu\n",sizeof(STREAM_TYPE));
epsilon = 1.e-6; epsilon = 1.e-6;
} }
err = 0; err = 0;
if (abs(aAvgErr/aj) > epsilon) { if (abs(aAvgErr / aj) > epsilon) {
err++; err++;
printf ("Failed Validation on array a[], AvgRelAbsErr > epsilon (%e)\n",epsilon); printf("Failed Validation on array a[], AvgRelAbsErr > epsilon (%e)\n",
printf (" Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n",aj,aAvgErr,abs(aAvgErr)/aj); epsilon);
printf(" Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n", aj,
aAvgErr, abs(aAvgErr) / aj);
ierr = 0; ierr = 0;
for (j=0; j<STREAM_ARRAY_SIZE; j++) { for (j = 0; j < asize; j++) {
if (abs(a[j]/aj-1.0) > epsilon) { if (abs(vptr[0][j] / aj - 1.0) > epsilon) {
ierr++; ierr++;
#ifdef VERBOSE #ifdef VERBOSE
if (ierr < 10) { if (ierr < 10) {
printf(" array a: index: %ld, expected: %e, observed: %e, relative error: %e\n", printf(" array a: index: %ld, expected: %e, observed: %e, "
j,aj,a[j],abs((aj-a[j])/aAvgErr)); "relative error: %e\n",
j, aj, vptr[0][j], abs((aj - vptr[0][j]) / aAvgErr));
} }
#endif #endif
} }
} }
printf(" For array a[], %d errors were found.\n",ierr); printf(" For array a[], %d errors were found.\n", ierr);
} }
if (abs(bAvgErr/bj) > epsilon) { if (abs(bAvgErr / bj) > epsilon) {
err++; err++;
printf ("Failed Validation on array b[], AvgRelAbsErr > epsilon (%e)\n",epsilon); printf("Failed Validation on array b[], AvgRelAbsErr > epsilon (%e)\n",
printf (" Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n",bj,bAvgErr,abs(bAvgErr)/bj); epsilon);
printf (" AvgRelAbsErr > Epsilon (%e)\n",epsilon); printf(" Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n", bj,
bAvgErr, abs(bAvgErr) / bj);
printf(" AvgRelAbsErr > Epsilon (%e)\n", epsilon);
ierr = 0; ierr = 0;
for (j=0; j<STREAM_ARRAY_SIZE; j++) { for (j = 0; j < asize; j++) {
if (abs(b[j]/bj-1.0) > epsilon) { if (abs(vptr[1][j] / bj - 1.0) > epsilon) {
ierr++; ierr++;
#ifdef VERBOSE #ifdef VERBOSE
if (ierr < 10) { if (ierr < 10) {
printf(" array b: index: %ld, expected: %e, observed: %e, relative error: %e\n", printf(" array b: index: %ld, expected: %e, observed: %e, "
j,bj,b[j],abs((bj-b[j])/bAvgErr)); "relative error: %e\n",
j, bj, vptr[1][j], abs((bj - vptr[1][j]) / bAvgErr));
} }
#endif #endif
} }
} }
printf(" For array b[], %d errors were found.\n",ierr); printf(" For array b[], %d errors were found.\n", ierr);
} }
if (abs(cAvgErr/cj) > epsilon) { if (abs(cAvgErr / cj) > epsilon) {
err++; err++;
printf ("Failed Validation on array c[], AvgRelAbsErr > epsilon (%e)\n",epsilon); printf("Failed Validation on array c[], AvgRelAbsErr > epsilon (%e)\n",
printf (" Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n",cj,cAvgErr,abs(cAvgErr)/cj); epsilon);
printf (" AvgRelAbsErr > Epsilon (%e)\n",epsilon); printf(" Expected Value: %e, AvgAbsErr: %e, AvgRelAbsErr: %e\n", cj,
cAvgErr, abs(cAvgErr) / cj);
printf(" AvgRelAbsErr > Epsilon (%e)\n", epsilon);
ierr = 0; ierr = 0;
for (j=0; j<STREAM_ARRAY_SIZE; j++) { for (j = 0; j < asize; j++) {
if (abs(c[j]/cj-1.0) > epsilon) { if (abs(vptr[2][j] / cj - 1.0) > epsilon) {
ierr++; ierr++;
#ifdef VERBOSE #ifdef VERBOSE
if (ierr < 10) { if (ierr < 10) {
printf(" array c: index: %ld, expected: %e, observed: %e, relative error: %e\n", printf(" array c: index: %ld, expected: %e, observed: %e, "
j,cj,c[j],abs((cj-c[j])/cAvgErr)); "relative error: %e\n",
j, cj, c[j], abs((cj - c[j]) / cAvgErr));
} }
#endif #endif
} }
} }
printf(" For array c[], %d errors were found.\n",ierr); printf(" For array c[], %d errors were found.\n", ierr);
} }
if (err == 0) { if (err == 0) {
printf ("Solution Validates: avg error less than %e on all three arrays\n",epsilon); printf("Solution Validates: avg error less than %e on all three arrays\n",
epsilon);
} }
#ifdef VERBOSE #ifdef VERBOSE
printf ("Results Validation Verbose Results: \n"); printf("Results Validation Verbose Results: \n");
printf (" Expected a(1), b(1), c(1): %f %f %f \n",aj,bj,cj); printf(" Expected a(1), b(1), c(1): %f %f %f \n", aj, bj, cj);
printf (" Observed a(1), b(1), c(1): %f %f %f \n",a[1],b[1],c[1]); printf(" Observed a(1), b(1), c(1): %f %f %f \n", vptr[0][1], vptr[1][1], vptr[2][1]);
printf (" Rel Errors on a, b, c: %e %e %e \n",abs(aAvgErr/aj),abs(bAvgErr/bj),abs(cAvgErr/cj)); printf(" Rel Errors on a, b, c: %e %e %e \n", abs(aAvgErr / aj),
abs(bAvgErr / bj), abs(cAvgErr / cj));
#endif #endif
} }
#ifdef TUNED
/* stubs for "tuned" versions of the kernels */
void tuned_STREAM_Copy()
{
size_t j;
#ifndef DIS_OPENMP
#pragma omp parallel for
#endif
for (j=0; j<STREAM_ARRAY_SIZE; j++)
c[j] = a[j];
}
void tuned_STREAM_Scale(STREAM_TYPE scalar)
{
size_t j;
#ifndef DIS_OPENMP
#pragma omp parallel for
#endif
for (j=0; j<STREAM_ARRAY_SIZE; j++)
b[j] = scalar*c[j];
}
void tuned_STREAM_Add()
{
size_t j;
#ifndef DIS_OPENMP
#pragma omp parallel for
#endif
for (j=0; j<STREAM_ARRAY_SIZE; j++)
c[j] = a[j]+b[j];
}
void tuned_STREAM_Triad(STREAM_TYPE scalar)
{
size_t j;
#ifndef DIS_OPENMP
#pragma omp parallel for
#endif
for (j=0; j<STREAM_ARRAY_SIZE; j++)
a[j] = b[j]+scalar*c[j];
}
/* end of stubs for the "tuned" versions of the kernels */
#endif

View file

@ -1,27 +1,16 @@
NAME = riscv-tcc NAME = riscv-tcc
mainargs ?= ref
BENCH_LIBS = bench openlibm soft-fp BENCH_LIBS = bench openlibm soft-fp
SRCS := tcc.c resources.S my_qsort.c fs.c SRCS := tcc.c my_qsort.c fs.c ./configs/$(mainargs)-config.c ./resources/resources-$(mainargs).S
INC_PATH += ../common/openlibm/include \ INC_PATH += ../common/openlibm/include \
../common/openlibm/src \ ../common/openlibm/src \
./include \ ./include \
../common/bench/include ../common/bench/include
ifeq ($(mainargs), test)
CFLAGS += -D__BENCH_TEST__
ASFLAGS += -D__BENCH_TEST__
else ifeq ($(mainargs), train)
CFLAGS += -D__BENCH_TRAIN__
ASFLAGS += -D__BENCH_TRAIN__
else ifeq ($(mainargs), huge)
CFLAGS += -D__BENCH_HUGE__
ASFLAGS += -D__BENCH_HUGE__
else
CFLAGS += -D__BENCH_REF__
ASFLAGS += -D__BENCH_REF__
endif
include $(AM_HOME)/Makefile include $(AM_HOME)/Makefile

View file

@ -15,3 +15,7 @@
#define ONE_SOURCE 1 #define ONE_SOURCE 1
// #define SDE_RISCV32_DEV 1 // #define SDE_RISCV32_DEV 1
// #define __SIZEOF_POINTER__ 4 // #define __SIZEOF_POINTER__ 4
//
typedef struct {
int file_count;
} bench_tcc_config;

View file

View file

@ -0,0 +1,25 @@
#include <fs.h>
#include "../config.h"
Finfo file_table[] = {
{"/share/test.c", 336, 0, NULL, NULL},
{"/share/test", 752, 336, NULL, NULL},
{"/share/trm.c", 273, 1088, NULL, NULL},
};
int tcc_argc1 = 10;
char *tcc_argv1[]={
"./tcc",
"/share/trm.c",
"/share/test.c",
"-ffreestanding",
"-nostdlib",
"-o",
"/share/test",
"-Wl,-Ttext=0x80000000",
"-O2",
"-static"
};
bench_tcc_config config = {3};

View file

@ -0,0 +1,24 @@
#include <fs.h>
#include "../config.h"
Finfo file_table[] = {
{"/share/trm.c", 273, 0, NULL, NULL},
{"/share/trap.h", 106, 273, NULL, NULL},
{"/share/train.c", 1521, 379, NULL, NULL},
{"/share/train", 106, 1900, NULL, NULL},
};
int tcc_argc1 = 10;
char *tcc_argv1[]={
"./tcc",
"/share/trm.c",
"/share/train.c",
"-ffreestanding",
"-nostdlib",
"-o",
"/share/train",
"-Wl,-Ttext=0x80000000",
"-O2",
"-static"
};
bench_tcc_config config = {4};

View file

@ -18,8 +18,6 @@ int fs_init(Finfo *list, size_t count) {
return 0; return 0;
} }
size_t ramdisk_read(void *buf, size_t offset, size_t len) size_t ramdisk_read(void *buf, size_t offset, size_t len)
{ {
assert(offset + len <= RAMDISK_SIZE); assert(offset + len <= RAMDISK_SIZE);
@ -40,7 +38,7 @@ int fs_open(const char *pathname, int flags, int mode)
//printf("ex1 addr is %x\n", (uint32_t)ramdisk_start + 336); //printf("ex1 addr is %x\n", (uint32_t)ramdisk_start + 336);
for(int fs_num = 0; fs_num < file_count; fs_num ++) for(int fs_num = 0; fs_num < file_count; fs_num ++)
{ {
if(strcmp(pathname, file_table[fs_num].name) == 0) // 匹配成功 if(strcmp(pathname, file_table[fs_num].name) == 0)
{ {
file_table[fs_num].open_offset = 0; file_table[fs_num].open_offset = 0;
return fs_num; return fs_num;

View file

View file

@ -0,0 +1,7 @@
.section .data
.global ramdisk_start, ramdisk_end
ramdisk_start:
.incbin "input/test.c"
.incbin "input/test"
.incbin "input/trm.c"
ramdisk_end:

View file

@ -0,0 +1,8 @@
.section .data
.global ramdisk_start, ramdisk_end
ramdisk_start:
.incbin "input/trm.c"
.incbin "input/trap.h"
.incbin "input/train.c"
.incbin "input/train"
ramdisk_end:

View file

@ -26,49 +26,6 @@
#include "tcctools.c" #include "tcctools.c"
#if defined (__BENCH_TEST__)
static Finfo file_table[] = {
{"/share/test.c", 336, 0, NULL, NULL},
{"/share/test", 752, 336, NULL, NULL},
{"/share/trm.c", 273, 1088, NULL, NULL},
};
int tcc_argc1 = 10;
char *tcc_argv1[]={
"./tcc",
"/share/trm.c",
"/share/test.c",
"-ffreestanding",
"-nostdlib",
"-o",
"/share/test",
"-Wl,-Ttext=0x80000000",
"-O2",
"-static"
};
#elif defined (__BENCH_TRAIN__)
static Finfo file_table[] = {
{"/share/trm.c", 273, 0, NULL, NULL},
{"/share/trap.h", 106, 273, NULL, NULL},
{"/share/train.c", 1521, 379, NULL, NULL},
{"/share/train", 106, 1900, NULL, NULL},
};
int tcc_argc1 = 10;
char *tcc_argv1[]={
"./tcc",
"/share/trm.c",
"/share/train.c",
"-ffreestanding",
"-nostdlib",
"-o",
"/share/train",
"-Wl,-Ttext=0x80000000",
"-O2",
"-static"
};
#elif defined (__BENCH_HUGE__)
#else //default to ref
#endif
static void set_environment(TCCState *s) static void set_environment(TCCState *s)
{ {
char * path; char * path;
@ -115,11 +72,13 @@ static char *default_outputfile(TCCState *s, const char *first_file)
return tcc_strdup(buf); return tcc_strdup(buf);
} }
extern bench_tcc_config config;
int main(int argc0, char **argv0) int main(int argc0, char **argv0)
{ {
fs_init(file_table, 4); extern Finfo file_table[];
fs_init(file_table, config.file_count);
bench_malloc_init(); bench_malloc_init();
TCCState *s, *s1; TCCState *s, *s1;
int ret, opt, n = 0, t = 0, done; int ret, opt, n = 0, t = 0, done;
@ -128,10 +87,10 @@ int main(int argc0, char **argv0)
int argc; char **argv; int argc; char **argv;
int ppfp = FD_STDOUT; int ppfp = FD_STDOUT;
extern int tcc_argc1;
extern char *tcc_argv1[];
start_time = uptime(); start_time = uptime();
redo: redo:
// argc = argc0, argv = argv0;
argc = tcc_argc1, argv = tcc_argv1; argc = tcc_argc1, argv = tcc_argv1;
s = s1 = tcc_new(); s = s1 = tcc_new();
#ifdef CONFIG_TCC_SWITCHES /* predefined options */ #ifdef CONFIG_TCC_SWITCHES /* predefined options */

View file

@ -1885,4 +1885,5 @@ PUB_FUNC void tcc_exit_state(TCCState *s1);
#else #else
# define TCC_STATE_VAR(sym) s1->sym # define TCC_STATE_VAR(sym) s1->sym
# define TCC_SET_STATE(fn) (tcc_enter_state(s1),fn) # define TCC_SET_STATE(fn) (tcc_enter_state(s1),fn)
#endif #endif

View file

@ -1,6 +1,8 @@
NAME = whetstone NAME = whetstone
SRCS = whetstone.c mainargs ?= ref
SRCS = whetstone.c ./configs/$(mainargs)-config.c
BENCH_LIBS = bench openlibm soft-fp BENCH_LIBS = bench openlibm soft-fp

View file

@ -0,0 +1,5 @@
#include <whestone.h>
bench_whestone_config config = {200};

View file

@ -0,0 +1,3 @@
#include <whestone.h>
bench_whestone_config config = {30};

View file

@ -0,0 +1,4 @@
#include <whestone.h>
bench_whestone_config config = {10};

View file

@ -1,105 +0,0 @@
#ifndef _CDEFS_COMPAT_H_
#define _CDEFS_COMPAT_H_
#if !defined(__BEGIN_DECLS)
#if defined(__cplusplus)
#define __BEGIN_DECLS extern "C" {
#define __END_DECLS }
#else
#define __BEGIN_DECLS
#define __END_DECLS
#endif
#endif /* !defined(__BEGIN_DECLS) */
#ifdef __GNUC__
#if defined(__strong_alias) && defined(__NetBSD__)
#define openlibm_strong_reference(sym,alias) __strong_alias(alias,sym)
#elif defined(__strong_reference)
#define openlibm_strong_reference(sym,alias) __strong_reference(sym,alias)
#else
#ifdef __APPLE__
#define openlibm_strong_reference(sym,aliassym) openlibm_weak_reference(sym,aliassym)
#else
#define openlibm_strong_reference(sym,aliassym) \
OLM_DLLEXPORT extern __typeof (aliassym) aliassym __attribute__ ((__alias__ (#sym)));
#endif /* __APPLE__ */
#endif /* __strong_reference */
#ifdef __wasm__
#define openlibm_weak_reference(sym,alias) openlibm_strong_reference(sym,alias)
#elif defined(__weak_alias) && defined(__NetBSD__)
#define openlibm_weak_reference(sym,alias) __weak_alias(alias,sym)
#elif defined(__weak_reference)
#define openlibm_weak_reference(sym,alias) __weak_reference(sym,alias)
#else
#ifdef __ELF__
#ifdef __STDC__
#define openlibm_weak_reference(sym,alias) \
__asm__(".weak " #alias); \
__asm__(".equ " #alias ", " #sym)
#ifdef __warn_references
#define openlibm_warn_references(sym,msg) __warn_references(sym,msg)
#else
#define openlibm_warn_references(sym,msg) \
__asm__(".section .gnu.warning." #sym); \
__asm__(".asciz \"" msg "\""); \
__asm__(".previous")
#endif /* __warn_references */
#else
#define openlibm_weak_reference(sym,alias) \
__asm__(".weak alias"); \
__asm__(".equ alias, sym")
#ifdef __warn_references
#define openlibm_warn_references(sym,msg) __warn_references(sym,msg)
#else
#define openlibm_warn_references(sym,msg) \
__asm__(".section .gnu.warning.sym"); \
__asm__(".asciz \"msg\""); \
__asm__(".previous")
#endif /* __warn_references */
#endif /* __STDC__ */
#elif defined(__clang__) /* CLANG */
#if defined(_WIN32) && defined(_X86_)
#define openlibm_asm_symbol_prefix "_"
#else
#define openlibm_asm_symbol_prefix ""
#endif
#ifdef __STDC__
#define openlibm_weak_reference(sym,alias) \
__asm__(".weak_reference " openlibm_asm_symbol_prefix #alias); \
__asm__(".set " openlibm_asm_symbol_prefix #alias ", " openlibm_asm_symbol_prefix #sym)
#else
#define openlibm_weak_reference(sym,alias) \
__asm__(".weak_reference openlibm_asm_symbol_prefix/**/alias");\
__asm__(".set openlibm_asm_symbol_prefix/**/alias, openlibm_asm_symbol_prefix/**/sym")
#endif
#else /* !__ELF__ */
#ifdef __STDC__
#define openlibm_weak_reference(sym,alias) \
__asm__(".stabs \"_" #alias "\",11,0,0,0"); \
__asm__(".stabs \"_" #sym "\",1,0,0,0")
#ifdef __warn_references
#define openlibm_warn_references(sym,msg) __warn_references(sym,msg)
#else
#define openlibm_warn_references(sym,msg) \
__asm__(".stabs \"" msg "\",30,0,0,0"); \
__asm__(".stabs \"_" #sym "\",1,0,0,0")
#endif /* __warn_references */
#else
#define openlibm_weak_reference(sym,alias) \
__asm__(".stabs \"_/**/alias\",11,0,0,0"); \
__asm__(".stabs \"_/**/sym\",1,0,0,0")
#ifdef __warn_references
#define openlibm_warn_references(sym,msg) __warn_references(sym,msg)
#else
#define openlibm_warn_references(sym,msg) \
__asm__(".stabs msg,30,0,0,0"); \
__asm__(".stabs \"_/**/sym\",1,0,0,0")
#endif /* __warn_references */
#endif /* __STDC__ */
#endif /* __ELF__ */
#endif /* __weak_reference */
#endif /* __GNUC__ */
#endif /* _CDEFS_COMPAT_H_ */

View file

@ -1,144 +0,0 @@
/* From: @(#)e_rem_pio2.c 1.4 95/01/18 */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
* Optimized by Bruce D. Evans.
*/
#include "cdefs-compat.h"
//__FBSDID("$FreeBSD: src/lib/msun/ld128/e_rem_pio2l.h,v 1.2 2011/05/30 19:41:28 kargl Exp $");
/* ld128 version of __ieee754_rem_pio2l(x,y)
*
* return the remainder of x rem pi/2 in y[0]+y[1]
* use __kernel_rem_pio2()
*/
#include <float.h>
#include <openlibm_math.h>
#include "math_private.h"
#include "fpmath.h"
#define BIAS (LDBL_MAX_EXP - 1)
/*
* XXX need to verify that nonzero integer multiples of pi/2 within the
* range get no closer to a long double than 2**-140, or that
* ilogb(x) + ilogb(min_delta) < 45 - -140.
*/
/*
* invpio2: 113 bits of 2/pi
* pio2_1: first 68 bits of pi/2
* pio2_1t: pi/2 - pio2_1
* pio2_2: second 68 bits of pi/2
* pio2_2t: pi/2 - (pio2_1+pio2_2)
* pio2_3: third 68 bits of pi/2
* pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
*/
static const double
zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
two24 = 1.67772160000000000000e+07; /* 0x41700000, 0x00000000 */
static const long double
invpio2 = 6.3661977236758134307553505349005747e-01L, /* 0x145f306dc9c882a53f84eafa3ea6a.0p-113 */
pio2_1 = 1.5707963267948966192292994253909555e+00L, /* 0x1921fb54442d18469800000000000.0p-112 */
pio2_1t = 2.0222662487959507323996846200947577e-21L, /* 0x13198a2e03707344a4093822299f3.0p-181 */
pio2_2 = 2.0222662487959507323994779168837751e-21L, /* 0x13198a2e03707344a400000000000.0p-181 */
pio2_2t = 2.0670321098263988236496903051604844e-43L, /* 0x127044533e63a0105df531d89cd91.0p-254 */
pio2_3 = 2.0670321098263988236499468110329591e-43L, /* 0x127044533e63a0105e00000000000.0p-254 */
pio2_3t = -2.5650587247459238361625433492959285e-65L; /* -0x159c4ec64ddaeb5f78671cbfb2210.0p-327 */
//VBS
//static inline __always_inline int
//__ieee754_rem_pio2l(long double x, long double *y)
static inline int
__ieee754_rem_pio2l(long double x, long double *y)
{
union IEEEl2bits u,u1;
long double z,w,t,r,fn;
double tx[5],ty[3];
int64_t n;
int e0,ex,i,j,nx;
int16_t expsign;
u.e = x;
expsign = u.xbits.expsign;
ex = expsign & 0x7fff;
if (ex < BIAS + 45 || ex == BIAS + 45 &&
u.bits.manh < 0x921fb54442d1LL) {
/* |x| ~< 2^45*(pi/2), medium size */
/* Use a specialized rint() to get fn. Assume round-to-nearest. */
fn = x*invpio2+0x1.8p112;
fn = fn-0x1.8p112;
#ifdef HAVE_EFFICIENT_I64RINT
n = i64rint(fn);
#else
n = fn;
#endif
r = x-fn*pio2_1;
w = fn*pio2_1t; /* 1st round good to 180 bit */
{
union IEEEl2bits u2;
int ex1;
j = ex;
y[0] = r-w;
u2.e = y[0];
ex1 = u2.xbits.expsign & 0x7fff;
i = j-ex1;
if(i>51) { /* 2nd iteration needed, good to 248 */
t = r;
w = fn*pio2_2;
r = t-w;
w = fn*pio2_2t-((t-r)-w);
y[0] = r-w;
u2.e = y[0];
ex1 = u2.xbits.expsign & 0x7fff;
i = j-ex1;
if(i>119) { /* 3rd iteration need, 316 bits acc */
t = r; /* will cover all possible cases */
w = fn*pio2_3;
r = t-w;
w = fn*pio2_3t-((t-r)-w);
y[0] = r-w;
}
}
}
y[1] = (r-y[0])-w;
return n;
}
/*
* all other (large) arguments
*/
if(ex==0x7fff) { /* x is inf or NaN */
y[0]=y[1]=x-x; return 0;
}
/* set z = scalbn(|x|,ilogb(x)-23) */
u1.e = x;
e0 = ex - BIAS - 23; /* e0 = ilogb(|x|)-23; */
u1.xbits.expsign = ex - e0;
z = u1.e;
for(i=0;i<4;i++) {
tx[i] = (double)((int32_t)(z));
z = (z-tx[i])*two24;
}
tx[4] = z;
nx = 5;
while(tx[nx-1]==zero) nx--; /* skip zero term */
n = __kernel_rem_pio2(tx,ty,e0,nx,3);
t = (long double)ty[2] + ty[1];
r = t + ty[0];
w = ty[0] - (r - t);
if(expsign<0) {y[0] = -r; y[1] = -w; return -n;}
y[0] = r; y[1] = w; return n;
}

View file

@ -1,113 +0,0 @@
/*-
* Copyright (c) 2008 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: src/lib/msun/ld128/invtrig.h,v 1.1 2008/07/31 22:41:26 das Exp $
*/
#include <float.h>
#include "fpmath.h"
#define BIAS (LDBL_MAX_EXP - 1)
#define MANH_SIZE (LDBL_MANH_SIZE + 1)
/* Approximation thresholds. */
#define ASIN_LINEAR (BIAS - 56) /* 2**-56 */
#define ACOS_CONST (BIAS - 113) /* 2**-113 */
#define ATAN_CONST (BIAS + 113) /* 2**113 */
#define ATAN_LINEAR (BIAS - 56) /* 2**-56 */
/* 0.95 */
#define THRESH ((0xe666666666666666ULL>>(64-(MANH_SIZE-1)))|LDBL_NBIT)
/* Constants shared by the long double inverse trig functions. */
#define pS0 _ItL_pS0
#define pS1 _ItL_pS1
#define pS2 _ItL_pS2
#define pS3 _ItL_pS3
#define pS4 _ItL_pS4
#define pS5 _ItL_pS5
#define pS6 _ItL_pS6
#define pS7 _ItL_pS7
#define pS8 _ItL_pS8
#define pS9 _ItL_pS9
#define qS1 _ItL_qS1
#define qS2 _ItL_qS2
#define qS3 _ItL_qS3
#define qS4 _ItL_qS4
#define qS5 _ItL_qS5
#define qS6 _ItL_qS6
#define qS7 _ItL_qS7
#define qS8 _ItL_qS8
#define qS9 _ItL_qS9
#define atanhi _ItL_atanhi
#define atanlo _ItL_atanlo
#define aT _ItL_aT
#define pi_lo _ItL_pi_lo
#define pio2_hi atanhi[3]
#define pio2_lo atanlo[3]
#define pio4_hi atanhi[1]
/* Constants shared by the long double inverse trig functions. */
extern const long double pS0, pS1, pS2, pS3, pS4, pS5, pS6, pS7, pS8, pS9;
extern const long double qS1, qS2, qS3, qS4, qS5, qS6, qS7, qS8, qS9;
extern const long double atanhi[], atanlo[], aT[];
extern const long double pi_lo;
static inline long double
P(long double x)
{
return (x * (pS0 + x * (pS1 + x * (pS2 + x * (pS3 + x * \
(pS4 + x * (pS5 + x * (pS6 + x * (pS7 + x * (pS8 + x * \
pS9))))))))));
}
static inline long double
Q(long double x)
{
return (1.0 + x * (qS1 + x * (qS2 + x * (qS3 + x * (qS4 + x * \
(qS5 + x * (qS6 + x * (qS7 + x * (qS8 + x * qS9)))))))));
}
static inline long double
T_even(long double x)
{
return (aT[0] + x * (aT[2] + x * (aT[4] + x * (aT[6] + x * \
(aT[8] + x * (aT[10] + x * (aT[12] + x * (aT[14] + x * \
(aT[16] + x * (aT[18] + x * (aT[20] + x * aT[22])))))))))));
}
static inline long double
T_odd(long double x)
{
return (aT[1] + x * (aT[3] + x * (aT[5] + x * (aT[7] + x * \
(aT[9] + x * (aT[11] + x * (aT[13] + x * (aT[15] + x * \
(aT[17] + x * (aT[19] + x * (aT[21] + x * aT[23])))))))))));
}

View file

@ -1,70 +0,0 @@
/*
* Copyright (c) 1988, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)mathimpl.h 8.1 (Berkeley) 6/4/93
* $FreeBSD: src/lib/msun/bsdsrc/mathimpl.h,v 1.7 2005/11/18 05:03:12 bde Exp $
*/
#ifndef _MATHIMPL_H_
#define _MATHIMPL_H_
#include "cdefs-compat.h"
#include "math_private.h"
/*
* TRUNC() is a macro that sets the trailing 27 bits in the mantissa of an
* IEEE double variable to zero. It must be expression-like for syntactic
* reasons, and we implement this expression using an inline function
* instead of a pure macro to avoid depending on the gcc feature of
* statement-expressions.
*/
#define TRUNC(d) (_b_trunc(&(d)))
static __inline void
_b_trunc(volatile double *_dp)
{
//VBS
//u_int32_t _lw;
u_int32_t _lw;
GET_LOW_WORD(_lw, *_dp);
SET_LOW_WORD(*_dp, _lw & 0xf8000000);
}
struct Double {
double a;
double b;
};
/*
* Functions internal to the math package, yet not static.
*/
double __exp__D(double, double);
struct Double __log__D(double);
#endif /* !_MATHIMPL_H_ */

View file

@ -1,8 +0,0 @@
#ifndef OPENLIBM_H
#define OPENLIBM_H
#include <openlibm_complex.h>
#include <openlibm_fenv.h>
#include <openlibm_math.h>
#endif /* !OPENLIBM_H */

View file

@ -1,179 +0,0 @@
/* $OpenBSD: complex.h,v 1.5 2014/03/16 18:38:30 guenther Exp $ */
/*
* Copyright (c) 2008 Martynas Venckus <martynas@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifdef OPENLIBM_USE_HOST_COMPLEX_H
#include <complex.h>
#else /* !OPENLIBM_USE_HOST_COMPLEX_H */
#ifndef OPENLIBM_COMPLEX_H
#define OPENLIBM_COMPLEX_H
#define complex _Complex
#define _Complex_I 1.0fi
#define I _Complex_I
/*
* Macros that can be used to construct complex values.
*
* The C99 standard intends x+I*y to be used for this, but x+I*y is
* currently unusable in general since gcc introduces many overflow,
* underflow, sign and efficiency bugs by rewriting I*y as
* (0.0+I)*(y+0.0*I) and laboriously computing the full complex product.
* In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted
* to -0.0+I*0.0.
*
* In C11, a CMPLX(x,y) macro was added to circumvent this limitation,
* and gcc 4.7 added a __builtin_complex feature to simplify implementation
* of CMPLX in libc, so we can take advantage of these features if they
* are available. Clang simply allows complex values to be constructed
* using a compound literal.
*
* If __builtin_complex is not available, resort to using inline
* functions instead. These can unfortunately not be used to construct
* compile-time constants.
*
* C99 specifies that complex numbers have the same representation as
* an array of two elements, where the first element is the real part
* and the second element is the imaginary part.
*/
#ifdef __clang__
# define CMPLXF(x, y) ((float complex){x, y})
# define CMPLX(x, y) ((double complex){x, y})
# define CMPLXL(x, y) ((long double complex){x, y})
#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) && !defined(__INTEL_COMPILER)
# define CMPLXF(x,y) __builtin_complex ((float) (x), (float) (y))
# define CMPLX(x,y) __builtin_complex ((double) (x), (double) (y))
# define CMPLXL(x,y) __builtin_complex ((long double) (x), (long double) (y))
#else
static inline float complex
CMPLXF(float x, float y)
{
union {
float a[2];
float complex f;
} z = {{ x, y }};
return (z.f);
}
static inline double complex
CMPLX(double x, double y)
{
union {
double a[2];
double complex f;
} z = {{ x, y }};
return (z.f);
}
static inline long double complex
CMPLXL(long double x, long double y)
{
union {
long double a[2];
long double complex f;
} z = {{ x, y }};
return (z.f);
}
#endif
/*
* Double versions of C99 functions
*/
double complex cacos(double complex);
double complex casin(double complex);
double complex catan(double complex);
double complex ccos(double complex);
double complex csin(double complex);
double complex ctan(double complex);
double complex cacosh(double complex);
double complex casinh(double complex);
double complex catanh(double complex);
double complex ccosh(double complex);
double complex csinh(double complex);
double complex ctanh(double complex);
double complex cexp(double complex);
double complex clog(double complex);
double cabs(double complex);
double complex cpow(double complex, double complex);
double complex csqrt(double complex);
double carg(double complex);
double cimag(double complex);
double complex conj(double complex);
double complex cproj(double complex);
double creal(double complex);
/*
* Float versions of C99 functions
*/
float complex cacosf(float complex);
float complex casinf(float complex);
float complex catanf(float complex);
float complex ccosf(float complex);
float complex csinf(float complex);
float complex ctanf(float complex);
float complex cacoshf(float complex);
float complex casinhf(float complex);
float complex catanhf(float complex);
float complex ccoshf(float complex);
float complex csinhf(float complex);
float complex ctanhf(float complex);
float complex cexpf(float complex);
float complex clogf(float complex);
float cabsf(float complex);
float complex cpowf(float complex, float complex);
float complex csqrtf(float complex);
float cargf(float complex);
float cimagf(float complex);
float complex conjf(float complex);
float complex cprojf(float complex);
float crealf(float complex);
/*
* Long double versions of C99 functions
*/
long double complex cacosl(long double complex);
long double complex casinl(long double complex);
long double complex catanl(long double complex);
long double complex ccosl(long double complex);
long double complex csinl(long double complex);
long double complex ctanl(long double complex);
long double complex cacoshl(long double complex);
long double complex casinhl(long double complex);
long double complex catanhl(long double complex);
long double complex ccoshl(long double complex);
long double complex csinhl(long double complex);
long double complex ctanhl(long double complex);
long double complex cexpl(long double complex);
long double complex clogl(long double complex);
long double cabsl(long double complex);
long double complex cpowl(long double complex,
long double complex);
long double complex csqrtl(long double complex);
long double cargl(long double complex);
long double cimagl(long double complex);
long double complex conjl(long double complex);
long double complex cprojl(long double complex);
long double creall(long double complex);
#endif /* !OPENLIBM_COMPLEX_H */
#endif /* OPENLIBM_USE_HOST_COMPLEX_H */

View file

@ -1,14 +0,0 @@
#ifndef OPENLIBM_DEFS_H_
#define OPENLIBM_DEFS_H_
#ifdef _WIN32
# ifdef IMPORT_EXPORTS
# define OLM_DLLEXPORT __declspec(dllimport)
# else
# define OLM_DLLEXPORT __declspec(dllexport)
# endif
#else
#define OLM_DLLEXPORT __attribute__ ((visibility("default")))
#endif
#endif // OPENLIBM_DEFS_H_

View file

@ -1,25 +0,0 @@
#ifdef OPENLIBM_USE_HOST_FENV_H
#include <fenv.h>
#else /* !OPENLIBM_USE_HOST_FENV_H */
#if defined(__aarch64__) || defined(__arm__)
#include <openlibm_fenv_arm.h>
#elif defined(__x86_64__)
#include <openlibm_fenv_amd64.h>
#elif defined(__i386__)
#include <openlibm_fenv_i387.h>
#elif defined(__powerpc__) || defined(__ppc__)
#include <openlibm_fenv_powerpc.h>
#elif defined(__mips__)
#include <openlibm_fenv_mips.h>
#elif defined(__s390__)
#include <openlibm_fenv_s390.h>
#elif defined(__riscv)
#include <openlibm_fenv_riscv.h>
#elif defined(__loongarch64)
#include <openlibm_fenv_loongarch64.h>
#else
#error "Unsupported platform"
#endif
#endif /* OPENLIBM_USE_HOST_FENV_H */

View file

@ -1,223 +0,0 @@
/*-
* Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: src/lib/msun/amd64/fenv.h,v 1.8 2011/10/10 15:43:09 das Exp $
*/
#ifndef _FENV_H_
#define _FENV_H_
#include <openlibm_defs.h>
#include "cdefs-compat.h"
#include "types-compat.h"
#ifndef __fenv_static
#define __fenv_static static
#endif
typedef struct {
struct {
uint32_t __control;
uint32_t __status;
uint32_t __tag;
char __other[16];
} __x87;
uint32_t __mxcsr;
} fenv_t;
typedef uint16_t fexcept_t;
/* Exception flags */
#define FE_INVALID 0x01
#define FE_DENORMAL 0x02
#define FE_DIVBYZERO 0x04
#define FE_OVERFLOW 0x08
#define FE_UNDERFLOW 0x10
#define FE_INEXACT 0x20
#define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_DENORMAL | FE_INEXACT | \
FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
/* Rounding modes */
#define FE_TONEAREST 0x0000
#define FE_DOWNWARD 0x0400
#define FE_UPWARD 0x0800
#define FE_TOWARDZERO 0x0c00
#define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \
FE_UPWARD | FE_TOWARDZERO)
/*
* As compared to the x87 control word, the SSE unit's control word
* has the rounding control bits offset by 3 and the exception mask
* bits offset by 7.
*/
#define _SSE_ROUND_SHIFT 3
#define _SSE_EMASK_SHIFT 7
__BEGIN_DECLS
/* Default floating-point environment */
extern const fenv_t __fe_dfl_env;
#define FE_DFL_ENV (&__fe_dfl_env)
#define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw))
#define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env))
#define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \
: "st", "st(1)", "st(2)", "st(3)", "st(4)", \
"st(5)", "st(6)", "st(7)")
#define __fnclex() __asm __volatile("fnclex")
#define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env)))
#define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
#define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
#define __fwait() __asm __volatile("fwait")
#define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr))
#define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
__fenv_static __attribute__((always_inline)) inline int
feclearexcept(int __excepts)
{
fenv_t __env;
if (__excepts == FE_ALL_EXCEPT) {
__fnclex();
} else {
__fnstenv(&__env.__x87);
__env.__x87.__status &= ~__excepts;
__fldenv(__env.__x87);
}
__stmxcsr(&__env.__mxcsr);
__env.__mxcsr &= ~__excepts;
__ldmxcsr(__env.__mxcsr);
return (0);
}
__fenv_static inline int
fegetexceptflag(fexcept_t *__flagp, int __excepts)
{
uint32_t __mxcsr;
uint16_t __status;
__stmxcsr(&__mxcsr);
__fnstsw(&__status);
*__flagp = (__mxcsr | __status) & __excepts;
return (0);
}
OLM_DLLEXPORT int fesetexceptflag(const fexcept_t *__flagp, int __excepts);
OLM_DLLEXPORT int feraiseexcept(int __excepts);
__fenv_static __attribute__((always_inline)) inline int
fetestexcept(int __excepts)
{
uint32_t __mxcsr;
uint16_t __status;
__stmxcsr(&__mxcsr);
__fnstsw(&__status);
return ((__status | __mxcsr) & __excepts);
}
__fenv_static inline int
fegetround(void)
{
uint16_t __control;
/*
* We assume that the x87 and the SSE unit agree on the
* rounding mode. Reading the control word on the x87 turns
* out to be about 5 times faster than reading it on the SSE
* unit on an Opteron 244.
*/
__fnstcw(&__control);
return (__control & _ROUND_MASK);
}
__fenv_static inline int
fesetround(int __round)
{
uint32_t __mxcsr;
uint16_t __control;
if (__round & ~_ROUND_MASK)
return (-1);
__fnstcw(&__control);
__control &= ~_ROUND_MASK;
__control |= __round;
__fldcw(__control);
__stmxcsr(&__mxcsr);
__mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
__mxcsr |= __round << _SSE_ROUND_SHIFT;
__ldmxcsr(__mxcsr);
return (0);
}
OLM_DLLEXPORT int fegetenv(fenv_t *__envp);
OLM_DLLEXPORT int feholdexcept(fenv_t *__envp);
__fenv_static inline int
fesetenv(const fenv_t *__envp)
{
/*
* XXX Using fldenvx() instead of fldenv() tells the compiler that this
* instruction clobbers the i387 register stack. This happens because
* we restore the tag word from the saved environment. Normally, this
* would happen anyway and we wouldn't care, because the ABI allows
* function calls to clobber the i387 regs. However, fesetenv() is
* inlined, so we need to be more careful.
*/
__fldenvx(__envp->__x87);
__ldmxcsr(__envp->__mxcsr);
return (0);
}
OLM_DLLEXPORT int feupdateenv(const fenv_t *__envp);
#if __BSD_VISIBLE
OLM_DLLEXPORT int feenableexcept(int __mask);
OLM_DLLEXPORT int fedisableexcept(int __mask);
/* We currently provide no external definition of fegetexcept(). */
static inline int
fegetexcept(void)
{
uint16_t __control;
/*
* We assume that the masks for the x87 and the SSE unit are
* the same.
*/
__fnstcw(&__control);
return (~__control & FE_ALL_EXCEPT);
}
#endif /* __BSD_VISIBLE */
__END_DECLS
#endif /* !_FENV_H_ */

View file

@ -1,230 +0,0 @@
/*-
* Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: src/lib/msun/arm/fenv.h,v 1.6 2011/10/10 15:43:09 das Exp $
*/
#ifndef _FENV_H_
#define _FENV_H_
#include <stdint.h>
#include "cdefs-compat.h"
#ifndef __fenv_static
#define __fenv_static static
#endif
typedef uint32_t fenv_t;
typedef uint32_t fexcept_t;
/* Exception flags */
#define FE_INVALID 0x0001
#define FE_DIVBYZERO 0x0002
#define FE_OVERFLOW 0x0004
#define FE_UNDERFLOW 0x0008
#define FE_INEXACT 0x0010
#define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_INEXACT | \
FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
/* Rounding modes */
#define FE_TONEAREST 0x0000
#define FE_TOWARDZERO 0x0001
#define FE_UPWARD 0x0002
#define FE_DOWNWARD 0x0003
#define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \
FE_UPWARD | FE_TOWARDZERO)
__BEGIN_DECLS
/* Default floating-point environment */
extern const fenv_t __fe_dfl_env;
#define FE_DFL_ENV (&__fe_dfl_env)
/* We need to be able to map status flag positions to mask flag positions */
#define _FPUSW_SHIFT 16
#define _ENABLE_MASK (FE_ALL_EXCEPT << _FPUSW_SHIFT)
#if defined(__aarch64__)
#define __rfs(__fpsr) __asm __volatile("mrs %0,fpsr" : "=r" (*(__fpsr)))
#define __wfs(__fpsr) __asm __volatile("msr fpsr,%0" : : "r" (__fpsr))
/* Test for hardware support for ARM floating point operations, explicitly
checking for float and double support, see "ARM C Language Extensions", 6.5.1 */
#elif defined(__ARM_FP) && (__ARM_FP & 0x0C) != 0
#define __rfs(__fpsr) __asm __volatile("vmrs %0,fpscr" : "=&r" (*(__fpsr)))
#define __wfs(__fpsr) __asm __volatile("vmsr fpscr,%0" : : "r" (__fpsr))
#else
#define __rfs(__fpsr) (*(__fpsr) = 0)
#define __wfs(__fpsr)
#endif
__fenv_static inline int
feclearexcept(int __excepts)
{
fexcept_t __fpsr;
__rfs(&__fpsr);
__fpsr &= ~__excepts;
__wfs(__fpsr);
return (0);
}
__fenv_static inline int
fegetexceptflag(fexcept_t *__flagp, int __excepts)
{
fexcept_t __fpsr;
__rfs(&__fpsr);
*__flagp = __fpsr & __excepts;
return (0);
}
__fenv_static inline int
fesetexceptflag(const fexcept_t *__flagp, int __excepts)
{
fexcept_t __fpsr;
__rfs(&__fpsr);
__fpsr &= ~__excepts;
__fpsr |= *__flagp & __excepts;
__wfs(__fpsr);
return (0);
}
__fenv_static inline int
feraiseexcept(int __excepts)
{
fexcept_t __ex = __excepts;
fesetexceptflag(&__ex, __excepts); /* XXX */
return (0);
}
__fenv_static inline int
fetestexcept(int __excepts)
{
fexcept_t __fpsr;
__rfs(&__fpsr);
return (__fpsr & __excepts);
}
__fenv_static inline int
fegetround(void)
{
/*
* Apparently, the rounding mode is specified as part of the
* instruction format on ARM, so the dynamic rounding mode is
* indeterminate. Some FPUs may differ.
*/
return (-1);
}
__fenv_static inline int
fesetround(int __round)
{
return (-1);
}
__fenv_static inline int
fegetenv(fenv_t *__envp)
{
__rfs(__envp);
return (0);
}
__fenv_static inline int
feholdexcept(fenv_t *__envp)
{
fenv_t __env;
__rfs(&__env);
*__envp = __env;
__env &= ~(FE_ALL_EXCEPT | _ENABLE_MASK);
__wfs(__env);
return (0);
}
__fenv_static inline int
fesetenv(const fenv_t *__envp)
{
__wfs(*__envp);
return (0);
}
__fenv_static inline int
feupdateenv(const fenv_t *__envp)
{
fexcept_t __fpsr;
__rfs(&__fpsr);
__wfs(*__envp);
feraiseexcept(__fpsr & FE_ALL_EXCEPT);
return (0);
}
#if __BSD_VISIBLE
/* We currently provide no external definitions of the functions below. */
static inline int
feenableexcept(int __mask)
{
fenv_t __old_fpsr, __new_fpsr;
__rfs(&__old_fpsr);
__new_fpsr = __old_fpsr | (__mask & FE_ALL_EXCEPT) << _FPUSW_SHIFT;
__wfs(__new_fpsr);
return ((__old_fpsr >> _FPUSW_SHIFT) & FE_ALL_EXCEPT);
}
static inline int
fedisableexcept(int __mask)
{
fenv_t __old_fpsr, __new_fpsr;
__rfs(&__old_fpsr);
__new_fpsr = __old_fpsr & ~((__mask & FE_ALL_EXCEPT) << _FPUSW_SHIFT);
__wfs(__new_fpsr);
return ((__old_fpsr >> _FPUSW_SHIFT) & FE_ALL_EXCEPT);
}
static inline int
fegetexcept(void)
{
fenv_t __fpsr;
__rfs(&__fpsr);
return ((__fpsr & _ENABLE_MASK) >> _FPUSW_SHIFT);
}
#endif /* __BSD_VISIBLE */
__END_DECLS
#endif /* !_FENV_H_ */

View file

@ -1,260 +0,0 @@
/*-
* Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: src/lib/msun/i387/fenv.h,v 1.8 2011/10/10 15:43:09 das Exp $
*/
#ifndef _FENV_H_
#define _FENV_H_
#include "openlibm_defs.h"
#include "cdefs-compat.h"
#include "types-compat.h"
#ifndef __fenv_static
#define __fenv_static static
#endif
/*
* To preserve binary compatibility with FreeBSD 5.3, we pack the
* mxcsr into some reserved fields, rather than changing sizeof(fenv_t).
*/
typedef struct {
uint16_t __control;
uint16_t __mxcsr_hi;
uint16_t __status;
uint16_t __mxcsr_lo;
uint32_t __tag;
char __other[16];
} fenv_t;
#define __get_mxcsr(env) (((env).__mxcsr_hi << 16) | \
((env).__mxcsr_lo))
#define __set_mxcsr(env, x) do { \
(env).__mxcsr_hi = (uint32_t)(x) >> 16; \
(env).__mxcsr_lo = (uint16_t)(x); \
} while (0)
typedef uint16_t fexcept_t;
/* Exception flags */
#define FE_INVALID 0x01
#define FE_DENORMAL 0x02
#define FE_DIVBYZERO 0x04
#define FE_OVERFLOW 0x08
#define FE_UNDERFLOW 0x10
#define FE_INEXACT 0x20
#define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_DENORMAL | FE_INEXACT | \
FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
/* Rounding modes */
#define FE_TONEAREST 0x0000
#define FE_DOWNWARD 0x0400
#define FE_UPWARD 0x0800
#define FE_TOWARDZERO 0x0c00
#define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \
FE_UPWARD | FE_TOWARDZERO)
/*
* As compared to the x87 control word, the SSE unit's control word
* has the rounding control bits offset by 3 and the exception mask
* bits offset by 7.
*/
#define _SSE_ROUND_SHIFT 3
#define _SSE_EMASK_SHIFT 7
__BEGIN_DECLS
/* After testing for SSE support once, we cache the result in __has_sse. */
enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
OLM_DLLEXPORT extern enum __sse_support __has_sse;
OLM_DLLEXPORT int __test_sse(void);
#ifdef __SSE__
#define __HAS_SSE() 1
#else
#define __HAS_SSE() (__has_sse == __SSE_YES || \
(__has_sse == __SSE_UNK && __test_sse()))
#endif
/* Default floating-point environment */
OLM_DLLEXPORT extern const fenv_t __fe_dfl_env;
#define FE_DFL_ENV (&__fe_dfl_env)
#define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw))
#define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env))
#define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \
: "st", "st(1)", "st(2)", "st(3)", "st(4)", \
"st(5)", "st(6)", "st(7)")
#define __fnclex() __asm __volatile("fnclex")
#define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env)))
#define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
#define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
#define __fwait() __asm __volatile("fwait")
#define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr))
#define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
__fenv_static inline int
feclearexcept(int __excepts)
{
fenv_t __env;
uint32_t __mxcsr;
if (__excepts == FE_ALL_EXCEPT) {
__fnclex();
} else {
__fnstenv(&__env);
__env.__status &= ~__excepts;
__fldenv(__env);
}
if (__HAS_SSE()) {
__stmxcsr(&__mxcsr);
__mxcsr &= ~__excepts;
__ldmxcsr(__mxcsr);
}
return (0);
}
__fenv_static inline int
fegetexceptflag(fexcept_t *__flagp, int __excepts)
{
uint32_t __mxcsr;
uint16_t __status;
__fnstsw(&__status);
if (__HAS_SSE())
__stmxcsr(&__mxcsr);
else
__mxcsr = 0;
*__flagp = (__mxcsr | __status) & __excepts;
return (0);
}
OLM_DLLEXPORT int fesetexceptflag(const fexcept_t *__flagp, int __excepts);
OLM_DLLEXPORT int feraiseexcept(int __excepts);
__fenv_static inline int
fetestexcept(int __excepts)
{
uint32_t __mxcsr;
uint16_t __status;
__fnstsw(&__status);
if (__HAS_SSE())
__stmxcsr(&__mxcsr);
else
__mxcsr = 0;
return ((__status | __mxcsr) & __excepts);
}
__fenv_static inline int
fegetround(void)
{
uint16_t __control;
/*
* We assume that the x87 and the SSE unit agree on the
* rounding mode. Reading the control word on the x87 turns
* out to be about 5 times faster than reading it on the SSE
* unit on an Opteron 244.
*/
__fnstcw(&__control);
return (__control & _ROUND_MASK);
}
__fenv_static inline int
fesetround(int __round)
{
uint32_t __mxcsr;
uint16_t __control;
if (__round & ~_ROUND_MASK)
return (-1);
__fnstcw(&__control);
__control &= ~_ROUND_MASK;
__control |= __round;
__fldcw(__control);
if (__HAS_SSE()) {
__stmxcsr(&__mxcsr);
__mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
__mxcsr |= __round << _SSE_ROUND_SHIFT;
__ldmxcsr(__mxcsr);
}
return (0);
}
OLM_DLLEXPORT int fegetenv(fenv_t *__envp);
OLM_DLLEXPORT int feholdexcept(fenv_t *__envp);
__fenv_static inline int
fesetenv(const fenv_t *__envp)
{
fenv_t __env = *__envp;
uint32_t __mxcsr;
__mxcsr = __get_mxcsr(__env);
__set_mxcsr(__env, 0xffffffff);
/*
* XXX Using fldenvx() instead of fldenv() tells the compiler that this
* instruction clobbers the i387 register stack. This happens because
* we restore the tag word from the saved environment. Normally, this
* would happen anyway and we wouldn't care, because the ABI allows
* function calls to clobber the i387 regs. However, fesetenv() is
* inlined, so we need to be more careful.
*/
__fldenvx(__env);
if (__HAS_SSE())
__ldmxcsr(__mxcsr);
return (0);
}
OLM_DLLEXPORT int feupdateenv(const fenv_t *__envp);
#if __BSD_VISIBLE
OLM_DLLEXPORT int feenableexcept(int __mask);
OLM_DLLEXPORT int fedisableexcept(int __mask);
/* We currently provide no external definition of fegetexcept(). */
static inline int
fegetexcept(void)
{
uint16_t __control;
/*
* We assume that the masks for the x87 and the SSE unit are
* the same.
*/
__fnstcw(&__control);
return (~__control & FE_ALL_EXCEPT);
}
#endif /* __BSD_VISIBLE */
__END_DECLS
#endif /* !_FENV_H_ */

View file

@ -1,226 +0,0 @@
/*-
* Copyright (c) 2023 Yifan An <me@anyi.fan>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _FENV_H_
#define _FENV_H_
#include <stdint.h>
#include "cdefs-compat.h"
#ifndef __fenv_static
#define __fenv_static static
#endif
typedef uint32_t fenv_t;
typedef uint32_t fexcept_t;
/* Exception flags */
#define FE_INVALID 0x100000
#define FE_DIVBYZERO 0x080000
#define FE_OVERFLOW 0x040000
#define FE_UNDERFLOW 0x020000
#define FE_INEXACT 0x010000
#define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_INEXACT | \
FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
/* Rounding modes */
#define FE_TONEAREST 0x0000
#define FE_TOWARDZERO 0x0100
#define FE_DOWNWARD 0x0200
#define FE_UPWARD 0x0300
#define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \
FE_UPWARD | FE_TOWARDZERO)
__BEGIN_DECLS
/* Default floating-point environment */
extern const fenv_t __fe_dfl_env;
#define FE_DFL_ENV (&__fe_dfl_env)
#define _FPU_MASK_V 0x10
#define _FPU_MASK_Z 0x08
#define _FPU_MASK_O 0x04
#define _FPU_MASK_U 0x02
#define _FPU_MASK_I 0x01
#define _FPUSW_SHIFT 16
#define _ENABLE_MASK (_FPU_MASK_V | _FPU_MASK_Z | _FPU_MASK_O | _FPU_MASK_U | _FPU_MASK_I)
#define __rfs(__fpsr) __asm __volatile("movfcsr2gr %0,$r0" : "=r"(__fpsr))
#define __wfs(__fpsr) __asm __volatile("movgr2fcsr $r0,%0" : : "r"(__fpsr))
__fenv_static inline int
feclearexcept(int __excepts)
{
fexcept_t __fpsr;
__rfs(__fpsr);
__fpsr &= ~__excepts;
__wfs(__fpsr);
return (0);
}
__fenv_static inline int
fegetexceptflag(fexcept_t *__flagp, int __excepts)
{
fexcept_t __fpsr;
__rfs(__fpsr);
*__flagp = __fpsr & __excepts;
return (0);
}
__fenv_static inline int
fesetexceptflag(const fexcept_t *__flagp, int __excepts)
{
fexcept_t __fpsr;
__rfs(__fpsr);
__fpsr &= ~__excepts;
__fpsr |= *__flagp & __excepts;
__wfs(__fpsr);
return (0);
}
__fenv_static inline int
feraiseexcept(int __excepts)
{
fexcept_t __ex = __excepts;
fesetexceptflag(&__ex, __excepts); /* XXX */
return (0);
}
__fenv_static inline int
fetestexcept(int __excepts)
{
fexcept_t __fpsr;
__rfs(__fpsr);
return (__fpsr & __excepts);
}
__fenv_static inline int
fegetround(void)
{
fexcept_t __fpsr;
__rfs(__fpsr);
return __fpsr & _ROUND_MASK;
}
__fenv_static inline int
fesetround(int __round)
{
fexcept_t __fpsr;
if ((__round & ~_ROUND_MASK) != 0)
return 1;
__rfs(__fpsr);
__fpsr &= ~_ROUND_MASK;
__fpsr |= __round;
__wfs(__fpsr);
return (0);
}
__fenv_static inline int
fegetenv(fenv_t *__envp)
{
__rfs(*__envp);
return (0);
}
__fenv_static inline int
feholdexcept(fenv_t *__envp)
{
fenv_t __env;
__rfs(__env);
*__envp = __env;
__env &= ~(FE_ALL_EXCEPT | _FPU_MASK_V | _FPU_MASK_Z | _FPU_MASK_O | _FPU_MASK_U | _FPU_MASK_I);
__wfs(__env);
return (0);
}
__fenv_static inline int
fesetenv(const fenv_t *__envp)
{
__wfs(*__envp);
return (0);
}
__fenv_static inline int
feupdateenv(const fenv_t *__envp)
{
fexcept_t __fpsr;
__rfs(__fpsr);
__wfs(*__envp);
feraiseexcept(__fpsr & FE_ALL_EXCEPT);
return (0);
}
#if __BSD_VISIBLE
static inline int
feenableexcept(int __mask)
{
fenv_t __old_fpsr, __new_fpsr;
__rfs(__new_fpsr);
__old_fpsr = (__new_fpsr & _ENABLE_MASK) << _FPUSW_SHIFT;
__new_fpsr |= (__mask & FE_ALL_EXCEPT) >> _FPUSW_SHIFT;
__wfs(__new_fpsr);
return __old_fpsr;
}
static inline int
fedisableexcept(int __mask)
{
fenv_t __old_fpsr, __new_fpsr;
__rfs(__new_fpsr);
__old_fpsr = (__new_fpsr & _ENABLE_MASK) << _FPUSW_SHIFT;
__new_fpsr &= ~((__mask & FE_ALL_EXCEPT) >> _FPUSW_SHIFT);
__wfs(__new_fpsr);
return __old_fpsr;
}
static inline int
fegetexcept(void)
{
fenv_t __fpsr;
__rfs(__fpsr);
return ((__fpsr & _ENABLE_MASK) << _FPUSW_SHIFT);
}
#endif /* __BSD_VISIBLE */
__END_DECLS
#endif /* !_FENV_H_ */

View file

@ -1,278 +0,0 @@
/*-
* Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _FENV_H_
#define _FENV_H_
#include <stdint.h>
#include "cdefs-compat.h"
#ifndef __fenv_static
#define __fenv_static static
#endif
typedef uint32_t fenv_t;
typedef uint32_t fexcept_t;
/* Exception flags */
#ifdef __mips_soft_float
#define _FPUSW_SHIFT 16
#define FE_INVALID 0x0001
#define FE_DIVBYZERO 0x0002
#define FE_OVERFLOW 0x0004
#define FE_UNDERFLOW 0x0008
#define FE_INEXACT 0x0010
#else
#define _FCSR_CAUSE_SHIFT 10
#define FE_INVALID 0x0040
#define FE_DIVBYZERO 0x0020
#define FE_OVERFLOW 0x0010
#define FE_UNDERFLOW 0x0008
#define FE_INEXACT 0x0004
#endif
#define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_INEXACT | \
FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
/* Rounding modes */
#define FE_TONEAREST 0x0000
#define FE_TOWARDZERO 0x0001
#define FE_UPWARD 0x0002
#define FE_DOWNWARD 0x0003
#define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \
FE_UPWARD | FE_TOWARDZERO)
__BEGIN_DECLS
/* Default floating-point environment */
extern const fenv_t __fe_dfl_env;
#define FE_DFL_ENV (&__fe_dfl_env)
/* We need to be able to map status flag positions to mask flag positions */
#define _ENABLE_SHIFT 5
#define _ENABLE_MASK (FE_ALL_EXCEPT << _ENABLE_SHIFT)
#ifndef __mips_soft_float
#define __cfc1(__fcsr) __asm __volatile("cfc1 %0, $31" : "=r" (__fcsr))
#define __ctc1(__fcsr) __asm __volatile("ctc1 %0, $31" :: "r" (__fcsr))
#endif
#ifdef __mips_soft_float
int feclearexcept(int __excepts);
int fegetexceptflag(fexcept_t *__flagp, int __excepts);
int fesetexceptflag(const fexcept_t *__flagp, int __excepts);
int feraiseexcept(int __excepts);
int fetestexcept(int __excepts);
int fegetround(void);
int fesetround(int __round);
int fegetenv(fenv_t *__envp);
int feholdexcept(fenv_t *__envp);
int fesetenv(const fenv_t *__envp);
int feupdateenv(const fenv_t *__envp);
#else
__fenv_static inline int
feclearexcept(int __excepts)
{
fexcept_t fcsr;
__excepts &= FE_ALL_EXCEPT;
__cfc1(fcsr);
fcsr &= ~(__excepts | (__excepts << _FCSR_CAUSE_SHIFT));
__ctc1(fcsr);
return (0);
}
__fenv_static inline int
fegetexceptflag(fexcept_t *__flagp, int __excepts)
{
fexcept_t fcsr;
__excepts &= FE_ALL_EXCEPT;
__cfc1(fcsr);
*__flagp = fcsr & __excepts;
return (0);
}
__fenv_static inline int
fesetexceptflag(const fexcept_t *__flagp, int __excepts)
{
fexcept_t fcsr;
__excepts &= FE_ALL_EXCEPT;
__cfc1(fcsr);
fcsr &= ~__excepts;
fcsr |= *__flagp & __excepts;
__ctc1(fcsr);
return (0);
}
__fenv_static inline int
feraiseexcept(int __excepts)
{
fexcept_t fcsr;
__excepts &= FE_ALL_EXCEPT;
__cfc1(fcsr);
fcsr |= __excepts | (__excepts << _FCSR_CAUSE_SHIFT);
__ctc1(fcsr);
return (0);
}
__fenv_static inline int
fetestexcept(int __excepts)
{
fexcept_t fcsr;
__excepts &= FE_ALL_EXCEPT;
__cfc1(fcsr);
return (fcsr & __excepts);
}
__fenv_static inline int
fegetround(void)
{
fexcept_t fcsr;
__cfc1(fcsr);
return (fcsr & _ROUND_MASK);
}
__fenv_static inline int
fesetround(int __round)
{
fexcept_t fcsr;
if (__round & ~_ROUND_MASK)
return (-1);
__cfc1(fcsr);
fcsr &= ~_ROUND_MASK;
fcsr |= __round;
__ctc1(fcsr);
return (0);
}
__fenv_static inline int
fegetenv(fenv_t *__envp)
{
__cfc1(*__envp);
return (0);
}
__fenv_static inline int
feholdexcept(fenv_t *__envp)
{
fexcept_t fcsr;
__cfc1(fcsr);
*__envp = fcsr;
fcsr &= ~(FE_ALL_EXCEPT | _ENABLE_MASK);
__ctc1(fcsr);
return (0);
}
__fenv_static inline int
fesetenv(const fenv_t *__envp)
{
__ctc1(*__envp);
return (0);
}
__fenv_static inline int
feupdateenv(const fenv_t *__envp)
{
fexcept_t fcsr;
__cfc1(fcsr);
fesetenv(__envp);
feraiseexcept(fcsr);
return (0);
}
#endif /* !__mips_soft_float */
#if __BSD_VISIBLE
/* We currently provide no external definitions of the functions below. */
#ifdef __mips_soft_float
int feenableexcept(int __mask);
int fedisableexcept(int __mask);
int fegetexcept(void);
#else
static inline int
feenableexcept(int __mask)
{
fenv_t __old_fcsr, __new_fcsr;
__cfc1(__old_fcsr);
__new_fcsr = __old_fcsr | (__mask & FE_ALL_EXCEPT) << _ENABLE_SHIFT;
__ctc1(__new_fcsr);
return ((__old_fcsr >> _ENABLE_SHIFT) & FE_ALL_EXCEPT);
}
static inline int
fedisableexcept(int __mask)
{
fenv_t __old_fcsr, __new_fcsr;
__cfc1(__old_fcsr);
__new_fcsr = __old_fcsr & ~((__mask & FE_ALL_EXCEPT) << _ENABLE_SHIFT);
__ctc1(__new_fcsr);
return ((__old_fcsr >> _ENABLE_SHIFT) & FE_ALL_EXCEPT);
}
static inline int
fegetexcept(void)
{
fexcept_t fcsr;
__cfc1(fcsr);
return ((fcsr & _ENABLE_MASK) >> _ENABLE_SHIFT);
}
#endif /* !__mips_soft_float */
#endif /* __BSD_VISIBLE */
__END_DECLS
#endif /* !_FENV_H_ */

View file

@ -1,279 +0,0 @@
/*-
* Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _FENV_H_
#define _FENV_H_
#include <sys/types.h>
#ifndef __fenv_static
#define __fenv_static static
#endif
typedef __uint32_t fenv_t;
typedef __uint32_t fexcept_t;
/* Exception flags */
#define FE_INEXACT 0x02000000
#define FE_DIVBYZERO 0x04000000
#define FE_UNDERFLOW 0x08000000
#define FE_OVERFLOW 0x10000000
#define FE_INVALID 0x20000000 /* all types of invalid FP ops */
/*
* The PowerPC architecture has extra invalid flags that indicate the
* specific type of invalid operation occurred. These flags may be
* tested, set, and cleared---but not masked---separately. All of
* these bits are cleared when FE_INVALID is cleared, but only
* FE_VXSOFT is set when FE_INVALID is explicitly set in software.
*/
#define FE_VXCVI 0x00000100 /* invalid integer convert */
#define FE_VXSQRT 0x00000200 /* square root of a negative */
#define FE_VXSOFT 0x00000400 /* software-requested exception */
#define FE_VXVC 0x00080000 /* ordered comparison involving NaN */
#define FE_VXIMZ 0x00100000 /* inf * 0 */
#define FE_VXZDZ 0x00200000 /* 0 / 0 */
#define FE_VXIDI 0x00400000 /* inf / inf */
#define FE_VXISI 0x00800000 /* inf - inf */
#define FE_VXSNAN 0x01000000 /* operation on a signalling NaN */
#define FE_ALL_INVALID (FE_VXCVI | FE_VXSQRT | FE_VXSOFT | FE_VXVC | \
FE_VXIMZ | FE_VXZDZ | FE_VXIDI | FE_VXISI | \
FE_VXSNAN | FE_INVALID)
#define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_INEXACT | \
FE_ALL_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
/* Rounding modes */
#define FE_TONEAREST 0x0000
#define FE_TOWARDZERO 0x0001
#define FE_UPWARD 0x0002
#define FE_DOWNWARD 0x0003
#define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \
FE_UPWARD | FE_TOWARDZERO)
__BEGIN_DECLS
/* Default floating-point environment */
extern const fenv_t __fe_dfl_env;
#define FE_DFL_ENV (&__fe_dfl_env)
/* We need to be able to map status flag positions to mask flag positions */
#define _FPUSW_SHIFT 22
#define _ENABLE_MASK ((FE_DIVBYZERO | FE_INEXACT | FE_INVALID | \
FE_OVERFLOW | FE_UNDERFLOW) >> _FPUSW_SHIFT)
#ifndef _SOFT_FLOAT
#define __mffs(__env) __asm __volatile("mffs %0" : "=f" (*(__env)))
#define __mtfsf(__env) __asm __volatile("mtfsf 255,%0" : : "f" (__env))
#else
#define __mffs(__env)
#define __mtfsf(__env)
#endif
union __fpscr {
double __d;
struct {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
fenv_t __reg;
__uint32_t __junk;
#else
__uint32_t __junk;
fenv_t __reg;
#endif
} __bits;
};
__fenv_static inline int
feclearexcept(int __excepts)
{
union __fpscr __r;
if (__excepts & FE_INVALID)
__excepts |= FE_ALL_INVALID;
__mffs(&__r.__d);
__r.__bits.__reg &= ~__excepts;
__mtfsf(__r.__d);
return (0);
}
__fenv_static inline int
fegetexceptflag(fexcept_t *__flagp, int __excepts)
{
union __fpscr __r;
__mffs(&__r.__d);
*__flagp = __r.__bits.__reg & __excepts;
return (0);
}
__fenv_static inline int
fesetexceptflag(const fexcept_t *__flagp, int __excepts)
{
union __fpscr __r;
if (__excepts & FE_INVALID)
__excepts |= FE_ALL_EXCEPT;
__mffs(&__r.__d);
__r.__bits.__reg &= ~__excepts;
__r.__bits.__reg |= *__flagp & __excepts;
__mtfsf(__r.__d);
return (0);
}
__fenv_static inline int
feraiseexcept(int __excepts)
{
union __fpscr __r;
if (__excepts & FE_INVALID)
__excepts |= FE_VXSOFT;
__mffs(&__r.__d);
__r.__bits.__reg |= __excepts;
__mtfsf(__r.__d);
return (0);
}
__fenv_static inline int
fetestexcept(int __excepts)
{
union __fpscr __r;
__mffs(&__r.__d);
return (__r.__bits.__reg & __excepts);
}
__fenv_static inline int
fegetround(void)
{
union __fpscr __r;
__mffs(&__r.__d);
return (__r.__bits.__reg & _ROUND_MASK);
}
__fenv_static inline int
fesetround(int __round)
{
union __fpscr __r;
if (__round & ~_ROUND_MASK)
return (-1);
__mffs(&__r.__d);
__r.__bits.__reg &= ~_ROUND_MASK;
__r.__bits.__reg |= __round;
__mtfsf(__r.__d);
return (0);
}
__fenv_static inline int
fegetenv(fenv_t *__envp)
{
union __fpscr __r;
__mffs(&__r.__d);
*__envp = __r.__bits.__reg;
return (0);
}
__fenv_static inline int
feholdexcept(fenv_t *__envp)
{
union __fpscr __r;
__mffs(&__r.__d);
*__envp = __r.__d;
__r.__bits.__reg &= ~(FE_ALL_EXCEPT | _ENABLE_MASK);
__mtfsf(__r.__d);
return (0);
}
__fenv_static inline int
fesetenv(const fenv_t *__envp)
{
union __fpscr __r;
__r.__bits.__reg = *__envp;
__mtfsf(__r.__d);
return (0);
}
__fenv_static inline int
feupdateenv(const fenv_t *__envp)
{
union __fpscr __r;
__mffs(&__r.__d);
__r.__bits.__reg &= FE_ALL_EXCEPT;
__r.__bits.__reg |= *__envp;
__mtfsf(__r.__d);
return (0);
}
#if __BSD_VISIBLE
/* We currently provide no external definitions of the functions below. */
static inline int
feenableexcept(int __mask)
{
union __fpscr __r;
fenv_t __oldmask;
__mffs(&__r.__d);
__oldmask = __r.__bits.__reg;
__r.__bits.__reg |= (__mask & FE_ALL_EXCEPT) >> _FPUSW_SHIFT;
__mtfsf(__r.__d);
return ((__oldmask & _ENABLE_MASK) << _FPUSW_SHIFT);
}
static inline int
fedisableexcept(int __mask)
{
union __fpscr __r;
fenv_t __oldmask;
__mffs(&__r.__d);
__oldmask = __r.__bits.__reg;
__r.__bits.__reg &= ~((__mask & FE_ALL_EXCEPT) >> _FPUSW_SHIFT);
__mtfsf(__r.__d);
return ((__oldmask & _ENABLE_MASK) << _FPUSW_SHIFT);
}
static inline int
fegetexcept(void)
{
union __fpscr __r;
__mffs(&__r.__d);
return ((__r.__bits.__reg & _ENABLE_MASK) << _FPUSW_SHIFT);
}
#endif /* __BSD_VISIBLE */
__END_DECLS
#endif /* !_FENV_H_ */

View file

@ -1,261 +0,0 @@
/*-
* Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
* Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
* All rights reserved.
*
* Portions of this software were developed by SRI International and the
* University of Cambridge Computer Laboratory under DARPA/AFRL contract
* FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
*
* Portions of this software were developed by the University of Cambridge
* Computer Laboratory as part of the CTSRD Project, with support from the
* UK Higher Education Innovation Fund (HEIF).
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: head/lib/msun/riscv/fenv.h 332792 2018-04-19 20:36:15Z brooks $
*/
#ifndef _FENV_H_
#define _FENV_H_
#include <stdint.h>
#include "cdefs-compat.h"
#ifndef __fenv_static
#define __fenv_static static
#endif
typedef __uint64_t fenv_t;
typedef __uint64_t fexcept_t;
/* Exception flags */
#define FE_INVALID 0x0010
#define FE_DIVBYZERO 0x0008
#define FE_OVERFLOW 0x0004
#define FE_UNDERFLOW 0x0002
#define FE_INEXACT 0x0001
#define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_INEXACT | \
FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
/*
* RISC-V Rounding modes
*/
#define _ROUND_SHIFT 5
#define FE_TONEAREST (0x00 << _ROUND_SHIFT)
#define FE_TOWARDZERO (0x01 << _ROUND_SHIFT)
#define FE_DOWNWARD (0x02 << _ROUND_SHIFT)
#define FE_UPWARD (0x03 << _ROUND_SHIFT)
#define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \
FE_UPWARD | FE_TOWARDZERO)
__BEGIN_DECLS
/* Default floating-point environment */
extern const fenv_t __fe_dfl_env;
#define FE_DFL_ENV (&__fe_dfl_env)
#if !defined(__riscv_float_abi_soft) && !defined(__riscv_float_abi_double)
#if defined(__riscv_float_abi_single)
#error single precision floating point ABI not supported
#else
#error compiler did not set soft/hard float macros
#endif
#endif
#ifndef __riscv_float_abi_soft
#define __rfs(__fcsr) __asm __volatile("csrr %0, fcsr" : "=r" (__fcsr))
#define __wfs(__fcsr) __asm __volatile("csrw fcsr, %0" :: "r" (__fcsr))
#endif
#ifdef __riscv_float_abi_soft
int feclearexcept(int __excepts);
int fegetexceptflag(fexcept_t *__flagp, int __excepts);
int fesetexceptflag(const fexcept_t *__flagp, int __excepts);
int feraiseexcept(int __excepts);
int fetestexcept(int __excepts);
int fegetround(void);
int fesetround(int __round);
int fegetenv(fenv_t *__envp);
int feholdexcept(fenv_t *__envp);
int fesetenv(const fenv_t *__envp);
int feupdateenv(const fenv_t *__envp);
#else
__fenv_static inline int
feclearexcept(int __excepts)
{
__asm __volatile("csrc fflags, %0" :: "r"(__excepts));
return (0);
}
__fenv_static inline int
fegetexceptflag(fexcept_t *__flagp, int __excepts)
{
fexcept_t __fcsr;
__rfs(__fcsr);
*__flagp = __fcsr & __excepts;
return (0);
}
__fenv_static inline int
fesetexceptflag(const fexcept_t *__flagp, int __excepts)
{
fexcept_t __fcsr;
__fcsr = *__flagp;
__asm __volatile("csrc fflags, %0" :: "r"(__excepts));
__asm __volatile("csrs fflags, %0" :: "r"(__fcsr & __excepts));
return (0);
}
__fenv_static inline int
feraiseexcept(int __excepts)
{
__asm __volatile("csrs fflags, %0" :: "r"(__excepts));
return (0);
}
__fenv_static inline int
fetestexcept(int __excepts)
{
fexcept_t __fcsr;
__rfs(__fcsr);
return (__fcsr & __excepts);
}
__fenv_static inline int
fegetround(void)
{
fexcept_t __fcsr;
__rfs(__fcsr);
return (__fcsr & _ROUND_MASK);
}
__fenv_static inline int
fesetround(int __round)
{
fexcept_t __fcsr;
if (__round & ~_ROUND_MASK)
return (-1);
__rfs(__fcsr);
__fcsr &= ~_ROUND_MASK;
__fcsr |= __round;
__wfs(__fcsr);
return (0);
}
__fenv_static inline int
fegetenv(fenv_t *__envp)
{
__rfs(*__envp);
return (0);
}
__fenv_static inline int
feholdexcept(fenv_t *__envp)
{
/* No exception traps. */
return (-1);
}
__fenv_static inline int
fesetenv(const fenv_t *__envp)
{
__wfs(*__envp);
return (0);
}
__fenv_static inline int
feupdateenv(const fenv_t *__envp)
{
fexcept_t __fcsr;
__rfs(__fcsr);
__wfs(*__envp);
feraiseexcept(__fcsr & FE_ALL_EXCEPT);
return (0);
}
#endif /* !__riscv_float_abi_soft */
#if __BSD_VISIBLE
/* We currently provide no external definitions of the functions below. */
#ifdef __riscv_float_abi_soft
int feenableexcept(int __mask);
int fedisableexcept(int __mask);
int fegetexcept(void);
#else
static inline int
feenableexcept(int __mask)
{
/* No exception traps. */
return (-1);
}
static inline int
fedisableexcept(int __mask)
{
/* No exception traps. */
return (0);
}
static inline int
fegetexcept(void)
{
/* No exception traps. */
return (0);
}
#endif /* !__riscv_float_abi_soft */
#endif /* __BSD_VISIBLE */
__END_DECLS
#endif /* !_FENV_H_ */

View file

@ -1,235 +0,0 @@
/*-
* Copyright (c) 2016 Dan Horák <dan[at]danny.cz>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _FENV_H_
#define _FENV_H_
#include <sys/types.h>
#ifndef __fenv_static
#define __fenv_static static
#endif
typedef __uint32_t fenv_t;
typedef __uint32_t fexcept_t;
/* Exception flags */
#define FE_INEXACT 0x080000
#define FE_UNDERFLOW 0x100000
#define FE_OVERFLOW 0x200000
#define FE_DIVBYZERO 0x400000
#define FE_INVALID 0x800000 /* all types of invalid FP ops */
#define FE_ALL_EXCEPT (FE_INVALID | FE_DIVBYZERO | FE_INEXACT | FE_OVERFLOW | FE_UNDERFLOW)
/* Rounding modes */
#define FE_TONEAREST 0x0000
#define FE_TOWARDZERO 0x0001
#define FE_UPWARD 0x0002
#define FE_DOWNWARD 0x0003
#define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \
FE_UPWARD | FE_TOWARDZERO)
__BEGIN_DECLS
/* Default floating-point environment */
extern const fenv_t __fe_dfl_env;
#define FE_DFL_ENV (&__fe_dfl_env)
/* We need to be able to map status flag positions to mask flag positions */
#define _FPC_EXC_MASK_SHIFT 8
#define _ENABLE_MASK ((FE_DIVBYZERO | FE_INEXACT | FE_INVALID | \
FE_OVERFLOW | FE_UNDERFLOW) << _FPC_EXC_MASK_SHIFT)
/* Macros for accessing the hardware control word. */
#define _FPU_GETCW(cw) __asm__ __volatile__ ("efpc %0,0" : "=d" (cw))
#define _FPU_SETCW(cw) __asm__ __volatile__ ("sfpc %0,0" : : "d" (cw))
__fenv_static inline int
feclearexcept(int __excepts)
{
fexcept_t __r;
if (__excepts & FE_INVALID)
__excepts |= FE_ALL_EXCEPT;
_FPU_GETCW(__r);
__r &= ~__excepts;
_FPU_SETCW(__r);
return (0);
}
__fenv_static inline int
fegetexceptflag(fexcept_t *__flagp, int __excepts)
{
fexcept_t __r;
_FPU_GETCW(__r);
*__flagp = __r & __excepts;
return (0);
}
__fenv_static inline int
fesetexceptflag(const fexcept_t *__flagp, int __excepts)
{
fexcept_t __r;
if (__excepts & FE_INVALID)
__excepts |= FE_ALL_EXCEPT;
_FPU_GETCW(__r);
__r &= ~__excepts;
__r |= *__flagp & __excepts;
_FPU_SETCW(__r);
return (0);
}
__fenv_static inline int
feraiseexcept(int __excepts)
{
fexcept_t __r;
_FPU_GETCW(__r);
__r |= __excepts;
_FPU_SETCW(__r);
return (0);
}
__fenv_static inline int
fetestexcept(int __excepts)
{
fexcept_t __r;
_FPU_GETCW(__r);
return (__r & __excepts);
}
__fenv_static inline int
fegetround(void)
{
fexcept_t __r;
_FPU_GETCW(__r);
return (__r & _ROUND_MASK);
}
__fenv_static inline int
fesetround(int __round)
{
fexcept_t __r;
if (__round & ~_ROUND_MASK)
return (-1);
_FPU_GETCW(__r);
__r &= ~_ROUND_MASK;
__r |= __round;
_FPU_SETCW(__r);
return (0);
}
__fenv_static inline int
fegetenv(fenv_t *__envp)
{
_FPU_GETCW(*__envp);
return (0);
}
__fenv_static inline int
feholdexcept(fenv_t *__envp)
{
fexcept_t __r;
_FPU_GETCW(__r);
*__envp = __r;
__r &= ~(FE_ALL_EXCEPT | _ENABLE_MASK);
_FPU_SETCW(__r);
return (0);
}
__fenv_static inline int
fesetenv(const fenv_t *__envp)
{
_FPU_SETCW(*__envp);
return (0);
}
__fenv_static inline int
feupdateenv(const fenv_t *__envp)
{
fexcept_t __r;
_FPU_GETCW(__r);
__r &= FE_ALL_EXCEPT;
__r |= *__envp;
_FPU_SETCW(__r);
return (0);
}
#if __BSD_VISIBLE
/* We currently provide no external definitions of the functions below. */
static inline int
feenableexcept(int __mask)
{
fenv_t __r;
fenv_t __oldmask;
_FPU_GETCW(__r);
__oldmask = __r;
__r |= (__mask & FE_ALL_EXCEPT) << _FPC_EXC_MASK_SHIFT;
_FPU_SETCW(__r);
return ((__oldmask & _ENABLE_MASK) >> _FPC_EXC_MASK_SHIFT);
}
static inline int
fedisableexcept(int __mask)
{
fenv_t __r;
fenv_t __oldmask;
_FPU_GETCW(__r);
__oldmask = __r;
__r &= ~((__mask & FE_ALL_EXCEPT) << _FPC_EXC_MASK_SHIFT);
_FPU_SETCW(__r);
return ((__oldmask & _ENABLE_MASK) >> _FPC_EXC_MASK_SHIFT);
}
static inline int
fegetexcept(void)
{
fexcept_t __r;
_FPU_GETCW(__r);
return (__r & (_ENABLE_MASK >> _FPC_EXC_MASK_SHIFT));
}
#endif /* __BSD_VISIBLE */
__END_DECLS
#endif /* !_FENV_H_ */

View file

@ -1,491 +0,0 @@
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* from: @(#)fdlibm.h 5.1 93/09/24
* $FreeBSD: src/lib/msun/src/openlibm.h,v 1.82 2011/11/12 19:55:48 theraven Exp $
*/
#ifdef OPENLIBM_USE_HOST_MATH_H
#include <math.h>
#else /* !OPENLIBM_USE_HOST_MATH_H */
#include <openlibm_defs.h>
#define __BSD_VISIBLE 1
#ifndef OPENLIBM_MATH_H
#define OPENLIBM_MATH_H
#if (defined(_WIN32) || defined (_MSC_VER)) && !defined(__WIN32__)
#define __WIN32__
#endif
#ifndef __pure2
#define __pure2
#endif
/*
* ANSI/POSIX
*/
extern const union __infinity_un {
unsigned char __uc[8];
double __ud;
} __infinity;
extern const union __nan_un {
unsigned char __uc[sizeof(float)];
float __uf;
} __nan;
/* VBS
#if __GNUC_PREREQ__(3, 3) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 800)
#define __MATH_BUILTIN_CONSTANTS
#endif
#if __GNUC_PREREQ__(3, 0) && !defined(__INTEL_COMPILER)
#define __MATH_BUILTIN_RELOPS
#endif
*/
//VBS begin
#define __MATH_BUILTIN_CONSTANTS
#define __MATH_BUILTIN_RELOPS
#ifndef __ISO_C_VISIBLE
#define __ISO_C_VISIBLE 1999
#endif
//VBS end
#ifdef __MATH_BUILTIN_CONSTANTS
#define HUGE_VAL __builtin_huge_val()
#else
#define HUGE_VAL (__infinity.__ud)
#endif
#if __ISO_C_VISIBLE >= 1999
#define FP_ILOGB0 (-INT_MAX)
#define FP_ILOGBNAN INT_MAX
#ifdef __MATH_BUILTIN_CONSTANTS
#define HUGE_VALF __builtin_huge_valf()
#define HUGE_VALL __builtin_huge_vall()
#define INFINITY __builtin_inff()
#define NAN __builtin_nanf("")
#else
#define HUGE_VALF (float)HUGE_VAL
#define HUGE_VALL (long double)HUGE_VAL
#define INFINITY HUGE_VALF
#define NAN (__nan.__uf)
#endif /* __MATH_BUILTIN_CONSTANTS */
#define MATH_ERRNO 1
#define MATH_ERREXCEPT 2
#define math_errhandling MATH_ERREXCEPT
#define FP_FAST_FMAF 1
#ifdef __ia64__
#define FP_FAST_FMA 1
#define FP_FAST_FMAL 1
#endif
/* Symbolic constants to classify floating point numbers. */
#define FP_INFINITE 0x01
#define FP_NAN 0x02
#define FP_NORMAL 0x04
#define FP_SUBNORMAL 0x08
#define FP_ZERO 0x10
#define fpclassify(x) \
((sizeof (x) == sizeof (float)) ? __fpclassifyf(x) \
: (sizeof (x) == sizeof (double)) ? __fpclassifyd(x) \
: __fpclassifyl(x))
#define isfinite(x) \
((sizeof (x) == sizeof (float)) ? __isfinitef(x) \
: (sizeof (x) == sizeof (double)) ? __isfinite(x) \
: __isfinitel(x))
#define isinf(x) \
((sizeof (x) == sizeof (float)) ? __isinff(x) \
: (sizeof (x) == sizeof (double)) ? isinf(x) \
: __isinfl(x))
#define isnan(x) \
((sizeof (x) == sizeof (float)) ? __isnanf(x) \
: (sizeof (x) == sizeof (double)) ? isnan(x) \
: __isnanl(x))
#define isnormal(x) \
((sizeof (x) == sizeof (float)) ? __isnormalf(x) \
: (sizeof (x) == sizeof (double)) ? __isnormal(x) \
: __isnormall(x))
#ifdef __MATH_BUILTIN_RELOPS
#define isgreater(x, y) __builtin_isgreater((x), (y))
#define isgreaterequal(x, y) __builtin_isgreaterequal((x), (y))
#define isless(x, y) __builtin_isless((x), (y))
#define islessequal(x, y) __builtin_islessequal((x), (y))
#define islessgreater(x, y) __builtin_islessgreater((x), (y))
#define isunordered(x, y) __builtin_isunordered((x), (y))
#else
#define isgreater(x, y) (!isunordered((x), (y)) && (x) > (y))
#define isgreaterequal(x, y) (!isunordered((x), (y)) && (x) >= (y))
#define isless(x, y) (!isunordered((x), (y)) && (x) < (y))
#define islessequal(x, y) (!isunordered((x), (y)) && (x) <= (y))
#define islessgreater(x, y) (!isunordered((x), (y)) && \
((x) > (y) || (y) > (x)))
#define isunordered(x, y) (isnan(x) || isnan(y))
#endif /* __MATH_BUILTIN_RELOPS */
#define signbit(x) \
((sizeof (x) == sizeof (float)) ? __signbitf(x) \
: (sizeof (x) == sizeof (double)) ? __signbit(x) \
: __signbitl(x))
//VBS
//typedef __double_t double_t;
//typedef __float_t float_t;
#endif /* __ISO_C_VISIBLE >= 1999 */
/*
* XOPEN/SVID
*/
#if __BSD_VISIBLE || __XSI_VISIBLE
#define M_E 2.7182818284590452354 /* e */
#define M_LOG2E 1.4426950408889634074 /* log 2e */
#define M_LOG10E 0.43429448190325182765 /* log 10e */
#define M_LN2 0.69314718055994530942 /* log e2 */
#define M_LN10 2.30258509299404568402 /* log e10 */
#define M_PI 3.14159265358979323846 /* pi */
#define M_PI_2 1.57079632679489661923 /* pi/2 */
#define M_PI_4 0.78539816339744830962 /* pi/4 */
#define M_1_PI 0.31830988618379067154 /* 1/pi */
#define M_2_PI 0.63661977236758134308 /* 2/pi */
#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
#define MAXFLOAT ((float)3.40282346638528860e+38)
#ifndef OPENLIBM_ONLY_THREAD_SAFE
OLM_DLLEXPORT extern int signgam;
#endif
#endif /* __BSD_VISIBLE || __XSI_VISIBLE */
#if __BSD_VISIBLE
#if 0
/* Old value from 4.4BSD-Lite openlibm.h; this is probably better. */
#define HUGE HUGE_VAL
#else
#define HUGE MAXFLOAT
#endif
#endif /* __BSD_VISIBLE */
/*
* Most of these functions depend on the rounding mode and have the side
* effect of raising floating-point exceptions, so they are not declared
* as __pure2. In C99, FENV_ACCESS affects the purity of these functions.
*/
#if defined(__cplusplus)
extern "C" {
#endif
/* Symbol present when OpenLibm is used. */
int isopenlibm(void);
/*
* ANSI/POSIX
*/
OLM_DLLEXPORT int __fpclassifyd(double) __pure2;
OLM_DLLEXPORT int __fpclassifyf(float) __pure2;
OLM_DLLEXPORT int __fpclassifyl(long double) __pure2;
OLM_DLLEXPORT int __isfinitef(float) __pure2;
OLM_DLLEXPORT int __isfinite(double) __pure2;
OLM_DLLEXPORT int __isfinitel(long double) __pure2;
OLM_DLLEXPORT int __isinff(float) __pure2;
OLM_DLLEXPORT int __isinfl(long double) __pure2;
OLM_DLLEXPORT int __isnanf(float) __pure2;
OLM_DLLEXPORT int __isnanl(long double) __pure2;
OLM_DLLEXPORT int __isnormalf(float) __pure2;
OLM_DLLEXPORT int __isnormal(double) __pure2;
OLM_DLLEXPORT int __isnormall(long double) __pure2;
OLM_DLLEXPORT int __signbit(double) __pure2;
OLM_DLLEXPORT int __signbitf(float) __pure2;
OLM_DLLEXPORT int __signbitl(long double) __pure2;
OLM_DLLEXPORT double acos(double);
OLM_DLLEXPORT double asin(double);
OLM_DLLEXPORT double atan(double);
OLM_DLLEXPORT double atan2(double, double);
OLM_DLLEXPORT double cos(double);
OLM_DLLEXPORT double sin(double);
OLM_DLLEXPORT double tan(double);
OLM_DLLEXPORT double cosh(double);
OLM_DLLEXPORT double sinh(double);
OLM_DLLEXPORT double tanh(double);
OLM_DLLEXPORT double exp(double);
OLM_DLLEXPORT double frexp(double, int *); /* fundamentally !__pure2 */
OLM_DLLEXPORT double ldexp(double, int);
OLM_DLLEXPORT double log(double);
OLM_DLLEXPORT double log10(double);
OLM_DLLEXPORT double modf(double, double *); /* fundamentally !__pure2 */
OLM_DLLEXPORT double pow(double, double);
OLM_DLLEXPORT double sqrt(double);
OLM_DLLEXPORT double ceil(double);
OLM_DLLEXPORT double fabs(double) __pure2;
OLM_DLLEXPORT double floor(double);
OLM_DLLEXPORT double fmod(double, double);
/*
* These functions are not in C90.
*/
#if __BSD_VISIBLE || __ISO_C_VISIBLE >= 1999 || __XSI_VISIBLE
OLM_DLLEXPORT double acosh(double);
OLM_DLLEXPORT double asinh(double);
OLM_DLLEXPORT double atanh(double);
OLM_DLLEXPORT double cbrt(double);
OLM_DLLEXPORT double erf(double);
OLM_DLLEXPORT double erfc(double);
OLM_DLLEXPORT double exp2(double);
OLM_DLLEXPORT double expm1(double);
OLM_DLLEXPORT double fma(double, double, double);
OLM_DLLEXPORT double hypot(double, double);
OLM_DLLEXPORT int ilogb(double) __pure2;
OLM_DLLEXPORT int (isinf)(double) __pure2;
OLM_DLLEXPORT int (isnan)(double) __pure2;
OLM_DLLEXPORT double lgamma(double);
OLM_DLLEXPORT long long llrint(double);
OLM_DLLEXPORT long long llround(double);
OLM_DLLEXPORT double log1p(double);
OLM_DLLEXPORT double log2(double);
OLM_DLLEXPORT double logb(double);
OLM_DLLEXPORT long lrint(double);
OLM_DLLEXPORT long lround(double);
OLM_DLLEXPORT double nan(const char *) __pure2;
OLM_DLLEXPORT double nextafter(double, double);
OLM_DLLEXPORT double remainder(double, double);
OLM_DLLEXPORT double remquo(double, double, int *);
OLM_DLLEXPORT double rint(double);
#endif /* __BSD_VISIBLE || __ISO_C_VISIBLE >= 1999 || __XSI_VISIBLE */
#if __BSD_VISIBLE || __XSI_VISIBLE
OLM_DLLEXPORT double j0(double);
OLM_DLLEXPORT double j1(double);
OLM_DLLEXPORT double jn(int, double);
OLM_DLLEXPORT double y0(double);
OLM_DLLEXPORT double y1(double);
OLM_DLLEXPORT double yn(int, double);
#endif /* __BSD_VISIBLE || __XSI_VISIBLE */
#if __BSD_VISIBLE || __ISO_C_VISIBLE >= 1999
OLM_DLLEXPORT double copysign(double, double) __pure2;
OLM_DLLEXPORT double fdim(double, double);
OLM_DLLEXPORT double fmax(double, double) __pure2;
OLM_DLLEXPORT double fmin(double, double) __pure2;
OLM_DLLEXPORT double nearbyint(double);
OLM_DLLEXPORT double round(double);
OLM_DLLEXPORT double scalbln(double, long);
OLM_DLLEXPORT double scalbn(double, int);
OLM_DLLEXPORT double tgamma(double);
OLM_DLLEXPORT double trunc(double);
#endif
/*
* BSD math library entry points
*/
#if __BSD_VISIBLE
OLM_DLLEXPORT int isinff(float) __pure2;
OLM_DLLEXPORT int isnanf(float) __pure2;
/*
* Reentrant version of lgamma; passes signgam back by reference as the
* second argument; user must allocate space for signgam.
*/
OLM_DLLEXPORT double lgamma_r(double, int *);
/*
* Single sine/cosine function.
*/
OLM_DLLEXPORT void sincos(double, double *, double *);
#endif /* __BSD_VISIBLE */
/* float versions of ANSI/POSIX functions */
#if __ISO_C_VISIBLE >= 1999
OLM_DLLEXPORT float acosf(float);
OLM_DLLEXPORT float asinf(float);
OLM_DLLEXPORT float atanf(float);
OLM_DLLEXPORT float atan2f(float, float);
OLM_DLLEXPORT float cosf(float);
OLM_DLLEXPORT float sinf(float);
OLM_DLLEXPORT float tanf(float);
OLM_DLLEXPORT float coshf(float);
OLM_DLLEXPORT float sinhf(float);
OLM_DLLEXPORT float tanhf(float);
OLM_DLLEXPORT float exp2f(float);
OLM_DLLEXPORT float expf(float);
OLM_DLLEXPORT float expm1f(float);
OLM_DLLEXPORT float frexpf(float, int *); /* fundamentally !__pure2 */
OLM_DLLEXPORT int ilogbf(float) __pure2;
OLM_DLLEXPORT float ldexpf(float, int);
OLM_DLLEXPORT float log10f(float);
OLM_DLLEXPORT float log1pf(float);
OLM_DLLEXPORT float log2f(float);
OLM_DLLEXPORT float logf(float);
OLM_DLLEXPORT float modff(float, float *); /* fundamentally !__pure2 */
OLM_DLLEXPORT float powf(float, float);
OLM_DLLEXPORT float sqrtf(float);
OLM_DLLEXPORT float ceilf(float);
OLM_DLLEXPORT float fabsf(float) __pure2;
OLM_DLLEXPORT float floorf(float);
OLM_DLLEXPORT float fmodf(float, float);
OLM_DLLEXPORT float roundf(float);
OLM_DLLEXPORT float erff(float);
OLM_DLLEXPORT float erfcf(float);
OLM_DLLEXPORT float hypotf(float, float);
OLM_DLLEXPORT float lgammaf(float);
OLM_DLLEXPORT float tgammaf(float);
OLM_DLLEXPORT float acoshf(float);
OLM_DLLEXPORT float asinhf(float);
OLM_DLLEXPORT float atanhf(float);
OLM_DLLEXPORT float cbrtf(float);
OLM_DLLEXPORT float logbf(float);
OLM_DLLEXPORT float copysignf(float, float) __pure2;
OLM_DLLEXPORT long long llrintf(float);
OLM_DLLEXPORT long long llroundf(float);
OLM_DLLEXPORT long lrintf(float);
OLM_DLLEXPORT long lroundf(float);
OLM_DLLEXPORT float nanf(const char *) __pure2;
OLM_DLLEXPORT float nearbyintf(float);
OLM_DLLEXPORT float nextafterf(float, float);
OLM_DLLEXPORT float remainderf(float, float);
OLM_DLLEXPORT float remquof(float, float, int *);
OLM_DLLEXPORT float rintf(float);
OLM_DLLEXPORT float scalblnf(float, long);
OLM_DLLEXPORT float scalbnf(float, int);
OLM_DLLEXPORT float truncf(float);
OLM_DLLEXPORT float fdimf(float, float);
OLM_DLLEXPORT float fmaf(float, float, float);
OLM_DLLEXPORT float fmaxf(float, float) __pure2;
OLM_DLLEXPORT float fminf(float, float) __pure2;
#endif
/*
* float versions of BSD math library entry points
*/
#if __BSD_VISIBLE
OLM_DLLEXPORT float dremf(float, float);
OLM_DLLEXPORT float j0f(float);
OLM_DLLEXPORT float j1f(float);
OLM_DLLEXPORT float jnf(int, float);
OLM_DLLEXPORT float y0f(float);
OLM_DLLEXPORT float y1f(float);
OLM_DLLEXPORT float ynf(int, float);
/*
* Float versions of reentrant version of lgamma; passes signgam back by
* reference as the second argument; user must allocate space for signgam.
*/
OLM_DLLEXPORT float lgammaf_r(float, int *);
/*
* Single sine/cosine function.
*/
OLM_DLLEXPORT void sincosf(float, float *, float *);
#endif /* __BSD_VISIBLE */
/*
* long double versions of ISO/POSIX math functions
*/
#if __ISO_C_VISIBLE >= 1999
OLM_DLLEXPORT long double acoshl(long double);
OLM_DLLEXPORT long double acosl(long double);
OLM_DLLEXPORT long double asinhl(long double);
OLM_DLLEXPORT long double asinl(long double);
OLM_DLLEXPORT long double atan2l(long double, long double);
OLM_DLLEXPORT long double atanhl(long double);
OLM_DLLEXPORT long double atanl(long double);
OLM_DLLEXPORT long double cbrtl(long double);
OLM_DLLEXPORT long double ceill(long double);
OLM_DLLEXPORT long double copysignl(long double, long double) __pure2;
OLM_DLLEXPORT long double coshl(long double);
OLM_DLLEXPORT long double cosl(long double);
OLM_DLLEXPORT long double erfcl(long double);
OLM_DLLEXPORT long double erfl(long double);
OLM_DLLEXPORT long double exp2l(long double);
OLM_DLLEXPORT long double expl(long double);
OLM_DLLEXPORT long double expm1l(long double);
OLM_DLLEXPORT long double fabsl(long double) __pure2;
OLM_DLLEXPORT long double fdiml(long double, long double);
OLM_DLLEXPORT long double floorl(long double);
OLM_DLLEXPORT long double fmal(long double, long double, long double);
OLM_DLLEXPORT long double fmaxl(long double, long double) __pure2;
OLM_DLLEXPORT long double fminl(long double, long double) __pure2;
OLM_DLLEXPORT long double fmodl(long double, long double);
OLM_DLLEXPORT long double frexpl(long double value, int *); /* fundamentally !__pure2 */
OLM_DLLEXPORT long double hypotl(long double, long double);
OLM_DLLEXPORT int ilogbl(long double) __pure2;
OLM_DLLEXPORT long double ldexpl(long double, int);
OLM_DLLEXPORT long double lgammal(long double);
OLM_DLLEXPORT long long llrintl(long double);
OLM_DLLEXPORT long long llroundl(long double);
OLM_DLLEXPORT long double log10l(long double);
OLM_DLLEXPORT long double log1pl(long double);
OLM_DLLEXPORT long double log2l(long double);
OLM_DLLEXPORT long double logbl(long double);
OLM_DLLEXPORT long double logl(long double);
OLM_DLLEXPORT long lrintl(long double);
OLM_DLLEXPORT long lroundl(long double);
OLM_DLLEXPORT long double modfl(long double, long double *); /* fundamentally !__pure2 */
OLM_DLLEXPORT long double nanl(const char *) __pure2;
OLM_DLLEXPORT long double nearbyintl(long double);
OLM_DLLEXPORT long double nextafterl(long double, long double);
OLM_DLLEXPORT double nexttoward(double, long double);
OLM_DLLEXPORT float nexttowardf(float, long double);
OLM_DLLEXPORT long double nexttowardl(long double, long double);
OLM_DLLEXPORT long double powl(long double, long double);
OLM_DLLEXPORT long double remainderl(long double, long double);
OLM_DLLEXPORT long double remquol(long double, long double, int *);
OLM_DLLEXPORT long double rintl(long double);
OLM_DLLEXPORT long double roundl(long double);
OLM_DLLEXPORT long double scalblnl(long double, long);
OLM_DLLEXPORT long double scalbnl(long double, int);
OLM_DLLEXPORT long double sinhl(long double);
OLM_DLLEXPORT long double sinl(long double);
OLM_DLLEXPORT long double sqrtl(long double);
OLM_DLLEXPORT long double tanhl(long double);
OLM_DLLEXPORT long double tanl(long double);
OLM_DLLEXPORT long double tgammal(long double);
OLM_DLLEXPORT long double truncl(long double);
#endif /* __ISO_C_VISIBLE >= 1999 */
/* Reentrant version of lgammal. */
#if __BSD_VISIBLE
OLM_DLLEXPORT long double lgammal_r(long double, int *);
/*
* Single sine/cosine function.
*/
OLM_DLLEXPORT void sincosl(long double, long double *, long double *);
#endif /* __BSD_VISIBLE */
#if defined(__cplusplus)
}
#endif
#endif /* !OPENLIBM_MATH_H */
#endif /* OPENLIBM_USE_HOST_MATH_H */

View file

@ -0,0 +1,25 @@
/* the following is optional depending on the timing function used */
/* map the FORTRAN math functions, etc. to the C versions */
#define DSIN sin
#define DCOS cos
#define DATAN atan
#define DLOG log
#define DEXP exp
#define DSQRT sqrt
#define IF if
/* function prototypes */
void POUT(long N, long J, long K, double X1, double X2, double X3, double X4);
void PA(double E[]);
void P0(void);
void P3(double X, double Y, double *Z);
#define USAGE "usage: whetdc [-c] [loops]\n"
/*
COMMON T,T1,T2,E1(4),J,K,L
*/
typedef struct {
long loopstart;
} bench_whestone_config;

View file

@ -54,43 +54,22 @@ C**********************************************************************
/* standard C library headers required */ /* standard C library headers required */
#include <am.h> #include <am.h>
#include <bench.h> #include <bench.h>
#include <klib.h>
#include <klib-macros.h>
#include <float.h> #include <float.h>
#include <klib-macros.h>
#include <klib.h>
#include <openlibm.h> #include <openlibm.h>
#include <stdint.h> #include <stdint.h>
#include <whestone.h>
/* the following is optional depending on the timing function used */ double T, T1, T2, E1[5];
int J, K, L;
/* map the FORTRAN math functions, etc. to the C versions */ extern bench_whestone_config config;
#define DSIN sin int main(int argc, char *argv[]) {
#define DCOS cos
#define DATAN atan
#define DLOG log
#define DEXP exp
#define DSQRT sqrt
#define IF if
/* function prototypes */
void POUT(long N, long J, long K, double X1, double X2, double X3, double X4);
void PA(double E[]);
void P0(void);
void P3(double X, double Y, double *Z);
#define USAGE "usage: whetdc [-c] [loops]\n"
/*
COMMON T,T1,T2,E1(4),J,K,L
*/
double T,T1,T2,E1[5];
int J,K,L;
int
main(int argc, char *argv[])
{
/* used in the FORTRAN version */ /* used in the FORTRAN version */
long I1; long I1;
long N1, N2, N3, N4, N6, N7, N8, N9, N10, N11; long N1, N2, N3, N4, N6, N7, N8, N9, N10, N11;
double X1,X2,X3,X4,X,Y,Z; double X1, X2, X3, X4, X, Y, Z;
long LOOP; long LOOP;
int II, JJ; int II, JJ;
@ -100,34 +79,34 @@ main(int argc, char *argv[])
float KIPS; float KIPS;
int continuous; int continuous;
//loopstart = 1000; /* see the note about LOOP below */ // loopstart = 1000; /* see the note about LOOP below */
loopstart = 200; loopstart = config.loopstart;
continuous = 0; continuous = 0;
LCONT: LCONT:
/* /*
C C
C Start benchmark timing at this point. C Start benchmark timing at this point.
C C
*/ */
startsec = uptime(); startsec = uptime();
/* /*
C C
C The actual benchmark starts here. C The actual benchmark starts here.
C C
*/ */
T = .499975; T = .499975;
T1 = 0.50025; T1 = 0.50025;
T2 = 2.0; T2 = 2.0;
/* /*
C C
C With loopcount LOOP=10, one million Whetstone instructions C With loopcount LOOP=10, one million Whetstone instructions
C will be executed in EACH MAJOR LOOP..A MAJOR LOOP IS EXECUTED C will be executed in EACH MAJOR LOOP..A MAJOR LOOP IS EXECUTED
C 'II' TIMES TO INCREASE WALL-CLOCK TIMING ACCURACY. C 'II' TIMES TO INCREASE WALL-CLOCK TIMING ACCURACY.
C C
LOOP = 1000; LOOP = 1000;
*/ */
LOOP = loopstart; LOOP = loopstart;
II = 1; II = 1;
@ -144,11 +123,11 @@ IILOOP:
N9 = 616 * LOOP; N9 = 616 * LOOP;
N10 = 0; N10 = 0;
N11 = 93 * LOOP; N11 = 93 * LOOP;
/* /*
C C
C Module 1: Simple identifiers C Module 1: Simple identifiers
C C
*/ */
X1 = 1.0; X1 = 1.0;
X2 = -1.0; X2 = -1.0;
X3 = -1.0; X3 = -1.0;
@ -158,50 +137,50 @@ C
X1 = (X1 + X2 + X3 - X4) * T; X1 = (X1 + X2 + X3 - X4) * T;
X2 = (X1 + X2 - X3 + X4) * T; X2 = (X1 + X2 - X3 + X4) * T;
X3 = (X1 - X2 + X3 + X4) * T; X3 = (X1 - X2 + X3 + X4) * T;
X4 = (-X1+ X2 + X3 + X4) * T; X4 = (-X1 + X2 + X3 + X4) * T;
} }
#ifdef PRINTOUT #ifdef PRINTOUT
IF (JJ==II)POUT(N1,N1,N1,X1,X2,X3,X4); IF(JJ == II) POUT(N1, N1, N1, X1, X2, X3, X4);
#endif #endif
/* /*
C C
C Module 2: Array elements C Module 2: Array elements
C C
*/ */
E1[1] = 1.0; E1[1] = 1.0;
E1[2] = -1.0; E1[2] = -1.0;
E1[3] = -1.0; E1[3] = -1.0;
E1[4] = -1.0; E1[4] = -1.0;
for (I1 = 1; I1 <= N2; I1++) { for (I1 = 1; I1 <= N2; I1++) {
E1[1] = ( E1[1] + E1[2] + E1[3] - E1[4]) * T; E1[1] = (E1[1] + E1[2] + E1[3] - E1[4]) * T;
E1[2] = ( E1[1] + E1[2] - E1[3] + E1[4]) * T; E1[2] = (E1[1] + E1[2] - E1[3] + E1[4]) * T;
E1[3] = ( E1[1] - E1[2] + E1[3] + E1[4]) * T; E1[3] = (E1[1] - E1[2] + E1[3] + E1[4]) * T;
E1[4] = (-E1[1] + E1[2] + E1[3] + E1[4]) * T; E1[4] = (-E1[1] + E1[2] + E1[3] + E1[4]) * T;
} }
#ifdef PRINTOUT #ifdef PRINTOUT
IF (JJ==II)POUT(N2,N3,N2,E1[1],E1[2],E1[3],E1[4]); IF(JJ == II) POUT(N2, N3, N2, E1[1], E1[2], E1[3], E1[4]);
#endif #endif
/* /*
C C
C Module 3: Array as parameter C Module 3: Array as parameter
C C
*/ */
for (I1 = 1; I1 <= N3; I1++) for (I1 = 1; I1 <= N3; I1++)
PA(E1); PA(E1);
#ifdef PRINTOUT #ifdef PRINTOUT
IF (JJ==II)POUT(N3,N2,N2,E1[1],E1[2],E1[3],E1[4]); IF(JJ == II) POUT(N3, N2, N2, E1[1], E1[2], E1[3], E1[4]);
#endif #endif
/* /*
C C
C Module 4: Conditional jumps C Module 4: Conditional jumps
C C
*/ */
J = 1; J = 1;
for (I1 = 1; I1 <= N4; I1++) { for (I1 = 1; I1 <= N4; I1++) {
if (J == 1) if (J == 1)
@ -221,70 +200,70 @@ C
} }
#ifdef PRINTOUT #ifdef PRINTOUT
IF (JJ==II)POUT(N4,J,J,X1,X2,X3,X4); IF(JJ == II) POUT(N4, J, J, X1, X2, X3, X4);
#endif #endif
/* /*
C C
C Module 5: Omitted C Module 5: Omitted
C Module 6: Integer arithmetic C Module 6: Integer arithmetic
C C
*/ */
J = 1; J = 1;
K = 2; K = 2;
L = 3; L = 3;
for (I1 = 1; I1 <= N6; I1++) { for (I1 = 1; I1 <= N6; I1++) {
J = J * (K-J) * (L-K); J = J * (K - J) * (L - K);
K = L * K - (L-J) * K; K = L * K - (L - J) * K;
L = (L-K) * (K+J); L = (L - K) * (K + J);
E1[L-1] = J + K + L; E1[L - 1] = J + K + L;
E1[K-1] = J * K * L; E1[K - 1] = J * K * L;
} }
#ifdef PRINTOUT #ifdef PRINTOUT
IF (JJ==II)POUT(N6,J,K,E1[1],E1[2],E1[3],E1[4]); IF(JJ == II) POUT(N6, J, K, E1[1], E1[2], E1[3], E1[4]);
#endif #endif
/* /*
C C
C Module 7: Trigonometric functions C Module 7: Trigonometric functions
C C
*/ */
X = 0.5; X = 0.5;
Y = 0.5; Y = 0.5;
for (I1 = 1; I1 <= N7; I1++) { for (I1 = 1; I1 <= N7; I1++) {
X = T * DATAN(T2*DSIN(X)*DCOS(X)/(DCOS(X+Y)+DCOS(X-Y)-1.0)); X = T * DATAN(T2 * DSIN(X) * DCOS(X) / (DCOS(X + Y) + DCOS(X - Y) - 1.0));
Y = T * DATAN(T2*DSIN(Y)*DCOS(Y)/(DCOS(X+Y)+DCOS(X-Y)-1.0)); Y = T * DATAN(T2 * DSIN(Y) * DCOS(Y) / (DCOS(X + Y) + DCOS(X - Y) - 1.0));
} }
#ifdef PRINTOUT #ifdef PRINTOUT
IF (JJ==II)POUT(N7,J,K,X,X,Y,Y); IF(JJ == II) POUT(N7, J, K, X, X, Y, Y);
#endif #endif
/* /*
C C
C Module 8: Procedure calls C Module 8: Procedure calls
C C
*/ */
X = 1.0; X = 1.0;
Y = 1.0; Y = 1.0;
Z = 1.0; Z = 1.0;
for (I1 = 1; I1 <= N8; I1++) for (I1 = 1; I1 <= N8; I1++)
P3(X,Y,&Z); P3(X, Y, &Z);
#ifdef PRINTOUT #ifdef PRINTOUT
IF (JJ==II)POUT(N8,J,K,X,Y,Z,Z); IF(JJ == II) POUT(N8, J, K, X, Y, Z, Z);
#endif #endif
/* /*
C C
C Module 9: Array references C Module 9: Array references
C C
*/ */
J = 1; J = 1;
K = 2; K = 2;
L = 3; L = 3;
@ -296,14 +275,14 @@ C
P0(); P0();
#ifdef PRINTOUT #ifdef PRINTOUT
IF (JJ==II)POUT(N9,J,K,E1[1],E1[2],E1[3],E1[4]); IF(JJ == II) POUT(N9, J, K, E1[1], E1[2], E1[3], E1[4]);
#endif #endif
/* /*
C C
C Module 10: Integer arithmetic C Module 10: Integer arithmetic
C C
*/ */
J = 2; J = 2;
K = 3; K = 3;
@ -315,59 +294,60 @@ C
} }
#ifdef PRINTOUT #ifdef PRINTOUT
IF (JJ==II)POUT(N10,J,K,X1,X2,X3,X4); IF(JJ == II) POUT(N10, J, K, X1, X2, X3, X4);
#endif #endif
/* /*
C C
C Module 11: Standard functions C Module 11: Standard functions
C C
*/ */
X = 0.75; X = 0.75;
for (I1 = 1; I1 <= N11; I1++) for (I1 = 1; I1 <= N11; I1++)
X = DSQRT(DEXP(DLOG(X)/T1)); X = DSQRT(DEXP(DLOG(X) / T1));
#ifdef PRINTOUT #ifdef PRINTOUT
IF (JJ==II)POUT(N11,J,K,X,X,X,X); IF(JJ == II) POUT(N11, J, K, X, X, X, X);
#endif #endif
/* /*
C C
C THIS IS THE END OF THE MAJOR LOOP. C THIS IS THE END OF THE MAJOR LOOP.
C C
*/ */
if (++JJ <= II) if (++JJ <= II)
goto IILOOP; goto IILOOP;
/* /*
C C
C Stop benchmark timing at this point. C Stop benchmark timing at this point.
C C
*/ */
finisec = uptime(); finisec = uptime();
/* /*
C---------------------------------------------------------------- C----------------------------------------------------------------
C Performance in Whetstone KIP's per second is given by C Performance in Whetstone KIP's per second is given by
C C
C (100*LOOP*II)/TIME C (100*LOOP*II)/TIME
C C
C where TIME is in seconds. C where TIME is in seconds.
C-------------------------------------------------------------------- C--------------------------------------------------------------------
*/ */
printf("\n"); printf("\n");
if (finisec-startsec <= 0) { if (finisec - startsec <= 0) {
printf("Insufficient duration- Increase the LOOP count\n"); printf("Insufficient duration- Increase the LOOP count\n");
return(1); return (1);
} }
printf("Loops: %ld, Iterations: %d, Duration: %ld sec.\n", printf("Loops: %ld, Iterations: %d, Duration: %ld sec.\n", LOOP, II,
LOOP, II, finisec-startsec); finisec - startsec);
KIPS = (100.0*LOOP*II)/(float)(finisec-startsec); KIPS = (100.0 * LOOP * II) / (float)(finisec - startsec);
if (KIPS >= 1000.0) if (KIPS >= 1000.0)
printf("C Converted Double Precision Whetstones: %.1f MWIPS\n", KIPS/1000.0); printf("C Converted Double Precision Whetstones: %.1f MWIPS\n",
KIPS / 1000.0);
else else
printf("C Converted Double Precision Whetstones: %.1f KWIPS\n", KIPS); printf("C Converted Double Precision Whetstones: %.1f KWIPS\n", KIPS);
@ -378,15 +358,13 @@ C--------------------------------------------------------------------
return 0; return 0;
} }
void void PA(double E[]) {
PA(double E[])
{
J = 0; J = 0;
L10: L10:
E[1] = ( E[1] + E[2] + E[3] - E[4]) * T; E[1] = (E[1] + E[2] + E[3] - E[4]) * T;
E[2] = ( E[1] + E[2] - E[3] + E[4]) * T; E[2] = (E[1] + E[2] - E[3] + E[4]) * T;
E[3] = ( E[1] - E[2] + E[3] + E[4]) * T; E[3] = (E[1] - E[2] + E[3] + E[4]) * T;
E[4] = (-E[1] + E[2] + E[3] + E[4]) / T2; E[4] = (-E[1] + E[2] + E[3] + E[4]) / T2;
J += 1; J += 1;
@ -394,17 +372,13 @@ L10:
goto L10; goto L10;
} }
void void P0(void) {
P0(void)
{
E1[J] = E1[K]; E1[J] = E1[K];
E1[K] = E1[L]; E1[K] = E1[L];
E1[L] = E1[J]; E1[L] = E1[J];
} }
void void P3(double X, double Y, double *Z) {
P3(double X, double Y, double *Z)
{
double X1, Y1; double X1, Y1;
X1 = X; X1 = X;
@ -415,10 +389,8 @@ P3(double X, double Y, double *Z)
} }
#ifdef PRINTOUT #ifdef PRINTOUT
void void POUT(long N, long J, long K, double X1, double X2, double X3, double X4) {
POUT(long N, long J, long K, double X1, double X2, double X3, double X4) printf("%7ld %7ld %7ld %12.4e %12.4e %12.4e %12.4e\n", N, J, K, X1, X2, X3,
{ X4);
printf("%7ld %7ld %7ld %12.4e %12.4e %12.4e %12.4e\n",
N, J, K, X1, X2, X3, X4);
} }
#endif #endif