Feat: gemm, linpack, stream, whetstone out-of-box in am-kernel.
This commit is contained in:
parent
5cd6edc02e
commit
cbee9324c1
399 changed files with 55469 additions and 4 deletions
3
src/gemm/Makefile
Normal file
3
src/gemm/Makefile
Normal file
|
@ -0,0 +1,3 @@
|
|||
NAME = gemm
|
||||
SRCS = $(shell find soft-fp/ -name "*.c") gemm.c matmul.c
|
||||
include $(AM_HOME)/Makefile
|
|
@ -34,9 +34,9 @@ void display(double * matrix, int m, int n){
|
|||
|
||||
int main(){
|
||||
|
||||
int m = 200;
|
||||
int n = 200;
|
||||
int k = 200;
|
||||
int m = 100;
|
||||
int n = 100;
|
||||
int k = 100;
|
||||
|
||||
double * A = (double*)malloc(m*k*sizeof(double));
|
||||
double * B = (double*)malloc(k*n*sizeof(double));
|
||||
|
@ -72,4 +72,4 @@ int main(){
|
|||
|
||||
printf("Dot product took %f seconds GFLOPS : %f\n",duration,gflops/duration);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
10
src/gemm/include/gemm.h
Normal file
10
src/gemm/include/gemm.h
Normal file
|
@ -0,0 +1,10 @@
|
|||
#include <am.h>
|
||||
#include <klib.h>
|
||||
#include <klib-macros.h>
|
||||
|
||||
|
||||
void AddDot4x4( int, double *, int, double *, int, double *, int );
|
||||
void PackMatrixA( int, double *, int, double * );
|
||||
void PackMatrixB( int, double *, int, double * );
|
||||
void InnerKernel( int, int, int, double *, int, double *, int, double *, int, int );
|
||||
void matmul( int m, int n, int k, double *a, int lda, double *b, int ldb,double *c, int ldc );
|
76
src/gemm/soft-fp/aa-README.txt
Normal file
76
src/gemm/soft-fp/aa-README.txt
Normal file
|
@ -0,0 +1,76 @@
|
|||
https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html
|
||||
|
||||
1.Arithmetic functions
|
||||
|
||||
Runtime Function: float __addsf3 (float a, float b)
|
||||
Runtime Function: double __adddf3 (double a, double b)
|
||||
These functions return the sum of a and b.
|
||||
|
||||
Runtime Function: float __subsf3 (float a, float b)
|
||||
Runtime Function: double __subdf3 (double a, double b)
|
||||
These functions return the difference between b and a; that is, a - b.
|
||||
|
||||
Runtime Function: float __mulsf3 (float a, float b)
|
||||
Runtime Function: double __muldf3 (double a, double b)
|
||||
These functions return the product of a and b.
|
||||
|
||||
Runtime Function: float __divsf3 (float a, float b)
|
||||
Runtime Function: double __divdf3 (double a, double b)
|
||||
These functions return the quotient of a and b; that is, a / b.
|
||||
|
||||
Runtime Function: float __negsf2 (float a)
|
||||
Runtime Function: double __negdf2 (double a)
|
||||
These functions return the negation of a. They simply flip the sign bit, so they can produce negative zero and negative NaN.
|
||||
|
||||
2.Conversion functions
|
||||
|
||||
Runtime Function: double __extendsfdf2 (float a)
|
||||
These functions extend a to the wider mode of their return type.
|
||||
|
||||
Runtime Function: float __truncdfsf2 (double a)
|
||||
These functions truncate a to the narrower mode of their return type, rounding toward zero.
|
||||
|
||||
Runtime Function: int __fixsfsi (float a)
|
||||
Runtime Function: int __fixdfsi (double a)
|
||||
These functions convert a to a signed integer, rounding toward zero.
|
||||
|
||||
Runtime Function: long __fixsfdi (float a)
|
||||
Runtime Function: long __fixdfdi (double a)
|
||||
These functions convert a to a signed long, rounding toward zero.
|
||||
|
||||
Runtime Function: long long __fixsfti (float a)
|
||||
Runtime Function: long long __fixdfti (double a)
|
||||
These functions convert a to a signed long long, rounding toward zero.
|
||||
|
||||
|
||||
Runtime Function: unsigned int __fixunssfsi (float a)
|
||||
Runtime Function: unsigned int __fixunsdfsi (double a)
|
||||
These functions convert a to an unsigned integer, rounding toward zero. Negative values all become zero.
|
||||
|
||||
Runtime Function: unsigned long __fixunssfdi (float a)
|
||||
Runtime Function: unsigned long __fixunsdfdi (double a)
|
||||
These functions convert a to an unsigned long, rounding toward zero. Negative values all become zero.
|
||||
|
||||
Runtime Function: unsigned long long __fixunssfti (float a)
|
||||
Runtime Function: unsigned long long __fixunsdfti (double a)
|
||||
These functions convert a to an unsigned long long, rounding toward zero. Negative values all become zero.
|
||||
|
||||
|
||||
Runtime Function: float __floatsisf (int i)
|
||||
Runtime Function: double __floatsidf (int i)
|
||||
These functions convert i, a signed integer, to floating point.
|
||||
|
||||
Runtime Function: float __floatdisf (long i) ¶
|
||||
Runtime Function: double __floatdidf (long i)
|
||||
These functions convert i, a signed long, to floating point.
|
||||
|
||||
|
||||
Runtime Function: float __floatunsisf (unsigned int i)
|
||||
Runtime Function: double __floatunsidf (unsigned int i)
|
||||
These functions convert i, an unsigned integer, to floating point.
|
||||
|
||||
Runtime Function: float __floatundisf (unsigned long i)
|
||||
Runtime Function: double __floatundidf (unsigned long i)
|
||||
These functions convert i, an unsigned long, to floating point.
|
||||
|
||||
3.Comparison functions
|
21
src/gemm/soft-fp/adddf3.c
Normal file
21
src/gemm/soft-fp/adddf3.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__adddf3 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_SEMIRAW_D (A, a);
|
||||
FP_UNPACK_SEMIRAW_D (B, b);
|
||||
FP_ADD_D (R, A, B);
|
||||
FP_PACK_SEMIRAW_D (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
23
src/gemm/soft-fp/addsf3.c
Normal file
23
src/gemm/soft-fp/addsf3.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__addsf3 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
FP_DECL_S (R);
|
||||
SFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_SEMIRAW_S (A, a);
|
||||
FP_UNPACK_SEMIRAW_S (B, b);
|
||||
FP_ADD_S (R, A, B);
|
||||
FP_PACK_SEMIRAW_S (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
21
src/gemm/soft-fp/divdf3.c
Normal file
21
src/gemm/soft-fp/divdf3.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__divdf3 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_D (A, a);
|
||||
FP_UNPACK_D (B, b);
|
||||
FP_DIV_D (R, A, B);
|
||||
FP_PACK_D (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
21
src/gemm/soft-fp/divsf3.c
Normal file
21
src/gemm/soft-fp/divsf3.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__divsf3 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
FP_DECL_S (R);
|
||||
SFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_S (A, a);
|
||||
FP_UNPACK_S (B, b);
|
||||
FP_DIV_S (R, A, B);
|
||||
FP_PACK_S (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
323
src/gemm/soft-fp/double.h
Normal file
323
src/gemm/soft-fp/double.h
Normal file
|
@ -0,0 +1,323 @@
|
|||
/* Software floating-point emulation.
|
||||
Definitions for IEEE Double Precision
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz),
|
||||
David S. Miller (davem@redhat.com) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_DOUBLE_H
|
||||
#define SOFT_FP_DOUBLE_H 1
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 32
|
||||
# error "Here's a nickel kid. Go buy yourself a real computer."
|
||||
#endif
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 64
|
||||
# define _FP_FRACTBITS_D (2 * _FP_W_TYPE_SIZE)
|
||||
# define _FP_FRACTBITS_DW_D (4 * _FP_W_TYPE_SIZE)
|
||||
#else
|
||||
# define _FP_FRACTBITS_D _FP_W_TYPE_SIZE
|
||||
# define _FP_FRACTBITS_DW_D (2 * _FP_W_TYPE_SIZE)
|
||||
#endif
|
||||
|
||||
#define _FP_FRACBITS_D 53
|
||||
#define _FP_FRACXBITS_D (_FP_FRACTBITS_D - _FP_FRACBITS_D)
|
||||
#define _FP_WFRACBITS_D (_FP_WORKBITS + _FP_FRACBITS_D)
|
||||
#define _FP_WFRACXBITS_D (_FP_FRACTBITS_D - _FP_WFRACBITS_D)
|
||||
#define _FP_EXPBITS_D 11
|
||||
#define _FP_EXPBIAS_D 1023
|
||||
#define _FP_EXPMAX_D 2047
|
||||
|
||||
#define _FP_QNANBIT_D \
|
||||
((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-2) % _FP_W_TYPE_SIZE)
|
||||
#define _FP_QNANBIT_SH_D \
|
||||
((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
|
||||
#define _FP_IMPLBIT_D \
|
||||
((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-1) % _FP_W_TYPE_SIZE)
|
||||
#define _FP_IMPLBIT_SH_D \
|
||||
((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
|
||||
#define _FP_OVERFLOW_D \
|
||||
((_FP_W_TYPE) 1 << _FP_WFRACBITS_D % _FP_W_TYPE_SIZE)
|
||||
|
||||
#define _FP_WFRACBITS_DW_D (2 * _FP_WFRACBITS_D)
|
||||
#define _FP_WFRACXBITS_DW_D (_FP_FRACTBITS_DW_D - _FP_WFRACBITS_DW_D)
|
||||
#define _FP_HIGHBIT_DW_D \
|
||||
((_FP_W_TYPE) 1 << (_FP_WFRACBITS_DW_D - 1) % _FP_W_TYPE_SIZE)
|
||||
|
||||
typedef float DFtype __attribute__ ((mode (DF)));
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 64
|
||||
|
||||
union _FP_UNION_D
|
||||
{
|
||||
DFtype flt;
|
||||
struct _FP_STRUCT_LAYOUT
|
||||
{
|
||||
# if __BYTE_ORDER == __BIG_ENDIAN
|
||||
unsigned sign : 1;
|
||||
unsigned exp : _FP_EXPBITS_D;
|
||||
unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
|
||||
unsigned frac0 : _FP_W_TYPE_SIZE;
|
||||
# else
|
||||
unsigned frac0 : _FP_W_TYPE_SIZE;
|
||||
unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
|
||||
unsigned exp : _FP_EXPBITS_D;
|
||||
unsigned sign : 1;
|
||||
# endif
|
||||
} bits;
|
||||
};
|
||||
|
||||
# define FP_DECL_D(X) _FP_DECL (2, X)
|
||||
# define FP_UNPACK_RAW_D(X, val) _FP_UNPACK_RAW_2 (D, X, (val))
|
||||
# define FP_UNPACK_RAW_DP(X, val) _FP_UNPACK_RAW_2_P (D, X, (val))
|
||||
# define FP_PACK_RAW_D(val, X) _FP_PACK_RAW_2 (D, (val), X)
|
||||
# define FP_PACK_RAW_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_2_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_D(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_2 (D, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (D, 2, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_DP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_2_P (D, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (D, 2, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_SEMIRAW_D(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_2 (D, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (D, 2, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_SEMIRAW_DP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_2_P (D, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (D, 2, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_D(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (D, 2, X); \
|
||||
_FP_PACK_RAW_2 (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (D, 2, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_2_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_SEMIRAW_D(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (D, 2, X); \
|
||||
_FP_PACK_RAW_2 (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_SEMIRAW_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (D, 2, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_2_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_ISSIGNAN_D(X) _FP_ISSIGNAN (D, 2, X)
|
||||
# define FP_NEG_D(R, X) _FP_NEG (D, 2, R, X)
|
||||
# define FP_ADD_D(R, X, Y) _FP_ADD (D, 2, R, X, Y)
|
||||
# define FP_SUB_D(R, X, Y) _FP_SUB (D, 2, R, X, Y)
|
||||
# define FP_MUL_D(R, X, Y) _FP_MUL (D, 2, R, X, Y)
|
||||
# define FP_DIV_D(R, X, Y) _FP_DIV (D, 2, R, X, Y)
|
||||
# define FP_SQRT_D(R, X) _FP_SQRT (D, 2, R, X)
|
||||
# define _FP_SQRT_MEAT_D(R, S, T, X, Q) _FP_SQRT_MEAT_2 (R, S, T, X, (Q))
|
||||
# define FP_FMA_D(R, X, Y, Z) _FP_FMA (D, 2, 4, R, X, Y, Z)
|
||||
|
||||
# define FP_CMP_D(r, X, Y, un, ex) _FP_CMP (D, 2, (r), X, Y, (un), (ex))
|
||||
# define FP_CMP_EQ_D(r, X, Y, ex) _FP_CMP_EQ (D, 2, (r), X, Y, (ex))
|
||||
# define FP_CMP_UNORD_D(r, X, Y, ex) _FP_CMP_UNORD (D, 2, (r), X, Y, (ex))
|
||||
|
||||
# define FP_TO_INT_D(r, X, rsz, rsg) _FP_TO_INT (D, 2, (r), X, (rsz), (rsg))
|
||||
# define FP_TO_INT_ROUND_D(r, X, rsz, rsg) \
|
||||
_FP_TO_INT_ROUND (D, 2, (r), X, (rsz), (rsg))
|
||||
# define FP_FROM_INT_D(X, r, rs, rt) _FP_FROM_INT (D, 2, X, (r), (rs), rt)
|
||||
|
||||
# define _FP_FRAC_HIGH_D(X) _FP_FRAC_HIGH_2 (X)
|
||||
# define _FP_FRAC_HIGH_RAW_D(X) _FP_FRAC_HIGH_2 (X)
|
||||
|
||||
# define _FP_FRAC_HIGH_DW_D(X) _FP_FRAC_HIGH_4 (X)
|
||||
|
||||
#else
|
||||
|
||||
union _FP_UNION_D
|
||||
{
|
||||
DFtype flt;
|
||||
struct _FP_STRUCT_LAYOUT
|
||||
{
|
||||
# if __BYTE_ORDER == __BIG_ENDIAN
|
||||
unsigned sign : 1;
|
||||
unsigned exp : _FP_EXPBITS_D;
|
||||
_FP_W_TYPE frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
|
||||
# else
|
||||
_FP_W_TYPE frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
|
||||
unsigned exp : _FP_EXPBITS_D;
|
||||
unsigned sign : 1;
|
||||
# endif
|
||||
} bits;
|
||||
};
|
||||
|
||||
# define FP_DECL_D(X) _FP_DECL (1, X)
|
||||
# define FP_UNPACK_RAW_D(X, val) _FP_UNPACK_RAW_1 (D, X, (val))
|
||||
# define FP_UNPACK_RAW_DP(X, val) _FP_UNPACK_RAW_1_P (D, X, (val))
|
||||
# define FP_PACK_RAW_D(val, X) _FP_PACK_RAW_1 (D, (val), X)
|
||||
# define FP_PACK_RAW_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_D(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1 (D, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (D, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_DP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1_P (D, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (D, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_SEMIRAW_D(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1 (D, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (D, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_SEMIRAW_DP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1_P (D, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (D, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_D(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (D, 1, X); \
|
||||
_FP_PACK_RAW_1 (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (D, 1, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_SEMIRAW_D(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (D, 1, X); \
|
||||
_FP_PACK_RAW_1 (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_SEMIRAW_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (D, 1, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_ISSIGNAN_D(X) _FP_ISSIGNAN (D, 1, X)
|
||||
# define FP_NEG_D(R, X) _FP_NEG (D, 1, R, X)
|
||||
# define FP_ADD_D(R, X, Y) _FP_ADD (D, 1, R, X, Y)
|
||||
# define FP_SUB_D(R, X, Y) _FP_SUB (D, 1, R, X, Y)
|
||||
# define FP_MUL_D(R, X, Y) _FP_MUL (D, 1, R, X, Y)
|
||||
# define FP_DIV_D(R, X, Y) _FP_DIV (D, 1, R, X, Y)
|
||||
# define FP_SQRT_D(R, X) _FP_SQRT (D, 1, R, X)
|
||||
# define _FP_SQRT_MEAT_D(R, S, T, X, Q) _FP_SQRT_MEAT_1 (R, S, T, X, (Q))
|
||||
# define FP_FMA_D(R, X, Y, Z) _FP_FMA (D, 1, 2, R, X, Y, Z)
|
||||
|
||||
/* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by
|
||||
the target machine. */
|
||||
|
||||
# define FP_CMP_D(r, X, Y, un, ex) _FP_CMP (D, 1, (r), X, Y, (un), (ex))
|
||||
# define FP_CMP_EQ_D(r, X, Y, ex) _FP_CMP_EQ (D, 1, (r), X, Y, (ex))
|
||||
# define FP_CMP_UNORD_D(r, X, Y, ex) _FP_CMP_UNORD (D, 1, (r), X, Y, (ex))
|
||||
|
||||
# define FP_TO_INT_D(r, X, rsz, rsg) _FP_TO_INT (D, 1, (r), X, (rsz), (rsg))
|
||||
# define FP_TO_INT_ROUND_D(r, X, rsz, rsg) \
|
||||
_FP_TO_INT_ROUND (D, 1, (r), X, (rsz), (rsg))
|
||||
# define FP_FROM_INT_D(X, r, rs, rt) _FP_FROM_INT (D, 1, X, (r), (rs), rt)
|
||||
|
||||
# define _FP_FRAC_HIGH_D(X) _FP_FRAC_HIGH_1 (X)
|
||||
# define _FP_FRAC_HIGH_RAW_D(X) _FP_FRAC_HIGH_1 (X)
|
||||
|
||||
# define _FP_FRAC_HIGH_DW_D(X) _FP_FRAC_HIGH_2 (X)
|
||||
|
||||
#endif /* W_TYPE_SIZE < 64 */
|
||||
|
||||
#endif /* !SOFT_FP_DOUBLE_H */
|
21
src/gemm/soft-fp/eqdf2.c
Normal file
21
src/gemm/soft-fp/eqdf2.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
CMPtype
|
||||
__eqdf2 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_UNPACK_RAW_D (B, b);
|
||||
FP_CMP_EQ_D (r, A, B, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__eqdf2, __nedf2);
|
21
src/gemm/soft-fp/eqsf2.c
Normal file
21
src/gemm/soft-fp/eqsf2.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
CMPtype
|
||||
__eqsf2 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_UNPACK_RAW_S (B, b);
|
||||
FP_CMP_EQ_S (r, A, B, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__eqsf2, __nesf2);
|
26
src/gemm/soft-fp/extendsfdf2.c
Normal file
26
src/gemm/soft-fp/extendsfdf2.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
#define FP_NO_EXACT_UNDERFLOW
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__extendsfdf2 (SFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
#if _FP_W_TYPE_SIZE < _FP_FRACBITS_D
|
||||
FP_EXTEND (D, S, 2, 1, R, A);
|
||||
#else
|
||||
FP_EXTEND (D, S, 1, 1, R, A);
|
||||
#endif
|
||||
FP_PACK_RAW_D (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
47
src/gemm/soft-fp/fixdfdi.c
Normal file
47
src/gemm/soft-fp/fixdfdi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 64bit signed integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DItype
|
||||
__fixdfdi (DFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
UDItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_TO_INT_D (r, A, DI_BITS, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
47
src/gemm/soft-fp/fixdfsi.c
Normal file
47
src/gemm/soft-fp/fixdfsi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 32bit signed integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
SItype
|
||||
__fixdfsi (DFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
USItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_TO_INT_D (r, A, SI_BITS, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
46
src/gemm/soft-fp/fixdfti.c
Normal file
46
src/gemm/soft-fp/fixdfti.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert IEEE double to 128bit signed integer
|
||||
Copyright (C) 2007-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Uros Bizjak (ubizjak@gmail.com).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
// #include "soft-fp.h"
|
||||
// #include "double.h"
|
||||
|
||||
// TItype
|
||||
// __fixdfti (DFtype a)
|
||||
// {
|
||||
// FP_DECL_EX;
|
||||
// FP_DECL_D (A);
|
||||
// UTItype r;
|
||||
|
||||
// FP_INIT_EXCEPTIONS;
|
||||
// FP_UNPACK_RAW_D (A, a);
|
||||
// FP_TO_INT_D (r, A, TI_BITS, 1);
|
||||
// FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
// return r;
|
||||
// }
|
47
src/gemm/soft-fp/fixsfdi.c
Normal file
47
src/gemm/soft-fp/fixsfdi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 64bit signed integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
DItype
|
||||
__fixsfdi (SFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
UDItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_TO_INT_S (r, A, DI_BITS, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
47
src/gemm/soft-fp/fixsfsi.c
Normal file
47
src/gemm/soft-fp/fixsfsi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 32bit signed integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SItype
|
||||
__fixsfsi (SFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
USItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_TO_INT_S (r, A, SI_BITS, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
46
src/gemm/soft-fp/fixsfti.c
Normal file
46
src/gemm/soft-fp/fixsfti.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert IEEE single to 128bit signed integer
|
||||
Copyright (C) 2007-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Uros Bizjak (ubizjak@gmail.com).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
// #include "soft-fp.h"
|
||||
// #include "single.h"
|
||||
|
||||
// TItype
|
||||
// __fixsfti (SFtype a)
|
||||
// {
|
||||
// FP_DECL_EX;
|
||||
// FP_DECL_S (A);
|
||||
// UTItype r;
|
||||
|
||||
// FP_INIT_EXCEPTIONS;
|
||||
// FP_UNPACK_RAW_S (A, a);
|
||||
// FP_TO_INT_S (r, A, TI_BITS, 1);
|
||||
// FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
// return r;
|
||||
// }
|
47
src/gemm/soft-fp/fixunsdfdi.c
Normal file
47
src/gemm/soft-fp/fixunsdfdi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 64bit unsigned integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
UDItype
|
||||
__fixunsdfdi (DFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
UDItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_TO_INT_D (r, A, DI_BITS, 0);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
47
src/gemm/soft-fp/fixunsdfsi.c
Normal file
47
src/gemm/soft-fp/fixunsdfsi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 32bit unsigned integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
USItype
|
||||
__fixunsdfsi (DFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
USItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_TO_INT_D (r, A, SI_BITS, 0);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
46
src/gemm/soft-fp/fixunsdfti.c
Normal file
46
src/gemm/soft-fp/fixunsdfti.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert IEEE double to 128bit unsigned integer
|
||||
Copyright (C) 2007-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Uros Bizjak (ubizjak@gmail.com).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
// #include "soft-fp.h"
|
||||
// #include "double.h"
|
||||
|
||||
// UTItype
|
||||
// __fixunsdfti (DFtype a)
|
||||
// {
|
||||
// FP_DECL_EX;
|
||||
// FP_DECL_D (A);
|
||||
// UTItype r;
|
||||
|
||||
// FP_INIT_EXCEPTIONS;
|
||||
// FP_UNPACK_RAW_D (A, a);
|
||||
// FP_TO_INT_D (r, A, TI_BITS, 0);
|
||||
// FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
// return r;
|
||||
// }
|
47
src/gemm/soft-fp/fixunssfdi.c
Normal file
47
src/gemm/soft-fp/fixunssfdi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 64bit unsigned integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
UDItype
|
||||
__fixunssfdi (SFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
UDItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_TO_INT_S (r, A, DI_BITS, 0);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
47
src/gemm/soft-fp/fixunssfsi.c
Normal file
47
src/gemm/soft-fp/fixunssfsi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 32bit unsigned integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
USItype
|
||||
__fixunssfsi (SFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
USItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_TO_INT_S (r, A, SI_BITS, 0);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
46
src/gemm/soft-fp/fixunssfti.c
Normal file
46
src/gemm/soft-fp/fixunssfti.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert IEEE single to 128bit unsigned integer
|
||||
Copyright (C) 2007-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Uros Bizjak (ubizjak@gmail.com).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
// #include "soft-fp.h"
|
||||
// #include "single.h"
|
||||
|
||||
// UTItype
|
||||
// __fixunssfti (SFtype a)
|
||||
// {
|
||||
// FP_DECL_EX;
|
||||
// FP_DECL_S (A);
|
||||
// UTItype r;
|
||||
|
||||
// FP_INIT_EXCEPTIONS;
|
||||
// FP_UNPACK_RAW_S (A, a);
|
||||
// FP_TO_INT_S (r, A, TI_BITS, 0);
|
||||
// FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
// return r;
|
||||
// }
|
47
src/gemm/soft-fp/floatdidf.c
Normal file
47
src/gemm/soft-fp/floatdidf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 64bit signed integer to IEEE double
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__floatdidf (DItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
DFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_D (A, i, DI_BITS, UDItype);
|
||||
FP_PACK_RAW_D (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
47
src/gemm/soft-fp/floatdisf.c
Normal file
47
src/gemm/soft-fp/floatdisf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 64bit signed integer to IEEE single
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__floatdisf (DItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
SFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_S (A, i, DI_BITS, UDItype);
|
||||
FP_PACK_RAW_S (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
49
src/gemm/soft-fp/floatsidf.c
Normal file
49
src/gemm/soft-fp/floatsidf.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 32bit signed integer to IEEE double
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#define FP_NO_EXCEPTIONS
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__floatsidf (SItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
DFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_D (A, i, SI_BITS, USItype);
|
||||
FP_PACK_RAW_D (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
||||
|
47
src/gemm/soft-fp/floatsisf.c
Normal file
47
src/gemm/soft-fp/floatsisf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 32bit signed integer to IEEE single
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__floatsisf (SItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
SFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_S (A, i, SI_BITS, USItype);
|
||||
FP_PACK_RAW_S (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
47
src/gemm/soft-fp/floatundidf.c
Normal file
47
src/gemm/soft-fp/floatundidf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 64bit unsigned integer to IEEE double
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__floatundidf (UDItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
DFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_D (A, i, DI_BITS, UDItype);
|
||||
FP_PACK_RAW_D (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
47
src/gemm/soft-fp/floatundisf.c
Normal file
47
src/gemm/soft-fp/floatundisf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 64bit unsigned integer to IEEE single
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__floatundisf (UDItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
SFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_S (A, i, DI_BITS, UDItype);
|
||||
FP_PACK_RAW_S (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
47
src/gemm/soft-fp/floatunsidf.c
Normal file
47
src/gemm/soft-fp/floatunsidf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 32bit unsigned integer to IEEE double
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#define FP_NO_EXCEPTIONS
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__floatunsidf (USItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
DFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_D (A, i, SI_BITS, USItype);
|
||||
FP_PACK_RAW_D (a, A);
|
||||
|
||||
return a;
|
||||
}
|
47
src/gemm/soft-fp/floatunsisf.c
Normal file
47
src/gemm/soft-fp/floatunsisf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 32bit unsigned integer to IEEE single
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__floatunsisf (USItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
SFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_S (A, i, SI_BITS, USItype);
|
||||
FP_PACK_RAW_S (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
21
src/gemm/soft-fp/gedf2.c
Normal file
21
src/gemm/soft-fp/gedf2.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
CMPtype
|
||||
__gedf2 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_UNPACK_RAW_D (B, b);
|
||||
FP_CMP_D (r, A, B, -2, 2);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__gedf2, __gtdf2);
|
21
src/gemm/soft-fp/gesf2.c
Normal file
21
src/gemm/soft-fp/gesf2.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
CMPtype
|
||||
__gesf2 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_UNPACK_RAW_S (B, b);
|
||||
FP_CMP_S (r, A, B, -2, 2);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__gesf2, __gtsf2);
|
21
src/gemm/soft-fp/ledf2.c
Normal file
21
src/gemm/soft-fp/ledf2.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
CMPtype
|
||||
__ledf2 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_UNPACK_RAW_D (B, b);
|
||||
FP_CMP_D (r, A, B, 2, 2);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__ledf2, __ltdf2);
|
22
src/gemm/soft-fp/lesf2.c
Normal file
22
src/gemm/soft-fp/lesf2.c
Normal file
|
@ -0,0 +1,22 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
CMPtype
|
||||
__lesf2 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_UNPACK_RAW_S (B, b);
|
||||
FP_CMP_S (r, A, B, 2, 2);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__lesf2, __ltsf2);
|
||||
|
1774
src/gemm/soft-fp/longlong.h
Normal file
1774
src/gemm/soft-fp/longlong.h
Normal file
File diff suppressed because it is too large
Load diff
21
src/gemm/soft-fp/muldf3.c
Normal file
21
src/gemm/soft-fp/muldf3.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__muldf3 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_D (A, a);
|
||||
FP_UNPACK_D (B, b);
|
||||
FP_MUL_D (R, A, B);
|
||||
FP_PACK_D (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
22
src/gemm/soft-fp/mulsf3.c
Normal file
22
src/gemm/soft-fp/mulsf3.c
Normal file
|
@ -0,0 +1,22 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__mulsf3 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
FP_DECL_S (R);
|
||||
SFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_S (A, a);
|
||||
FP_UNPACK_S (B, b);
|
||||
FP_MUL_S (R, A, B);
|
||||
FP_PACK_S (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
16
src/gemm/soft-fp/negdf2.c
Normal file
16
src/gemm/soft-fp/negdf2.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__negdf2 (DFtype a)
|
||||
{
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_NEG_D (R, A);
|
||||
FP_PACK_RAW_D (r, R);
|
||||
|
||||
return r;
|
||||
}
|
16
src/gemm/soft-fp/negsf2.c
Normal file
16
src/gemm/soft-fp/negsf2.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__negsf2 (SFtype a)
|
||||
{
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (R);
|
||||
SFtype r;
|
||||
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_NEG_S (R, A);
|
||||
FP_PACK_RAW_S (r, R);
|
||||
|
||||
return r;
|
||||
}
|
369
src/gemm/soft-fp/op-1.h
Normal file
369
src/gemm/soft-fp/op-1.h
Normal file
|
@ -0,0 +1,369 @@
|
|||
/* Software floating-point emulation.
|
||||
Basic one-word fraction declaration and manipulation.
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz),
|
||||
David S. Miller (davem@redhat.com) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_OP_1_H
|
||||
#define SOFT_FP_OP_1_H 1
|
||||
|
||||
#define _FP_FRAC_DECL_1(X) _FP_W_TYPE X##_f _FP_ZERO_INIT
|
||||
#define _FP_FRAC_COPY_1(D, S) (D##_f = S##_f)
|
||||
#define _FP_FRAC_SET_1(X, I) (X##_f = I)
|
||||
#define _FP_FRAC_HIGH_1(X) (X##_f)
|
||||
#define _FP_FRAC_LOW_1(X) (X##_f)
|
||||
#define _FP_FRAC_WORD_1(X, w) (X##_f)
|
||||
|
||||
#define _FP_FRAC_ADDI_1(X, I) (X##_f += I)
|
||||
#define _FP_FRAC_SLL_1(X, N) \
|
||||
do \
|
||||
{ \
|
||||
if (__builtin_constant_p (N) && (N) == 1) \
|
||||
X##_f += X##_f; \
|
||||
else \
|
||||
X##_f <<= (N); \
|
||||
} \
|
||||
while (0)
|
||||
#define _FP_FRAC_SRL_1(X, N) (X##_f >>= N)
|
||||
|
||||
/* Right shift with sticky-lsb. */
|
||||
#define _FP_FRAC_SRST_1(X, S, N, sz) __FP_FRAC_SRST_1 (X##_f, S, (N), (sz))
|
||||
#define _FP_FRAC_SRS_1(X, N, sz) __FP_FRAC_SRS_1 (X##_f, (N), (sz))
|
||||
|
||||
#define __FP_FRAC_SRST_1(X, S, N, sz) \
|
||||
do \
|
||||
{ \
|
||||
S = (__builtin_constant_p (N) && (N) == 1 \
|
||||
? X & 1 \
|
||||
: (X << (_FP_W_TYPE_SIZE - (N))) != 0); \
|
||||
X = X >> (N); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define __FP_FRAC_SRS_1(X, N, sz) \
|
||||
(X = (X >> (N) | (__builtin_constant_p (N) && (N) == 1 \
|
||||
? X & 1 \
|
||||
: (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
|
||||
|
||||
#define _FP_FRAC_ADD_1(R, X, Y) (R##_f = X##_f + Y##_f)
|
||||
#define _FP_FRAC_SUB_1(R, X, Y) (R##_f = X##_f - Y##_f)
|
||||
#define _FP_FRAC_DEC_1(X, Y) (X##_f -= Y##_f)
|
||||
#define _FP_FRAC_CLZ_1(z, X) __FP_CLZ ((z), X##_f)
|
||||
|
||||
/* Predicates. */
|
||||
#define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE) X##_f < 0)
|
||||
#define _FP_FRAC_ZEROP_1(X) (X##_f == 0)
|
||||
#define _FP_FRAC_OVERP_1(fs, X) (X##_f & _FP_OVERFLOW_##fs)
|
||||
#define _FP_FRAC_CLEAR_OVERP_1(fs, X) (X##_f &= ~_FP_OVERFLOW_##fs)
|
||||
#define _FP_FRAC_HIGHBIT_DW_1(fs, X) (X##_f & _FP_HIGHBIT_DW_##fs)
|
||||
#define _FP_FRAC_EQ_1(X, Y) (X##_f == Y##_f)
|
||||
#define _FP_FRAC_GE_1(X, Y) (X##_f >= Y##_f)
|
||||
#define _FP_FRAC_GT_1(X, Y) (X##_f > Y##_f)
|
||||
|
||||
#define _FP_ZEROFRAC_1 0
|
||||
#define _FP_MINFRAC_1 1
|
||||
#define _FP_MAXFRAC_1 (~(_FP_WS_TYPE) 0)
|
||||
|
||||
/* Unpack the raw bits of a native fp value. Do not classify or
|
||||
normalize the data. */
|
||||
|
||||
#define _FP_UNPACK_RAW_1(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_UNPACK_RAW_1_flo; \
|
||||
_FP_UNPACK_RAW_1_flo.flt = (val); \
|
||||
\
|
||||
X##_f = _FP_UNPACK_RAW_1_flo.bits.frac; \
|
||||
X##_e = _FP_UNPACK_RAW_1_flo.bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_1_flo.bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_UNPACK_RAW_1_P(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_UNPACK_RAW_1_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
X##_f = _FP_UNPACK_RAW_1_P_flo->bits.frac; \
|
||||
X##_e = _FP_UNPACK_RAW_1_P_flo->bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_1_P_flo->bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Repack the raw bits of a native fp value. */
|
||||
|
||||
#define _FP_PACK_RAW_1(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_PACK_RAW_1_flo; \
|
||||
\
|
||||
_FP_PACK_RAW_1_flo.bits.frac = X##_f; \
|
||||
_FP_PACK_RAW_1_flo.bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_1_flo.bits.sign = X##_s; \
|
||||
\
|
||||
(val) = _FP_PACK_RAW_1_flo.flt; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_PACK_RAW_1_P(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_PACK_RAW_1_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
_FP_PACK_RAW_1_P_flo->bits.frac = X##_f; \
|
||||
_FP_PACK_RAW_1_P_flo->bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_1_P_flo->bits.sign = X##_s; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Multiplication algorithms: */
|
||||
|
||||
/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
|
||||
multiplication immediately. */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_1_imm(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
R##_f = X##_f * Y##_f; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_MUL_MEAT_DW_1_imm ((wfracbits), R, X, Y); \
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_1 (R, (wfracbits)-1, 2*(wfracbits)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_1_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
doit (R##_f1, R##_f0, X##_f, Y##_f); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_wide_Z); \
|
||||
_FP_MUL_MEAT_DW_1_wide ((wfracbits), _FP_MUL_MEAT_1_wide_Z, \
|
||||
X, Y, doit); \
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_wide_Z, (wfracbits)-1, \
|
||||
2*(wfracbits)); \
|
||||
R##_f = _FP_MUL_MEAT_1_wide_Z_f0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Finally, a simple widening multiply algorithm. What fun! */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_1_hard(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_xh, _FP_MUL_MEAT_DW_1_hard_xl; \
|
||||
_FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_yh, _FP_MUL_MEAT_DW_1_hard_yl; \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_1_hard_a); \
|
||||
\
|
||||
/* Split the words in half. */ \
|
||||
_FP_MUL_MEAT_DW_1_hard_xh = X##_f >> (_FP_W_TYPE_SIZE/2); \
|
||||
_FP_MUL_MEAT_DW_1_hard_xl \
|
||||
= X##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \
|
||||
_FP_MUL_MEAT_DW_1_hard_yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \
|
||||
_FP_MUL_MEAT_DW_1_hard_yl \
|
||||
= Y##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \
|
||||
\
|
||||
/* Multiply the pieces. */ \
|
||||
R##_f0 = _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yl; \
|
||||
_FP_MUL_MEAT_DW_1_hard_a_f0 \
|
||||
= _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yl; \
|
||||
_FP_MUL_MEAT_DW_1_hard_a_f1 \
|
||||
= _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yh; \
|
||||
R##_f1 = _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yh; \
|
||||
\
|
||||
/* Reassemble into two full words. */ \
|
||||
if ((_FP_MUL_MEAT_DW_1_hard_a_f0 += _FP_MUL_MEAT_DW_1_hard_a_f1) \
|
||||
< _FP_MUL_MEAT_DW_1_hard_a_f1) \
|
||||
R##_f1 += (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2); \
|
||||
_FP_MUL_MEAT_DW_1_hard_a_f1 \
|
||||
= _FP_MUL_MEAT_DW_1_hard_a_f0 >> (_FP_W_TYPE_SIZE/2); \
|
||||
_FP_MUL_MEAT_DW_1_hard_a_f0 \
|
||||
= _FP_MUL_MEAT_DW_1_hard_a_f0 << (_FP_W_TYPE_SIZE/2); \
|
||||
_FP_FRAC_ADD_2 (R, R, _FP_MUL_MEAT_DW_1_hard_a); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_hard_z); \
|
||||
_FP_MUL_MEAT_DW_1_hard ((wfracbits), \
|
||||
_FP_MUL_MEAT_1_hard_z, X, Y); \
|
||||
\
|
||||
/* Normalize. */ \
|
||||
_FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_hard_z, \
|
||||
(wfracbits) - 1, 2*(wfracbits)); \
|
||||
R##_f = _FP_MUL_MEAT_1_hard_z_f0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Division algorithms: */
|
||||
|
||||
/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
|
||||
division immediately. Give this macro either _FP_DIV_HELP_imm for
|
||||
C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you
|
||||
choose will depend on what the compiler does with divrem4. */
|
||||
|
||||
#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r; \
|
||||
X##_f <<= (X##_f < Y##_f \
|
||||
? R##_e--, _FP_WFRACBITS_##fs \
|
||||
: _FP_WFRACBITS_##fs - 1); \
|
||||
doit (_FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r, X##_f, Y##_f); \
|
||||
R##_f = _FP_DIV_MEAT_1_imm_q | (_FP_DIV_MEAT_1_imm_r != 0); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
|
||||
that may be useful in this situation. This first is for a primitive
|
||||
that requires normalization, the second for one that does not. Look
|
||||
for UDIV_NEEDS_NORMALIZATION to tell which your machine needs. */
|
||||
|
||||
#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nh; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nl; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_q; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_r; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_y; \
|
||||
\
|
||||
/* Normalize Y -- i.e. make the most significant bit set. */ \
|
||||
_FP_DIV_MEAT_1_udiv_norm_y = Y##_f << _FP_WFRACXBITS_##fs; \
|
||||
\
|
||||
/* Shift X op correspondingly high, that is, up one full word. */ \
|
||||
if (X##_f < Y##_f) \
|
||||
{ \
|
||||
R##_e--; \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nl = 0; \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nh = X##_f; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nl = X##_f << (_FP_W_TYPE_SIZE - 1); \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nh = X##_f >> 1; \
|
||||
} \
|
||||
\
|
||||
udiv_qrnnd (_FP_DIV_MEAT_1_udiv_norm_q, \
|
||||
_FP_DIV_MEAT_1_udiv_norm_r, \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nh, \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nl, \
|
||||
_FP_DIV_MEAT_1_udiv_norm_y); \
|
||||
R##_f = (_FP_DIV_MEAT_1_udiv_norm_q \
|
||||
| (_FP_DIV_MEAT_1_udiv_norm_r != 0)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r; \
|
||||
if (X##_f < Y##_f) \
|
||||
{ \
|
||||
R##_e--; \
|
||||
_FP_DIV_MEAT_1_udiv_nl = X##_f << _FP_WFRACBITS_##fs; \
|
||||
_FP_DIV_MEAT_1_udiv_nh = X##_f >> _FP_WFRACXBITS_##fs; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
_FP_DIV_MEAT_1_udiv_nl = X##_f << (_FP_WFRACBITS_##fs - 1); \
|
||||
_FP_DIV_MEAT_1_udiv_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \
|
||||
} \
|
||||
udiv_qrnnd (_FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r, \
|
||||
_FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl, \
|
||||
Y##_f); \
|
||||
R##_f = _FP_DIV_MEAT_1_udiv_q | (_FP_DIV_MEAT_1_udiv_r != 0); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Square root algorithms:
|
||||
We have just one right now, maybe Newton approximation
|
||||
should be added for those machines where division is fast. */
|
||||
|
||||
#define _FP_SQRT_MEAT_1(R, S, T, X, q) \
|
||||
do \
|
||||
{ \
|
||||
while ((q) != _FP_WORK_ROUND) \
|
||||
{ \
|
||||
T##_f = S##_f + (q); \
|
||||
if (T##_f <= X##_f) \
|
||||
{ \
|
||||
S##_f = T##_f + (q); \
|
||||
X##_f -= T##_f; \
|
||||
R##_f += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_1 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
if (X##_f) \
|
||||
{ \
|
||||
if (S##_f < X##_f) \
|
||||
R##_f |= _FP_WORK_ROUND; \
|
||||
R##_f |= _FP_WORK_STICKY; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Assembly/disassembly for converting to/from integral types.
|
||||
No shifting or overflow handled here. */
|
||||
|
||||
#define _FP_FRAC_ASSEMBLE_1(r, X, rsize) ((r) = X##_f)
|
||||
#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = (r))
|
||||
|
||||
|
||||
/* Convert FP values between word sizes. */
|
||||
|
||||
#define _FP_FRAC_COPY_1_1(D, S) (D##_f = S##_f)
|
||||
|
||||
#endif /* !SOFT_FP_OP_1_H */
|
705
src/gemm/soft-fp/op-2.h
Normal file
705
src/gemm/soft-fp/op-2.h
Normal file
|
@ -0,0 +1,705 @@
|
|||
/* Software floating-point emulation.
|
||||
Basic two-word fraction declaration and manipulation.
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz),
|
||||
David S. Miller (davem@redhat.com) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_OP_2_H
|
||||
#define SOFT_FP_OP_2_H 1
|
||||
|
||||
#define _FP_FRAC_DECL_2(X) \
|
||||
_FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
|
||||
#define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
|
||||
#define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I)
|
||||
#define _FP_FRAC_HIGH_2(X) (X##_f1)
|
||||
#define _FP_FRAC_LOW_2(X) (X##_f0)
|
||||
#define _FP_FRAC_WORD_2(X, w) (X##_f##w)
|
||||
|
||||
#define _FP_FRAC_SLL_2(X, N) \
|
||||
(void) (((N) < _FP_W_TYPE_SIZE) \
|
||||
? ({ \
|
||||
if (__builtin_constant_p (N) && (N) == 1) \
|
||||
{ \
|
||||
X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
|
||||
X##_f0 += X##_f0; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
|
||||
X##_f0 <<= (N); \
|
||||
} \
|
||||
0; \
|
||||
}) \
|
||||
: ({ \
|
||||
X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
|
||||
X##_f0 = 0; \
|
||||
}))
|
||||
|
||||
|
||||
#define _FP_FRAC_SRL_2(X, N) \
|
||||
(void) (((N) < _FP_W_TYPE_SIZE) \
|
||||
? ({ \
|
||||
X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
|
||||
X##_f1 >>= (N); \
|
||||
}) \
|
||||
: ({ \
|
||||
X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
|
||||
X##_f1 = 0; \
|
||||
}))
|
||||
|
||||
/* Right shift with sticky-lsb. */
|
||||
#define _FP_FRAC_SRST_2(X, S, N, sz) \
|
||||
(void) (((N) < _FP_W_TYPE_SIZE) \
|
||||
? ({ \
|
||||
S = (__builtin_constant_p (N) && (N) == 1 \
|
||||
? X##_f0 & 1 \
|
||||
: (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
|
||||
X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
|
||||
X##_f1 >>= (N); \
|
||||
}) \
|
||||
: ({ \
|
||||
S = ((((N) == _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
|
||||
| X##_f0) != 0); \
|
||||
X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
|
||||
X##_f1 = 0; \
|
||||
}))
|
||||
|
||||
#define _FP_FRAC_SRS_2(X, N, sz) \
|
||||
(void) (((N) < _FP_W_TYPE_SIZE) \
|
||||
? ({ \
|
||||
X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
|
||||
| (__builtin_constant_p (N) && (N) == 1 \
|
||||
? X##_f0 & 1 \
|
||||
: (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
|
||||
X##_f1 >>= (N); \
|
||||
}) \
|
||||
: ({ \
|
||||
X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \
|
||||
| ((((N) == _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
|
||||
| X##_f0) != 0)); \
|
||||
X##_f1 = 0; \
|
||||
}))
|
||||
|
||||
#define _FP_FRAC_ADDI_2(X, I) \
|
||||
__FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
|
||||
|
||||
#define _FP_FRAC_ADD_2(R, X, Y) \
|
||||
__FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
|
||||
|
||||
#define _FP_FRAC_SUB_2(R, X, Y) \
|
||||
__FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
|
||||
|
||||
#define _FP_FRAC_DEC_2(X, Y) \
|
||||
__FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
|
||||
|
||||
#define _FP_FRAC_CLZ_2(R, X) \
|
||||
do \
|
||||
{ \
|
||||
if (X##_f1) \
|
||||
__FP_CLZ ((R), X##_f1); \
|
||||
else \
|
||||
{ \
|
||||
__FP_CLZ ((R), X##_f0); \
|
||||
(R) += _FP_W_TYPE_SIZE; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Predicates. */
|
||||
#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0)
|
||||
#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
|
||||
#define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
|
||||
#define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
|
||||
#define _FP_FRAC_HIGHBIT_DW_2(fs, X) \
|
||||
(_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
|
||||
#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
|
||||
#define _FP_FRAC_GT_2(X, Y) \
|
||||
(X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
|
||||
#define _FP_FRAC_GE_2(X, Y) \
|
||||
(X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
|
||||
|
||||
#define _FP_ZEROFRAC_2 0, 0
|
||||
#define _FP_MINFRAC_2 0, 1
|
||||
#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
|
||||
|
||||
/* Internals. */
|
||||
|
||||
#define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1)
|
||||
|
||||
#define __FP_CLZ_2(R, xh, xl) \
|
||||
do \
|
||||
{ \
|
||||
if (xh) \
|
||||
__FP_CLZ ((R), xh); \
|
||||
else \
|
||||
{ \
|
||||
__FP_CLZ ((R), xl); \
|
||||
(R) += _FP_W_TYPE_SIZE; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#if 0
|
||||
|
||||
# ifndef __FP_FRAC_ADDI_2
|
||||
# define __FP_FRAC_ADDI_2(xh, xl, i) \
|
||||
(xh += ((xl += i) < i))
|
||||
# endif
|
||||
# ifndef __FP_FRAC_ADD_2
|
||||
# define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
|
||||
(rh = xh + yh + ((rl = xl + yl) < xl))
|
||||
# endif
|
||||
# ifndef __FP_FRAC_SUB_2
|
||||
# define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
|
||||
(rh = xh - yh - ((rl = xl - yl) > xl))
|
||||
# endif
|
||||
# ifndef __FP_FRAC_DEC_2
|
||||
# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
|
||||
do \
|
||||
{ \
|
||||
UWtype __FP_FRAC_DEC_2_t = xl; \
|
||||
xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \
|
||||
} \
|
||||
while (0)
|
||||
# endif
|
||||
|
||||
#else
|
||||
|
||||
# undef __FP_FRAC_ADDI_2
|
||||
# define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i)
|
||||
# undef __FP_FRAC_ADD_2
|
||||
# define __FP_FRAC_ADD_2 add_ssaaaa
|
||||
# undef __FP_FRAC_SUB_2
|
||||
# define __FP_FRAC_SUB_2 sub_ddmmss
|
||||
# undef __FP_FRAC_DEC_2
|
||||
# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
|
||||
sub_ddmmss (xh, xl, xh, xl, yh, yl)
|
||||
|
||||
#endif
|
||||
|
||||
/* Unpack the raw bits of a native fp value. Do not classify or
|
||||
normalize the data. */
|
||||
|
||||
#define _FP_UNPACK_RAW_2(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \
|
||||
_FP_UNPACK_RAW_2_flo.flt = (val); \
|
||||
\
|
||||
X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \
|
||||
X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \
|
||||
X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_UNPACK_RAW_2_P(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \
|
||||
X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \
|
||||
X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Repack the raw bits of a native fp value. */
|
||||
|
||||
#define _FP_PACK_RAW_2(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \
|
||||
\
|
||||
_FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \
|
||||
_FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \
|
||||
_FP_PACK_RAW_2_flo.bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_2_flo.bits.sign = X##_s; \
|
||||
\
|
||||
(val) = _FP_PACK_RAW_2_flo.flt; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_PACK_RAW_2_P(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
_FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \
|
||||
_FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \
|
||||
_FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Multiplication algorithms: */
|
||||
|
||||
/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \
|
||||
\
|
||||
doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \
|
||||
X##_f0, Y##_f0); \
|
||||
doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
|
||||
X##_f0, Y##_f1); \
|
||||
doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
|
||||
X##_f1, Y##_f0); \
|
||||
doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
X##_f1, Y##_f1); \
|
||||
\
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1), 0, \
|
||||
_FP_MUL_MEAT_DW_2_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_2_wide_b_f0, \
|
||||
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1), 0, \
|
||||
_FP_MUL_MEAT_DW_2_wide_c_f1, \
|
||||
_FP_MUL_MEAT_DW_2_wide_c_f0, \
|
||||
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z, \
|
||||
X, Y, doit); \
|
||||
\
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1, \
|
||||
2*(wfracbits)); \
|
||||
R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \
|
||||
R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
|
||||
Do only 3 multiplications instead of four. This one is for machines
|
||||
where multiplication is much more expensive than subtraction. */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \
|
||||
_FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \
|
||||
int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \
|
||||
int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
|
||||
= _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c2 \
|
||||
= _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \
|
||||
doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \
|
||||
X##_f0, Y##_f0); \
|
||||
doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
|
||||
doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \
|
||||
&= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \
|
||||
&= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1), \
|
||||
(_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
|
||||
& _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_d, \
|
||||
0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
|
||||
__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \
|
||||
__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
|
||||
__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1), \
|
||||
0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \
|
||||
_FP_FRAC_WORD_4 (R, 0)); \
|
||||
__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1), 0, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \
|
||||
__FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \
|
||||
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits), \
|
||||
_FP_MUL_MEAT_2_wide_3mul_z, \
|
||||
X, Y, doit); \
|
||||
\
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \
|
||||
(wfracbits)-1, 2*(wfracbits)); \
|
||||
R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \
|
||||
R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \
|
||||
_FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \
|
||||
_FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \
|
||||
_FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \
|
||||
_FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \
|
||||
_FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \
|
||||
\
|
||||
mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \
|
||||
_FP_MUL_MEAT_DW_2_gmp_y, 2); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y); \
|
||||
\
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1, \
|
||||
2*(wfracbits)); \
|
||||
R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \
|
||||
R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Do at most 120x120=240 bits multiplication using double floating
|
||||
point multiplication. This is useful if floating point
|
||||
multiplication has much bigger throughput than integer multiply.
|
||||
It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
|
||||
between 106 and 120 only.
|
||||
Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
|
||||
SETFETZ is a macro which will disable all FPU exceptions and set rounding
|
||||
towards zero, RESETFE should optionally reset it back. */
|
||||
|
||||
#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
|
||||
do \
|
||||
{ \
|
||||
static const double _const[] = \
|
||||
{ \
|
||||
/* 2^-24 */ 5.9604644775390625e-08, \
|
||||
/* 2^-48 */ 3.5527136788005009e-15, \
|
||||
/* 2^-72 */ 2.1175823681357508e-22, \
|
||||
/* 2^-96 */ 1.2621774483536189e-29, \
|
||||
/* 2^28 */ 2.68435456e+08, \
|
||||
/* 2^4 */ 1.600000e+01, \
|
||||
/* 2^-20 */ 9.5367431640625e-07, \
|
||||
/* 2^-44 */ 5.6843418860808015e-14, \
|
||||
/* 2^-68 */ 3.3881317890172014e-21, \
|
||||
/* 2^-92 */ 2.0194839173657902e-28, \
|
||||
/* 2^-116 */ 1.2037062152420224e-35 \
|
||||
}; \
|
||||
double _a240, _b240, _c240, _d240, _e240, _f240, \
|
||||
_g240, _h240, _i240, _j240, _k240; \
|
||||
union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
|
||||
_p240, _q240, _r240, _s240; \
|
||||
UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
|
||||
\
|
||||
_FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120, \
|
||||
"wfracbits out of range"); \
|
||||
\
|
||||
setfetz; \
|
||||
\
|
||||
_e240 = (double) (long) (X##_f0 & 0xffffff); \
|
||||
_j240 = (double) (long) (Y##_f0 & 0xffffff); \
|
||||
_d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \
|
||||
_i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \
|
||||
_c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
|
||||
_h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
|
||||
_b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \
|
||||
_g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \
|
||||
_a240 = (double) (long) (X##_f1 >> 32); \
|
||||
_f240 = (double) (long) (Y##_f1 >> 32); \
|
||||
_e240 *= _const[3]; \
|
||||
_j240 *= _const[3]; \
|
||||
_d240 *= _const[2]; \
|
||||
_i240 *= _const[2]; \
|
||||
_c240 *= _const[1]; \
|
||||
_h240 *= _const[1]; \
|
||||
_b240 *= _const[0]; \
|
||||
_g240 *= _const[0]; \
|
||||
_s240.d = _e240*_j240; \
|
||||
_r240.d = _d240*_j240 + _e240*_i240; \
|
||||
_q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240; \
|
||||
_p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
|
||||
_o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
|
||||
_n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
|
||||
_m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
|
||||
_l240.d = _a240*_g240 + _b240*_f240; \
|
||||
_k240 = _a240*_f240; \
|
||||
_r240.d += _s240.d; \
|
||||
_q240.d += _r240.d; \
|
||||
_p240.d += _q240.d; \
|
||||
_o240.d += _p240.d; \
|
||||
_n240.d += _o240.d; \
|
||||
_m240.d += _n240.d; \
|
||||
_l240.d += _m240.d; \
|
||||
_k240 += _l240.d; \
|
||||
_s240.d -= ((_const[10]+_s240.d)-_const[10]); \
|
||||
_r240.d -= ((_const[9]+_r240.d)-_const[9]); \
|
||||
_q240.d -= ((_const[8]+_q240.d)-_const[8]); \
|
||||
_p240.d -= ((_const[7]+_p240.d)-_const[7]); \
|
||||
_o240.d += _const[7]; \
|
||||
_n240.d += _const[6]; \
|
||||
_m240.d += _const[5]; \
|
||||
_l240.d += _const[4]; \
|
||||
if (_s240.d != 0.0) \
|
||||
_y240 = 1; \
|
||||
if (_r240.d != 0.0) \
|
||||
_y240 = 1; \
|
||||
if (_q240.d != 0.0) \
|
||||
_y240 = 1; \
|
||||
if (_p240.d != 0.0) \
|
||||
_y240 = 1; \
|
||||
_t240 = (DItype) _k240; \
|
||||
_u240 = _l240.i; \
|
||||
_v240 = _m240.i; \
|
||||
_w240 = _n240.i; \
|
||||
_x240 = _o240.i; \
|
||||
R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \
|
||||
| ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \
|
||||
R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
|
||||
| ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
|
||||
| ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
|
||||
| ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
|
||||
| _y240); \
|
||||
resetfe; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Division algorithms: */
|
||||
|
||||
#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \
|
||||
if (_FP_FRAC_GE_2 (X, Y)) \
|
||||
{ \
|
||||
_FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \
|
||||
_FP_DIV_MEAT_2_udiv_n_f1 \
|
||||
= X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
|
||||
_FP_DIV_MEAT_2_udiv_n_f0 \
|
||||
= X##_f0 << (_FP_W_TYPE_SIZE - 1); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
R##_e--; \
|
||||
_FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \
|
||||
_FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \
|
||||
_FP_DIV_MEAT_2_udiv_n_f0 = 0; \
|
||||
} \
|
||||
\
|
||||
/* Normalize, i.e. make the most significant bit of the \
|
||||
denominator set. */ \
|
||||
_FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
|
||||
\
|
||||
udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \
|
||||
_FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \
|
||||
Y##_f1); \
|
||||
umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \
|
||||
R##_f1, Y##_f0); \
|
||||
_FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \
|
||||
if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
|
||||
{ \
|
||||
R##_f1--; \
|
||||
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
|
||||
_FP_DIV_MEAT_2_udiv_r); \
|
||||
if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
|
||||
&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
|
||||
_FP_DIV_MEAT_2_udiv_r)) \
|
||||
{ \
|
||||
R##_f1--; \
|
||||
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
|
||||
_FP_DIV_MEAT_2_udiv_r); \
|
||||
} \
|
||||
} \
|
||||
_FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \
|
||||
\
|
||||
if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \
|
||||
{ \
|
||||
/* This is a special case, not an optimization \
|
||||
(_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \
|
||||
As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \
|
||||
R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \
|
||||
know what kind of bits it is (sticky, guard, round), \
|
||||
we don't care. We also don't care what the reminder is, \
|
||||
because the guard bit will be set anyway. -jj */ \
|
||||
R##_f0 = -1; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \
|
||||
_FP_DIV_MEAT_2_udiv_r_f1, \
|
||||
_FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \
|
||||
umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \
|
||||
_FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \
|
||||
_FP_DIV_MEAT_2_udiv_r_f0 = 0; \
|
||||
if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
|
||||
_FP_DIV_MEAT_2_udiv_r)) \
|
||||
{ \
|
||||
R##_f0--; \
|
||||
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
|
||||
_FP_DIV_MEAT_2_udiv_r); \
|
||||
if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
|
||||
&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
|
||||
_FP_DIV_MEAT_2_udiv_r)) \
|
||||
{ \
|
||||
R##_f0--; \
|
||||
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
|
||||
_FP_DIV_MEAT_2_udiv_r); \
|
||||
} \
|
||||
} \
|
||||
if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \
|
||||
_FP_DIV_MEAT_2_udiv_m)) \
|
||||
R##_f0 |= _FP_WORK_STICKY; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Square root algorithms:
|
||||
We have just one right now, maybe Newton approximation
|
||||
should be added for those machines where division is fast. */
|
||||
|
||||
#define _FP_SQRT_MEAT_2(R, S, T, X, q) \
|
||||
do \
|
||||
{ \
|
||||
while (q) \
|
||||
{ \
|
||||
T##_f1 = S##_f1 + (q); \
|
||||
if (T##_f1 <= X##_f1) \
|
||||
{ \
|
||||
S##_f1 = T##_f1 + (q); \
|
||||
X##_f1 -= T##_f1; \
|
||||
R##_f1 += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_2 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
(q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
|
||||
while ((q) != _FP_WORK_ROUND) \
|
||||
{ \
|
||||
T##_f0 = S##_f0 + (q); \
|
||||
T##_f1 = S##_f1; \
|
||||
if (T##_f1 < X##_f1 \
|
||||
|| (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
|
||||
{ \
|
||||
S##_f0 = T##_f0 + (q); \
|
||||
S##_f1 += (T##_f0 > S##_f0); \
|
||||
_FP_FRAC_DEC_2 (X, T); \
|
||||
R##_f0 += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_2 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
if (X##_f0 | X##_f1) \
|
||||
{ \
|
||||
if (S##_f1 < X##_f1 \
|
||||
|| (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
|
||||
R##_f0 |= _FP_WORK_ROUND; \
|
||||
R##_f0 |= _FP_WORK_STICKY; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Assembly/disassembly for converting to/from integral types.
|
||||
No shifting or overflow handled here. */
|
||||
|
||||
#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
|
||||
(void) (((rsize) <= _FP_W_TYPE_SIZE) \
|
||||
? ({ (r) = X##_f0; }) \
|
||||
: ({ \
|
||||
(r) = X##_f1; \
|
||||
(r) <<= _FP_W_TYPE_SIZE; \
|
||||
(r) += X##_f0; \
|
||||
}))
|
||||
|
||||
#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
|
||||
do \
|
||||
{ \
|
||||
X##_f0 = (r); \
|
||||
X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) >> _FP_W_TYPE_SIZE); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Convert FP values between word sizes. */
|
||||
|
||||
#define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
|
||||
|
||||
#define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
|
||||
|
||||
#define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S)
|
||||
|
||||
#endif /* !SOFT_FP_OP_2_H */
|
882
src/gemm/soft-fp/op-4.h
Normal file
882
src/gemm/soft-fp/op-4.h
Normal file
|
@ -0,0 +1,882 @@
|
|||
/* Software floating-point emulation.
|
||||
Basic four-word fraction declaration and manipulation.
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz),
|
||||
David S. Miller (davem@redhat.com) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_OP_4_H
|
||||
#define SOFT_FP_OP_4_H 1
|
||||
|
||||
#define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
|
||||
#define _FP_FRAC_COPY_4(D, S) \
|
||||
(D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
|
||||
D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
|
||||
#define _FP_FRAC_SET_4(X, I) __FP_FRAC_SET_4 (X, I)
|
||||
#define _FP_FRAC_HIGH_4(X) (X##_f[3])
|
||||
#define _FP_FRAC_LOW_4(X) (X##_f[0])
|
||||
#define _FP_FRAC_WORD_4(X, w) (X##_f[w])
|
||||
|
||||
#define _FP_FRAC_SLL_4(X, N) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SLL_4_up, _FP_FRAC_SLL_4_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SLL_4_skip, _FP_FRAC_SLL_4_i; \
|
||||
_FP_FRAC_SLL_4_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SLL_4_up = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SLL_4_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_4_up; \
|
||||
if (!_FP_FRAC_SLL_4_up) \
|
||||
for (_FP_FRAC_SLL_4_i = 3; \
|
||||
_FP_FRAC_SLL_4_i >= _FP_FRAC_SLL_4_skip; \
|
||||
--_FP_FRAC_SLL_4_i) \
|
||||
X##_f[_FP_FRAC_SLL_4_i] \
|
||||
= X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip]; \
|
||||
else \
|
||||
{ \
|
||||
for (_FP_FRAC_SLL_4_i = 3; \
|
||||
_FP_FRAC_SLL_4_i > _FP_FRAC_SLL_4_skip; \
|
||||
--_FP_FRAC_SLL_4_i) \
|
||||
X##_f[_FP_FRAC_SLL_4_i] \
|
||||
= ((X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip] \
|
||||
<< _FP_FRAC_SLL_4_up) \
|
||||
| (X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip-1] \
|
||||
>> _FP_FRAC_SLL_4_down)); \
|
||||
X##_f[_FP_FRAC_SLL_4_i--] = X##_f[0] << _FP_FRAC_SLL_4_up; \
|
||||
} \
|
||||
for (; _FP_FRAC_SLL_4_i >= 0; --_FP_FRAC_SLL_4_i) \
|
||||
X##_f[_FP_FRAC_SLL_4_i] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* This one was broken too. */
|
||||
#define _FP_FRAC_SRL_4(X, N) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SRL_4_up, _FP_FRAC_SRL_4_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SRL_4_skip, _FP_FRAC_SRL_4_i; \
|
||||
_FP_FRAC_SRL_4_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRL_4_down = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRL_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_4_down; \
|
||||
if (!_FP_FRAC_SRL_4_down) \
|
||||
for (_FP_FRAC_SRL_4_i = 0; \
|
||||
_FP_FRAC_SRL_4_i <= 3-_FP_FRAC_SRL_4_skip; \
|
||||
++_FP_FRAC_SRL_4_i) \
|
||||
X##_f[_FP_FRAC_SRL_4_i] \
|
||||
= X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip]; \
|
||||
else \
|
||||
{ \
|
||||
for (_FP_FRAC_SRL_4_i = 0; \
|
||||
_FP_FRAC_SRL_4_i < 3-_FP_FRAC_SRL_4_skip; \
|
||||
++_FP_FRAC_SRL_4_i) \
|
||||
X##_f[_FP_FRAC_SRL_4_i] \
|
||||
= ((X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip] \
|
||||
>> _FP_FRAC_SRL_4_down) \
|
||||
| (X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip+1] \
|
||||
<< _FP_FRAC_SRL_4_up)); \
|
||||
X##_f[_FP_FRAC_SRL_4_i++] = X##_f[3] >> _FP_FRAC_SRL_4_down; \
|
||||
} \
|
||||
for (; _FP_FRAC_SRL_4_i < 4; ++_FP_FRAC_SRL_4_i) \
|
||||
X##_f[_FP_FRAC_SRL_4_i] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Right shift with sticky-lsb.
|
||||
What this actually means is that we do a standard right-shift,
|
||||
but that if any of the bits that fall off the right hand side
|
||||
were one then we always set the LSbit. */
|
||||
#define _FP_FRAC_SRST_4(X, S, N, size) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SRST_4_up, _FP_FRAC_SRST_4_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SRST_4_skip, _FP_FRAC_SRST_4_i; \
|
||||
_FP_W_TYPE _FP_FRAC_SRST_4_s; \
|
||||
_FP_FRAC_SRST_4_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRST_4_down = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRST_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRST_4_down; \
|
||||
for (_FP_FRAC_SRST_4_s = _FP_FRAC_SRST_4_i = 0; \
|
||||
_FP_FRAC_SRST_4_i < _FP_FRAC_SRST_4_skip; \
|
||||
++_FP_FRAC_SRST_4_i) \
|
||||
_FP_FRAC_SRST_4_s |= X##_f[_FP_FRAC_SRST_4_i]; \
|
||||
if (!_FP_FRAC_SRST_4_down) \
|
||||
for (_FP_FRAC_SRST_4_i = 0; \
|
||||
_FP_FRAC_SRST_4_i <= 3-_FP_FRAC_SRST_4_skip; \
|
||||
++_FP_FRAC_SRST_4_i) \
|
||||
X##_f[_FP_FRAC_SRST_4_i] \
|
||||
= X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip]; \
|
||||
else \
|
||||
{ \
|
||||
_FP_FRAC_SRST_4_s \
|
||||
|= X##_f[_FP_FRAC_SRST_4_i] << _FP_FRAC_SRST_4_up; \
|
||||
for (_FP_FRAC_SRST_4_i = 0; \
|
||||
_FP_FRAC_SRST_4_i < 3-_FP_FRAC_SRST_4_skip; \
|
||||
++_FP_FRAC_SRST_4_i) \
|
||||
X##_f[_FP_FRAC_SRST_4_i] \
|
||||
= ((X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip] \
|
||||
>> _FP_FRAC_SRST_4_down) \
|
||||
| (X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip+1] \
|
||||
<< _FP_FRAC_SRST_4_up)); \
|
||||
X##_f[_FP_FRAC_SRST_4_i++] \
|
||||
= X##_f[3] >> _FP_FRAC_SRST_4_down; \
|
||||
} \
|
||||
for (; _FP_FRAC_SRST_4_i < 4; ++_FP_FRAC_SRST_4_i) \
|
||||
X##_f[_FP_FRAC_SRST_4_i] = 0; \
|
||||
S = (_FP_FRAC_SRST_4_s != 0); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_SRS_4(X, N, size) \
|
||||
do \
|
||||
{ \
|
||||
int _FP_FRAC_SRS_4_sticky; \
|
||||
_FP_FRAC_SRST_4 (X, _FP_FRAC_SRS_4_sticky, (N), (size)); \
|
||||
X##_f[0] |= _FP_FRAC_SRS_4_sticky; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_ADD_4(R, X, Y) \
|
||||
__FP_FRAC_ADD_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
|
||||
X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
|
||||
Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
|
||||
|
||||
#define _FP_FRAC_SUB_4(R, X, Y) \
|
||||
__FP_FRAC_SUB_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
|
||||
X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
|
||||
Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
|
||||
|
||||
#define _FP_FRAC_DEC_4(X, Y) \
|
||||
__FP_FRAC_DEC_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
|
||||
Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
|
||||
|
||||
#define _FP_FRAC_ADDI_4(X, I) \
|
||||
__FP_FRAC_ADDI_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
|
||||
|
||||
#define _FP_ZEROFRAC_4 0, 0, 0, 0
|
||||
#define _FP_MINFRAC_4 0, 0, 0, 1
|
||||
#define _FP_MAXFRAC_4 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
|
||||
|
||||
#define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
|
||||
#define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE) X##_f[3] < 0)
|
||||
#define _FP_FRAC_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
|
||||
#define _FP_FRAC_HIGHBIT_DW_4(fs, X) \
|
||||
(_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
|
||||
#define _FP_FRAC_CLEAR_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
|
||||
|
||||
#define _FP_FRAC_EQ_4(X, Y) \
|
||||
(X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
|
||||
&& X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
|
||||
|
||||
#define _FP_FRAC_GT_4(X, Y) \
|
||||
(X##_f[3] > Y##_f[3] \
|
||||
|| (X##_f[3] == Y##_f[3] \
|
||||
&& (X##_f[2] > Y##_f[2] \
|
||||
|| (X##_f[2] == Y##_f[2] \
|
||||
&& (X##_f[1] > Y##_f[1] \
|
||||
|| (X##_f[1] == Y##_f[1] \
|
||||
&& X##_f[0] > Y##_f[0]))))))
|
||||
|
||||
#define _FP_FRAC_GE_4(X, Y) \
|
||||
(X##_f[3] > Y##_f[3] \
|
||||
|| (X##_f[3] == Y##_f[3] \
|
||||
&& (X##_f[2] > Y##_f[2] \
|
||||
|| (X##_f[2] == Y##_f[2] \
|
||||
&& (X##_f[1] > Y##_f[1] \
|
||||
|| (X##_f[1] == Y##_f[1] \
|
||||
&& X##_f[0] >= Y##_f[0]))))))
|
||||
|
||||
|
||||
#define _FP_FRAC_CLZ_4(R, X) \
|
||||
do \
|
||||
{ \
|
||||
if (X##_f[3]) \
|
||||
__FP_CLZ ((R), X##_f[3]); \
|
||||
else if (X##_f[2]) \
|
||||
{ \
|
||||
__FP_CLZ ((R), X##_f[2]); \
|
||||
(R) += _FP_W_TYPE_SIZE; \
|
||||
} \
|
||||
else if (X##_f[1]) \
|
||||
{ \
|
||||
__FP_CLZ ((R), X##_f[1]); \
|
||||
(R) += _FP_W_TYPE_SIZE*2; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
__FP_CLZ ((R), X##_f[0]); \
|
||||
(R) += _FP_W_TYPE_SIZE*3; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
#define _FP_UNPACK_RAW_4(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_UNPACK_RAW_4_flo; \
|
||||
_FP_UNPACK_RAW_4_flo.flt = (val); \
|
||||
X##_f[0] = _FP_UNPACK_RAW_4_flo.bits.frac0; \
|
||||
X##_f[1] = _FP_UNPACK_RAW_4_flo.bits.frac1; \
|
||||
X##_f[2] = _FP_UNPACK_RAW_4_flo.bits.frac2; \
|
||||
X##_f[3] = _FP_UNPACK_RAW_4_flo.bits.frac3; \
|
||||
X##_e = _FP_UNPACK_RAW_4_flo.bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_4_flo.bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_UNPACK_RAW_4_P(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_UNPACK_RAW_4_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
X##_f[0] = _FP_UNPACK_RAW_4_P_flo->bits.frac0; \
|
||||
X##_f[1] = _FP_UNPACK_RAW_4_P_flo->bits.frac1; \
|
||||
X##_f[2] = _FP_UNPACK_RAW_4_P_flo->bits.frac2; \
|
||||
X##_f[3] = _FP_UNPACK_RAW_4_P_flo->bits.frac3; \
|
||||
X##_e = _FP_UNPACK_RAW_4_P_flo->bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_4_P_flo->bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_PACK_RAW_4(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_PACK_RAW_4_flo; \
|
||||
_FP_PACK_RAW_4_flo.bits.frac0 = X##_f[0]; \
|
||||
_FP_PACK_RAW_4_flo.bits.frac1 = X##_f[1]; \
|
||||
_FP_PACK_RAW_4_flo.bits.frac2 = X##_f[2]; \
|
||||
_FP_PACK_RAW_4_flo.bits.frac3 = X##_f[3]; \
|
||||
_FP_PACK_RAW_4_flo.bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_4_flo.bits.sign = X##_s; \
|
||||
(val) = _FP_PACK_RAW_4_flo.flt; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_PACK_RAW_4_P(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_PACK_RAW_4_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
_FP_PACK_RAW_4_P_flo->bits.frac0 = X##_f[0]; \
|
||||
_FP_PACK_RAW_4_P_flo->bits.frac1 = X##_f[1]; \
|
||||
_FP_PACK_RAW_4_P_flo->bits.frac2 = X##_f[2]; \
|
||||
_FP_PACK_RAW_4_P_flo->bits.frac3 = X##_f[3]; \
|
||||
_FP_PACK_RAW_4_P_flo->bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_4_P_flo->bits.sign = X##_s; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Multiplication algorithms: */
|
||||
|
||||
/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_b); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_c); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_d); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_e); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_f); \
|
||||
\
|
||||
doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), \
|
||||
X##_f[0], Y##_f[0]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
X##_f[0], Y##_f[1]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
|
||||
X##_f[1], Y##_f[0]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
X##_f[1], Y##_f[1]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
X##_f[0], Y##_f[2]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
|
||||
X##_f[2], Y##_f[0]); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
|
||||
_FP_FRAC_WORD_8 (R, 1), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
0, 0, _FP_FRAC_WORD_8 (R, 1)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
|
||||
_FP_FRAC_WORD_8 (R, 1), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
|
||||
_FP_FRAC_WORD_8 (R, 1)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
|
||||
_FP_FRAC_WORD_8 (R, 2), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
|
||||
_FP_FRAC_WORD_8 (R, 2), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
|
||||
_FP_FRAC_WORD_8 (R, 2)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
|
||||
_FP_FRAC_WORD_8 (R, 2), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_f_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_f_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
|
||||
_FP_FRAC_WORD_8 (R, 2)); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f0, X##_f[0], Y##_f[3]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_c_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f0, X##_f[3], Y##_f[0]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
X##_f[1], Y##_f[2]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
X##_f[2], Y##_f[1]); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3)); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
X##_f[2], Y##_f[2]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
|
||||
X##_f[1], Y##_f[3]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
X##_f[3], Y##_f[1]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
X##_f[2], Y##_f[3]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
|
||||
X##_f[3], Y##_f[2]); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
|
||||
_FP_FRAC_WORD_8 (R, 4), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
|
||||
_FP_FRAC_WORD_8 (R, 4), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
|
||||
_FP_FRAC_WORD_8 (R, 4)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
|
||||
_FP_FRAC_WORD_8 (R, 4), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
|
||||
_FP_FRAC_WORD_8 (R, 4)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
|
||||
_FP_FRAC_WORD_8 (R, 5), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
|
||||
_FP_FRAC_WORD_8 (R, 5), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_f_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_f_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
|
||||
_FP_FRAC_WORD_8 (R, 5)); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
X##_f[3], Y##_f[3]); \
|
||||
__FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_wide_z); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_4_wide ((wfracbits), _FP_MUL_MEAT_4_wide_z, \
|
||||
X, Y, doit); \
|
||||
\
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_wide_z, (wfracbits)-1, \
|
||||
2*(wfracbits)); \
|
||||
__FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 3), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 2), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 1), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 0)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
mpn_mul_n (R##_f, _x_f, _y_f, 4); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_gmp_z); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_4_gmp ((wfracbits), _FP_MUL_MEAT_4_gmp_z, X, Y); \
|
||||
\
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_gmp_z, (wfracbits)-1, \
|
||||
2*(wfracbits)); \
|
||||
__FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 3), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 2), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 1), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 0)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Helper utility for _FP_DIV_MEAT_4_udiv:
|
||||
* pppp = m * nnn. */
|
||||
#define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0) \
|
||||
do \
|
||||
{ \
|
||||
UWtype umul_ppppmnnn_t; \
|
||||
umul_ppmm (p1, p0, m, n0); \
|
||||
umul_ppmm (p2, umul_ppppmnnn_t, m, n1); \
|
||||
__FP_FRAC_ADDI_2 (p2, p1, umul_ppppmnnn_t); \
|
||||
umul_ppmm (p3, umul_ppppmnnn_t, m, n2); \
|
||||
__FP_FRAC_ADDI_2 (p3, p2, umul_ppppmnnn_t); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Division algorithms: */
|
||||
|
||||
#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
int _FP_DIV_MEAT_4_udiv_i; \
|
||||
_FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_n); \
|
||||
_FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_m); \
|
||||
_FP_FRAC_SET_4 (_FP_DIV_MEAT_4_udiv_n, _FP_ZEROFRAC_4); \
|
||||
if (_FP_FRAC_GE_4 (X, Y)) \
|
||||
{ \
|
||||
_FP_DIV_MEAT_4_udiv_n_f[3] \
|
||||
= X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
|
||||
_FP_FRAC_SRL_4 (X, 1); \
|
||||
} \
|
||||
else \
|
||||
R##_e--; \
|
||||
\
|
||||
/* Normalize, i.e. make the most significant bit of the \
|
||||
denominator set. */ \
|
||||
_FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs); \
|
||||
\
|
||||
for (_FP_DIV_MEAT_4_udiv_i = 3; ; _FP_DIV_MEAT_4_udiv_i--) \
|
||||
{ \
|
||||
if (X##_f[3] == Y##_f[3]) \
|
||||
{ \
|
||||
/* This is a special case, not an optimization \
|
||||
(X##_f[3]/Y##_f[3] would not fit into UWtype). \
|
||||
As X## is guaranteed to be < Y, \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i] can be either \
|
||||
(UWtype)-1 or (UWtype)-2. */ \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i] = -1; \
|
||||
if (!_FP_DIV_MEAT_4_udiv_i) \
|
||||
break; \
|
||||
__FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
|
||||
Y##_f[2], Y##_f[1], Y##_f[0], 0, \
|
||||
X##_f[2], X##_f[1], X##_f[0], \
|
||||
_FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]); \
|
||||
_FP_FRAC_SUB_4 (X, Y, X); \
|
||||
if (X##_f[3] > Y##_f[3]) \
|
||||
{ \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i] = -2; \
|
||||
_FP_FRAC_ADD_4 (X, Y, X); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
udiv_qrnnd (R##_f[_FP_DIV_MEAT_4_udiv_i], \
|
||||
X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
|
||||
umul_ppppmnnn (_FP_DIV_MEAT_4_udiv_m_f[3], \
|
||||
_FP_DIV_MEAT_4_udiv_m_f[2], \
|
||||
_FP_DIV_MEAT_4_udiv_m_f[1], \
|
||||
_FP_DIV_MEAT_4_udiv_m_f[0], \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i], \
|
||||
Y##_f[2], Y##_f[1], Y##_f[0]); \
|
||||
X##_f[2] = X##_f[1]; \
|
||||
X##_f[1] = X##_f[0]; \
|
||||
X##_f[0] \
|
||||
= _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]; \
|
||||
if (_FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
|
||||
{ \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
|
||||
_FP_FRAC_ADD_4 (X, Y, X); \
|
||||
if (_FP_FRAC_GE_4 (X, Y) \
|
||||
&& _FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
|
||||
{ \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
|
||||
_FP_FRAC_ADD_4 (X, Y, X); \
|
||||
} \
|
||||
} \
|
||||
_FP_FRAC_DEC_4 (X, _FP_DIV_MEAT_4_udiv_m); \
|
||||
if (!_FP_DIV_MEAT_4_udiv_i) \
|
||||
{ \
|
||||
if (!_FP_FRAC_EQ_4 (X, _FP_DIV_MEAT_4_udiv_m)) \
|
||||
R##_f[0] |= _FP_WORK_STICKY; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Square root algorithms:
|
||||
We have just one right now, maybe Newton approximation
|
||||
should be added for those machines where division is fast. */
|
||||
|
||||
#define _FP_SQRT_MEAT_4(R, S, T, X, q) \
|
||||
do \
|
||||
{ \
|
||||
while (q) \
|
||||
{ \
|
||||
T##_f[3] = S##_f[3] + (q); \
|
||||
if (T##_f[3] <= X##_f[3]) \
|
||||
{ \
|
||||
S##_f[3] = T##_f[3] + (q); \
|
||||
X##_f[3] -= T##_f[3]; \
|
||||
R##_f[3] += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_4 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
(q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
|
||||
while (q) \
|
||||
{ \
|
||||
T##_f[2] = S##_f[2] + (q); \
|
||||
T##_f[3] = S##_f[3]; \
|
||||
if (T##_f[3] < X##_f[3] \
|
||||
|| (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \
|
||||
{ \
|
||||
S##_f[2] = T##_f[2] + (q); \
|
||||
S##_f[3] += (T##_f[2] > S##_f[2]); \
|
||||
__FP_FRAC_DEC_2 (X##_f[3], X##_f[2], \
|
||||
T##_f[3], T##_f[2]); \
|
||||
R##_f[2] += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_4 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
(q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
|
||||
while (q) \
|
||||
{ \
|
||||
T##_f[1] = S##_f[1] + (q); \
|
||||
T##_f[2] = S##_f[2]; \
|
||||
T##_f[3] = S##_f[3]; \
|
||||
if (T##_f[3] < X##_f[3] \
|
||||
|| (T##_f[3] == X##_f[3] \
|
||||
&& (T##_f[2] < X##_f[2] \
|
||||
|| (T##_f[2] == X##_f[2] \
|
||||
&& T##_f[1] <= X##_f[1])))) \
|
||||
{ \
|
||||
S##_f[1] = T##_f[1] + (q); \
|
||||
S##_f[2] += (T##_f[1] > S##_f[1]); \
|
||||
S##_f[3] += (T##_f[2] > S##_f[2]); \
|
||||
__FP_FRAC_DEC_3 (X##_f[3], X##_f[2], X##_f[1], \
|
||||
T##_f[3], T##_f[2], T##_f[1]); \
|
||||
R##_f[1] += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_4 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
(q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
|
||||
while ((q) != _FP_WORK_ROUND) \
|
||||
{ \
|
||||
T##_f[0] = S##_f[0] + (q); \
|
||||
T##_f[1] = S##_f[1]; \
|
||||
T##_f[2] = S##_f[2]; \
|
||||
T##_f[3] = S##_f[3]; \
|
||||
if (_FP_FRAC_GE_4 (X, T)) \
|
||||
{ \
|
||||
S##_f[0] = T##_f[0] + (q); \
|
||||
S##_f[1] += (T##_f[0] > S##_f[0]); \
|
||||
S##_f[2] += (T##_f[1] > S##_f[1]); \
|
||||
S##_f[3] += (T##_f[2] > S##_f[2]); \
|
||||
_FP_FRAC_DEC_4 (X, T); \
|
||||
R##_f[0] += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_4 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
if (!_FP_FRAC_ZEROP_4 (X)) \
|
||||
{ \
|
||||
if (_FP_FRAC_GT_4 (X, S)) \
|
||||
R##_f[0] |= _FP_WORK_ROUND; \
|
||||
R##_f[0] |= _FP_WORK_STICKY; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Internals. */
|
||||
|
||||
#define __FP_FRAC_SET_4(X, I3, I2, I1, I0) \
|
||||
(X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
|
||||
|
||||
#ifndef __FP_FRAC_ADD_3
|
||||
# define __FP_FRAC_ADD_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2; \
|
||||
r0 = x0 + y0; \
|
||||
__FP_FRAC_ADD_3_c1 = r0 < x0; \
|
||||
r1 = x1 + y1; \
|
||||
__FP_FRAC_ADD_3_c2 = r1 < x1; \
|
||||
r1 += __FP_FRAC_ADD_3_c1; \
|
||||
__FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1; \
|
||||
r2 = x2 + y2 + __FP_FRAC_ADD_3_c2; \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_ADD_4
|
||||
# define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE __FP_FRAC_ADD_4_c1, __FP_FRAC_ADD_4_c2; \
|
||||
_FP_W_TYPE __FP_FRAC_ADD_4_c3; \
|
||||
r0 = x0 + y0; \
|
||||
__FP_FRAC_ADD_4_c1 = r0 < x0; \
|
||||
r1 = x1 + y1; \
|
||||
__FP_FRAC_ADD_4_c2 = r1 < x1; \
|
||||
r1 += __FP_FRAC_ADD_4_c1; \
|
||||
__FP_FRAC_ADD_4_c2 |= r1 < __FP_FRAC_ADD_4_c1; \
|
||||
r2 = x2 + y2; \
|
||||
__FP_FRAC_ADD_4_c3 = r2 < x2; \
|
||||
r2 += __FP_FRAC_ADD_4_c2; \
|
||||
__FP_FRAC_ADD_4_c3 |= r2 < __FP_FRAC_ADD_4_c2; \
|
||||
r3 = x3 + y3 + __FP_FRAC_ADD_4_c3; \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_SUB_3
|
||||
# define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE __FP_FRAC_SUB_3_tmp[2]; \
|
||||
_FP_W_TYPE __FP_FRAC_SUB_3_c1, __FP_FRAC_SUB_3_c2; \
|
||||
__FP_FRAC_SUB_3_tmp[0] = x0 - y0; \
|
||||
__FP_FRAC_SUB_3_c1 = __FP_FRAC_SUB_3_tmp[0] > x0; \
|
||||
__FP_FRAC_SUB_3_tmp[1] = x1 - y1; \
|
||||
__FP_FRAC_SUB_3_c2 = __FP_FRAC_SUB_3_tmp[1] > x1; \
|
||||
__FP_FRAC_SUB_3_tmp[1] -= __FP_FRAC_SUB_3_c1; \
|
||||
__FP_FRAC_SUB_3_c2 |= __FP_FRAC_SUB_3_c1 && (y1 == x1); \
|
||||
r2 = x2 - y2 - __FP_FRAC_SUB_3_c2; \
|
||||
r1 = __FP_FRAC_SUB_3_tmp[1]; \
|
||||
r0 = __FP_FRAC_SUB_3_tmp[0]; \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_SUB_4
|
||||
# define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE __FP_FRAC_SUB_4_tmp[3]; \
|
||||
_FP_W_TYPE __FP_FRAC_SUB_4_c1, __FP_FRAC_SUB_4_c2; \
|
||||
_FP_W_TYPE __FP_FRAC_SUB_4_c3; \
|
||||
__FP_FRAC_SUB_4_tmp[0] = x0 - y0; \
|
||||
__FP_FRAC_SUB_4_c1 = __FP_FRAC_SUB_4_tmp[0] > x0; \
|
||||
__FP_FRAC_SUB_4_tmp[1] = x1 - y1; \
|
||||
__FP_FRAC_SUB_4_c2 = __FP_FRAC_SUB_4_tmp[1] > x1; \
|
||||
__FP_FRAC_SUB_4_tmp[1] -= __FP_FRAC_SUB_4_c1; \
|
||||
__FP_FRAC_SUB_4_c2 |= __FP_FRAC_SUB_4_c1 && (y1 == x1); \
|
||||
__FP_FRAC_SUB_4_tmp[2] = x2 - y2; \
|
||||
__FP_FRAC_SUB_4_c3 = __FP_FRAC_SUB_4_tmp[2] > x2; \
|
||||
__FP_FRAC_SUB_4_tmp[2] -= __FP_FRAC_SUB_4_c2; \
|
||||
__FP_FRAC_SUB_4_c3 |= __FP_FRAC_SUB_4_c2 && (y2 == x2); \
|
||||
r3 = x3 - y3 - __FP_FRAC_SUB_4_c3; \
|
||||
r2 = __FP_FRAC_SUB_4_tmp[2]; \
|
||||
r1 = __FP_FRAC_SUB_4_tmp[1]; \
|
||||
r0 = __FP_FRAC_SUB_4_tmp[0]; \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_DEC_3
|
||||
# define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
UWtype __FP_FRAC_DEC_3_t0, __FP_FRAC_DEC_3_t1; \
|
||||
UWtype __FP_FRAC_DEC_3_t2; \
|
||||
__FP_FRAC_DEC_3_t0 = x0; \
|
||||
__FP_FRAC_DEC_3_t1 = x1; \
|
||||
__FP_FRAC_DEC_3_t2 = x2; \
|
||||
__FP_FRAC_SUB_3 (x2, x1, x0, __FP_FRAC_DEC_3_t2, \
|
||||
__FP_FRAC_DEC_3_t1, __FP_FRAC_DEC_3_t0, \
|
||||
y2, y1, y0); \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_DEC_4
|
||||
# define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
UWtype __FP_FRAC_DEC_4_t0, __FP_FRAC_DEC_4_t1; \
|
||||
UWtype __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t3; \
|
||||
__FP_FRAC_DEC_4_t0 = x0; \
|
||||
__FP_FRAC_DEC_4_t1 = x1; \
|
||||
__FP_FRAC_DEC_4_t2 = x2; \
|
||||
__FP_FRAC_DEC_4_t3 = x3; \
|
||||
__FP_FRAC_SUB_4 (x3, x2, x1, x0, __FP_FRAC_DEC_4_t3, \
|
||||
__FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t1, \
|
||||
__FP_FRAC_DEC_4_t0, y3, y2, y1, y0); \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_ADDI_4
|
||||
# define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \
|
||||
do \
|
||||
{ \
|
||||
UWtype __FP_FRAC_ADDI_4_t; \
|
||||
__FP_FRAC_ADDI_4_t = ((x0 += i) < i); \
|
||||
x1 += __FP_FRAC_ADDI_4_t; \
|
||||
__FP_FRAC_ADDI_4_t = (x1 < __FP_FRAC_ADDI_4_t); \
|
||||
x2 += __FP_FRAC_ADDI_4_t; \
|
||||
__FP_FRAC_ADDI_4_t = (x2 < __FP_FRAC_ADDI_4_t); \
|
||||
x3 += __FP_FRAC_ADDI_4_t; \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
/* Convert FP values between word sizes. This appears to be more
|
||||
complicated than I'd have expected it to be, so these might be
|
||||
wrong... These macros are in any case somewhat bogus because they
|
||||
use information about what various FRAC_n variables look like
|
||||
internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
|
||||
the ones in op-2.h and op-1.h. */
|
||||
#define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0])
|
||||
|
||||
#define _FP_FRAC_COPY_2_4(D, S) \
|
||||
do \
|
||||
{ \
|
||||
D##_f0 = S##_f[0]; \
|
||||
D##_f1 = S##_f[1]; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Assembly/disassembly for converting to/from integral types.
|
||||
No shifting or overflow handled here. */
|
||||
/* Put the FP value X into r, which is an integer of size rsize. */
|
||||
#define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
|
||||
do \
|
||||
{ \
|
||||
if ((rsize) <= _FP_W_TYPE_SIZE) \
|
||||
(r) = X##_f[0]; \
|
||||
else if ((rsize) <= 2*_FP_W_TYPE_SIZE) \
|
||||
{ \
|
||||
(r) = X##_f[1]; \
|
||||
(r) = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) << _FP_W_TYPE_SIZE); \
|
||||
(r) += X##_f[0]; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* I'm feeling lazy so we deal with int == 3words \
|
||||
(implausible) and int == 4words as a single case. */ \
|
||||
(r) = X##_f[3]; \
|
||||
(r) = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) << _FP_W_TYPE_SIZE); \
|
||||
(r) += X##_f[2]; \
|
||||
(r) = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) << _FP_W_TYPE_SIZE); \
|
||||
(r) += X##_f[1]; \
|
||||
(r) = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) << _FP_W_TYPE_SIZE); \
|
||||
(r) += X##_f[0]; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* "No disassemble Number Five!" */
|
||||
/* Move an integer of size rsize into X's fractional part. We rely on
|
||||
the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
|
||||
having to mask the values we store into it. */
|
||||
#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
|
||||
do \
|
||||
{ \
|
||||
X##_f[0] = (r); \
|
||||
X##_f[1] = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) >> _FP_W_TYPE_SIZE); \
|
||||
X##_f[2] = ((rsize) <= 2*_FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) >> 2*_FP_W_TYPE_SIZE); \
|
||||
X##_f[3] = ((rsize) <= 3*_FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) >> 3*_FP_W_TYPE_SIZE); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_COPY_4_1(D, S) \
|
||||
do \
|
||||
{ \
|
||||
D##_f[0] = S##_f; \
|
||||
D##_f[1] = D##_f[2] = D##_f[3] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_COPY_4_2(D, S) \
|
||||
do \
|
||||
{ \
|
||||
D##_f[0] = S##_f0; \
|
||||
D##_f[1] = S##_f1; \
|
||||
D##_f[2] = D##_f[3] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_COPY_4_4(D, S) _FP_FRAC_COPY_4 (D, S)
|
||||
|
||||
#endif /* !SOFT_FP_OP_4_H */
|
238
src/gemm/soft-fp/op-8.h
Normal file
238
src/gemm/soft-fp/op-8.h
Normal file
|
@ -0,0 +1,238 @@
|
|||
/* Software floating-point emulation.
|
||||
Basic eight-word fraction declaration and manipulation.
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_OP_8_H
|
||||
#define SOFT_FP_OP_8_H 1
|
||||
|
||||
/* We need just a few things from here for op-4, if we ever need some
|
||||
other macros, they can be added. */
|
||||
#define _FP_FRAC_DECL_8(X) _FP_W_TYPE X##_f[8]
|
||||
#define _FP_FRAC_SET_8(X, I) __FP_FRAC_SET_8 (X, I)
|
||||
#define _FP_FRAC_HIGH_8(X) (X##_f[7])
|
||||
#define _FP_FRAC_LOW_8(X) (X##_f[0])
|
||||
#define _FP_FRAC_WORD_8(X, w) (X##_f[w])
|
||||
|
||||
#define _FP_FRAC_SLL_8(X, N) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SLL_8_up, _FP_FRAC_SLL_8_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SLL_8_skip, _FP_FRAC_SLL_8_i; \
|
||||
_FP_FRAC_SLL_8_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SLL_8_up = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SLL_8_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_8_up; \
|
||||
if (!_FP_FRAC_SLL_8_up) \
|
||||
for (_FP_FRAC_SLL_8_i = 7; \
|
||||
_FP_FRAC_SLL_8_i >= _FP_FRAC_SLL_8_skip; \
|
||||
--_FP_FRAC_SLL_8_i) \
|
||||
X##_f[_FP_FRAC_SLL_8_i] \
|
||||
= X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip]; \
|
||||
else \
|
||||
{ \
|
||||
for (_FP_FRAC_SLL_8_i = 7; \
|
||||
_FP_FRAC_SLL_8_i > _FP_FRAC_SLL_8_skip; \
|
||||
--_FP_FRAC_SLL_8_i) \
|
||||
X##_f[_FP_FRAC_SLL_8_i] \
|
||||
= ((X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip] \
|
||||
<< _FP_FRAC_SLL_8_up) \
|
||||
| (X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip-1] \
|
||||
>> _FP_FRAC_SLL_8_down)); \
|
||||
X##_f[_FP_FRAC_SLL_8_i--] = X##_f[0] << _FP_FRAC_SLL_8_up; \
|
||||
} \
|
||||
for (; _FP_FRAC_SLL_8_i >= 0; --_FP_FRAC_SLL_8_i) \
|
||||
X##_f[_FP_FRAC_SLL_8_i] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_SRL_8(X, N) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SRL_8_up, _FP_FRAC_SRL_8_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SRL_8_skip, _FP_FRAC_SRL_8_i; \
|
||||
_FP_FRAC_SRL_8_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRL_8_down = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRL_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_8_down; \
|
||||
if (!_FP_FRAC_SRL_8_down) \
|
||||
for (_FP_FRAC_SRL_8_i = 0; \
|
||||
_FP_FRAC_SRL_8_i <= 7-_FP_FRAC_SRL_8_skip; \
|
||||
++_FP_FRAC_SRL_8_i) \
|
||||
X##_f[_FP_FRAC_SRL_8_i] \
|
||||
= X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip]; \
|
||||
else \
|
||||
{ \
|
||||
for (_FP_FRAC_SRL_8_i = 0; \
|
||||
_FP_FRAC_SRL_8_i < 7-_FP_FRAC_SRL_8_skip; \
|
||||
++_FP_FRAC_SRL_8_i) \
|
||||
X##_f[_FP_FRAC_SRL_8_i] \
|
||||
= ((X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip] \
|
||||
>> _FP_FRAC_SRL_8_down) \
|
||||
| (X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip+1] \
|
||||
<< _FP_FRAC_SRL_8_up)); \
|
||||
X##_f[_FP_FRAC_SRL_8_i++] = X##_f[7] >> _FP_FRAC_SRL_8_down; \
|
||||
} \
|
||||
for (; _FP_FRAC_SRL_8_i < 8; ++_FP_FRAC_SRL_8_i) \
|
||||
X##_f[_FP_FRAC_SRL_8_i] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Right shift with sticky-lsb.
|
||||
What this actually means is that we do a standard right-shift,
|
||||
but that if any of the bits that fall off the right hand side
|
||||
were one then we always set the LSbit. */
|
||||
#define _FP_FRAC_SRS_8(X, N, size) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SRS_8_up, _FP_FRAC_SRS_8_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SRS_8_skip, _FP_FRAC_SRS_8_i; \
|
||||
_FP_W_TYPE _FP_FRAC_SRS_8_s; \
|
||||
_FP_FRAC_SRS_8_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRS_8_down = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRS_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRS_8_down; \
|
||||
for (_FP_FRAC_SRS_8_s = _FP_FRAC_SRS_8_i = 0; \
|
||||
_FP_FRAC_SRS_8_i < _FP_FRAC_SRS_8_skip; \
|
||||
++_FP_FRAC_SRS_8_i) \
|
||||
_FP_FRAC_SRS_8_s |= X##_f[_FP_FRAC_SRS_8_i]; \
|
||||
if (!_FP_FRAC_SRS_8_down) \
|
||||
for (_FP_FRAC_SRS_8_i = 0; \
|
||||
_FP_FRAC_SRS_8_i <= 7-_FP_FRAC_SRS_8_skip; \
|
||||
++_FP_FRAC_SRS_8_i) \
|
||||
X##_f[_FP_FRAC_SRS_8_i] \
|
||||
= X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip]; \
|
||||
else \
|
||||
{ \
|
||||
_FP_FRAC_SRS_8_s \
|
||||
|= X##_f[_FP_FRAC_SRS_8_i] << _FP_FRAC_SRS_8_up; \
|
||||
for (_FP_FRAC_SRS_8_i = 0; \
|
||||
_FP_FRAC_SRS_8_i < 7-_FP_FRAC_SRS_8_skip; \
|
||||
++_FP_FRAC_SRS_8_i) \
|
||||
X##_f[_FP_FRAC_SRS_8_i] \
|
||||
= ((X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip] \
|
||||
>> _FP_FRAC_SRS_8_down) \
|
||||
| (X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip+1] \
|
||||
<< _FP_FRAC_SRS_8_up)); \
|
||||
X##_f[_FP_FRAC_SRS_8_i++] = X##_f[7] >> _FP_FRAC_SRS_8_down; \
|
||||
} \
|
||||
for (; _FP_FRAC_SRS_8_i < 8; ++_FP_FRAC_SRS_8_i) \
|
||||
X##_f[_FP_FRAC_SRS_8_i] = 0; \
|
||||
/* Don't fix the LSB until the very end when we're sure f[0] is \
|
||||
stable. */ \
|
||||
X##_f[0] |= (_FP_FRAC_SRS_8_s != 0); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_ADD_8(R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_FRAC_ADD_8_c = 0; \
|
||||
_FP_I_TYPE _FP_FRAC_ADD_8_i; \
|
||||
for (_FP_FRAC_ADD_8_i = 0; _FP_FRAC_ADD_8_i < 8; ++_FP_FRAC_ADD_8_i) \
|
||||
{ \
|
||||
R##_f[_FP_FRAC_ADD_8_i] \
|
||||
= (X##_f[_FP_FRAC_ADD_8_i] + Y##_f[_FP_FRAC_ADD_8_i] \
|
||||
+ _FP_FRAC_ADD_8_c); \
|
||||
_FP_FRAC_ADD_8_c \
|
||||
= (_FP_FRAC_ADD_8_c \
|
||||
? R##_f[_FP_FRAC_ADD_8_i] <= X##_f[_FP_FRAC_ADD_8_i] \
|
||||
: R##_f[_FP_FRAC_ADD_8_i] < X##_f[_FP_FRAC_ADD_8_i]); \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_SUB_8(R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_FRAC_SUB_8_tmp[8]; \
|
||||
_FP_W_TYPE _FP_FRAC_SUB_8_c = 0; \
|
||||
_FP_I_TYPE _FP_FRAC_SUB_8_i; \
|
||||
for (_FP_FRAC_SUB_8_i = 0; _FP_FRAC_SUB_8_i < 8; ++_FP_FRAC_SUB_8_i) \
|
||||
{ \
|
||||
_FP_FRAC_SUB_8_tmp[_FP_FRAC_SUB_8_i] \
|
||||
= (X##_f[_FP_FRAC_SUB_8_i] - Y##_f[_FP_FRAC_SUB_8_i] \
|
||||
- _FP_FRAC_SUB_8_c); \
|
||||
_FP_FRAC_SUB_8_c \
|
||||
= (_FP_FRAC_SUB_8_c \
|
||||
? (_FP_FRAC_SUB_8_tmp[_FP_FRAC_SUB_8_i] \
|
||||
>= X##_f[_FP_FRAC_SUB_8_i]) \
|
||||
: (_FP_FRAC_SUB_8_tmp[_FP_FRAC_SUB_8_i] \
|
||||
> X##_f[_FP_FRAC_SUB_8_i])); \
|
||||
} \
|
||||
for (_FP_FRAC_SUB_8_i = 0; _FP_FRAC_SUB_8_i < 8; ++_FP_FRAC_SUB_8_i) \
|
||||
R##_f[_FP_FRAC_SUB_8_i] = _FP_FRAC_SUB_8_tmp[_FP_FRAC_SUB_8_i]; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_CLZ_8(R, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_CLZ_8_i; \
|
||||
for (_FP_FRAC_CLZ_8_i = 7; _FP_FRAC_CLZ_8_i > 0; _FP_FRAC_CLZ_8_i--) \
|
||||
if (X##_f[_FP_FRAC_CLZ_8_i]) \
|
||||
break; \
|
||||
__FP_CLZ ((R), X##_f[_FP_FRAC_CLZ_8_i]); \
|
||||
(R) += _FP_W_TYPE_SIZE * (7 - _FP_FRAC_CLZ_8_i); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MINFRAC_8 0, 0, 0, 0, 0, 0, 0, 1
|
||||
|
||||
#define _FP_FRAC_NEGP_8(X) ((_FP_WS_TYPE) X##_f[7] < 0)
|
||||
#define _FP_FRAC_ZEROP_8(X) \
|
||||
((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3] \
|
||||
| X##_f[4] | X##_f[5] | X##_f[6] | X##_f[7]) == 0)
|
||||
#define _FP_FRAC_HIGHBIT_DW_8(fs, X) \
|
||||
(_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
|
||||
|
||||
#define _FP_FRAC_COPY_4_8(D, S) \
|
||||
do \
|
||||
{ \
|
||||
D##_f[0] = S##_f[0]; \
|
||||
D##_f[1] = S##_f[1]; \
|
||||
D##_f[2] = S##_f[2]; \
|
||||
D##_f[3] = S##_f[3]; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_COPY_8_4(D, S) \
|
||||
do \
|
||||
{ \
|
||||
D##_f[0] = S##_f[0]; \
|
||||
D##_f[1] = S##_f[1]; \
|
||||
D##_f[2] = S##_f[2]; \
|
||||
D##_f[3] = S##_f[3]; \
|
||||
D##_f[4] = D##_f[5] = D##_f[6] = D##_f[7]= 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define __FP_FRAC_SET_8(X, I7, I6, I5, I4, I3, I2, I1, I0) \
|
||||
(X##_f[7] = I7, X##_f[6] = I6, X##_f[5] = I5, X##_f[4] = I4, \
|
||||
X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
|
||||
|
||||
#endif /* !SOFT_FP_OP_8_H */
|
2155
src/gemm/soft-fp/op-common.h
Normal file
2155
src/gemm/soft-fp/op-common.h
Normal file
File diff suppressed because it is too large
Load diff
117
src/gemm/soft-fp/sfp-machine.h
Normal file
117
src/gemm/soft-fp/sfp-machine.h
Normal file
|
@ -0,0 +1,117 @@
|
|||
|
||||
#if __riscv_xlen == 32
|
||||
|
||||
#define _FP_W_TYPE_SIZE 32
|
||||
#define _FP_W_TYPE unsigned long
|
||||
#define _FP_WS_TYPE signed long
|
||||
#define _FP_I_TYPE long
|
||||
|
||||
#define _FP_MUL_MEAT_S(R,X,Y) \
|
||||
_FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
|
||||
#define _FP_MUL_MEAT_D(R,X,Y) \
|
||||
_FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
|
||||
#define _FP_MUL_MEAT_Q(R,X,Y) \
|
||||
_FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
|
||||
|
||||
#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y)
|
||||
#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y)
|
||||
#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
|
||||
|
||||
#define _FP_NANFRAC_S _FP_QNANBIT_S
|
||||
#define _FP_NANFRAC_D _FP_QNANBIT_D, 0
|
||||
#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0
|
||||
|
||||
#else
|
||||
|
||||
#define _FP_W_TYPE_SIZE 64
|
||||
#define _FP_W_TYPE unsigned long long
|
||||
#define _FP_WS_TYPE signed long long
|
||||
#define _FP_I_TYPE long long
|
||||
|
||||
#define _FP_MUL_MEAT_S(R,X,Y) \
|
||||
_FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y)
|
||||
#define _FP_MUL_MEAT_D(R,X,Y) \
|
||||
_FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
|
||||
#define _FP_MUL_MEAT_Q(R,X,Y) \
|
||||
_FP_MUL_MEAT_2_wide_3mul(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
|
||||
|
||||
#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
|
||||
#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y)
|
||||
#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y)
|
||||
|
||||
#define _FP_NANFRAC_S _FP_QNANBIT_S
|
||||
#define _FP_NANFRAC_D _FP_QNANBIT_D
|
||||
#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0
|
||||
|
||||
#endif
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
typedef int TItype __attribute__ ((mode (TI)));
|
||||
typedef unsigned int UTItype __attribute__ ((mode (TI)));
|
||||
#define TI_BITS (__CHAR_BIT__ * (int)sizeof(TItype))
|
||||
#endif
|
||||
|
||||
/* The type of the result of a floating point comparison. This must
|
||||
match __libgcc_cmp_return__ in GCC for the target. */
|
||||
typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
|
||||
#define CMPtype __gcc_CMPtype
|
||||
|
||||
#define _FP_NANSIGN_S 0
|
||||
#define _FP_NANSIGN_D 0
|
||||
#define _FP_NANSIGN_Q 0
|
||||
|
||||
#define _FP_KEEPNANFRACP 0
|
||||
#define _FP_QNANNEGATEDP 0
|
||||
|
||||
#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
|
||||
do { \
|
||||
R##_s = _FP_NANSIGN_##fs; \
|
||||
_FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs); \
|
||||
R##_c = FP_CLS_NAN; \
|
||||
} while (0)
|
||||
|
||||
#define _FP_DECL_EX int _frm __attribute__ ((unused));
|
||||
#define FP_ROUNDMODE _frm
|
||||
|
||||
#define FP_RND_NEAREST 0x0
|
||||
#define FP_RND_ZERO 0x1
|
||||
#define FP_RND_PINF 0x3
|
||||
#define FP_RND_MINF 0x2
|
||||
|
||||
#define FP_EX_INVALID 0x10
|
||||
#define FP_EX_OVERFLOW 0x04
|
||||
#define FP_EX_UNDERFLOW 0x02
|
||||
#define FP_EX_DIVZERO 0x08
|
||||
#define FP_EX_INEXACT 0x01
|
||||
|
||||
#define _FP_TININESS_AFTER_ROUNDING 1
|
||||
|
||||
#ifdef __riscv_flen
|
||||
#define FP_INIT_ROUNDMODE \
|
||||
do { \
|
||||
__asm__ volatile ("frrm %0" : "=r" (_frm)); \
|
||||
} while (0)
|
||||
|
||||
#define FP_HANDLE_EXCEPTIONS \
|
||||
do { \
|
||||
if (__builtin_expect (_fex, 0)) \
|
||||
__asm__ volatile ("csrs fflags, %0" : : "rK" (_fex)); \
|
||||
} while (0)
|
||||
#else
|
||||
#define FP_INIT_ROUNDMODE _frm = FP_RND_NEAREST
|
||||
#endif
|
||||
|
||||
#define __LITTLE_ENDIAN 1234
|
||||
#define __BIG_ENDIAN 4321
|
||||
|
||||
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
#define __BYTE_ORDER __BIG_ENDIAN
|
||||
#else
|
||||
#define __BYTE_ORDER __LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
|
||||
/* Define ALIASNAME as a strong alias for NAME. */
|
||||
# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
|
||||
# define _strong_alias(name, aliasname) \
|
||||
extern __typeof (name) aliasname __attribute__ ((alias (#name)));
|
199
src/gemm/soft-fp/single.h
Normal file
199
src/gemm/soft-fp/single.h
Normal file
|
@ -0,0 +1,199 @@
|
|||
/* Software floating-point emulation.
|
||||
Definitions for IEEE Single Precision.
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz),
|
||||
David S. Miller (davem@redhat.com) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_SINGLE_H
|
||||
#define SOFT_FP_SINGLE_H 1
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 32
|
||||
# error "Here's a nickel kid. Go buy yourself a real computer."
|
||||
#endif
|
||||
|
||||
#define _FP_FRACTBITS_S _FP_W_TYPE_SIZE
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 64
|
||||
# define _FP_FRACTBITS_DW_S (2 * _FP_W_TYPE_SIZE)
|
||||
#else
|
||||
# define _FP_FRACTBITS_DW_S _FP_W_TYPE_SIZE
|
||||
#endif
|
||||
|
||||
#define _FP_FRACBITS_S 24
|
||||
#define _FP_FRACXBITS_S (_FP_FRACTBITS_S - _FP_FRACBITS_S)
|
||||
#define _FP_WFRACBITS_S (_FP_WORKBITS + _FP_FRACBITS_S)
|
||||
#define _FP_WFRACXBITS_S (_FP_FRACTBITS_S - _FP_WFRACBITS_S)
|
||||
#define _FP_EXPBITS_S 8
|
||||
#define _FP_EXPBIAS_S 127
|
||||
#define _FP_EXPMAX_S 255
|
||||
#define _FP_QNANBIT_S ((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-2))
|
||||
#define _FP_QNANBIT_SH_S ((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-2+_FP_WORKBITS))
|
||||
#define _FP_IMPLBIT_S ((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-1))
|
||||
#define _FP_IMPLBIT_SH_S ((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-1+_FP_WORKBITS))
|
||||
#define _FP_OVERFLOW_S ((_FP_W_TYPE) 1 << (_FP_WFRACBITS_S))
|
||||
|
||||
#define _FP_WFRACBITS_DW_S (2 * _FP_WFRACBITS_S)
|
||||
#define _FP_WFRACXBITS_DW_S (_FP_FRACTBITS_DW_S - _FP_WFRACBITS_DW_S)
|
||||
#define _FP_HIGHBIT_DW_S \
|
||||
((_FP_W_TYPE) 1 << (_FP_WFRACBITS_DW_S - 1) % _FP_W_TYPE_SIZE)
|
||||
|
||||
/* The implementation of _FP_MUL_MEAT_S and _FP_DIV_MEAT_S should be
|
||||
chosen by the target machine. */
|
||||
|
||||
typedef float SFtype __attribute__ ((mode (SF)));
|
||||
|
||||
union _FP_UNION_S
|
||||
{
|
||||
SFtype flt;
|
||||
struct _FP_STRUCT_LAYOUT
|
||||
{
|
||||
#if __BYTE_ORDER == __BIG_ENDIAN
|
||||
unsigned sign : 1;
|
||||
unsigned exp : _FP_EXPBITS_S;
|
||||
unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
|
||||
#else
|
||||
unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
|
||||
unsigned exp : _FP_EXPBITS_S;
|
||||
unsigned sign : 1;
|
||||
#endif
|
||||
} bits;
|
||||
};
|
||||
|
||||
#define FP_DECL_S(X) _FP_DECL (1, X)
|
||||
#define FP_UNPACK_RAW_S(X, val) _FP_UNPACK_RAW_1 (S, X, (val))
|
||||
#define FP_UNPACK_RAW_SP(X, val) _FP_UNPACK_RAW_1_P (S, X, (val))
|
||||
#define FP_PACK_RAW_S(val, X) _FP_PACK_RAW_1 (S, (val), X)
|
||||
#define FP_PACK_RAW_SP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (S, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_UNPACK_S(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1 (S, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (S, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_UNPACK_SP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1_P (S, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (S, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_UNPACK_SEMIRAW_S(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1 (S, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (S, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_UNPACK_SEMIRAW_SP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1_P (S, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (S, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_PACK_S(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (S, 1, X); \
|
||||
_FP_PACK_RAW_1 (S, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_PACK_SP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (S, 1, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (S, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_PACK_SEMIRAW_S(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (S, 1, X); \
|
||||
_FP_PACK_RAW_1 (S, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_PACK_SEMIRAW_SP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (S, 1, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (S, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_ISSIGNAN_S(X) _FP_ISSIGNAN (S, 1, X)
|
||||
#define FP_NEG_S(R, X) _FP_NEG (S, 1, R, X)
|
||||
#define FP_ADD_S(R, X, Y) _FP_ADD (S, 1, R, X, Y)
|
||||
#define FP_SUB_S(R, X, Y) _FP_SUB (S, 1, R, X, Y)
|
||||
#define FP_MUL_S(R, X, Y) _FP_MUL (S, 1, R, X, Y)
|
||||
#define FP_DIV_S(R, X, Y) _FP_DIV (S, 1, R, X, Y)
|
||||
#define FP_SQRT_S(R, X) _FP_SQRT (S, 1, R, X)
|
||||
#define _FP_SQRT_MEAT_S(R, S, T, X, Q) _FP_SQRT_MEAT_1 (R, S, T, X, (Q))
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 64
|
||||
# define FP_FMA_S(R, X, Y, Z) _FP_FMA (S, 1, 2, R, X, Y, Z)
|
||||
#else
|
||||
# define FP_FMA_S(R, X, Y, Z) _FP_FMA (S, 1, 1, R, X, Y, Z)
|
||||
#endif
|
||||
|
||||
#define FP_CMP_S(r, X, Y, un, ex) _FP_CMP (S, 1, (r), X, Y, (un), (ex))
|
||||
#define FP_CMP_EQ_S(r, X, Y, ex) _FP_CMP_EQ (S, 1, (r), X, Y, (ex))
|
||||
#define FP_CMP_UNORD_S(r, X, Y, ex) _FP_CMP_UNORD (S, 1, (r), X, Y, (ex))
|
||||
|
||||
#define FP_TO_INT_S(r, X, rsz, rsg) _FP_TO_INT (S, 1, (r), X, (rsz), (rsg))
|
||||
#define FP_TO_INT_ROUND_S(r, X, rsz, rsg) \
|
||||
_FP_TO_INT_ROUND (S, 1, (r), X, (rsz), (rsg))
|
||||
#define FP_FROM_INT_S(X, r, rs, rt) _FP_FROM_INT (S, 1, X, (r), (rs), rt)
|
||||
|
||||
#define _FP_FRAC_HIGH_S(X) _FP_FRAC_HIGH_1 (X)
|
||||
#define _FP_FRAC_HIGH_RAW_S(X) _FP_FRAC_HIGH_1 (X)
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 64
|
||||
# define _FP_FRAC_HIGH_DW_S(X) _FP_FRAC_HIGH_2 (X)
|
||||
#else
|
||||
# define _FP_FRAC_HIGH_DW_S(X) _FP_FRAC_HIGH_1 (X)
|
||||
#endif
|
||||
|
||||
#endif /* !SOFT_FP_SINGLE_H */
|
230
src/gemm/soft-fp/soft-fp.h
Normal file
230
src/gemm/soft-fp/soft-fp.h
Normal file
|
@ -0,0 +1,230 @@
|
|||
#ifndef __SOFT_FP_H__
|
||||
#define __SOFT_FP_H__
|
||||
|
||||
#include "sfp-machine.h"
|
||||
|
||||
#define abort() // 54
|
||||
/* For unreachable default cases in switch statements over bitwise OR
|
||||
of FP_CLS_* values. */
|
||||
#if (defined __GNUC__ \
|
||||
&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
|
||||
# define _FP_UNREACHABLE __builtin_unreachable ()
|
||||
#else
|
||||
# define _FP_UNREACHABLE abort ()
|
||||
#endif
|
||||
// 63
|
||||
#if ((defined __GNUC__ \
|
||||
&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) \
|
||||
|| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L))
|
||||
# define _FP_STATIC_ASSERT(expr, msg) \
|
||||
_Static_assert ((expr), msg)
|
||||
#else
|
||||
# define _FP_STATIC_ASSERT(expr, msg) \
|
||||
extern int (*__Static_assert_function (void)) \
|
||||
[!!sizeof (struct { int __error_if_negative: (expr) ? 2 : -1; })]
|
||||
#endif
|
||||
|
||||
|
||||
#define _FP_ZERO_INIT = 0 // 82
|
||||
#define _FP_WORKBITS 3 // 85
|
||||
#define _FP_WORK_LSB ((_FP_W_TYPE) 1 << 3)
|
||||
#define _FP_WORK_ROUND ((_FP_W_TYPE) 1 << 2) // 87
|
||||
#define _FP_WORK_GUARD ((_FP_W_TYPE) 1 << 1)
|
||||
#define _FP_WORK_STICKY ((_FP_W_TYPE) 1 << 0) // 89
|
||||
|
||||
#ifndef FP_RND_NEAREST
|
||||
# define FP_RND_NEAREST 0
|
||||
# define FP_RND_ZERO 1
|
||||
# define FP_RND_PINF 2
|
||||
# define FP_RND_MINF 3
|
||||
#endif
|
||||
#ifndef FP_ROUNDMODE
|
||||
# define FP_ROUNDMODE FP_RND_NEAREST
|
||||
#endif
|
||||
|
||||
/* By default don't care about exceptions. */ // 101
|
||||
#ifndef FP_EX_INVALID
|
||||
# define FP_EX_INVALID 0
|
||||
#endif
|
||||
#ifndef FP_EX_OVERFLOW
|
||||
# define FP_EX_OVERFLOW 0
|
||||
#endif
|
||||
#ifndef FP_EX_UNDERFLOW
|
||||
# define FP_EX_UNDERFLOW 0
|
||||
#endif
|
||||
#ifndef FP_EX_DIVZERO
|
||||
# define FP_EX_DIVZERO 0
|
||||
#endif
|
||||
#ifndef FP_EX_INEXACT
|
||||
# define FP_EX_INEXACT 0
|
||||
#endif
|
||||
#ifndef FP_EX_DENORM
|
||||
# define FP_EX_DENORM 0
|
||||
#endif
|
||||
|
||||
/* Sub-exceptions of "invalid". */ // 121
|
||||
/* Signaling NaN operand. */
|
||||
#ifndef FP_EX_INVALID_SNAN
|
||||
# define FP_EX_INVALID_SNAN 0
|
||||
#endif
|
||||
/* Inf * 0. */ // 126
|
||||
#ifndef FP_EX_INVALID_IMZ
|
||||
# define FP_EX_INVALID_IMZ 0
|
||||
#endif
|
||||
|
||||
/* Inf - Inf. */ // 134
|
||||
#ifndef FP_EX_INVALID_ISI
|
||||
# define FP_EX_INVALID_ISI 0
|
||||
#endif
|
||||
/* 0 / 0. */
|
||||
#ifndef FP_EX_INVALID_ZDZ
|
||||
# define FP_EX_INVALID_ZDZ 0
|
||||
#endif
|
||||
/* Inf / Inf. */
|
||||
#ifndef FP_EX_INVALID_IDI
|
||||
# define FP_EX_INVALID_IDI 0
|
||||
#endif
|
||||
|
||||
/* Invalid conversion to integer. */
|
||||
#ifndef FP_EX_INVALID_CVI
|
||||
# define FP_EX_INVALID_CVI 0
|
||||
#endif
|
||||
/* Invalid comparison. */ // 154
|
||||
#ifndef FP_EX_INVALID_VC
|
||||
# define FP_EX_INVALID_VC 0
|
||||
#endif
|
||||
|
||||
/* _FP_STRUCT_LAYOUT may be defined as an attribute to determine the
|
||||
struct layout variant used for structures where bit-fields are used
|
||||
to access specific parts of binary floating-point numbers. This is
|
||||
required for systems where the default ABI uses struct layout with
|
||||
differences in how consecutive bit-fields are laid out from the
|
||||
default expected by soft-fp. */
|
||||
#ifndef _FP_STRUCT_LAYOUT
|
||||
# define _FP_STRUCT_LAYOUT
|
||||
#endif
|
||||
// 169
|
||||
#ifdef _FP_DECL_EX
|
||||
# define FP_DECL_EX \
|
||||
int _fex = 0; \
|
||||
_FP_DECL_EX
|
||||
#else
|
||||
# define FP_DECL_EX int _fex = 0
|
||||
#endif
|
||||
|
||||
/* Initialize any machine-specific state used in FP_ROUNDMODE,
|
||||
FP_TRAPPING_EXCEPTIONS or FP_HANDLE_EXCEPTIONS. */
|
||||
#ifndef FP_INIT_ROUNDMODE
|
||||
# define FP_INIT_ROUNDMODE do {} while (0)
|
||||
#endif
|
||||
|
||||
/* Initialize any machine-specific state used in
|
||||
FP_TRAPPING_EXCEPTIONS or FP_HANDLE_EXCEPTIONS. */
|
||||
# define FP_INIT_TRAPPING_EXCEPTIONS FP_INIT_ROUNDMODE // 186
|
||||
|
||||
/* Initialize any machine-specific state used in
|
||||
FP_HANDLE_EXCEPTIONS. */
|
||||
#define FP_INIT_EXCEPTIONS FP_INIT_TRAPPING_EXCEPTIONS // 192
|
||||
|
||||
#define FP_HANDLE_EXCEPTIONS do {} while (0) // 196
|
||||
|
||||
#define FP_DENORM_ZERO 0 // 201
|
||||
#define FP_SET_EXCEPTION(ex) _fex |= (ex) // 212
|
||||
#define FP_CUR_EXCEPTIONS (_fex) // 215
|
||||
#define FP_TRAPPING_EXCEPTIONS 0 // 219
|
||||
|
||||
|
||||
// 259
|
||||
#define _FP_ROUND_NEAREST(wc, X) \
|
||||
do \
|
||||
{ \
|
||||
if ((_FP_FRAC_LOW_##wc (X) & 15) != _FP_WORK_ROUND) \
|
||||
_FP_FRAC_ADDI_##wc (X, _FP_WORK_ROUND); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_ROUND_ZERO(wc, X) (void) 0
|
||||
|
||||
#define _FP_ROUND_PINF(wc, X) \
|
||||
do \
|
||||
{ \
|
||||
if (!X##_s && (_FP_FRAC_LOW_##wc (X) & 7)) \
|
||||
_FP_FRAC_ADDI_##wc (X, _FP_WORK_LSB); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_ROUND_MINF(wc, X) \
|
||||
do \
|
||||
{ \
|
||||
if (X##_s && (_FP_FRAC_LOW_##wc (X) & 7)) \
|
||||
_FP_FRAC_ADDI_##wc (X, _FP_WORK_LSB); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_ROUND(wc, X) \
|
||||
do \
|
||||
{ \
|
||||
if (_FP_FRAC_LOW_##wc (X) & 7) \
|
||||
{ \
|
||||
FP_SET_EXCEPTION (FP_EX_INEXACT); \
|
||||
switch (FP_ROUNDMODE) \
|
||||
{ \
|
||||
case FP_RND_NEAREST: \
|
||||
_FP_ROUND_NEAREST (wc, X); \
|
||||
break; \
|
||||
case FP_RND_ZERO: \
|
||||
_FP_ROUND_ZERO (wc, X); \
|
||||
break; \
|
||||
case FP_RND_PINF: \
|
||||
_FP_ROUND_PINF (wc, X); \
|
||||
break; \
|
||||
case FP_RND_MINF: \
|
||||
_FP_ROUND_MINF (wc, X); \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_CLS_NORMAL 0 // 310
|
||||
#define FP_CLS_ZERO 1
|
||||
#define FP_CLS_INF 2
|
||||
#define FP_CLS_NAN 3
|
||||
|
||||
#define _FP_CLS_COMBINE(x, y) (((x) << 2) | (y)) // 315
|
||||
|
||||
#include "op-1.h"
|
||||
#include "op-2.h"
|
||||
#include "op-4.h"
|
||||
#include "op-8.h"
|
||||
#include "op-common.h"
|
||||
|
||||
/* Sigh. Silly things longlong.h needs. */
|
||||
#define UWtype _FP_W_TYPE
|
||||
#define W_TYPE_SIZE _FP_W_TYPE_SIZE
|
||||
|
||||
typedef int QItype __attribute__ ((mode (QI)));
|
||||
typedef int SItype __attribute__ ((mode (SI)));
|
||||
typedef int DItype __attribute__ ((mode (DI)));
|
||||
typedef unsigned int UQItype __attribute__ ((mode (QI)));
|
||||
typedef unsigned int USItype __attribute__ ((mode (SI)));
|
||||
typedef unsigned int UDItype __attribute__ ((mode (DI)));
|
||||
#if _FP_W_TYPE_SIZE == 32
|
||||
typedef unsigned int UHWtype __attribute__ ((mode (HI)));
|
||||
#elif _FP_W_TYPE_SIZE == 64
|
||||
typedef USItype UHWtype;
|
||||
#endif
|
||||
|
||||
#ifndef CMPtype
|
||||
# define CMPtype int
|
||||
#endif
|
||||
|
||||
#define SI_BITS (__CHAR_BIT__ * (int) sizeof (SItype))
|
||||
#define DI_BITS (__CHAR_BIT__ * (int) sizeof (DItype))
|
||||
|
||||
#include "longlong.h"
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
21
src/gemm/soft-fp/subdf3.c
Normal file
21
src/gemm/soft-fp/subdf3.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__subdf3 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_SEMIRAW_D (A, a);
|
||||
FP_UNPACK_SEMIRAW_D (B, b);
|
||||
FP_SUB_D (R, A, B);
|
||||
FP_PACK_SEMIRAW_D (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
22
src/gemm/soft-fp/subsf3.c
Normal file
22
src/gemm/soft-fp/subsf3.c
Normal file
|
@ -0,0 +1,22 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__subsf3 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
FP_DECL_S (R);
|
||||
SFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_SEMIRAW_S (A, a);
|
||||
FP_UNPACK_SEMIRAW_S (B, b);
|
||||
FP_SUB_S (R, A, B);
|
||||
FP_PACK_SEMIRAW_S (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
24
src/gemm/soft-fp/truncdfsf2.c
Normal file
24
src/gemm/soft-fp/truncdfsf2.c
Normal file
|
@ -0,0 +1,24 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
#include "double.h"
|
||||
|
||||
SFtype
|
||||
__truncdfsf2 (DFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_S (R);
|
||||
SFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_SEMIRAW_D (A, a);
|
||||
#if _FP_W_TYPE_SIZE < _FP_FRACBITS_D
|
||||
FP_TRUNC (S, D, 1, 2, R, A);
|
||||
#else
|
||||
FP_TRUNC (S, D, 1, 1, R, A);
|
||||
#endif
|
||||
FP_PACK_SEMIRAW_S (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
3
src/linpack/Makefile
Normal file
3
src/linpack/Makefile
Normal file
|
@ -0,0 +1,3 @@
|
|||
NAME = linpack
|
||||
SRCS = $(shell find soft-fp/ -name "*.c") linpack.c
|
||||
include $(AM_HOME)/Makefile
|
76
src/linpack/soft-fp/aa-README.txt
Normal file
76
src/linpack/soft-fp/aa-README.txt
Normal file
|
@ -0,0 +1,76 @@
|
|||
https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html
|
||||
|
||||
1.Arithmetic functions
|
||||
|
||||
Runtime Function: float __addsf3 (float a, float b)
|
||||
Runtime Function: double __adddf3 (double a, double b)
|
||||
These functions return the sum of a and b.
|
||||
|
||||
Runtime Function: float __subsf3 (float a, float b)
|
||||
Runtime Function: double __subdf3 (double a, double b)
|
||||
These functions return the difference between b and a; that is, a - b.
|
||||
|
||||
Runtime Function: float __mulsf3 (float a, float b)
|
||||
Runtime Function: double __muldf3 (double a, double b)
|
||||
These functions return the product of a and b.
|
||||
|
||||
Runtime Function: float __divsf3 (float a, float b)
|
||||
Runtime Function: double __divdf3 (double a, double b)
|
||||
These functions return the quotient of a and b; that is, a / b.
|
||||
|
||||
Runtime Function: float __negsf2 (float a)
|
||||
Runtime Function: double __negdf2 (double a)
|
||||
These functions return the negation of a. They simply flip the sign bit, so they can produce negative zero and negative NaN.
|
||||
|
||||
2.Conversion functions
|
||||
|
||||
Runtime Function: double __extendsfdf2 (float a)
|
||||
These functions extend a to the wider mode of their return type.
|
||||
|
||||
Runtime Function: float __truncdfsf2 (double a)
|
||||
These functions truncate a to the narrower mode of their return type, rounding toward zero.
|
||||
|
||||
Runtime Function: int __fixsfsi (float a)
|
||||
Runtime Function: int __fixdfsi (double a)
|
||||
These functions convert a to a signed integer, rounding toward zero.
|
||||
|
||||
Runtime Function: long __fixsfdi (float a)
|
||||
Runtime Function: long __fixdfdi (double a)
|
||||
These functions convert a to a signed long, rounding toward zero.
|
||||
|
||||
Runtime Function: long long __fixsfti (float a)
|
||||
Runtime Function: long long __fixdfti (double a)
|
||||
These functions convert a to a signed long long, rounding toward zero.
|
||||
|
||||
|
||||
Runtime Function: unsigned int __fixunssfsi (float a)
|
||||
Runtime Function: unsigned int __fixunsdfsi (double a)
|
||||
These functions convert a to an unsigned integer, rounding toward zero. Negative values all become zero.
|
||||
|
||||
Runtime Function: unsigned long __fixunssfdi (float a)
|
||||
Runtime Function: unsigned long __fixunsdfdi (double a)
|
||||
These functions convert a to an unsigned long, rounding toward zero. Negative values all become zero.
|
||||
|
||||
Runtime Function: unsigned long long __fixunssfti (float a)
|
||||
Runtime Function: unsigned long long __fixunsdfti (double a)
|
||||
These functions convert a to an unsigned long long, rounding toward zero. Negative values all become zero.
|
||||
|
||||
|
||||
Runtime Function: float __floatsisf (int i)
|
||||
Runtime Function: double __floatsidf (int i)
|
||||
These functions convert i, a signed integer, to floating point.
|
||||
|
||||
Runtime Function: float __floatdisf (long i) ¶
|
||||
Runtime Function: double __floatdidf (long i)
|
||||
These functions convert i, a signed long, to floating point.
|
||||
|
||||
|
||||
Runtime Function: float __floatunsisf (unsigned int i)
|
||||
Runtime Function: double __floatunsidf (unsigned int i)
|
||||
These functions convert i, an unsigned integer, to floating point.
|
||||
|
||||
Runtime Function: float __floatundisf (unsigned long i)
|
||||
Runtime Function: double __floatundidf (unsigned long i)
|
||||
These functions convert i, an unsigned long, to floating point.
|
||||
|
||||
3.Comparison functions
|
21
src/linpack/soft-fp/adddf3.c
Normal file
21
src/linpack/soft-fp/adddf3.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__adddf3 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_SEMIRAW_D (A, a);
|
||||
FP_UNPACK_SEMIRAW_D (B, b);
|
||||
FP_ADD_D (R, A, B);
|
||||
FP_PACK_SEMIRAW_D (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
23
src/linpack/soft-fp/addsf3.c
Normal file
23
src/linpack/soft-fp/addsf3.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__addsf3 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
FP_DECL_S (R);
|
||||
SFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_SEMIRAW_S (A, a);
|
||||
FP_UNPACK_SEMIRAW_S (B, b);
|
||||
FP_ADD_S (R, A, B);
|
||||
FP_PACK_SEMIRAW_S (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
21
src/linpack/soft-fp/divdf3.c
Normal file
21
src/linpack/soft-fp/divdf3.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__divdf3 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_D (A, a);
|
||||
FP_UNPACK_D (B, b);
|
||||
FP_DIV_D (R, A, B);
|
||||
FP_PACK_D (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
21
src/linpack/soft-fp/divsf3.c
Normal file
21
src/linpack/soft-fp/divsf3.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__divsf3 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
FP_DECL_S (R);
|
||||
SFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_S (A, a);
|
||||
FP_UNPACK_S (B, b);
|
||||
FP_DIV_S (R, A, B);
|
||||
FP_PACK_S (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
323
src/linpack/soft-fp/double.h
Normal file
323
src/linpack/soft-fp/double.h
Normal file
|
@ -0,0 +1,323 @@
|
|||
/* Software floating-point emulation.
|
||||
Definitions for IEEE Double Precision
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz),
|
||||
David S. Miller (davem@redhat.com) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_DOUBLE_H
|
||||
#define SOFT_FP_DOUBLE_H 1
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 32
|
||||
# error "Here's a nickel kid. Go buy yourself a real computer."
|
||||
#endif
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 64
|
||||
# define _FP_FRACTBITS_D (2 * _FP_W_TYPE_SIZE)
|
||||
# define _FP_FRACTBITS_DW_D (4 * _FP_W_TYPE_SIZE)
|
||||
#else
|
||||
# define _FP_FRACTBITS_D _FP_W_TYPE_SIZE
|
||||
# define _FP_FRACTBITS_DW_D (2 * _FP_W_TYPE_SIZE)
|
||||
#endif
|
||||
|
||||
#define _FP_FRACBITS_D 53
|
||||
#define _FP_FRACXBITS_D (_FP_FRACTBITS_D - _FP_FRACBITS_D)
|
||||
#define _FP_WFRACBITS_D (_FP_WORKBITS + _FP_FRACBITS_D)
|
||||
#define _FP_WFRACXBITS_D (_FP_FRACTBITS_D - _FP_WFRACBITS_D)
|
||||
#define _FP_EXPBITS_D 11
|
||||
#define _FP_EXPBIAS_D 1023
|
||||
#define _FP_EXPMAX_D 2047
|
||||
|
||||
#define _FP_QNANBIT_D \
|
||||
((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-2) % _FP_W_TYPE_SIZE)
|
||||
#define _FP_QNANBIT_SH_D \
|
||||
((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
|
||||
#define _FP_IMPLBIT_D \
|
||||
((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-1) % _FP_W_TYPE_SIZE)
|
||||
#define _FP_IMPLBIT_SH_D \
|
||||
((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
|
||||
#define _FP_OVERFLOW_D \
|
||||
((_FP_W_TYPE) 1 << _FP_WFRACBITS_D % _FP_W_TYPE_SIZE)
|
||||
|
||||
#define _FP_WFRACBITS_DW_D (2 * _FP_WFRACBITS_D)
|
||||
#define _FP_WFRACXBITS_DW_D (_FP_FRACTBITS_DW_D - _FP_WFRACBITS_DW_D)
|
||||
#define _FP_HIGHBIT_DW_D \
|
||||
((_FP_W_TYPE) 1 << (_FP_WFRACBITS_DW_D - 1) % _FP_W_TYPE_SIZE)
|
||||
|
||||
typedef float DFtype __attribute__ ((mode (DF)));
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 64
|
||||
|
||||
union _FP_UNION_D
|
||||
{
|
||||
DFtype flt;
|
||||
struct _FP_STRUCT_LAYOUT
|
||||
{
|
||||
# if __BYTE_ORDER == __BIG_ENDIAN
|
||||
unsigned sign : 1;
|
||||
unsigned exp : _FP_EXPBITS_D;
|
||||
unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
|
||||
unsigned frac0 : _FP_W_TYPE_SIZE;
|
||||
# else
|
||||
unsigned frac0 : _FP_W_TYPE_SIZE;
|
||||
unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
|
||||
unsigned exp : _FP_EXPBITS_D;
|
||||
unsigned sign : 1;
|
||||
# endif
|
||||
} bits;
|
||||
};
|
||||
|
||||
# define FP_DECL_D(X) _FP_DECL (2, X)
|
||||
# define FP_UNPACK_RAW_D(X, val) _FP_UNPACK_RAW_2 (D, X, (val))
|
||||
# define FP_UNPACK_RAW_DP(X, val) _FP_UNPACK_RAW_2_P (D, X, (val))
|
||||
# define FP_PACK_RAW_D(val, X) _FP_PACK_RAW_2 (D, (val), X)
|
||||
# define FP_PACK_RAW_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_2_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_D(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_2 (D, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (D, 2, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_DP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_2_P (D, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (D, 2, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_SEMIRAW_D(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_2 (D, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (D, 2, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_SEMIRAW_DP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_2_P (D, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (D, 2, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_D(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (D, 2, X); \
|
||||
_FP_PACK_RAW_2 (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (D, 2, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_2_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_SEMIRAW_D(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (D, 2, X); \
|
||||
_FP_PACK_RAW_2 (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_SEMIRAW_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (D, 2, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_2_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_ISSIGNAN_D(X) _FP_ISSIGNAN (D, 2, X)
|
||||
# define FP_NEG_D(R, X) _FP_NEG (D, 2, R, X)
|
||||
# define FP_ADD_D(R, X, Y) _FP_ADD (D, 2, R, X, Y)
|
||||
# define FP_SUB_D(R, X, Y) _FP_SUB (D, 2, R, X, Y)
|
||||
# define FP_MUL_D(R, X, Y) _FP_MUL (D, 2, R, X, Y)
|
||||
# define FP_DIV_D(R, X, Y) _FP_DIV (D, 2, R, X, Y)
|
||||
# define FP_SQRT_D(R, X) _FP_SQRT (D, 2, R, X)
|
||||
# define _FP_SQRT_MEAT_D(R, S, T, X, Q) _FP_SQRT_MEAT_2 (R, S, T, X, (Q))
|
||||
# define FP_FMA_D(R, X, Y, Z) _FP_FMA (D, 2, 4, R, X, Y, Z)
|
||||
|
||||
# define FP_CMP_D(r, X, Y, un, ex) _FP_CMP (D, 2, (r), X, Y, (un), (ex))
|
||||
# define FP_CMP_EQ_D(r, X, Y, ex) _FP_CMP_EQ (D, 2, (r), X, Y, (ex))
|
||||
# define FP_CMP_UNORD_D(r, X, Y, ex) _FP_CMP_UNORD (D, 2, (r), X, Y, (ex))
|
||||
|
||||
# define FP_TO_INT_D(r, X, rsz, rsg) _FP_TO_INT (D, 2, (r), X, (rsz), (rsg))
|
||||
# define FP_TO_INT_ROUND_D(r, X, rsz, rsg) \
|
||||
_FP_TO_INT_ROUND (D, 2, (r), X, (rsz), (rsg))
|
||||
# define FP_FROM_INT_D(X, r, rs, rt) _FP_FROM_INT (D, 2, X, (r), (rs), rt)
|
||||
|
||||
# define _FP_FRAC_HIGH_D(X) _FP_FRAC_HIGH_2 (X)
|
||||
# define _FP_FRAC_HIGH_RAW_D(X) _FP_FRAC_HIGH_2 (X)
|
||||
|
||||
# define _FP_FRAC_HIGH_DW_D(X) _FP_FRAC_HIGH_4 (X)
|
||||
|
||||
#else
|
||||
|
||||
union _FP_UNION_D
|
||||
{
|
||||
DFtype flt;
|
||||
struct _FP_STRUCT_LAYOUT
|
||||
{
|
||||
# if __BYTE_ORDER == __BIG_ENDIAN
|
||||
unsigned sign : 1;
|
||||
unsigned exp : _FP_EXPBITS_D;
|
||||
_FP_W_TYPE frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
|
||||
# else
|
||||
_FP_W_TYPE frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
|
||||
unsigned exp : _FP_EXPBITS_D;
|
||||
unsigned sign : 1;
|
||||
# endif
|
||||
} bits;
|
||||
};
|
||||
|
||||
# define FP_DECL_D(X) _FP_DECL (1, X)
|
||||
# define FP_UNPACK_RAW_D(X, val) _FP_UNPACK_RAW_1 (D, X, (val))
|
||||
# define FP_UNPACK_RAW_DP(X, val) _FP_UNPACK_RAW_1_P (D, X, (val))
|
||||
# define FP_PACK_RAW_D(val, X) _FP_PACK_RAW_1 (D, (val), X)
|
||||
# define FP_PACK_RAW_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_D(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1 (D, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (D, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_DP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1_P (D, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (D, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_SEMIRAW_D(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1 (D, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (D, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_UNPACK_SEMIRAW_DP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1_P (D, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (D, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_D(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (D, 1, X); \
|
||||
_FP_PACK_RAW_1 (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (D, 1, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_SEMIRAW_D(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (D, 1, X); \
|
||||
_FP_PACK_RAW_1 (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_PACK_SEMIRAW_DP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (D, 1, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (D, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
# define FP_ISSIGNAN_D(X) _FP_ISSIGNAN (D, 1, X)
|
||||
# define FP_NEG_D(R, X) _FP_NEG (D, 1, R, X)
|
||||
# define FP_ADD_D(R, X, Y) _FP_ADD (D, 1, R, X, Y)
|
||||
# define FP_SUB_D(R, X, Y) _FP_SUB (D, 1, R, X, Y)
|
||||
# define FP_MUL_D(R, X, Y) _FP_MUL (D, 1, R, X, Y)
|
||||
# define FP_DIV_D(R, X, Y) _FP_DIV (D, 1, R, X, Y)
|
||||
# define FP_SQRT_D(R, X) _FP_SQRT (D, 1, R, X)
|
||||
# define _FP_SQRT_MEAT_D(R, S, T, X, Q) _FP_SQRT_MEAT_1 (R, S, T, X, (Q))
|
||||
# define FP_FMA_D(R, X, Y, Z) _FP_FMA (D, 1, 2, R, X, Y, Z)
|
||||
|
||||
/* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by
|
||||
the target machine. */
|
||||
|
||||
# define FP_CMP_D(r, X, Y, un, ex) _FP_CMP (D, 1, (r), X, Y, (un), (ex))
|
||||
# define FP_CMP_EQ_D(r, X, Y, ex) _FP_CMP_EQ (D, 1, (r), X, Y, (ex))
|
||||
# define FP_CMP_UNORD_D(r, X, Y, ex) _FP_CMP_UNORD (D, 1, (r), X, Y, (ex))
|
||||
|
||||
# define FP_TO_INT_D(r, X, rsz, rsg) _FP_TO_INT (D, 1, (r), X, (rsz), (rsg))
|
||||
# define FP_TO_INT_ROUND_D(r, X, rsz, rsg) \
|
||||
_FP_TO_INT_ROUND (D, 1, (r), X, (rsz), (rsg))
|
||||
# define FP_FROM_INT_D(X, r, rs, rt) _FP_FROM_INT (D, 1, X, (r), (rs), rt)
|
||||
|
||||
# define _FP_FRAC_HIGH_D(X) _FP_FRAC_HIGH_1 (X)
|
||||
# define _FP_FRAC_HIGH_RAW_D(X) _FP_FRAC_HIGH_1 (X)
|
||||
|
||||
# define _FP_FRAC_HIGH_DW_D(X) _FP_FRAC_HIGH_2 (X)
|
||||
|
||||
#endif /* W_TYPE_SIZE < 64 */
|
||||
|
||||
#endif /* !SOFT_FP_DOUBLE_H */
|
21
src/linpack/soft-fp/eqdf2.c
Normal file
21
src/linpack/soft-fp/eqdf2.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
CMPtype
|
||||
__eqdf2 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_UNPACK_RAW_D (B, b);
|
||||
FP_CMP_EQ_D (r, A, B, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__eqdf2, __nedf2);
|
21
src/linpack/soft-fp/eqsf2.c
Normal file
21
src/linpack/soft-fp/eqsf2.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
CMPtype
|
||||
__eqsf2 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_UNPACK_RAW_S (B, b);
|
||||
FP_CMP_EQ_S (r, A, B, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__eqsf2, __nesf2);
|
26
src/linpack/soft-fp/extendsfdf2.c
Normal file
26
src/linpack/soft-fp/extendsfdf2.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
#define FP_NO_EXACT_UNDERFLOW
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__extendsfdf2 (SFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
#if _FP_W_TYPE_SIZE < _FP_FRACBITS_D
|
||||
FP_EXTEND (D, S, 2, 1, R, A);
|
||||
#else
|
||||
FP_EXTEND (D, S, 1, 1, R, A);
|
||||
#endif
|
||||
FP_PACK_RAW_D (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
47
src/linpack/soft-fp/fixdfdi.c
Normal file
47
src/linpack/soft-fp/fixdfdi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 64bit signed integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DItype
|
||||
__fixdfdi (DFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
UDItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_TO_INT_D (r, A, DI_BITS, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
47
src/linpack/soft-fp/fixdfsi.c
Normal file
47
src/linpack/soft-fp/fixdfsi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 32bit signed integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
SItype
|
||||
__fixdfsi (DFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
USItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_TO_INT_D (r, A, SI_BITS, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
46
src/linpack/soft-fp/fixdfti.c
Normal file
46
src/linpack/soft-fp/fixdfti.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert IEEE double to 128bit signed integer
|
||||
Copyright (C) 2007-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Uros Bizjak (ubizjak@gmail.com).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
// #include "soft-fp.h"
|
||||
// #include "double.h"
|
||||
|
||||
// TItype
|
||||
// __fixdfti (DFtype a)
|
||||
// {
|
||||
// FP_DECL_EX;
|
||||
// FP_DECL_D (A);
|
||||
// UTItype r;
|
||||
|
||||
// FP_INIT_EXCEPTIONS;
|
||||
// FP_UNPACK_RAW_D (A, a);
|
||||
// FP_TO_INT_D (r, A, TI_BITS, 1);
|
||||
// FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
// return r;
|
||||
// }
|
47
src/linpack/soft-fp/fixsfdi.c
Normal file
47
src/linpack/soft-fp/fixsfdi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 64bit signed integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
DItype
|
||||
__fixsfdi (SFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
UDItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_TO_INT_S (r, A, DI_BITS, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
47
src/linpack/soft-fp/fixsfsi.c
Normal file
47
src/linpack/soft-fp/fixsfsi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 32bit signed integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SItype
|
||||
__fixsfsi (SFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
USItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_TO_INT_S (r, A, SI_BITS, 1);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
46
src/linpack/soft-fp/fixsfti.c
Normal file
46
src/linpack/soft-fp/fixsfti.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert IEEE single to 128bit signed integer
|
||||
Copyright (C) 2007-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Uros Bizjak (ubizjak@gmail.com).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
// #include "soft-fp.h"
|
||||
// #include "single.h"
|
||||
|
||||
// TItype
|
||||
// __fixsfti (SFtype a)
|
||||
// {
|
||||
// FP_DECL_EX;
|
||||
// FP_DECL_S (A);
|
||||
// UTItype r;
|
||||
|
||||
// FP_INIT_EXCEPTIONS;
|
||||
// FP_UNPACK_RAW_S (A, a);
|
||||
// FP_TO_INT_S (r, A, TI_BITS, 1);
|
||||
// FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
// return r;
|
||||
// }
|
47
src/linpack/soft-fp/fixunsdfdi.c
Normal file
47
src/linpack/soft-fp/fixunsdfdi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 64bit unsigned integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
UDItype
|
||||
__fixunsdfdi (DFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
UDItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_TO_INT_D (r, A, DI_BITS, 0);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
47
src/linpack/soft-fp/fixunsdfsi.c
Normal file
47
src/linpack/soft-fp/fixunsdfsi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 32bit unsigned integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
USItype
|
||||
__fixunsdfsi (DFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
USItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_TO_INT_D (r, A, SI_BITS, 0);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
46
src/linpack/soft-fp/fixunsdfti.c
Normal file
46
src/linpack/soft-fp/fixunsdfti.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert IEEE double to 128bit unsigned integer
|
||||
Copyright (C) 2007-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Uros Bizjak (ubizjak@gmail.com).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
// #include "soft-fp.h"
|
||||
// #include "double.h"
|
||||
|
||||
// UTItype
|
||||
// __fixunsdfti (DFtype a)
|
||||
// {
|
||||
// FP_DECL_EX;
|
||||
// FP_DECL_D (A);
|
||||
// UTItype r;
|
||||
|
||||
// FP_INIT_EXCEPTIONS;
|
||||
// FP_UNPACK_RAW_D (A, a);
|
||||
// FP_TO_INT_D (r, A, TI_BITS, 0);
|
||||
// FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
// return r;
|
||||
// }
|
47
src/linpack/soft-fp/fixunssfdi.c
Normal file
47
src/linpack/soft-fp/fixunssfdi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 64bit unsigned integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
UDItype
|
||||
__fixunssfdi (SFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
UDItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_TO_INT_S (r, A, DI_BITS, 0);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
47
src/linpack/soft-fp/fixunssfsi.c
Normal file
47
src/linpack/soft-fp/fixunssfsi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a to 32bit unsigned integer
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
USItype
|
||||
__fixunssfsi (SFtype a)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
USItype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_TO_INT_S (r, A, SI_BITS, 0);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
46
src/linpack/soft-fp/fixunssfti.c
Normal file
46
src/linpack/soft-fp/fixunssfti.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert IEEE single to 128bit unsigned integer
|
||||
Copyright (C) 2007-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Uros Bizjak (ubizjak@gmail.com).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
// #include "soft-fp.h"
|
||||
// #include "single.h"
|
||||
|
||||
// UTItype
|
||||
// __fixunssfti (SFtype a)
|
||||
// {
|
||||
// FP_DECL_EX;
|
||||
// FP_DECL_S (A);
|
||||
// UTItype r;
|
||||
|
||||
// FP_INIT_EXCEPTIONS;
|
||||
// FP_UNPACK_RAW_S (A, a);
|
||||
// FP_TO_INT_S (r, A, TI_BITS, 0);
|
||||
// FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
// return r;
|
||||
// }
|
47
src/linpack/soft-fp/floatdidf.c
Normal file
47
src/linpack/soft-fp/floatdidf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 64bit signed integer to IEEE double
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__floatdidf (DItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
DFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_D (A, i, DI_BITS, UDItype);
|
||||
FP_PACK_RAW_D (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
47
src/linpack/soft-fp/floatdisf.c
Normal file
47
src/linpack/soft-fp/floatdisf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 64bit signed integer to IEEE single
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__floatdisf (DItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
SFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_S (A, i, DI_BITS, UDItype);
|
||||
FP_PACK_RAW_S (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
49
src/linpack/soft-fp/floatsidf.c
Normal file
49
src/linpack/soft-fp/floatsidf.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 32bit signed integer to IEEE double
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#define FP_NO_EXCEPTIONS
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__floatsidf (SItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
DFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_D (A, i, SI_BITS, USItype);
|
||||
FP_PACK_RAW_D (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
||||
|
47
src/linpack/soft-fp/floatsisf.c
Normal file
47
src/linpack/soft-fp/floatsisf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 32bit signed integer to IEEE single
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__floatsisf (SItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
SFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_S (A, i, SI_BITS, USItype);
|
||||
FP_PACK_RAW_S (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
47
src/linpack/soft-fp/floatundidf.c
Normal file
47
src/linpack/soft-fp/floatundidf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 64bit unsigned integer to IEEE double
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__floatundidf (UDItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
DFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_D (A, i, DI_BITS, UDItype);
|
||||
FP_PACK_RAW_D (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
47
src/linpack/soft-fp/floatundisf.c
Normal file
47
src/linpack/soft-fp/floatundisf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 64bit unsigned integer to IEEE single
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__floatundisf (UDItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
SFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_S (A, i, DI_BITS, UDItype);
|
||||
FP_PACK_RAW_S (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
47
src/linpack/soft-fp/floatunsidf.c
Normal file
47
src/linpack/soft-fp/floatunsidf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 32bit unsigned integer to IEEE double
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#define FP_NO_EXCEPTIONS
|
||||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__floatunsidf (USItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
DFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_D (A, i, SI_BITS, USItype);
|
||||
FP_PACK_RAW_D (a, A);
|
||||
|
||||
return a;
|
||||
}
|
47
src/linpack/soft-fp/floatunsisf.c
Normal file
47
src/linpack/soft-fp/floatunsisf.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Software floating-point emulation.
|
||||
Convert a 32bit unsigned integer to IEEE single
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com) and
|
||||
Jakub Jelinek (jj@ultra.linux.cz).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__floatunsisf (USItype i)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
SFtype a;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_FROM_INT_S (A, i, SI_BITS, USItype);
|
||||
FP_PACK_RAW_S (a, A);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return a;
|
||||
}
|
21
src/linpack/soft-fp/gedf2.c
Normal file
21
src/linpack/soft-fp/gedf2.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
CMPtype
|
||||
__gedf2 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_UNPACK_RAW_D (B, b);
|
||||
FP_CMP_D (r, A, B, -2, 2);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__gedf2, __gtdf2);
|
21
src/linpack/soft-fp/gesf2.c
Normal file
21
src/linpack/soft-fp/gesf2.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
CMPtype
|
||||
__gesf2 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_UNPACK_RAW_S (B, b);
|
||||
FP_CMP_S (r, A, B, -2, 2);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__gesf2, __gtsf2);
|
21
src/linpack/soft-fp/ledf2.c
Normal file
21
src/linpack/soft-fp/ledf2.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
CMPtype
|
||||
__ledf2 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_UNPACK_RAW_D (B, b);
|
||||
FP_CMP_D (r, A, B, 2, 2);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__ledf2, __ltdf2);
|
22
src/linpack/soft-fp/lesf2.c
Normal file
22
src/linpack/soft-fp/lesf2.c
Normal file
|
@ -0,0 +1,22 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
CMPtype
|
||||
__lesf2 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
CMPtype r;
|
||||
|
||||
FP_INIT_EXCEPTIONS;
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_UNPACK_RAW_S (B, b);
|
||||
FP_CMP_S (r, A, B, 2, 2);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
strong_alias (__lesf2, __ltsf2);
|
||||
|
1774
src/linpack/soft-fp/longlong.h
Normal file
1774
src/linpack/soft-fp/longlong.h
Normal file
File diff suppressed because it is too large
Load diff
21
src/linpack/soft-fp/muldf3.c
Normal file
21
src/linpack/soft-fp/muldf3.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__muldf3 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_D (A, a);
|
||||
FP_UNPACK_D (B, b);
|
||||
FP_MUL_D (R, A, B);
|
||||
FP_PACK_D (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
22
src/linpack/soft-fp/mulsf3.c
Normal file
22
src/linpack/soft-fp/mulsf3.c
Normal file
|
@ -0,0 +1,22 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__mulsf3 (SFtype a, SFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (B);
|
||||
FP_DECL_S (R);
|
||||
SFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_S (A, a);
|
||||
FP_UNPACK_S (B, b);
|
||||
FP_MUL_S (R, A, B);
|
||||
FP_PACK_S (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
16
src/linpack/soft-fp/negdf2.c
Normal file
16
src/linpack/soft-fp/negdf2.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__negdf2 (DFtype a)
|
||||
{
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_UNPACK_RAW_D (A, a);
|
||||
FP_NEG_D (R, A);
|
||||
FP_PACK_RAW_D (r, R);
|
||||
|
||||
return r;
|
||||
}
|
16
src/linpack/soft-fp/negsf2.c
Normal file
16
src/linpack/soft-fp/negsf2.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
#include "soft-fp.h"
|
||||
#include "single.h"
|
||||
|
||||
SFtype
|
||||
__negsf2 (SFtype a)
|
||||
{
|
||||
FP_DECL_S (A);
|
||||
FP_DECL_S (R);
|
||||
SFtype r;
|
||||
|
||||
FP_UNPACK_RAW_S (A, a);
|
||||
FP_NEG_S (R, A);
|
||||
FP_PACK_RAW_S (r, R);
|
||||
|
||||
return r;
|
||||
}
|
369
src/linpack/soft-fp/op-1.h
Normal file
369
src/linpack/soft-fp/op-1.h
Normal file
|
@ -0,0 +1,369 @@
|
|||
/* Software floating-point emulation.
|
||||
Basic one-word fraction declaration and manipulation.
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz),
|
||||
David S. Miller (davem@redhat.com) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_OP_1_H
|
||||
#define SOFT_FP_OP_1_H 1
|
||||
|
||||
#define _FP_FRAC_DECL_1(X) _FP_W_TYPE X##_f _FP_ZERO_INIT
|
||||
#define _FP_FRAC_COPY_1(D, S) (D##_f = S##_f)
|
||||
#define _FP_FRAC_SET_1(X, I) (X##_f = I)
|
||||
#define _FP_FRAC_HIGH_1(X) (X##_f)
|
||||
#define _FP_FRAC_LOW_1(X) (X##_f)
|
||||
#define _FP_FRAC_WORD_1(X, w) (X##_f)
|
||||
|
||||
#define _FP_FRAC_ADDI_1(X, I) (X##_f += I)
|
||||
#define _FP_FRAC_SLL_1(X, N) \
|
||||
do \
|
||||
{ \
|
||||
if (__builtin_constant_p (N) && (N) == 1) \
|
||||
X##_f += X##_f; \
|
||||
else \
|
||||
X##_f <<= (N); \
|
||||
} \
|
||||
while (0)
|
||||
#define _FP_FRAC_SRL_1(X, N) (X##_f >>= N)
|
||||
|
||||
/* Right shift with sticky-lsb. */
|
||||
#define _FP_FRAC_SRST_1(X, S, N, sz) __FP_FRAC_SRST_1 (X##_f, S, (N), (sz))
|
||||
#define _FP_FRAC_SRS_1(X, N, sz) __FP_FRAC_SRS_1 (X##_f, (N), (sz))
|
||||
|
||||
#define __FP_FRAC_SRST_1(X, S, N, sz) \
|
||||
do \
|
||||
{ \
|
||||
S = (__builtin_constant_p (N) && (N) == 1 \
|
||||
? X & 1 \
|
||||
: (X << (_FP_W_TYPE_SIZE - (N))) != 0); \
|
||||
X = X >> (N); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define __FP_FRAC_SRS_1(X, N, sz) \
|
||||
(X = (X >> (N) | (__builtin_constant_p (N) && (N) == 1 \
|
||||
? X & 1 \
|
||||
: (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
|
||||
|
||||
#define _FP_FRAC_ADD_1(R, X, Y) (R##_f = X##_f + Y##_f)
|
||||
#define _FP_FRAC_SUB_1(R, X, Y) (R##_f = X##_f - Y##_f)
|
||||
#define _FP_FRAC_DEC_1(X, Y) (X##_f -= Y##_f)
|
||||
#define _FP_FRAC_CLZ_1(z, X) __FP_CLZ ((z), X##_f)
|
||||
|
||||
/* Predicates. */
|
||||
#define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE) X##_f < 0)
|
||||
#define _FP_FRAC_ZEROP_1(X) (X##_f == 0)
|
||||
#define _FP_FRAC_OVERP_1(fs, X) (X##_f & _FP_OVERFLOW_##fs)
|
||||
#define _FP_FRAC_CLEAR_OVERP_1(fs, X) (X##_f &= ~_FP_OVERFLOW_##fs)
|
||||
#define _FP_FRAC_HIGHBIT_DW_1(fs, X) (X##_f & _FP_HIGHBIT_DW_##fs)
|
||||
#define _FP_FRAC_EQ_1(X, Y) (X##_f == Y##_f)
|
||||
#define _FP_FRAC_GE_1(X, Y) (X##_f >= Y##_f)
|
||||
#define _FP_FRAC_GT_1(X, Y) (X##_f > Y##_f)
|
||||
|
||||
#define _FP_ZEROFRAC_1 0
|
||||
#define _FP_MINFRAC_1 1
|
||||
#define _FP_MAXFRAC_1 (~(_FP_WS_TYPE) 0)
|
||||
|
||||
/* Unpack the raw bits of a native fp value. Do not classify or
|
||||
normalize the data. */
|
||||
|
||||
#define _FP_UNPACK_RAW_1(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_UNPACK_RAW_1_flo; \
|
||||
_FP_UNPACK_RAW_1_flo.flt = (val); \
|
||||
\
|
||||
X##_f = _FP_UNPACK_RAW_1_flo.bits.frac; \
|
||||
X##_e = _FP_UNPACK_RAW_1_flo.bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_1_flo.bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_UNPACK_RAW_1_P(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_UNPACK_RAW_1_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
X##_f = _FP_UNPACK_RAW_1_P_flo->bits.frac; \
|
||||
X##_e = _FP_UNPACK_RAW_1_P_flo->bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_1_P_flo->bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Repack the raw bits of a native fp value. */
|
||||
|
||||
#define _FP_PACK_RAW_1(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_PACK_RAW_1_flo; \
|
||||
\
|
||||
_FP_PACK_RAW_1_flo.bits.frac = X##_f; \
|
||||
_FP_PACK_RAW_1_flo.bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_1_flo.bits.sign = X##_s; \
|
||||
\
|
||||
(val) = _FP_PACK_RAW_1_flo.flt; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_PACK_RAW_1_P(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_PACK_RAW_1_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
_FP_PACK_RAW_1_P_flo->bits.frac = X##_f; \
|
||||
_FP_PACK_RAW_1_P_flo->bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_1_P_flo->bits.sign = X##_s; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Multiplication algorithms: */
|
||||
|
||||
/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
|
||||
multiplication immediately. */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_1_imm(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
R##_f = X##_f * Y##_f; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_MUL_MEAT_DW_1_imm ((wfracbits), R, X, Y); \
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_1 (R, (wfracbits)-1, 2*(wfracbits)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_1_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
doit (R##_f1, R##_f0, X##_f, Y##_f); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_wide_Z); \
|
||||
_FP_MUL_MEAT_DW_1_wide ((wfracbits), _FP_MUL_MEAT_1_wide_Z, \
|
||||
X, Y, doit); \
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_wide_Z, (wfracbits)-1, \
|
||||
2*(wfracbits)); \
|
||||
R##_f = _FP_MUL_MEAT_1_wide_Z_f0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Finally, a simple widening multiply algorithm. What fun! */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_1_hard(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_xh, _FP_MUL_MEAT_DW_1_hard_xl; \
|
||||
_FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_yh, _FP_MUL_MEAT_DW_1_hard_yl; \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_1_hard_a); \
|
||||
\
|
||||
/* Split the words in half. */ \
|
||||
_FP_MUL_MEAT_DW_1_hard_xh = X##_f >> (_FP_W_TYPE_SIZE/2); \
|
||||
_FP_MUL_MEAT_DW_1_hard_xl \
|
||||
= X##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \
|
||||
_FP_MUL_MEAT_DW_1_hard_yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \
|
||||
_FP_MUL_MEAT_DW_1_hard_yl \
|
||||
= Y##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \
|
||||
\
|
||||
/* Multiply the pieces. */ \
|
||||
R##_f0 = _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yl; \
|
||||
_FP_MUL_MEAT_DW_1_hard_a_f0 \
|
||||
= _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yl; \
|
||||
_FP_MUL_MEAT_DW_1_hard_a_f1 \
|
||||
= _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yh; \
|
||||
R##_f1 = _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yh; \
|
||||
\
|
||||
/* Reassemble into two full words. */ \
|
||||
if ((_FP_MUL_MEAT_DW_1_hard_a_f0 += _FP_MUL_MEAT_DW_1_hard_a_f1) \
|
||||
< _FP_MUL_MEAT_DW_1_hard_a_f1) \
|
||||
R##_f1 += (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2); \
|
||||
_FP_MUL_MEAT_DW_1_hard_a_f1 \
|
||||
= _FP_MUL_MEAT_DW_1_hard_a_f0 >> (_FP_W_TYPE_SIZE/2); \
|
||||
_FP_MUL_MEAT_DW_1_hard_a_f0 \
|
||||
= _FP_MUL_MEAT_DW_1_hard_a_f0 << (_FP_W_TYPE_SIZE/2); \
|
||||
_FP_FRAC_ADD_2 (R, R, _FP_MUL_MEAT_DW_1_hard_a); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_hard_z); \
|
||||
_FP_MUL_MEAT_DW_1_hard ((wfracbits), \
|
||||
_FP_MUL_MEAT_1_hard_z, X, Y); \
|
||||
\
|
||||
/* Normalize. */ \
|
||||
_FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_hard_z, \
|
||||
(wfracbits) - 1, 2*(wfracbits)); \
|
||||
R##_f = _FP_MUL_MEAT_1_hard_z_f0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Division algorithms: */
|
||||
|
||||
/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
|
||||
division immediately. Give this macro either _FP_DIV_HELP_imm for
|
||||
C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you
|
||||
choose will depend on what the compiler does with divrem4. */
|
||||
|
||||
#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r; \
|
||||
X##_f <<= (X##_f < Y##_f \
|
||||
? R##_e--, _FP_WFRACBITS_##fs \
|
||||
: _FP_WFRACBITS_##fs - 1); \
|
||||
doit (_FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r, X##_f, Y##_f); \
|
||||
R##_f = _FP_DIV_MEAT_1_imm_q | (_FP_DIV_MEAT_1_imm_r != 0); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
|
||||
that may be useful in this situation. This first is for a primitive
|
||||
that requires normalization, the second for one that does not. Look
|
||||
for UDIV_NEEDS_NORMALIZATION to tell which your machine needs. */
|
||||
|
||||
#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nh; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nl; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_q; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_r; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_y; \
|
||||
\
|
||||
/* Normalize Y -- i.e. make the most significant bit set. */ \
|
||||
_FP_DIV_MEAT_1_udiv_norm_y = Y##_f << _FP_WFRACXBITS_##fs; \
|
||||
\
|
||||
/* Shift X op correspondingly high, that is, up one full word. */ \
|
||||
if (X##_f < Y##_f) \
|
||||
{ \
|
||||
R##_e--; \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nl = 0; \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nh = X##_f; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nl = X##_f << (_FP_W_TYPE_SIZE - 1); \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nh = X##_f >> 1; \
|
||||
} \
|
||||
\
|
||||
udiv_qrnnd (_FP_DIV_MEAT_1_udiv_norm_q, \
|
||||
_FP_DIV_MEAT_1_udiv_norm_r, \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nh, \
|
||||
_FP_DIV_MEAT_1_udiv_norm_nl, \
|
||||
_FP_DIV_MEAT_1_udiv_norm_y); \
|
||||
R##_f = (_FP_DIV_MEAT_1_udiv_norm_q \
|
||||
| (_FP_DIV_MEAT_1_udiv_norm_r != 0)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r; \
|
||||
if (X##_f < Y##_f) \
|
||||
{ \
|
||||
R##_e--; \
|
||||
_FP_DIV_MEAT_1_udiv_nl = X##_f << _FP_WFRACBITS_##fs; \
|
||||
_FP_DIV_MEAT_1_udiv_nh = X##_f >> _FP_WFRACXBITS_##fs; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
_FP_DIV_MEAT_1_udiv_nl = X##_f << (_FP_WFRACBITS_##fs - 1); \
|
||||
_FP_DIV_MEAT_1_udiv_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \
|
||||
} \
|
||||
udiv_qrnnd (_FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r, \
|
||||
_FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl, \
|
||||
Y##_f); \
|
||||
R##_f = _FP_DIV_MEAT_1_udiv_q | (_FP_DIV_MEAT_1_udiv_r != 0); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Square root algorithms:
|
||||
We have just one right now, maybe Newton approximation
|
||||
should be added for those machines where division is fast. */
|
||||
|
||||
#define _FP_SQRT_MEAT_1(R, S, T, X, q) \
|
||||
do \
|
||||
{ \
|
||||
while ((q) != _FP_WORK_ROUND) \
|
||||
{ \
|
||||
T##_f = S##_f + (q); \
|
||||
if (T##_f <= X##_f) \
|
||||
{ \
|
||||
S##_f = T##_f + (q); \
|
||||
X##_f -= T##_f; \
|
||||
R##_f += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_1 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
if (X##_f) \
|
||||
{ \
|
||||
if (S##_f < X##_f) \
|
||||
R##_f |= _FP_WORK_ROUND; \
|
||||
R##_f |= _FP_WORK_STICKY; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Assembly/disassembly for converting to/from integral types.
|
||||
No shifting or overflow handled here. */
|
||||
|
||||
#define _FP_FRAC_ASSEMBLE_1(r, X, rsize) ((r) = X##_f)
|
||||
#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = (r))
|
||||
|
||||
|
||||
/* Convert FP values between word sizes. */
|
||||
|
||||
#define _FP_FRAC_COPY_1_1(D, S) (D##_f = S##_f)
|
||||
|
||||
#endif /* !SOFT_FP_OP_1_H */
|
705
src/linpack/soft-fp/op-2.h
Normal file
705
src/linpack/soft-fp/op-2.h
Normal file
|
@ -0,0 +1,705 @@
|
|||
/* Software floating-point emulation.
|
||||
Basic two-word fraction declaration and manipulation.
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz),
|
||||
David S. Miller (davem@redhat.com) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_OP_2_H
|
||||
#define SOFT_FP_OP_2_H 1
|
||||
|
||||
#define _FP_FRAC_DECL_2(X) \
|
||||
_FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
|
||||
#define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
|
||||
#define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I)
|
||||
#define _FP_FRAC_HIGH_2(X) (X##_f1)
|
||||
#define _FP_FRAC_LOW_2(X) (X##_f0)
|
||||
#define _FP_FRAC_WORD_2(X, w) (X##_f##w)
|
||||
|
||||
#define _FP_FRAC_SLL_2(X, N) \
|
||||
(void) (((N) < _FP_W_TYPE_SIZE) \
|
||||
? ({ \
|
||||
if (__builtin_constant_p (N) && (N) == 1) \
|
||||
{ \
|
||||
X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
|
||||
X##_f0 += X##_f0; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
|
||||
X##_f0 <<= (N); \
|
||||
} \
|
||||
0; \
|
||||
}) \
|
||||
: ({ \
|
||||
X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
|
||||
X##_f0 = 0; \
|
||||
}))
|
||||
|
||||
|
||||
#define _FP_FRAC_SRL_2(X, N) \
|
||||
(void) (((N) < _FP_W_TYPE_SIZE) \
|
||||
? ({ \
|
||||
X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
|
||||
X##_f1 >>= (N); \
|
||||
}) \
|
||||
: ({ \
|
||||
X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
|
||||
X##_f1 = 0; \
|
||||
}))
|
||||
|
||||
/* Right shift with sticky-lsb. */
|
||||
#define _FP_FRAC_SRST_2(X, S, N, sz) \
|
||||
(void) (((N) < _FP_W_TYPE_SIZE) \
|
||||
? ({ \
|
||||
S = (__builtin_constant_p (N) && (N) == 1 \
|
||||
? X##_f0 & 1 \
|
||||
: (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
|
||||
X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
|
||||
X##_f1 >>= (N); \
|
||||
}) \
|
||||
: ({ \
|
||||
S = ((((N) == _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
|
||||
| X##_f0) != 0); \
|
||||
X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
|
||||
X##_f1 = 0; \
|
||||
}))
|
||||
|
||||
#define _FP_FRAC_SRS_2(X, N, sz) \
|
||||
(void) (((N) < _FP_W_TYPE_SIZE) \
|
||||
? ({ \
|
||||
X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
|
||||
| (__builtin_constant_p (N) && (N) == 1 \
|
||||
? X##_f0 & 1 \
|
||||
: (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
|
||||
X##_f1 >>= (N); \
|
||||
}) \
|
||||
: ({ \
|
||||
X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \
|
||||
| ((((N) == _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
|
||||
| X##_f0) != 0)); \
|
||||
X##_f1 = 0; \
|
||||
}))
|
||||
|
||||
#define _FP_FRAC_ADDI_2(X, I) \
|
||||
__FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
|
||||
|
||||
#define _FP_FRAC_ADD_2(R, X, Y) \
|
||||
__FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
|
||||
|
||||
#define _FP_FRAC_SUB_2(R, X, Y) \
|
||||
__FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
|
||||
|
||||
#define _FP_FRAC_DEC_2(X, Y) \
|
||||
__FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
|
||||
|
||||
#define _FP_FRAC_CLZ_2(R, X) \
|
||||
do \
|
||||
{ \
|
||||
if (X##_f1) \
|
||||
__FP_CLZ ((R), X##_f1); \
|
||||
else \
|
||||
{ \
|
||||
__FP_CLZ ((R), X##_f0); \
|
||||
(R) += _FP_W_TYPE_SIZE; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Predicates. */
|
||||
#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0)
|
||||
#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
|
||||
#define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
|
||||
#define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
|
||||
#define _FP_FRAC_HIGHBIT_DW_2(fs, X) \
|
||||
(_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
|
||||
#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
|
||||
#define _FP_FRAC_GT_2(X, Y) \
|
||||
(X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
|
||||
#define _FP_FRAC_GE_2(X, Y) \
|
||||
(X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
|
||||
|
||||
#define _FP_ZEROFRAC_2 0, 0
|
||||
#define _FP_MINFRAC_2 0, 1
|
||||
#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
|
||||
|
||||
/* Internals. */
|
||||
|
||||
#define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1)
|
||||
|
||||
#define __FP_CLZ_2(R, xh, xl) \
|
||||
do \
|
||||
{ \
|
||||
if (xh) \
|
||||
__FP_CLZ ((R), xh); \
|
||||
else \
|
||||
{ \
|
||||
__FP_CLZ ((R), xl); \
|
||||
(R) += _FP_W_TYPE_SIZE; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#if 0
|
||||
|
||||
# ifndef __FP_FRAC_ADDI_2
|
||||
# define __FP_FRAC_ADDI_2(xh, xl, i) \
|
||||
(xh += ((xl += i) < i))
|
||||
# endif
|
||||
# ifndef __FP_FRAC_ADD_2
|
||||
# define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
|
||||
(rh = xh + yh + ((rl = xl + yl) < xl))
|
||||
# endif
|
||||
# ifndef __FP_FRAC_SUB_2
|
||||
# define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
|
||||
(rh = xh - yh - ((rl = xl - yl) > xl))
|
||||
# endif
|
||||
# ifndef __FP_FRAC_DEC_2
|
||||
# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
|
||||
do \
|
||||
{ \
|
||||
UWtype __FP_FRAC_DEC_2_t = xl; \
|
||||
xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \
|
||||
} \
|
||||
while (0)
|
||||
# endif
|
||||
|
||||
#else
|
||||
|
||||
# undef __FP_FRAC_ADDI_2
|
||||
# define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i)
|
||||
# undef __FP_FRAC_ADD_2
|
||||
# define __FP_FRAC_ADD_2 add_ssaaaa
|
||||
# undef __FP_FRAC_SUB_2
|
||||
# define __FP_FRAC_SUB_2 sub_ddmmss
|
||||
# undef __FP_FRAC_DEC_2
|
||||
# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
|
||||
sub_ddmmss (xh, xl, xh, xl, yh, yl)
|
||||
|
||||
#endif
|
||||
|
||||
/* Unpack the raw bits of a native fp value. Do not classify or
|
||||
normalize the data. */
|
||||
|
||||
#define _FP_UNPACK_RAW_2(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \
|
||||
_FP_UNPACK_RAW_2_flo.flt = (val); \
|
||||
\
|
||||
X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \
|
||||
X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \
|
||||
X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_UNPACK_RAW_2_P(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \
|
||||
X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \
|
||||
X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Repack the raw bits of a native fp value. */
|
||||
|
||||
#define _FP_PACK_RAW_2(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \
|
||||
\
|
||||
_FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \
|
||||
_FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \
|
||||
_FP_PACK_RAW_2_flo.bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_2_flo.bits.sign = X##_s; \
|
||||
\
|
||||
(val) = _FP_PACK_RAW_2_flo.flt; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_PACK_RAW_2_P(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
_FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \
|
||||
_FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \
|
||||
_FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Multiplication algorithms: */
|
||||
|
||||
/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \
|
||||
\
|
||||
doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \
|
||||
X##_f0, Y##_f0); \
|
||||
doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
|
||||
X##_f0, Y##_f1); \
|
||||
doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
|
||||
X##_f1, Y##_f0); \
|
||||
doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
X##_f1, Y##_f1); \
|
||||
\
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1), 0, \
|
||||
_FP_MUL_MEAT_DW_2_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_2_wide_b_f0, \
|
||||
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1), 0, \
|
||||
_FP_MUL_MEAT_DW_2_wide_c_f1, \
|
||||
_FP_MUL_MEAT_DW_2_wide_c_f0, \
|
||||
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z, \
|
||||
X, Y, doit); \
|
||||
\
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1, \
|
||||
2*(wfracbits)); \
|
||||
R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \
|
||||
R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
|
||||
Do only 3 multiplications instead of four. This one is for machines
|
||||
where multiplication is much more expensive than subtraction. */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \
|
||||
_FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \
|
||||
int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \
|
||||
int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
|
||||
= _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c2 \
|
||||
= _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \
|
||||
doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \
|
||||
X##_f0, Y##_f0); \
|
||||
doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
|
||||
doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \
|
||||
&= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \
|
||||
&= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1), \
|
||||
(_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
|
||||
& _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_d, \
|
||||
0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
|
||||
__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \
|
||||
__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
|
||||
__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1), \
|
||||
0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \
|
||||
_FP_FRAC_WORD_4 (R, 0)); \
|
||||
__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_FRAC_WORD_4 (R, 1), 0, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \
|
||||
__FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \
|
||||
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits), \
|
||||
_FP_MUL_MEAT_2_wide_3mul_z, \
|
||||
X, Y, doit); \
|
||||
\
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \
|
||||
(wfracbits)-1, 2*(wfracbits)); \
|
||||
R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \
|
||||
R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \
|
||||
_FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \
|
||||
_FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \
|
||||
_FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \
|
||||
_FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \
|
||||
_FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \
|
||||
\
|
||||
mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \
|
||||
_FP_MUL_MEAT_DW_2_gmp_y, 2); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y); \
|
||||
\
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1, \
|
||||
2*(wfracbits)); \
|
||||
R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \
|
||||
R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Do at most 120x120=240 bits multiplication using double floating
|
||||
point multiplication. This is useful if floating point
|
||||
multiplication has much bigger throughput than integer multiply.
|
||||
It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
|
||||
between 106 and 120 only.
|
||||
Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
|
||||
SETFETZ is a macro which will disable all FPU exceptions and set rounding
|
||||
towards zero, RESETFE should optionally reset it back. */
|
||||
|
||||
#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
|
||||
do \
|
||||
{ \
|
||||
static const double _const[] = \
|
||||
{ \
|
||||
/* 2^-24 */ 5.9604644775390625e-08, \
|
||||
/* 2^-48 */ 3.5527136788005009e-15, \
|
||||
/* 2^-72 */ 2.1175823681357508e-22, \
|
||||
/* 2^-96 */ 1.2621774483536189e-29, \
|
||||
/* 2^28 */ 2.68435456e+08, \
|
||||
/* 2^4 */ 1.600000e+01, \
|
||||
/* 2^-20 */ 9.5367431640625e-07, \
|
||||
/* 2^-44 */ 5.6843418860808015e-14, \
|
||||
/* 2^-68 */ 3.3881317890172014e-21, \
|
||||
/* 2^-92 */ 2.0194839173657902e-28, \
|
||||
/* 2^-116 */ 1.2037062152420224e-35 \
|
||||
}; \
|
||||
double _a240, _b240, _c240, _d240, _e240, _f240, \
|
||||
_g240, _h240, _i240, _j240, _k240; \
|
||||
union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
|
||||
_p240, _q240, _r240, _s240; \
|
||||
UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
|
||||
\
|
||||
_FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120, \
|
||||
"wfracbits out of range"); \
|
||||
\
|
||||
setfetz; \
|
||||
\
|
||||
_e240 = (double) (long) (X##_f0 & 0xffffff); \
|
||||
_j240 = (double) (long) (Y##_f0 & 0xffffff); \
|
||||
_d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \
|
||||
_i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \
|
||||
_c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
|
||||
_h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
|
||||
_b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \
|
||||
_g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \
|
||||
_a240 = (double) (long) (X##_f1 >> 32); \
|
||||
_f240 = (double) (long) (Y##_f1 >> 32); \
|
||||
_e240 *= _const[3]; \
|
||||
_j240 *= _const[3]; \
|
||||
_d240 *= _const[2]; \
|
||||
_i240 *= _const[2]; \
|
||||
_c240 *= _const[1]; \
|
||||
_h240 *= _const[1]; \
|
||||
_b240 *= _const[0]; \
|
||||
_g240 *= _const[0]; \
|
||||
_s240.d = _e240*_j240; \
|
||||
_r240.d = _d240*_j240 + _e240*_i240; \
|
||||
_q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240; \
|
||||
_p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
|
||||
_o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
|
||||
_n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
|
||||
_m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
|
||||
_l240.d = _a240*_g240 + _b240*_f240; \
|
||||
_k240 = _a240*_f240; \
|
||||
_r240.d += _s240.d; \
|
||||
_q240.d += _r240.d; \
|
||||
_p240.d += _q240.d; \
|
||||
_o240.d += _p240.d; \
|
||||
_n240.d += _o240.d; \
|
||||
_m240.d += _n240.d; \
|
||||
_l240.d += _m240.d; \
|
||||
_k240 += _l240.d; \
|
||||
_s240.d -= ((_const[10]+_s240.d)-_const[10]); \
|
||||
_r240.d -= ((_const[9]+_r240.d)-_const[9]); \
|
||||
_q240.d -= ((_const[8]+_q240.d)-_const[8]); \
|
||||
_p240.d -= ((_const[7]+_p240.d)-_const[7]); \
|
||||
_o240.d += _const[7]; \
|
||||
_n240.d += _const[6]; \
|
||||
_m240.d += _const[5]; \
|
||||
_l240.d += _const[4]; \
|
||||
if (_s240.d != 0.0) \
|
||||
_y240 = 1; \
|
||||
if (_r240.d != 0.0) \
|
||||
_y240 = 1; \
|
||||
if (_q240.d != 0.0) \
|
||||
_y240 = 1; \
|
||||
if (_p240.d != 0.0) \
|
||||
_y240 = 1; \
|
||||
_t240 = (DItype) _k240; \
|
||||
_u240 = _l240.i; \
|
||||
_v240 = _m240.i; \
|
||||
_w240 = _n240.i; \
|
||||
_x240 = _o240.i; \
|
||||
R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \
|
||||
| ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \
|
||||
R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
|
||||
| ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
|
||||
| ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
|
||||
| ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
|
||||
| _y240); \
|
||||
resetfe; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Division algorithms: */
|
||||
|
||||
#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \
|
||||
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \
|
||||
if (_FP_FRAC_GE_2 (X, Y)) \
|
||||
{ \
|
||||
_FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \
|
||||
_FP_DIV_MEAT_2_udiv_n_f1 \
|
||||
= X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
|
||||
_FP_DIV_MEAT_2_udiv_n_f0 \
|
||||
= X##_f0 << (_FP_W_TYPE_SIZE - 1); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
R##_e--; \
|
||||
_FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \
|
||||
_FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \
|
||||
_FP_DIV_MEAT_2_udiv_n_f0 = 0; \
|
||||
} \
|
||||
\
|
||||
/* Normalize, i.e. make the most significant bit of the \
|
||||
denominator set. */ \
|
||||
_FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
|
||||
\
|
||||
udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \
|
||||
_FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \
|
||||
Y##_f1); \
|
||||
umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \
|
||||
R##_f1, Y##_f0); \
|
||||
_FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \
|
||||
if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
|
||||
{ \
|
||||
R##_f1--; \
|
||||
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
|
||||
_FP_DIV_MEAT_2_udiv_r); \
|
||||
if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
|
||||
&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
|
||||
_FP_DIV_MEAT_2_udiv_r)) \
|
||||
{ \
|
||||
R##_f1--; \
|
||||
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
|
||||
_FP_DIV_MEAT_2_udiv_r); \
|
||||
} \
|
||||
} \
|
||||
_FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \
|
||||
\
|
||||
if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \
|
||||
{ \
|
||||
/* This is a special case, not an optimization \
|
||||
(_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \
|
||||
As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \
|
||||
R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \
|
||||
know what kind of bits it is (sticky, guard, round), \
|
||||
we don't care. We also don't care what the reminder is, \
|
||||
because the guard bit will be set anyway. -jj */ \
|
||||
R##_f0 = -1; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \
|
||||
_FP_DIV_MEAT_2_udiv_r_f1, \
|
||||
_FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \
|
||||
umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \
|
||||
_FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \
|
||||
_FP_DIV_MEAT_2_udiv_r_f0 = 0; \
|
||||
if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
|
||||
_FP_DIV_MEAT_2_udiv_r)) \
|
||||
{ \
|
||||
R##_f0--; \
|
||||
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
|
||||
_FP_DIV_MEAT_2_udiv_r); \
|
||||
if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
|
||||
&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
|
||||
_FP_DIV_MEAT_2_udiv_r)) \
|
||||
{ \
|
||||
R##_f0--; \
|
||||
_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
|
||||
_FP_DIV_MEAT_2_udiv_r); \
|
||||
} \
|
||||
} \
|
||||
if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \
|
||||
_FP_DIV_MEAT_2_udiv_m)) \
|
||||
R##_f0 |= _FP_WORK_STICKY; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Square root algorithms:
|
||||
We have just one right now, maybe Newton approximation
|
||||
should be added for those machines where division is fast. */
|
||||
|
||||
#define _FP_SQRT_MEAT_2(R, S, T, X, q) \
|
||||
do \
|
||||
{ \
|
||||
while (q) \
|
||||
{ \
|
||||
T##_f1 = S##_f1 + (q); \
|
||||
if (T##_f1 <= X##_f1) \
|
||||
{ \
|
||||
S##_f1 = T##_f1 + (q); \
|
||||
X##_f1 -= T##_f1; \
|
||||
R##_f1 += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_2 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
(q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
|
||||
while ((q) != _FP_WORK_ROUND) \
|
||||
{ \
|
||||
T##_f0 = S##_f0 + (q); \
|
||||
T##_f1 = S##_f1; \
|
||||
if (T##_f1 < X##_f1 \
|
||||
|| (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
|
||||
{ \
|
||||
S##_f0 = T##_f0 + (q); \
|
||||
S##_f1 += (T##_f0 > S##_f0); \
|
||||
_FP_FRAC_DEC_2 (X, T); \
|
||||
R##_f0 += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_2 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
if (X##_f0 | X##_f1) \
|
||||
{ \
|
||||
if (S##_f1 < X##_f1 \
|
||||
|| (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
|
||||
R##_f0 |= _FP_WORK_ROUND; \
|
||||
R##_f0 |= _FP_WORK_STICKY; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Assembly/disassembly for converting to/from integral types.
|
||||
No shifting or overflow handled here. */
|
||||
|
||||
#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
|
||||
(void) (((rsize) <= _FP_W_TYPE_SIZE) \
|
||||
? ({ (r) = X##_f0; }) \
|
||||
: ({ \
|
||||
(r) = X##_f1; \
|
||||
(r) <<= _FP_W_TYPE_SIZE; \
|
||||
(r) += X##_f0; \
|
||||
}))
|
||||
|
||||
#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
|
||||
do \
|
||||
{ \
|
||||
X##_f0 = (r); \
|
||||
X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) >> _FP_W_TYPE_SIZE); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Convert FP values between word sizes. */
|
||||
|
||||
#define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
|
||||
|
||||
#define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
|
||||
|
||||
#define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S)
|
||||
|
||||
#endif /* !SOFT_FP_OP_2_H */
|
882
src/linpack/soft-fp/op-4.h
Normal file
882
src/linpack/soft-fp/op-4.h
Normal file
|
@ -0,0 +1,882 @@
|
|||
/* Software floating-point emulation.
|
||||
Basic four-word fraction declaration and manipulation.
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz),
|
||||
David S. Miller (davem@redhat.com) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_OP_4_H
|
||||
#define SOFT_FP_OP_4_H 1
|
||||
|
||||
#define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
|
||||
#define _FP_FRAC_COPY_4(D, S) \
|
||||
(D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
|
||||
D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
|
||||
#define _FP_FRAC_SET_4(X, I) __FP_FRAC_SET_4 (X, I)
|
||||
#define _FP_FRAC_HIGH_4(X) (X##_f[3])
|
||||
#define _FP_FRAC_LOW_4(X) (X##_f[0])
|
||||
#define _FP_FRAC_WORD_4(X, w) (X##_f[w])
|
||||
|
||||
#define _FP_FRAC_SLL_4(X, N) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SLL_4_up, _FP_FRAC_SLL_4_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SLL_4_skip, _FP_FRAC_SLL_4_i; \
|
||||
_FP_FRAC_SLL_4_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SLL_4_up = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SLL_4_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_4_up; \
|
||||
if (!_FP_FRAC_SLL_4_up) \
|
||||
for (_FP_FRAC_SLL_4_i = 3; \
|
||||
_FP_FRAC_SLL_4_i >= _FP_FRAC_SLL_4_skip; \
|
||||
--_FP_FRAC_SLL_4_i) \
|
||||
X##_f[_FP_FRAC_SLL_4_i] \
|
||||
= X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip]; \
|
||||
else \
|
||||
{ \
|
||||
for (_FP_FRAC_SLL_4_i = 3; \
|
||||
_FP_FRAC_SLL_4_i > _FP_FRAC_SLL_4_skip; \
|
||||
--_FP_FRAC_SLL_4_i) \
|
||||
X##_f[_FP_FRAC_SLL_4_i] \
|
||||
= ((X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip] \
|
||||
<< _FP_FRAC_SLL_4_up) \
|
||||
| (X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip-1] \
|
||||
>> _FP_FRAC_SLL_4_down)); \
|
||||
X##_f[_FP_FRAC_SLL_4_i--] = X##_f[0] << _FP_FRAC_SLL_4_up; \
|
||||
} \
|
||||
for (; _FP_FRAC_SLL_4_i >= 0; --_FP_FRAC_SLL_4_i) \
|
||||
X##_f[_FP_FRAC_SLL_4_i] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* This one was broken too. */
|
||||
#define _FP_FRAC_SRL_4(X, N) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SRL_4_up, _FP_FRAC_SRL_4_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SRL_4_skip, _FP_FRAC_SRL_4_i; \
|
||||
_FP_FRAC_SRL_4_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRL_4_down = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRL_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_4_down; \
|
||||
if (!_FP_FRAC_SRL_4_down) \
|
||||
for (_FP_FRAC_SRL_4_i = 0; \
|
||||
_FP_FRAC_SRL_4_i <= 3-_FP_FRAC_SRL_4_skip; \
|
||||
++_FP_FRAC_SRL_4_i) \
|
||||
X##_f[_FP_FRAC_SRL_4_i] \
|
||||
= X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip]; \
|
||||
else \
|
||||
{ \
|
||||
for (_FP_FRAC_SRL_4_i = 0; \
|
||||
_FP_FRAC_SRL_4_i < 3-_FP_FRAC_SRL_4_skip; \
|
||||
++_FP_FRAC_SRL_4_i) \
|
||||
X##_f[_FP_FRAC_SRL_4_i] \
|
||||
= ((X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip] \
|
||||
>> _FP_FRAC_SRL_4_down) \
|
||||
| (X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip+1] \
|
||||
<< _FP_FRAC_SRL_4_up)); \
|
||||
X##_f[_FP_FRAC_SRL_4_i++] = X##_f[3] >> _FP_FRAC_SRL_4_down; \
|
||||
} \
|
||||
for (; _FP_FRAC_SRL_4_i < 4; ++_FP_FRAC_SRL_4_i) \
|
||||
X##_f[_FP_FRAC_SRL_4_i] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Right shift with sticky-lsb.
|
||||
What this actually means is that we do a standard right-shift,
|
||||
but that if any of the bits that fall off the right hand side
|
||||
were one then we always set the LSbit. */
|
||||
#define _FP_FRAC_SRST_4(X, S, N, size) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SRST_4_up, _FP_FRAC_SRST_4_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SRST_4_skip, _FP_FRAC_SRST_4_i; \
|
||||
_FP_W_TYPE _FP_FRAC_SRST_4_s; \
|
||||
_FP_FRAC_SRST_4_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRST_4_down = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRST_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRST_4_down; \
|
||||
for (_FP_FRAC_SRST_4_s = _FP_FRAC_SRST_4_i = 0; \
|
||||
_FP_FRAC_SRST_4_i < _FP_FRAC_SRST_4_skip; \
|
||||
++_FP_FRAC_SRST_4_i) \
|
||||
_FP_FRAC_SRST_4_s |= X##_f[_FP_FRAC_SRST_4_i]; \
|
||||
if (!_FP_FRAC_SRST_4_down) \
|
||||
for (_FP_FRAC_SRST_4_i = 0; \
|
||||
_FP_FRAC_SRST_4_i <= 3-_FP_FRAC_SRST_4_skip; \
|
||||
++_FP_FRAC_SRST_4_i) \
|
||||
X##_f[_FP_FRAC_SRST_4_i] \
|
||||
= X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip]; \
|
||||
else \
|
||||
{ \
|
||||
_FP_FRAC_SRST_4_s \
|
||||
|= X##_f[_FP_FRAC_SRST_4_i] << _FP_FRAC_SRST_4_up; \
|
||||
for (_FP_FRAC_SRST_4_i = 0; \
|
||||
_FP_FRAC_SRST_4_i < 3-_FP_FRAC_SRST_4_skip; \
|
||||
++_FP_FRAC_SRST_4_i) \
|
||||
X##_f[_FP_FRAC_SRST_4_i] \
|
||||
= ((X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip] \
|
||||
>> _FP_FRAC_SRST_4_down) \
|
||||
| (X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip+1] \
|
||||
<< _FP_FRAC_SRST_4_up)); \
|
||||
X##_f[_FP_FRAC_SRST_4_i++] \
|
||||
= X##_f[3] >> _FP_FRAC_SRST_4_down; \
|
||||
} \
|
||||
for (; _FP_FRAC_SRST_4_i < 4; ++_FP_FRAC_SRST_4_i) \
|
||||
X##_f[_FP_FRAC_SRST_4_i] = 0; \
|
||||
S = (_FP_FRAC_SRST_4_s != 0); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_SRS_4(X, N, size) \
|
||||
do \
|
||||
{ \
|
||||
int _FP_FRAC_SRS_4_sticky; \
|
||||
_FP_FRAC_SRST_4 (X, _FP_FRAC_SRS_4_sticky, (N), (size)); \
|
||||
X##_f[0] |= _FP_FRAC_SRS_4_sticky; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_ADD_4(R, X, Y) \
|
||||
__FP_FRAC_ADD_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
|
||||
X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
|
||||
Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
|
||||
|
||||
#define _FP_FRAC_SUB_4(R, X, Y) \
|
||||
__FP_FRAC_SUB_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
|
||||
X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
|
||||
Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
|
||||
|
||||
#define _FP_FRAC_DEC_4(X, Y) \
|
||||
__FP_FRAC_DEC_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
|
||||
Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
|
||||
|
||||
#define _FP_FRAC_ADDI_4(X, I) \
|
||||
__FP_FRAC_ADDI_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
|
||||
|
||||
#define _FP_ZEROFRAC_4 0, 0, 0, 0
|
||||
#define _FP_MINFRAC_4 0, 0, 0, 1
|
||||
#define _FP_MAXFRAC_4 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
|
||||
|
||||
#define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
|
||||
#define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE) X##_f[3] < 0)
|
||||
#define _FP_FRAC_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
|
||||
#define _FP_FRAC_HIGHBIT_DW_4(fs, X) \
|
||||
(_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
|
||||
#define _FP_FRAC_CLEAR_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
|
||||
|
||||
#define _FP_FRAC_EQ_4(X, Y) \
|
||||
(X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
|
||||
&& X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
|
||||
|
||||
#define _FP_FRAC_GT_4(X, Y) \
|
||||
(X##_f[3] > Y##_f[3] \
|
||||
|| (X##_f[3] == Y##_f[3] \
|
||||
&& (X##_f[2] > Y##_f[2] \
|
||||
|| (X##_f[2] == Y##_f[2] \
|
||||
&& (X##_f[1] > Y##_f[1] \
|
||||
|| (X##_f[1] == Y##_f[1] \
|
||||
&& X##_f[0] > Y##_f[0]))))))
|
||||
|
||||
#define _FP_FRAC_GE_4(X, Y) \
|
||||
(X##_f[3] > Y##_f[3] \
|
||||
|| (X##_f[3] == Y##_f[3] \
|
||||
&& (X##_f[2] > Y##_f[2] \
|
||||
|| (X##_f[2] == Y##_f[2] \
|
||||
&& (X##_f[1] > Y##_f[1] \
|
||||
|| (X##_f[1] == Y##_f[1] \
|
||||
&& X##_f[0] >= Y##_f[0]))))))
|
||||
|
||||
|
||||
#define _FP_FRAC_CLZ_4(R, X) \
|
||||
do \
|
||||
{ \
|
||||
if (X##_f[3]) \
|
||||
__FP_CLZ ((R), X##_f[3]); \
|
||||
else if (X##_f[2]) \
|
||||
{ \
|
||||
__FP_CLZ ((R), X##_f[2]); \
|
||||
(R) += _FP_W_TYPE_SIZE; \
|
||||
} \
|
||||
else if (X##_f[1]) \
|
||||
{ \
|
||||
__FP_CLZ ((R), X##_f[1]); \
|
||||
(R) += _FP_W_TYPE_SIZE*2; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
__FP_CLZ ((R), X##_f[0]); \
|
||||
(R) += _FP_W_TYPE_SIZE*3; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
#define _FP_UNPACK_RAW_4(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_UNPACK_RAW_4_flo; \
|
||||
_FP_UNPACK_RAW_4_flo.flt = (val); \
|
||||
X##_f[0] = _FP_UNPACK_RAW_4_flo.bits.frac0; \
|
||||
X##_f[1] = _FP_UNPACK_RAW_4_flo.bits.frac1; \
|
||||
X##_f[2] = _FP_UNPACK_RAW_4_flo.bits.frac2; \
|
||||
X##_f[3] = _FP_UNPACK_RAW_4_flo.bits.frac3; \
|
||||
X##_e = _FP_UNPACK_RAW_4_flo.bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_4_flo.bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_UNPACK_RAW_4_P(fs, X, val) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_UNPACK_RAW_4_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
X##_f[0] = _FP_UNPACK_RAW_4_P_flo->bits.frac0; \
|
||||
X##_f[1] = _FP_UNPACK_RAW_4_P_flo->bits.frac1; \
|
||||
X##_f[2] = _FP_UNPACK_RAW_4_P_flo->bits.frac2; \
|
||||
X##_f[3] = _FP_UNPACK_RAW_4_P_flo->bits.frac3; \
|
||||
X##_e = _FP_UNPACK_RAW_4_P_flo->bits.exp; \
|
||||
X##_s = _FP_UNPACK_RAW_4_P_flo->bits.sign; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_PACK_RAW_4(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs _FP_PACK_RAW_4_flo; \
|
||||
_FP_PACK_RAW_4_flo.bits.frac0 = X##_f[0]; \
|
||||
_FP_PACK_RAW_4_flo.bits.frac1 = X##_f[1]; \
|
||||
_FP_PACK_RAW_4_flo.bits.frac2 = X##_f[2]; \
|
||||
_FP_PACK_RAW_4_flo.bits.frac3 = X##_f[3]; \
|
||||
_FP_PACK_RAW_4_flo.bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_4_flo.bits.sign = X##_s; \
|
||||
(val) = _FP_PACK_RAW_4_flo.flt; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_PACK_RAW_4_P(fs, val, X) \
|
||||
do \
|
||||
{ \
|
||||
union _FP_UNION_##fs *_FP_PACK_RAW_4_P_flo \
|
||||
= (union _FP_UNION_##fs *) (val); \
|
||||
\
|
||||
_FP_PACK_RAW_4_P_flo->bits.frac0 = X##_f[0]; \
|
||||
_FP_PACK_RAW_4_P_flo->bits.frac1 = X##_f[1]; \
|
||||
_FP_PACK_RAW_4_P_flo->bits.frac2 = X##_f[2]; \
|
||||
_FP_PACK_RAW_4_P_flo->bits.frac3 = X##_f[3]; \
|
||||
_FP_PACK_RAW_4_P_flo->bits.exp = X##_e; \
|
||||
_FP_PACK_RAW_4_P_flo->bits.sign = X##_s; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Multiplication algorithms: */
|
||||
|
||||
/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
|
||||
|
||||
#define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_b); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_c); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_d); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_e); \
|
||||
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_f); \
|
||||
\
|
||||
doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), \
|
||||
X##_f[0], Y##_f[0]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
X##_f[0], Y##_f[1]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
|
||||
X##_f[1], Y##_f[0]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
X##_f[1], Y##_f[1]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
X##_f[0], Y##_f[2]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
|
||||
X##_f[2], Y##_f[0]); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
|
||||
_FP_FRAC_WORD_8 (R, 1), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
0, 0, _FP_FRAC_WORD_8 (R, 1)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
|
||||
_FP_FRAC_WORD_8 (R, 1), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
|
||||
_FP_FRAC_WORD_8 (R, 1)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
|
||||
_FP_FRAC_WORD_8 (R, 2), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
|
||||
_FP_FRAC_WORD_8 (R, 2), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
|
||||
_FP_FRAC_WORD_8 (R, 2)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
|
||||
_FP_FRAC_WORD_8 (R, 2), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_f_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_f_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
|
||||
_FP_FRAC_WORD_8 (R, 2)); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f0, X##_f[0], Y##_f[3]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_c_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f0, X##_f[3], Y##_f[0]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
X##_f[1], Y##_f[2]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
X##_f[2], Y##_f[1]); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
|
||||
_FP_FRAC_WORD_8 (R, 3)); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
X##_f[2], Y##_f[2]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
|
||||
X##_f[1], Y##_f[3]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
X##_f[3], Y##_f[1]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
X##_f[2], Y##_f[3]); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
|
||||
X##_f[3], Y##_f[2]); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
|
||||
_FP_FRAC_WORD_8 (R, 4), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
|
||||
_FP_FRAC_WORD_8 (R, 4), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_c_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
|
||||
_FP_FRAC_WORD_8 (R, 4)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
|
||||
_FP_FRAC_WORD_8 (R, 4), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_d_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
|
||||
_FP_FRAC_WORD_8 (R, 4)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
|
||||
_FP_FRAC_WORD_8 (R, 5), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_e_f0, \
|
||||
0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \
|
||||
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
|
||||
_FP_FRAC_WORD_8 (R, 5), 0, \
|
||||
_FP_MUL_MEAT_DW_4_wide_f_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_f_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
|
||||
_FP_FRAC_WORD_8 (R, 5)); \
|
||||
doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
X##_f[3], Y##_f[3]); \
|
||||
__FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f1, \
|
||||
_FP_MUL_MEAT_DW_4_wide_b_f0, \
|
||||
_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_wide_z); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_4_wide ((wfracbits), _FP_MUL_MEAT_4_wide_z, \
|
||||
X, Y, doit); \
|
||||
\
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_wide_z, (wfracbits)-1, \
|
||||
2*(wfracbits)); \
|
||||
__FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 3), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 2), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 1), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 0)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
mpn_mul_n (R##_f, _x_f, _y_f, 4); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_gmp_z); \
|
||||
\
|
||||
_FP_MUL_MEAT_DW_4_gmp ((wfracbits), _FP_MUL_MEAT_4_gmp_z, X, Y); \
|
||||
\
|
||||
/* Normalize since we know where the msb of the multiplicands \
|
||||
were (bit B), we know that the msb of the of the product is \
|
||||
at either 2B or 2B-1. */ \
|
||||
_FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_gmp_z, (wfracbits)-1, \
|
||||
2*(wfracbits)); \
|
||||
__FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 3), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 2), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 1), \
|
||||
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 0)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Helper utility for _FP_DIV_MEAT_4_udiv:
|
||||
* pppp = m * nnn. */
|
||||
#define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0) \
|
||||
do \
|
||||
{ \
|
||||
UWtype umul_ppppmnnn_t; \
|
||||
umul_ppmm (p1, p0, m, n0); \
|
||||
umul_ppmm (p2, umul_ppppmnnn_t, m, n1); \
|
||||
__FP_FRAC_ADDI_2 (p2, p1, umul_ppppmnnn_t); \
|
||||
umul_ppmm (p3, umul_ppppmnnn_t, m, n2); \
|
||||
__FP_FRAC_ADDI_2 (p3, p2, umul_ppppmnnn_t); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Division algorithms: */
|
||||
|
||||
#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
int _FP_DIV_MEAT_4_udiv_i; \
|
||||
_FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_n); \
|
||||
_FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_m); \
|
||||
_FP_FRAC_SET_4 (_FP_DIV_MEAT_4_udiv_n, _FP_ZEROFRAC_4); \
|
||||
if (_FP_FRAC_GE_4 (X, Y)) \
|
||||
{ \
|
||||
_FP_DIV_MEAT_4_udiv_n_f[3] \
|
||||
= X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
|
||||
_FP_FRAC_SRL_4 (X, 1); \
|
||||
} \
|
||||
else \
|
||||
R##_e--; \
|
||||
\
|
||||
/* Normalize, i.e. make the most significant bit of the \
|
||||
denominator set. */ \
|
||||
_FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs); \
|
||||
\
|
||||
for (_FP_DIV_MEAT_4_udiv_i = 3; ; _FP_DIV_MEAT_4_udiv_i--) \
|
||||
{ \
|
||||
if (X##_f[3] == Y##_f[3]) \
|
||||
{ \
|
||||
/* This is a special case, not an optimization \
|
||||
(X##_f[3]/Y##_f[3] would not fit into UWtype). \
|
||||
As X## is guaranteed to be < Y, \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i] can be either \
|
||||
(UWtype)-1 or (UWtype)-2. */ \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i] = -1; \
|
||||
if (!_FP_DIV_MEAT_4_udiv_i) \
|
||||
break; \
|
||||
__FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
|
||||
Y##_f[2], Y##_f[1], Y##_f[0], 0, \
|
||||
X##_f[2], X##_f[1], X##_f[0], \
|
||||
_FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]); \
|
||||
_FP_FRAC_SUB_4 (X, Y, X); \
|
||||
if (X##_f[3] > Y##_f[3]) \
|
||||
{ \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i] = -2; \
|
||||
_FP_FRAC_ADD_4 (X, Y, X); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
udiv_qrnnd (R##_f[_FP_DIV_MEAT_4_udiv_i], \
|
||||
X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
|
||||
umul_ppppmnnn (_FP_DIV_MEAT_4_udiv_m_f[3], \
|
||||
_FP_DIV_MEAT_4_udiv_m_f[2], \
|
||||
_FP_DIV_MEAT_4_udiv_m_f[1], \
|
||||
_FP_DIV_MEAT_4_udiv_m_f[0], \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i], \
|
||||
Y##_f[2], Y##_f[1], Y##_f[0]); \
|
||||
X##_f[2] = X##_f[1]; \
|
||||
X##_f[1] = X##_f[0]; \
|
||||
X##_f[0] \
|
||||
= _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]; \
|
||||
if (_FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
|
||||
{ \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
|
||||
_FP_FRAC_ADD_4 (X, Y, X); \
|
||||
if (_FP_FRAC_GE_4 (X, Y) \
|
||||
&& _FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
|
||||
{ \
|
||||
R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
|
||||
_FP_FRAC_ADD_4 (X, Y, X); \
|
||||
} \
|
||||
} \
|
||||
_FP_FRAC_DEC_4 (X, _FP_DIV_MEAT_4_udiv_m); \
|
||||
if (!_FP_DIV_MEAT_4_udiv_i) \
|
||||
{ \
|
||||
if (!_FP_FRAC_EQ_4 (X, _FP_DIV_MEAT_4_udiv_m)) \
|
||||
R##_f[0] |= _FP_WORK_STICKY; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Square root algorithms:
|
||||
We have just one right now, maybe Newton approximation
|
||||
should be added for those machines where division is fast. */
|
||||
|
||||
#define _FP_SQRT_MEAT_4(R, S, T, X, q) \
|
||||
do \
|
||||
{ \
|
||||
while (q) \
|
||||
{ \
|
||||
T##_f[3] = S##_f[3] + (q); \
|
||||
if (T##_f[3] <= X##_f[3]) \
|
||||
{ \
|
||||
S##_f[3] = T##_f[3] + (q); \
|
||||
X##_f[3] -= T##_f[3]; \
|
||||
R##_f[3] += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_4 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
(q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
|
||||
while (q) \
|
||||
{ \
|
||||
T##_f[2] = S##_f[2] + (q); \
|
||||
T##_f[3] = S##_f[3]; \
|
||||
if (T##_f[3] < X##_f[3] \
|
||||
|| (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \
|
||||
{ \
|
||||
S##_f[2] = T##_f[2] + (q); \
|
||||
S##_f[3] += (T##_f[2] > S##_f[2]); \
|
||||
__FP_FRAC_DEC_2 (X##_f[3], X##_f[2], \
|
||||
T##_f[3], T##_f[2]); \
|
||||
R##_f[2] += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_4 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
(q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
|
||||
while (q) \
|
||||
{ \
|
||||
T##_f[1] = S##_f[1] + (q); \
|
||||
T##_f[2] = S##_f[2]; \
|
||||
T##_f[3] = S##_f[3]; \
|
||||
if (T##_f[3] < X##_f[3] \
|
||||
|| (T##_f[3] == X##_f[3] \
|
||||
&& (T##_f[2] < X##_f[2] \
|
||||
|| (T##_f[2] == X##_f[2] \
|
||||
&& T##_f[1] <= X##_f[1])))) \
|
||||
{ \
|
||||
S##_f[1] = T##_f[1] + (q); \
|
||||
S##_f[2] += (T##_f[1] > S##_f[1]); \
|
||||
S##_f[3] += (T##_f[2] > S##_f[2]); \
|
||||
__FP_FRAC_DEC_3 (X##_f[3], X##_f[2], X##_f[1], \
|
||||
T##_f[3], T##_f[2], T##_f[1]); \
|
||||
R##_f[1] += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_4 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
(q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
|
||||
while ((q) != _FP_WORK_ROUND) \
|
||||
{ \
|
||||
T##_f[0] = S##_f[0] + (q); \
|
||||
T##_f[1] = S##_f[1]; \
|
||||
T##_f[2] = S##_f[2]; \
|
||||
T##_f[3] = S##_f[3]; \
|
||||
if (_FP_FRAC_GE_4 (X, T)) \
|
||||
{ \
|
||||
S##_f[0] = T##_f[0] + (q); \
|
||||
S##_f[1] += (T##_f[0] > S##_f[0]); \
|
||||
S##_f[2] += (T##_f[1] > S##_f[1]); \
|
||||
S##_f[3] += (T##_f[2] > S##_f[2]); \
|
||||
_FP_FRAC_DEC_4 (X, T); \
|
||||
R##_f[0] += (q); \
|
||||
} \
|
||||
_FP_FRAC_SLL_4 (X, 1); \
|
||||
(q) >>= 1; \
|
||||
} \
|
||||
if (!_FP_FRAC_ZEROP_4 (X)) \
|
||||
{ \
|
||||
if (_FP_FRAC_GT_4 (X, S)) \
|
||||
R##_f[0] |= _FP_WORK_ROUND; \
|
||||
R##_f[0] |= _FP_WORK_STICKY; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Internals. */
|
||||
|
||||
#define __FP_FRAC_SET_4(X, I3, I2, I1, I0) \
|
||||
(X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
|
||||
|
||||
#ifndef __FP_FRAC_ADD_3
|
||||
# define __FP_FRAC_ADD_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2; \
|
||||
r0 = x0 + y0; \
|
||||
__FP_FRAC_ADD_3_c1 = r0 < x0; \
|
||||
r1 = x1 + y1; \
|
||||
__FP_FRAC_ADD_3_c2 = r1 < x1; \
|
||||
r1 += __FP_FRAC_ADD_3_c1; \
|
||||
__FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1; \
|
||||
r2 = x2 + y2 + __FP_FRAC_ADD_3_c2; \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_ADD_4
|
||||
# define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE __FP_FRAC_ADD_4_c1, __FP_FRAC_ADD_4_c2; \
|
||||
_FP_W_TYPE __FP_FRAC_ADD_4_c3; \
|
||||
r0 = x0 + y0; \
|
||||
__FP_FRAC_ADD_4_c1 = r0 < x0; \
|
||||
r1 = x1 + y1; \
|
||||
__FP_FRAC_ADD_4_c2 = r1 < x1; \
|
||||
r1 += __FP_FRAC_ADD_4_c1; \
|
||||
__FP_FRAC_ADD_4_c2 |= r1 < __FP_FRAC_ADD_4_c1; \
|
||||
r2 = x2 + y2; \
|
||||
__FP_FRAC_ADD_4_c3 = r2 < x2; \
|
||||
r2 += __FP_FRAC_ADD_4_c2; \
|
||||
__FP_FRAC_ADD_4_c3 |= r2 < __FP_FRAC_ADD_4_c2; \
|
||||
r3 = x3 + y3 + __FP_FRAC_ADD_4_c3; \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_SUB_3
|
||||
# define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE __FP_FRAC_SUB_3_tmp[2]; \
|
||||
_FP_W_TYPE __FP_FRAC_SUB_3_c1, __FP_FRAC_SUB_3_c2; \
|
||||
__FP_FRAC_SUB_3_tmp[0] = x0 - y0; \
|
||||
__FP_FRAC_SUB_3_c1 = __FP_FRAC_SUB_3_tmp[0] > x0; \
|
||||
__FP_FRAC_SUB_3_tmp[1] = x1 - y1; \
|
||||
__FP_FRAC_SUB_3_c2 = __FP_FRAC_SUB_3_tmp[1] > x1; \
|
||||
__FP_FRAC_SUB_3_tmp[1] -= __FP_FRAC_SUB_3_c1; \
|
||||
__FP_FRAC_SUB_3_c2 |= __FP_FRAC_SUB_3_c1 && (y1 == x1); \
|
||||
r2 = x2 - y2 - __FP_FRAC_SUB_3_c2; \
|
||||
r1 = __FP_FRAC_SUB_3_tmp[1]; \
|
||||
r0 = __FP_FRAC_SUB_3_tmp[0]; \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_SUB_4
|
||||
# define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE __FP_FRAC_SUB_4_tmp[3]; \
|
||||
_FP_W_TYPE __FP_FRAC_SUB_4_c1, __FP_FRAC_SUB_4_c2; \
|
||||
_FP_W_TYPE __FP_FRAC_SUB_4_c3; \
|
||||
__FP_FRAC_SUB_4_tmp[0] = x0 - y0; \
|
||||
__FP_FRAC_SUB_4_c1 = __FP_FRAC_SUB_4_tmp[0] > x0; \
|
||||
__FP_FRAC_SUB_4_tmp[1] = x1 - y1; \
|
||||
__FP_FRAC_SUB_4_c2 = __FP_FRAC_SUB_4_tmp[1] > x1; \
|
||||
__FP_FRAC_SUB_4_tmp[1] -= __FP_FRAC_SUB_4_c1; \
|
||||
__FP_FRAC_SUB_4_c2 |= __FP_FRAC_SUB_4_c1 && (y1 == x1); \
|
||||
__FP_FRAC_SUB_4_tmp[2] = x2 - y2; \
|
||||
__FP_FRAC_SUB_4_c3 = __FP_FRAC_SUB_4_tmp[2] > x2; \
|
||||
__FP_FRAC_SUB_4_tmp[2] -= __FP_FRAC_SUB_4_c2; \
|
||||
__FP_FRAC_SUB_4_c3 |= __FP_FRAC_SUB_4_c2 && (y2 == x2); \
|
||||
r3 = x3 - y3 - __FP_FRAC_SUB_4_c3; \
|
||||
r2 = __FP_FRAC_SUB_4_tmp[2]; \
|
||||
r1 = __FP_FRAC_SUB_4_tmp[1]; \
|
||||
r0 = __FP_FRAC_SUB_4_tmp[0]; \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_DEC_3
|
||||
# define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
UWtype __FP_FRAC_DEC_3_t0, __FP_FRAC_DEC_3_t1; \
|
||||
UWtype __FP_FRAC_DEC_3_t2; \
|
||||
__FP_FRAC_DEC_3_t0 = x0; \
|
||||
__FP_FRAC_DEC_3_t1 = x1; \
|
||||
__FP_FRAC_DEC_3_t2 = x2; \
|
||||
__FP_FRAC_SUB_3 (x2, x1, x0, __FP_FRAC_DEC_3_t2, \
|
||||
__FP_FRAC_DEC_3_t1, __FP_FRAC_DEC_3_t0, \
|
||||
y2, y1, y0); \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_DEC_4
|
||||
# define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \
|
||||
do \
|
||||
{ \
|
||||
UWtype __FP_FRAC_DEC_4_t0, __FP_FRAC_DEC_4_t1; \
|
||||
UWtype __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t3; \
|
||||
__FP_FRAC_DEC_4_t0 = x0; \
|
||||
__FP_FRAC_DEC_4_t1 = x1; \
|
||||
__FP_FRAC_DEC_4_t2 = x2; \
|
||||
__FP_FRAC_DEC_4_t3 = x3; \
|
||||
__FP_FRAC_SUB_4 (x3, x2, x1, x0, __FP_FRAC_DEC_4_t3, \
|
||||
__FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t1, \
|
||||
__FP_FRAC_DEC_4_t0, y3, y2, y1, y0); \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FRAC_ADDI_4
|
||||
# define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \
|
||||
do \
|
||||
{ \
|
||||
UWtype __FP_FRAC_ADDI_4_t; \
|
||||
__FP_FRAC_ADDI_4_t = ((x0 += i) < i); \
|
||||
x1 += __FP_FRAC_ADDI_4_t; \
|
||||
__FP_FRAC_ADDI_4_t = (x1 < __FP_FRAC_ADDI_4_t); \
|
||||
x2 += __FP_FRAC_ADDI_4_t; \
|
||||
__FP_FRAC_ADDI_4_t = (x2 < __FP_FRAC_ADDI_4_t); \
|
||||
x3 += __FP_FRAC_ADDI_4_t; \
|
||||
} \
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
/* Convert FP values between word sizes. This appears to be more
|
||||
complicated than I'd have expected it to be, so these might be
|
||||
wrong... These macros are in any case somewhat bogus because they
|
||||
use information about what various FRAC_n variables look like
|
||||
internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
|
||||
the ones in op-2.h and op-1.h. */
|
||||
#define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0])
|
||||
|
||||
#define _FP_FRAC_COPY_2_4(D, S) \
|
||||
do \
|
||||
{ \
|
||||
D##_f0 = S##_f[0]; \
|
||||
D##_f1 = S##_f[1]; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Assembly/disassembly for converting to/from integral types.
|
||||
No shifting or overflow handled here. */
|
||||
/* Put the FP value X into r, which is an integer of size rsize. */
|
||||
#define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
|
||||
do \
|
||||
{ \
|
||||
if ((rsize) <= _FP_W_TYPE_SIZE) \
|
||||
(r) = X##_f[0]; \
|
||||
else if ((rsize) <= 2*_FP_W_TYPE_SIZE) \
|
||||
{ \
|
||||
(r) = X##_f[1]; \
|
||||
(r) = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) << _FP_W_TYPE_SIZE); \
|
||||
(r) += X##_f[0]; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* I'm feeling lazy so we deal with int == 3words \
|
||||
(implausible) and int == 4words as a single case. */ \
|
||||
(r) = X##_f[3]; \
|
||||
(r) = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) << _FP_W_TYPE_SIZE); \
|
||||
(r) += X##_f[2]; \
|
||||
(r) = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) << _FP_W_TYPE_SIZE); \
|
||||
(r) += X##_f[1]; \
|
||||
(r) = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) << _FP_W_TYPE_SIZE); \
|
||||
(r) += X##_f[0]; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* "No disassemble Number Five!" */
|
||||
/* Move an integer of size rsize into X's fractional part. We rely on
|
||||
the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
|
||||
having to mask the values we store into it. */
|
||||
#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
|
||||
do \
|
||||
{ \
|
||||
X##_f[0] = (r); \
|
||||
X##_f[1] = ((rsize) <= _FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) >> _FP_W_TYPE_SIZE); \
|
||||
X##_f[2] = ((rsize) <= 2*_FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) >> 2*_FP_W_TYPE_SIZE); \
|
||||
X##_f[3] = ((rsize) <= 3*_FP_W_TYPE_SIZE \
|
||||
? 0 \
|
||||
: (r) >> 3*_FP_W_TYPE_SIZE); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_COPY_4_1(D, S) \
|
||||
do \
|
||||
{ \
|
||||
D##_f[0] = S##_f; \
|
||||
D##_f[1] = D##_f[2] = D##_f[3] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_COPY_4_2(D, S) \
|
||||
do \
|
||||
{ \
|
||||
D##_f[0] = S##_f0; \
|
||||
D##_f[1] = S##_f1; \
|
||||
D##_f[2] = D##_f[3] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_COPY_4_4(D, S) _FP_FRAC_COPY_4 (D, S)
|
||||
|
||||
#endif /* !SOFT_FP_OP_4_H */
|
238
src/linpack/soft-fp/op-8.h
Normal file
238
src/linpack/soft-fp/op-8.h
Normal file
|
@ -0,0 +1,238 @@
|
|||
/* Software floating-point emulation.
|
||||
Basic eight-word fraction declaration and manipulation.
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_OP_8_H
|
||||
#define SOFT_FP_OP_8_H 1
|
||||
|
||||
/* We need just a few things from here for op-4, if we ever need some
|
||||
other macros, they can be added. */
|
||||
#define _FP_FRAC_DECL_8(X) _FP_W_TYPE X##_f[8]
|
||||
#define _FP_FRAC_SET_8(X, I) __FP_FRAC_SET_8 (X, I)
|
||||
#define _FP_FRAC_HIGH_8(X) (X##_f[7])
|
||||
#define _FP_FRAC_LOW_8(X) (X##_f[0])
|
||||
#define _FP_FRAC_WORD_8(X, w) (X##_f[w])
|
||||
|
||||
#define _FP_FRAC_SLL_8(X, N) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SLL_8_up, _FP_FRAC_SLL_8_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SLL_8_skip, _FP_FRAC_SLL_8_i; \
|
||||
_FP_FRAC_SLL_8_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SLL_8_up = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SLL_8_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_8_up; \
|
||||
if (!_FP_FRAC_SLL_8_up) \
|
||||
for (_FP_FRAC_SLL_8_i = 7; \
|
||||
_FP_FRAC_SLL_8_i >= _FP_FRAC_SLL_8_skip; \
|
||||
--_FP_FRAC_SLL_8_i) \
|
||||
X##_f[_FP_FRAC_SLL_8_i] \
|
||||
= X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip]; \
|
||||
else \
|
||||
{ \
|
||||
for (_FP_FRAC_SLL_8_i = 7; \
|
||||
_FP_FRAC_SLL_8_i > _FP_FRAC_SLL_8_skip; \
|
||||
--_FP_FRAC_SLL_8_i) \
|
||||
X##_f[_FP_FRAC_SLL_8_i] \
|
||||
= ((X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip] \
|
||||
<< _FP_FRAC_SLL_8_up) \
|
||||
| (X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip-1] \
|
||||
>> _FP_FRAC_SLL_8_down)); \
|
||||
X##_f[_FP_FRAC_SLL_8_i--] = X##_f[0] << _FP_FRAC_SLL_8_up; \
|
||||
} \
|
||||
for (; _FP_FRAC_SLL_8_i >= 0; --_FP_FRAC_SLL_8_i) \
|
||||
X##_f[_FP_FRAC_SLL_8_i] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_SRL_8(X, N) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SRL_8_up, _FP_FRAC_SRL_8_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SRL_8_skip, _FP_FRAC_SRL_8_i; \
|
||||
_FP_FRAC_SRL_8_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRL_8_down = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRL_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_8_down; \
|
||||
if (!_FP_FRAC_SRL_8_down) \
|
||||
for (_FP_FRAC_SRL_8_i = 0; \
|
||||
_FP_FRAC_SRL_8_i <= 7-_FP_FRAC_SRL_8_skip; \
|
||||
++_FP_FRAC_SRL_8_i) \
|
||||
X##_f[_FP_FRAC_SRL_8_i] \
|
||||
= X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip]; \
|
||||
else \
|
||||
{ \
|
||||
for (_FP_FRAC_SRL_8_i = 0; \
|
||||
_FP_FRAC_SRL_8_i < 7-_FP_FRAC_SRL_8_skip; \
|
||||
++_FP_FRAC_SRL_8_i) \
|
||||
X##_f[_FP_FRAC_SRL_8_i] \
|
||||
= ((X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip] \
|
||||
>> _FP_FRAC_SRL_8_down) \
|
||||
| (X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip+1] \
|
||||
<< _FP_FRAC_SRL_8_up)); \
|
||||
X##_f[_FP_FRAC_SRL_8_i++] = X##_f[7] >> _FP_FRAC_SRL_8_down; \
|
||||
} \
|
||||
for (; _FP_FRAC_SRL_8_i < 8; ++_FP_FRAC_SRL_8_i) \
|
||||
X##_f[_FP_FRAC_SRL_8_i] = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Right shift with sticky-lsb.
|
||||
What this actually means is that we do a standard right-shift,
|
||||
but that if any of the bits that fall off the right hand side
|
||||
were one then we always set the LSbit. */
|
||||
#define _FP_FRAC_SRS_8(X, N, size) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_SRS_8_up, _FP_FRAC_SRS_8_down; \
|
||||
_FP_I_TYPE _FP_FRAC_SRS_8_skip, _FP_FRAC_SRS_8_i; \
|
||||
_FP_W_TYPE _FP_FRAC_SRS_8_s; \
|
||||
_FP_FRAC_SRS_8_skip = (N) / _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRS_8_down = (N) % _FP_W_TYPE_SIZE; \
|
||||
_FP_FRAC_SRS_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRS_8_down; \
|
||||
for (_FP_FRAC_SRS_8_s = _FP_FRAC_SRS_8_i = 0; \
|
||||
_FP_FRAC_SRS_8_i < _FP_FRAC_SRS_8_skip; \
|
||||
++_FP_FRAC_SRS_8_i) \
|
||||
_FP_FRAC_SRS_8_s |= X##_f[_FP_FRAC_SRS_8_i]; \
|
||||
if (!_FP_FRAC_SRS_8_down) \
|
||||
for (_FP_FRAC_SRS_8_i = 0; \
|
||||
_FP_FRAC_SRS_8_i <= 7-_FP_FRAC_SRS_8_skip; \
|
||||
++_FP_FRAC_SRS_8_i) \
|
||||
X##_f[_FP_FRAC_SRS_8_i] \
|
||||
= X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip]; \
|
||||
else \
|
||||
{ \
|
||||
_FP_FRAC_SRS_8_s \
|
||||
|= X##_f[_FP_FRAC_SRS_8_i] << _FP_FRAC_SRS_8_up; \
|
||||
for (_FP_FRAC_SRS_8_i = 0; \
|
||||
_FP_FRAC_SRS_8_i < 7-_FP_FRAC_SRS_8_skip; \
|
||||
++_FP_FRAC_SRS_8_i) \
|
||||
X##_f[_FP_FRAC_SRS_8_i] \
|
||||
= ((X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip] \
|
||||
>> _FP_FRAC_SRS_8_down) \
|
||||
| (X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip+1] \
|
||||
<< _FP_FRAC_SRS_8_up)); \
|
||||
X##_f[_FP_FRAC_SRS_8_i++] = X##_f[7] >> _FP_FRAC_SRS_8_down; \
|
||||
} \
|
||||
for (; _FP_FRAC_SRS_8_i < 8; ++_FP_FRAC_SRS_8_i) \
|
||||
X##_f[_FP_FRAC_SRS_8_i] = 0; \
|
||||
/* Don't fix the LSB until the very end when we're sure f[0] is \
|
||||
stable. */ \
|
||||
X##_f[0] |= (_FP_FRAC_SRS_8_s != 0); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_ADD_8(R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_FRAC_ADD_8_c = 0; \
|
||||
_FP_I_TYPE _FP_FRAC_ADD_8_i; \
|
||||
for (_FP_FRAC_ADD_8_i = 0; _FP_FRAC_ADD_8_i < 8; ++_FP_FRAC_ADD_8_i) \
|
||||
{ \
|
||||
R##_f[_FP_FRAC_ADD_8_i] \
|
||||
= (X##_f[_FP_FRAC_ADD_8_i] + Y##_f[_FP_FRAC_ADD_8_i] \
|
||||
+ _FP_FRAC_ADD_8_c); \
|
||||
_FP_FRAC_ADD_8_c \
|
||||
= (_FP_FRAC_ADD_8_c \
|
||||
? R##_f[_FP_FRAC_ADD_8_i] <= X##_f[_FP_FRAC_ADD_8_i] \
|
||||
: R##_f[_FP_FRAC_ADD_8_i] < X##_f[_FP_FRAC_ADD_8_i]); \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_SUB_8(R, X, Y) \
|
||||
do \
|
||||
{ \
|
||||
_FP_W_TYPE _FP_FRAC_SUB_8_tmp[8]; \
|
||||
_FP_W_TYPE _FP_FRAC_SUB_8_c = 0; \
|
||||
_FP_I_TYPE _FP_FRAC_SUB_8_i; \
|
||||
for (_FP_FRAC_SUB_8_i = 0; _FP_FRAC_SUB_8_i < 8; ++_FP_FRAC_SUB_8_i) \
|
||||
{ \
|
||||
_FP_FRAC_SUB_8_tmp[_FP_FRAC_SUB_8_i] \
|
||||
= (X##_f[_FP_FRAC_SUB_8_i] - Y##_f[_FP_FRAC_SUB_8_i] \
|
||||
- _FP_FRAC_SUB_8_c); \
|
||||
_FP_FRAC_SUB_8_c \
|
||||
= (_FP_FRAC_SUB_8_c \
|
||||
? (_FP_FRAC_SUB_8_tmp[_FP_FRAC_SUB_8_i] \
|
||||
>= X##_f[_FP_FRAC_SUB_8_i]) \
|
||||
: (_FP_FRAC_SUB_8_tmp[_FP_FRAC_SUB_8_i] \
|
||||
> X##_f[_FP_FRAC_SUB_8_i])); \
|
||||
} \
|
||||
for (_FP_FRAC_SUB_8_i = 0; _FP_FRAC_SUB_8_i < 8; ++_FP_FRAC_SUB_8_i) \
|
||||
R##_f[_FP_FRAC_SUB_8_i] = _FP_FRAC_SUB_8_tmp[_FP_FRAC_SUB_8_i]; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_CLZ_8(R, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_I_TYPE _FP_FRAC_CLZ_8_i; \
|
||||
for (_FP_FRAC_CLZ_8_i = 7; _FP_FRAC_CLZ_8_i > 0; _FP_FRAC_CLZ_8_i--) \
|
||||
if (X##_f[_FP_FRAC_CLZ_8_i]) \
|
||||
break; \
|
||||
__FP_CLZ ((R), X##_f[_FP_FRAC_CLZ_8_i]); \
|
||||
(R) += _FP_W_TYPE_SIZE * (7 - _FP_FRAC_CLZ_8_i); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_MINFRAC_8 0, 0, 0, 0, 0, 0, 0, 1
|
||||
|
||||
#define _FP_FRAC_NEGP_8(X) ((_FP_WS_TYPE) X##_f[7] < 0)
|
||||
#define _FP_FRAC_ZEROP_8(X) \
|
||||
((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3] \
|
||||
| X##_f[4] | X##_f[5] | X##_f[6] | X##_f[7]) == 0)
|
||||
#define _FP_FRAC_HIGHBIT_DW_8(fs, X) \
|
||||
(_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
|
||||
|
||||
#define _FP_FRAC_COPY_4_8(D, S) \
|
||||
do \
|
||||
{ \
|
||||
D##_f[0] = S##_f[0]; \
|
||||
D##_f[1] = S##_f[1]; \
|
||||
D##_f[2] = S##_f[2]; \
|
||||
D##_f[3] = S##_f[3]; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_FRAC_COPY_8_4(D, S) \
|
||||
do \
|
||||
{ \
|
||||
D##_f[0] = S##_f[0]; \
|
||||
D##_f[1] = S##_f[1]; \
|
||||
D##_f[2] = S##_f[2]; \
|
||||
D##_f[3] = S##_f[3]; \
|
||||
D##_f[4] = D##_f[5] = D##_f[6] = D##_f[7]= 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define __FP_FRAC_SET_8(X, I7, I6, I5, I4, I3, I2, I1, I0) \
|
||||
(X##_f[7] = I7, X##_f[6] = I6, X##_f[5] = I5, X##_f[4] = I4, \
|
||||
X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
|
||||
|
||||
#endif /* !SOFT_FP_OP_8_H */
|
2155
src/linpack/soft-fp/op-common.h
Normal file
2155
src/linpack/soft-fp/op-common.h
Normal file
File diff suppressed because it is too large
Load diff
117
src/linpack/soft-fp/sfp-machine.h
Normal file
117
src/linpack/soft-fp/sfp-machine.h
Normal file
|
@ -0,0 +1,117 @@
|
|||
|
||||
#if __riscv_xlen == 32
|
||||
|
||||
#define _FP_W_TYPE_SIZE 32
|
||||
#define _FP_W_TYPE unsigned long
|
||||
#define _FP_WS_TYPE signed long
|
||||
#define _FP_I_TYPE long
|
||||
|
||||
#define _FP_MUL_MEAT_S(R,X,Y) \
|
||||
_FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
|
||||
#define _FP_MUL_MEAT_D(R,X,Y) \
|
||||
_FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
|
||||
#define _FP_MUL_MEAT_Q(R,X,Y) \
|
||||
_FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
|
||||
|
||||
#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y)
|
||||
#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y)
|
||||
#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
|
||||
|
||||
#define _FP_NANFRAC_S _FP_QNANBIT_S
|
||||
#define _FP_NANFRAC_D _FP_QNANBIT_D, 0
|
||||
#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0
|
||||
|
||||
#else
|
||||
|
||||
#define _FP_W_TYPE_SIZE 64
|
||||
#define _FP_W_TYPE unsigned long long
|
||||
#define _FP_WS_TYPE signed long long
|
||||
#define _FP_I_TYPE long long
|
||||
|
||||
#define _FP_MUL_MEAT_S(R,X,Y) \
|
||||
_FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y)
|
||||
#define _FP_MUL_MEAT_D(R,X,Y) \
|
||||
_FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
|
||||
#define _FP_MUL_MEAT_Q(R,X,Y) \
|
||||
_FP_MUL_MEAT_2_wide_3mul(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
|
||||
|
||||
#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
|
||||
#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y)
|
||||
#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y)
|
||||
|
||||
#define _FP_NANFRAC_S _FP_QNANBIT_S
|
||||
#define _FP_NANFRAC_D _FP_QNANBIT_D
|
||||
#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0
|
||||
|
||||
#endif
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
typedef int TItype __attribute__ ((mode (TI)));
|
||||
typedef unsigned int UTItype __attribute__ ((mode (TI)));
|
||||
#define TI_BITS (__CHAR_BIT__ * (int)sizeof(TItype))
|
||||
#endif
|
||||
|
||||
/* The type of the result of a floating point comparison. This must
|
||||
match __libgcc_cmp_return__ in GCC for the target. */
|
||||
typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
|
||||
#define CMPtype __gcc_CMPtype
|
||||
|
||||
#define _FP_NANSIGN_S 0
|
||||
#define _FP_NANSIGN_D 0
|
||||
#define _FP_NANSIGN_Q 0
|
||||
|
||||
#define _FP_KEEPNANFRACP 0
|
||||
#define _FP_QNANNEGATEDP 0
|
||||
|
||||
#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
|
||||
do { \
|
||||
R##_s = _FP_NANSIGN_##fs; \
|
||||
_FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs); \
|
||||
R##_c = FP_CLS_NAN; \
|
||||
} while (0)
|
||||
|
||||
#define _FP_DECL_EX int _frm __attribute__ ((unused));
|
||||
#define FP_ROUNDMODE _frm
|
||||
|
||||
#define FP_RND_NEAREST 0x0
|
||||
#define FP_RND_ZERO 0x1
|
||||
#define FP_RND_PINF 0x3
|
||||
#define FP_RND_MINF 0x2
|
||||
|
||||
#define FP_EX_INVALID 0x10
|
||||
#define FP_EX_OVERFLOW 0x04
|
||||
#define FP_EX_UNDERFLOW 0x02
|
||||
#define FP_EX_DIVZERO 0x08
|
||||
#define FP_EX_INEXACT 0x01
|
||||
|
||||
#define _FP_TININESS_AFTER_ROUNDING 1
|
||||
|
||||
#ifdef __riscv_flen
|
||||
#define FP_INIT_ROUNDMODE \
|
||||
do { \
|
||||
__asm__ volatile ("frrm %0" : "=r" (_frm)); \
|
||||
} while (0)
|
||||
|
||||
#define FP_HANDLE_EXCEPTIONS \
|
||||
do { \
|
||||
if (__builtin_expect (_fex, 0)) \
|
||||
__asm__ volatile ("csrs fflags, %0" : : "rK" (_fex)); \
|
||||
} while (0)
|
||||
#else
|
||||
#define FP_INIT_ROUNDMODE _frm = FP_RND_NEAREST
|
||||
#endif
|
||||
|
||||
#define __LITTLE_ENDIAN 1234
|
||||
#define __BIG_ENDIAN 4321
|
||||
|
||||
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
#define __BYTE_ORDER __BIG_ENDIAN
|
||||
#else
|
||||
#define __BYTE_ORDER __LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
|
||||
/* Define ALIASNAME as a strong alias for NAME. */
|
||||
# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
|
||||
# define _strong_alias(name, aliasname) \
|
||||
extern __typeof (name) aliasname __attribute__ ((alias (#name)));
|
199
src/linpack/soft-fp/single.h
Normal file
199
src/linpack/soft-fp/single.h
Normal file
|
@ -0,0 +1,199 @@
|
|||
/* Software floating-point emulation.
|
||||
Definitions for IEEE Single Precision.
|
||||
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson (rth@cygnus.com),
|
||||
Jakub Jelinek (jj@ultra.linux.cz),
|
||||
David S. Miller (davem@redhat.com) and
|
||||
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file into
|
||||
combinations with other programs, and to distribute those
|
||||
combinations without any restriction coming from the use of this
|
||||
file. (The Lesser General Public License restrictions do apply in
|
||||
other respects; for example, they cover modification of the file,
|
||||
and distribution when not linked into a combine executable.)
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef SOFT_FP_SINGLE_H
|
||||
#define SOFT_FP_SINGLE_H 1
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 32
|
||||
# error "Here's a nickel kid. Go buy yourself a real computer."
|
||||
#endif
|
||||
|
||||
#define _FP_FRACTBITS_S _FP_W_TYPE_SIZE
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 64
|
||||
# define _FP_FRACTBITS_DW_S (2 * _FP_W_TYPE_SIZE)
|
||||
#else
|
||||
# define _FP_FRACTBITS_DW_S _FP_W_TYPE_SIZE
|
||||
#endif
|
||||
|
||||
#define _FP_FRACBITS_S 24
|
||||
#define _FP_FRACXBITS_S (_FP_FRACTBITS_S - _FP_FRACBITS_S)
|
||||
#define _FP_WFRACBITS_S (_FP_WORKBITS + _FP_FRACBITS_S)
|
||||
#define _FP_WFRACXBITS_S (_FP_FRACTBITS_S - _FP_WFRACBITS_S)
|
||||
#define _FP_EXPBITS_S 8
|
||||
#define _FP_EXPBIAS_S 127
|
||||
#define _FP_EXPMAX_S 255
|
||||
#define _FP_QNANBIT_S ((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-2))
|
||||
#define _FP_QNANBIT_SH_S ((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-2+_FP_WORKBITS))
|
||||
#define _FP_IMPLBIT_S ((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-1))
|
||||
#define _FP_IMPLBIT_SH_S ((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-1+_FP_WORKBITS))
|
||||
#define _FP_OVERFLOW_S ((_FP_W_TYPE) 1 << (_FP_WFRACBITS_S))
|
||||
|
||||
#define _FP_WFRACBITS_DW_S (2 * _FP_WFRACBITS_S)
|
||||
#define _FP_WFRACXBITS_DW_S (_FP_FRACTBITS_DW_S - _FP_WFRACBITS_DW_S)
|
||||
#define _FP_HIGHBIT_DW_S \
|
||||
((_FP_W_TYPE) 1 << (_FP_WFRACBITS_DW_S - 1) % _FP_W_TYPE_SIZE)
|
||||
|
||||
/* The implementation of _FP_MUL_MEAT_S and _FP_DIV_MEAT_S should be
|
||||
chosen by the target machine. */
|
||||
|
||||
typedef float SFtype __attribute__ ((mode (SF)));
|
||||
|
||||
union _FP_UNION_S
|
||||
{
|
||||
SFtype flt;
|
||||
struct _FP_STRUCT_LAYOUT
|
||||
{
|
||||
#if __BYTE_ORDER == __BIG_ENDIAN
|
||||
unsigned sign : 1;
|
||||
unsigned exp : _FP_EXPBITS_S;
|
||||
unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
|
||||
#else
|
||||
unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
|
||||
unsigned exp : _FP_EXPBITS_S;
|
||||
unsigned sign : 1;
|
||||
#endif
|
||||
} bits;
|
||||
};
|
||||
|
||||
#define FP_DECL_S(X) _FP_DECL (1, X)
|
||||
#define FP_UNPACK_RAW_S(X, val) _FP_UNPACK_RAW_1 (S, X, (val))
|
||||
#define FP_UNPACK_RAW_SP(X, val) _FP_UNPACK_RAW_1_P (S, X, (val))
|
||||
#define FP_PACK_RAW_S(val, X) _FP_PACK_RAW_1 (S, (val), X)
|
||||
#define FP_PACK_RAW_SP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (S, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_UNPACK_S(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1 (S, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (S, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_UNPACK_SP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1_P (S, X, (val)); \
|
||||
_FP_UNPACK_CANONICAL (S, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_UNPACK_SEMIRAW_S(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1 (S, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (S, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_UNPACK_SEMIRAW_SP(X, val) \
|
||||
do \
|
||||
{ \
|
||||
_FP_UNPACK_RAW_1_P (S, X, (val)); \
|
||||
_FP_UNPACK_SEMIRAW (S, 1, X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_PACK_S(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (S, 1, X); \
|
||||
_FP_PACK_RAW_1 (S, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_PACK_SP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_CANONICAL (S, 1, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (S, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_PACK_SEMIRAW_S(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (S, 1, X); \
|
||||
_FP_PACK_RAW_1 (S, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_PACK_SEMIRAW_SP(val, X) \
|
||||
do \
|
||||
{ \
|
||||
_FP_PACK_SEMIRAW (S, 1, X); \
|
||||
if (!FP_INHIBIT_RESULTS) \
|
||||
_FP_PACK_RAW_1_P (S, (val), X); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_ISSIGNAN_S(X) _FP_ISSIGNAN (S, 1, X)
|
||||
#define FP_NEG_S(R, X) _FP_NEG (S, 1, R, X)
|
||||
#define FP_ADD_S(R, X, Y) _FP_ADD (S, 1, R, X, Y)
|
||||
#define FP_SUB_S(R, X, Y) _FP_SUB (S, 1, R, X, Y)
|
||||
#define FP_MUL_S(R, X, Y) _FP_MUL (S, 1, R, X, Y)
|
||||
#define FP_DIV_S(R, X, Y) _FP_DIV (S, 1, R, X, Y)
|
||||
#define FP_SQRT_S(R, X) _FP_SQRT (S, 1, R, X)
|
||||
#define _FP_SQRT_MEAT_S(R, S, T, X, Q) _FP_SQRT_MEAT_1 (R, S, T, X, (Q))
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 64
|
||||
# define FP_FMA_S(R, X, Y, Z) _FP_FMA (S, 1, 2, R, X, Y, Z)
|
||||
#else
|
||||
# define FP_FMA_S(R, X, Y, Z) _FP_FMA (S, 1, 1, R, X, Y, Z)
|
||||
#endif
|
||||
|
||||
#define FP_CMP_S(r, X, Y, un, ex) _FP_CMP (S, 1, (r), X, Y, (un), (ex))
|
||||
#define FP_CMP_EQ_S(r, X, Y, ex) _FP_CMP_EQ (S, 1, (r), X, Y, (ex))
|
||||
#define FP_CMP_UNORD_S(r, X, Y, ex) _FP_CMP_UNORD (S, 1, (r), X, Y, (ex))
|
||||
|
||||
#define FP_TO_INT_S(r, X, rsz, rsg) _FP_TO_INT (S, 1, (r), X, (rsz), (rsg))
|
||||
#define FP_TO_INT_ROUND_S(r, X, rsz, rsg) \
|
||||
_FP_TO_INT_ROUND (S, 1, (r), X, (rsz), (rsg))
|
||||
#define FP_FROM_INT_S(X, r, rs, rt) _FP_FROM_INT (S, 1, X, (r), (rs), rt)
|
||||
|
||||
#define _FP_FRAC_HIGH_S(X) _FP_FRAC_HIGH_1 (X)
|
||||
#define _FP_FRAC_HIGH_RAW_S(X) _FP_FRAC_HIGH_1 (X)
|
||||
|
||||
#if _FP_W_TYPE_SIZE < 64
|
||||
# define _FP_FRAC_HIGH_DW_S(X) _FP_FRAC_HIGH_2 (X)
|
||||
#else
|
||||
# define _FP_FRAC_HIGH_DW_S(X) _FP_FRAC_HIGH_1 (X)
|
||||
#endif
|
||||
|
||||
#endif /* !SOFT_FP_SINGLE_H */
|
230
src/linpack/soft-fp/soft-fp.h
Normal file
230
src/linpack/soft-fp/soft-fp.h
Normal file
|
@ -0,0 +1,230 @@
|
|||
#ifndef __SOFT_FP_H__
|
||||
#define __SOFT_FP_H__
|
||||
|
||||
#include "sfp-machine.h"
|
||||
|
||||
#define abort() // 54
|
||||
/* For unreachable default cases in switch statements over bitwise OR
|
||||
of FP_CLS_* values. */
|
||||
#if (defined __GNUC__ \
|
||||
&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
|
||||
# define _FP_UNREACHABLE __builtin_unreachable ()
|
||||
#else
|
||||
# define _FP_UNREACHABLE abort ()
|
||||
#endif
|
||||
// 63
|
||||
#if ((defined __GNUC__ \
|
||||
&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) \
|
||||
|| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L))
|
||||
# define _FP_STATIC_ASSERT(expr, msg) \
|
||||
_Static_assert ((expr), msg)
|
||||
#else
|
||||
# define _FP_STATIC_ASSERT(expr, msg) \
|
||||
extern int (*__Static_assert_function (void)) \
|
||||
[!!sizeof (struct { int __error_if_negative: (expr) ? 2 : -1; })]
|
||||
#endif
|
||||
|
||||
|
||||
#define _FP_ZERO_INIT = 0 // 82
|
||||
#define _FP_WORKBITS 3 // 85
|
||||
#define _FP_WORK_LSB ((_FP_W_TYPE) 1 << 3)
|
||||
#define _FP_WORK_ROUND ((_FP_W_TYPE) 1 << 2) // 87
|
||||
#define _FP_WORK_GUARD ((_FP_W_TYPE) 1 << 1)
|
||||
#define _FP_WORK_STICKY ((_FP_W_TYPE) 1 << 0) // 89
|
||||
|
||||
#ifndef FP_RND_NEAREST
|
||||
# define FP_RND_NEAREST 0
|
||||
# define FP_RND_ZERO 1
|
||||
# define FP_RND_PINF 2
|
||||
# define FP_RND_MINF 3
|
||||
#endif
|
||||
#ifndef FP_ROUNDMODE
|
||||
# define FP_ROUNDMODE FP_RND_NEAREST
|
||||
#endif
|
||||
|
||||
/* By default don't care about exceptions. */ // 101
|
||||
#ifndef FP_EX_INVALID
|
||||
# define FP_EX_INVALID 0
|
||||
#endif
|
||||
#ifndef FP_EX_OVERFLOW
|
||||
# define FP_EX_OVERFLOW 0
|
||||
#endif
|
||||
#ifndef FP_EX_UNDERFLOW
|
||||
# define FP_EX_UNDERFLOW 0
|
||||
#endif
|
||||
#ifndef FP_EX_DIVZERO
|
||||
# define FP_EX_DIVZERO 0
|
||||
#endif
|
||||
#ifndef FP_EX_INEXACT
|
||||
# define FP_EX_INEXACT 0
|
||||
#endif
|
||||
#ifndef FP_EX_DENORM
|
||||
# define FP_EX_DENORM 0
|
||||
#endif
|
||||
|
||||
/* Sub-exceptions of "invalid". */ // 121
|
||||
/* Signaling NaN operand. */
|
||||
#ifndef FP_EX_INVALID_SNAN
|
||||
# define FP_EX_INVALID_SNAN 0
|
||||
#endif
|
||||
/* Inf * 0. */ // 126
|
||||
#ifndef FP_EX_INVALID_IMZ
|
||||
# define FP_EX_INVALID_IMZ 0
|
||||
#endif
|
||||
|
||||
/* Inf - Inf. */ // 134
|
||||
#ifndef FP_EX_INVALID_ISI
|
||||
# define FP_EX_INVALID_ISI 0
|
||||
#endif
|
||||
/* 0 / 0. */
|
||||
#ifndef FP_EX_INVALID_ZDZ
|
||||
# define FP_EX_INVALID_ZDZ 0
|
||||
#endif
|
||||
/* Inf / Inf. */
|
||||
#ifndef FP_EX_INVALID_IDI
|
||||
# define FP_EX_INVALID_IDI 0
|
||||
#endif
|
||||
|
||||
/* Invalid conversion to integer. */
|
||||
#ifndef FP_EX_INVALID_CVI
|
||||
# define FP_EX_INVALID_CVI 0
|
||||
#endif
|
||||
/* Invalid comparison. */ // 154
|
||||
#ifndef FP_EX_INVALID_VC
|
||||
# define FP_EX_INVALID_VC 0
|
||||
#endif
|
||||
|
||||
/* _FP_STRUCT_LAYOUT may be defined as an attribute to determine the
|
||||
struct layout variant used for structures where bit-fields are used
|
||||
to access specific parts of binary floating-point numbers. This is
|
||||
required for systems where the default ABI uses struct layout with
|
||||
differences in how consecutive bit-fields are laid out from the
|
||||
default expected by soft-fp. */
|
||||
#ifndef _FP_STRUCT_LAYOUT
|
||||
# define _FP_STRUCT_LAYOUT
|
||||
#endif
|
||||
// 169
|
||||
#ifdef _FP_DECL_EX
|
||||
# define FP_DECL_EX \
|
||||
int _fex = 0; \
|
||||
_FP_DECL_EX
|
||||
#else
|
||||
# define FP_DECL_EX int _fex = 0
|
||||
#endif
|
||||
|
||||
/* Initialize any machine-specific state used in FP_ROUNDMODE,
|
||||
FP_TRAPPING_EXCEPTIONS or FP_HANDLE_EXCEPTIONS. */
|
||||
#ifndef FP_INIT_ROUNDMODE
|
||||
# define FP_INIT_ROUNDMODE do {} while (0)
|
||||
#endif
|
||||
|
||||
/* Initialize any machine-specific state used in
|
||||
FP_TRAPPING_EXCEPTIONS or FP_HANDLE_EXCEPTIONS. */
|
||||
# define FP_INIT_TRAPPING_EXCEPTIONS FP_INIT_ROUNDMODE // 186
|
||||
|
||||
/* Initialize any machine-specific state used in
|
||||
FP_HANDLE_EXCEPTIONS. */
|
||||
#define FP_INIT_EXCEPTIONS FP_INIT_TRAPPING_EXCEPTIONS // 192
|
||||
|
||||
#define FP_HANDLE_EXCEPTIONS do {} while (0) // 196
|
||||
|
||||
#define FP_DENORM_ZERO 0 // 201
|
||||
#define FP_SET_EXCEPTION(ex) _fex |= (ex) // 212
|
||||
#define FP_CUR_EXCEPTIONS (_fex) // 215
|
||||
#define FP_TRAPPING_EXCEPTIONS 0 // 219
|
||||
|
||||
|
||||
// 259
|
||||
#define _FP_ROUND_NEAREST(wc, X) \
|
||||
do \
|
||||
{ \
|
||||
if ((_FP_FRAC_LOW_##wc (X) & 15) != _FP_WORK_ROUND) \
|
||||
_FP_FRAC_ADDI_##wc (X, _FP_WORK_ROUND); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_ROUND_ZERO(wc, X) (void) 0
|
||||
|
||||
#define _FP_ROUND_PINF(wc, X) \
|
||||
do \
|
||||
{ \
|
||||
if (!X##_s && (_FP_FRAC_LOW_##wc (X) & 7)) \
|
||||
_FP_FRAC_ADDI_##wc (X, _FP_WORK_LSB); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_ROUND_MINF(wc, X) \
|
||||
do \
|
||||
{ \
|
||||
if (X##_s && (_FP_FRAC_LOW_##wc (X) & 7)) \
|
||||
_FP_FRAC_ADDI_##wc (X, _FP_WORK_LSB); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define _FP_ROUND(wc, X) \
|
||||
do \
|
||||
{ \
|
||||
if (_FP_FRAC_LOW_##wc (X) & 7) \
|
||||
{ \
|
||||
FP_SET_EXCEPTION (FP_EX_INEXACT); \
|
||||
switch (FP_ROUNDMODE) \
|
||||
{ \
|
||||
case FP_RND_NEAREST: \
|
||||
_FP_ROUND_NEAREST (wc, X); \
|
||||
break; \
|
||||
case FP_RND_ZERO: \
|
||||
_FP_ROUND_ZERO (wc, X); \
|
||||
break; \
|
||||
case FP_RND_PINF: \
|
||||
_FP_ROUND_PINF (wc, X); \
|
||||
break; \
|
||||
case FP_RND_MINF: \
|
||||
_FP_ROUND_MINF (wc, X); \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define FP_CLS_NORMAL 0 // 310
|
||||
#define FP_CLS_ZERO 1
|
||||
#define FP_CLS_INF 2
|
||||
#define FP_CLS_NAN 3
|
||||
|
||||
#define _FP_CLS_COMBINE(x, y) (((x) << 2) | (y)) // 315
|
||||
|
||||
#include "op-1.h"
|
||||
#include "op-2.h"
|
||||
#include "op-4.h"
|
||||
#include "op-8.h"
|
||||
#include "op-common.h"
|
||||
|
||||
/* Sigh. Silly things longlong.h needs. */
|
||||
#define UWtype _FP_W_TYPE
|
||||
#define W_TYPE_SIZE _FP_W_TYPE_SIZE
|
||||
|
||||
typedef int QItype __attribute__ ((mode (QI)));
|
||||
typedef int SItype __attribute__ ((mode (SI)));
|
||||
typedef int DItype __attribute__ ((mode (DI)));
|
||||
typedef unsigned int UQItype __attribute__ ((mode (QI)));
|
||||
typedef unsigned int USItype __attribute__ ((mode (SI)));
|
||||
typedef unsigned int UDItype __attribute__ ((mode (DI)));
|
||||
#if _FP_W_TYPE_SIZE == 32
|
||||
typedef unsigned int UHWtype __attribute__ ((mode (HI)));
|
||||
#elif _FP_W_TYPE_SIZE == 64
|
||||
typedef USItype UHWtype;
|
||||
#endif
|
||||
|
||||
#ifndef CMPtype
|
||||
# define CMPtype int
|
||||
#endif
|
||||
|
||||
#define SI_BITS (__CHAR_BIT__ * (int) sizeof (SItype))
|
||||
#define DI_BITS (__CHAR_BIT__ * (int) sizeof (DItype))
|
||||
|
||||
#include "longlong.h"
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
21
src/linpack/soft-fp/subdf3.c
Normal file
21
src/linpack/soft-fp/subdf3.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "soft-fp.h"
|
||||
#include "double.h"
|
||||
|
||||
DFtype
|
||||
__subdf3 (DFtype a, DFtype b)
|
||||
{
|
||||
FP_DECL_EX;
|
||||
FP_DECL_D (A);
|
||||
FP_DECL_D (B);
|
||||
FP_DECL_D (R);
|
||||
DFtype r;
|
||||
|
||||
FP_INIT_ROUNDMODE;
|
||||
FP_UNPACK_SEMIRAW_D (A, a);
|
||||
FP_UNPACK_SEMIRAW_D (B, b);
|
||||
FP_SUB_D (R, A, B);
|
||||
FP_PACK_SEMIRAW_D (r, R);
|
||||
FP_HANDLE_EXCEPTIONS;
|
||||
|
||||
return r;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue