-
Notifications
You must be signed in to change notification settings - Fork 8
/
divsf3.c
252 lines (224 loc) · 10.1 KB
/
divsf3.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements single-precision soft-float division
// with the IEEE-754 default rounding (to nearest, ties to even).
//
// For simplicity, this implementation currently flushes denormals to zero.
// It should be a fairly straightforward exercise to implement gradual
// underflow with correct rounding.
//
//===----------------------------------------------------------------------===//
// This file is originally a part of the compiler-RT/LLVM compiler infrastructure
// and it has been edited to fit to FGPU
#include "fp_lib.h"
inline unsigned __muldsi3_high(unsigned a, unsigned b) {
const int bits_in_word_2 = 16;
const unsigned lower_mask = (unsigned)~0 >> bits_in_word_2;
unsigned ll = ((a & lower_mask) * (b & lower_mask));
unsigned lh = (a >> bits_in_word_2) * (b & lower_mask);
unsigned hl = (b >> bits_in_word_2) * (a & lower_mask);
unsigned hh = (a >> bits_in_word_2) * (b >> bits_in_word_2);
unsigned res = ll >> bits_in_word_2;
res += lh & lower_mask;
res += hl & lower_mask;
res >>= bits_in_word_2;
res += lh >> bits_in_word_2;
res += hl >> bits_in_word_2;
res += hh;
return res;
}
inline long muldsi3(unsigned a, unsigned b) {
dwords r;
const int bits_in_word_2 = 16;
const unsigned lower_mask = (unsigned)~0 >> bits_in_word_2;
r.s.low = (a & lower_mask) * (b & lower_mask);
unsigned t = r.s.low >> bits_in_word_2;
r.s.low &= lower_mask;
t += (a >> bits_in_word_2) * (b & lower_mask);
r.s.low += (t & lower_mask) << bits_in_word_2;
r.s.high = t >> bits_in_word_2;
t = r.s.low >> bits_in_word_2;
r.s.low &= lower_mask;
t += (b >> bits_in_word_2) * (a & lower_mask);
r.s.low += (t & lower_mask) << bits_in_word_2;
r.s.high += t >> bits_in_word_2;
r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2);
return r.all;
}
float __divsf3(float a, float b) {
const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
const unsigned quotientSign = (toRep(a) ^ toRep(b)) & signBit;
unsigned aSignificand = toRep(a) & significandMask;
unsigned bSignificand = toRep(b) & significandMask;
int scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
const unsigned aAbs = toRep(a) & absMask;
const unsigned bAbs = toRep(b) & absMask;
// NaN / anything = qNaN
if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
// anything / NaN = qNaN
if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
if (aAbs == infRep) {
// infinity / infinity = NaN
if (bAbs == infRep) return fromRep(qnanRep);
// infinity / anything else = +/- infinity
else return fromRep(aAbs | quotientSign);
}
// anything else / infinity = +/- 0
if (bAbs == infRep) return fromRep(quotientSign);
if (!aAbs) {
// zero / zero = NaN
if (!bAbs) return fromRep(qnanRep);
// zero / anything else = +/- zero
else return fromRep(quotientSign);
}
// anything else / zero = +/- infinity
if (!bAbs) return fromRep(infRep | quotientSign);
// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if (aAbs < implicitBit) scale += normalize(&aSignificand);
if (bAbs < implicitBit) scale -= normalize(&bSignificand);
/* // NaN / anything = qNaN or anything / NaN = qNaN */
/* unsigned resNaN = aAbs > infRep || bAbs > infRep; */
/* // infinity / infinity = NaN */
/* resNaN = resNaN || (aAbs==infRep && bAbs==infRep); */
/* // zero / zero = NaN */
/* resNaN = resNaN || (!aAbs && !bAbs); */
/* if (resNaN) return fromRep(qnanRep); */
/* */
/* // infinity / anything else = +/- infinity */
/* // anything else / zero = +/- infinity */
/* unsigned resInf = aAbs==infRep || !bAbs; */
/* if (resInf) return fromRep(infRep | quotientSign); */
/* */
/* // anything else / infinity = +/- 0 */
/* // zero / anything else = +/- zero */
/* unsigned resZero = bAbs==infRep || !aAbs; */
/* if (resZero) return fromRep(quotientSign); */
/* */
/* // one or both of a or b is denormal, the other (if applicable) is a */
/* // normal number. Renormalize one or both of a and b, and set scale to */
/* // include the necessary exponent adjustment. */
/* if (aAbs < implicitBit) scale += normalize(&aSignificand); */
/* if (bAbs < implicitBit) scale -= normalize(&bSignificand); */
}
// Or in the implicit significand bit. (If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.)
aSignificand |= implicitBit;
bSignificand |= implicitBit;
int quotientExponent = aExponent - bExponent + scale;
// Align the significand of b as a Q31 fixed-point number in the range
// [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
// polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
// is accurate to about 3.5 binary digits.
unsigned q31b = bSignificand << 8;
unsigned reciprocal = ((unsigned)0x7504f333) - q31b;
// Now refine the reciprocal estimate using a Newton-Raphson iteration:
//
// x1 = x0 * (2 - x0 * b)
//
// This doubles the number of correct binary digits in the approximation
// with each iteration, so after three iterations, we have about 28 binary
// digits of accuracy.
unsigned correction;
/* unsigned correctionOverflow; */
/* correction = -(__muldsi3_high(reciprocal, q31b)); */
/* correctionOverflow = correction & 0x80000000 ? reciprocal:0; */
/* reciprocal = __muldsi3_high(reciprocal, 2*correction) + correctionOverflow; */
/* correction = -(__muldsi3_high(reciprocal, q31b)); */
/* correctionOverflow = correction & 0x80000000 ? reciprocal:0; */
/* reciprocal = __muldsi3_high(reciprocal, 2*correction) + correctionOverflow; */
/* correction = -(__muldsi3_high(reciprocal, q31b)); */
/* correctionOverflow = correction & 0x80000000 ? reciprocal:0; */
/* reciprocal = __muldsi3_high(reciprocal, 2*correction) + correctionOverflow; */
correction = -(__muldsi3_high(reciprocal, q31b));
reciprocal = muldsi3(reciprocal, correction) >> 31;
correction = -(__muldsi3_high(reciprocal, q31b));
reciprocal = muldsi3(reciprocal, correction) >> 31;
correction = -(__muldsi3_high(reciprocal, q31b));
reciprocal = muldsi3(reciprocal, correction) >> 31;
// Exhaustive testing shows that the error in reciprocal after three steps
// is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our
// expectations. We bump the reciprocal by a tiny value to force the error
// to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to
// be specific). This also causes 1/1 to give a sensible approximation
// instead of zero (due to overflow).
reciprocal -= 2;
// The numerical reciprocal is accurate to within 2^-28, lies in the
// interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller
// than the true reciprocal of b. Multiplying a by this reciprocal thus
// gives a numerical q = a/b in Q24 with the following properties:
//
// 1. q < a/b
// 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0)
// 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes
// from the fact that we truncate the product, and the 2^27 term
// is the error in the reciprocal of b scaled by the maximum
// possible value of a. As a consequence of this error bound,
// either q or nextafter(q) is the correctly rounded
unsigned quotient = __muldsi3_high(reciprocal, aSignificand << 1);
// unsigned quotient = (unsigned long)reciprocal*(aSignificand << 1) >> 32;
// Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
// In either case, we are going to compute a residual of the form
//
// r = a - q*b
//
// We know from the construction of q that r satisfies:
//
// 0 <= r < ulp(q)*b
//
// if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we
// already have the correct result. The exact halfway case cannot occur.
// We also take this time to right shift quotient if it falls in the [1,2)
// range and adjust the exponent accordingly.
unsigned residual;
unsigned decreaseExponent = quotient < (implicitBit << 1);
quotientExponent -= decreaseExponent?1:0;
quotient >>= decreaseExponent?0:1;
residual = aSignificand << (decreaseExponent?24:23);
residual -= quotient * bSignificand;
/* if (quotient < (implicitBit << 1)) { */
/* residual = (aSignificand << 24) - quotient * bSignificand; */
/* quotientExponent--; */
/* } else { */
/* quotient >>= 1; */
/* residual = (aSignificand << 23) - quotient * bSignificand; */
/* } */
const int writtenExponent = quotientExponent + exponentBias;
if (writtenExponent >= maxExponent) {
// If we have overflowed the exponent, return infinity.
return fromRep(infRep | quotientSign);
}
else if (writtenExponent < 1) {
// Flush denormals to zero.
const bool round = (residual << 1) > bSignificand;
unsigned shift = 1-(unsigned)writtenExponent;
shift = shift > 30 ? 31:shift;
unsigned res = (quotient+round) >> shift;
res |= quotientSign;
return fromRep(res);
}
else {
const bool round = (residual << 1) > bSignificand;
// Clear the implicit bit
unsigned absResult = quotient & significandMask;
// Insert the exponent
absResult |= (unsigned)writtenExponent << significandBits;
// Round
absResult += round;
// cout << "quotient is " << quotient << "(0x" << std::hex << quotient << ")" << endl;
// Insert the sign and return
return fromRep(absResult | quotientSign);
}
}