-
Notifications
You must be signed in to change notification settings - Fork 0
/
binary16.c
106 lines (79 loc) · 1.98 KB
/
binary16.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#include <stdio.h> // printf
#include "binary16.h"
#define EXP16_BITS (5)
#define FRC16_BITS (10)
#define EXP32_BITS (8)
#define FRC32_BITS (23)
#define EXP64_BITS (11)
#define FRC64_BITS (53)
#define EMIN16 (-14)
#define EMAX16 (15)
#define EBIAS16 (15)
#define EBIAS32 (127)
#define EBIAS64 (1023)
#define FMAX16 (0x3ff)
#define BIT_MASK(type) ((((uint64_t)1) << (type)) - 1)
#define EXP32_MASK (BIT_MASK(EXP32_BITS))
#define EXP64_MASK (BIT_MASK(EXP64_BITS))
#define FRC32_MASK (BIT_MASK(FRC32_BITS))
#define FRC64_MASK (BIT_MASK(FRC64_BITS))
#define PACK(sign, exp, frac) (((sign) << 15) | ((exp) << 10) | (frac))
#define POSINF16 ((uint16_t)(0x7c00)) // 0111 1100 0000 0000
#define NEGINF16 ((uint16_t)(0xfc00)) // 1111 1100 0000 0000
size_t
msb_ndx(size_t value) {
size_t offset = 0;
while(value > 0) {
offset++;
value >>= 1;
}
return offset;
}
binary16_t
binary16_from_components(
uint8_t sign, int16_t exp, uint64_t frac) {
uint16_t retval = 0;
if (exp < EMIN16) {
retval = POSINF16;
} else if (exp > EMAX16) {
retval = NEGINF16;
} else if (frac > FMAX16) {
// TODO
} else {
retval = PACK(sign, exp, frac);
}
return retval;
}
typedef union {
float f;
uint32_t i;
} binary32_t;
binary16_t
binary16_from_binary32(float x) {
binary32_t u;
u.f = x;
uint8_t sign = u.i >> 31;
int16_t exp = ((int16_t)((u.i >> FRC32_BITS) & EXP32_MASK)) - EBIAS32;
uint64_t frac = u.i & FRC32_MASK;
printf("%d %d %lx\n", sign, exp, frac);
return binary16_from_components(sign, exp, frac);
}
typedef union {
double f;
uint64_t i;
} binary64_t;
binary16_t
binary16_from_binary64(double x) {
binary64_t u;
u.f = x;
uint8_t sign = u.i >> 63;
int16_t exp = ((int16_t)((u.i >> FRC64_BITS) & EXP64_MASK)) - EBIAS64;
uint64_t frac = u.i & FRC64_MASK;
printf("%d %d %lx\n", sign, exp, frac);
return binary16_from_components(sign, exp, frac);
}
binary16_t
binary16_add(binary16_t x, binary16_t y) {
// TODO
return 0;
}