@@ -18,118 +18,85 @@ namespace __llvm_libc {
1818
1919namespace fma {
2020
21- static constexpr uint32_t FAST_PASS_BOUND = 0x5880 '0000U ; // 2^50
21+ static constexpr uint32_t FAST_PASS_BOUND = 0x5680 '0000U ; // 2^46
2222
2323// Digits of 1/pi, generated by Sollya with:
24- // > a0 = D(1 /pi);
25- // > a1 = D(1 /pi - a0);
26- // > a2 = D(1 /pi - a0 - a1);
27- // > a3 = D(1 /pi - a0 - a1 - a2);
28- static constexpr double ONE_OVER_PI [5 ] = {
29- 0x1 .45f306dc9c883p- 2 , -0x1 .6b01ec5417056p-56 , -0x1 .6447e493ad4cep-110 ,
30- 0x1 .e21c820ff28b2p -164 , -0x1 .508510ea79237p-219 };
24+ // > a0 = D(16 /pi);
25+ // > a1 = D(16 /pi - a0);
26+ // > a2 = D(16 /pi - a0 - a1);
27+ // > a3 = D(16 /pi - a0 - a1 - a2);
28+ static constexpr double SIXTEEN_OVER_PI [5 ] = {
29+ 0x1 .45f306dc9c883p+ 2 , -0x1 .6b01ec5417056p-52 , -0x1 .6447e493ad4cep-106 ,
30+ 0x1 .e21c820ff28b2p -160 , -0x1 .508510ea79237p-215 };
3131
3232// Return k and y, where
33- // k = round(x / pi) and y = (x / pi) - k.
33+ // k = round(x * 16 / pi) and y = (x * 16 / pi) - k.
3434// Assume x is non-negative.
3535static inline int64_t small_range_reduction (double x, double &y) {
36- double kd = fputil::nearest_integer (x * ONE_OVER_PI [0 ]);
37- y = fputil::fma (x, ONE_OVER_PI [0 ], -kd);
38- y = fputil::fma (x, ONE_OVER_PI [1 ], y);
36+ double kd = fputil::nearest_integer (x * SIXTEEN_OVER_PI [0 ]);
37+ y = fputil::fma (x, SIXTEEN_OVER_PI [0 ], -kd);
38+ y = fputil::fma (x, SIXTEEN_OVER_PI [1 ], y);
3939 return static_cast <int64_t >(kd);
4040}
4141
4242// Return k and y, where
43- // k = round(x / pi) and y = (x / pi) - k.
43+ // k = round(x * 16 / pi) and y = (x * 16 / pi) - k.
4444static inline int64_t large_range_reduction (double x, int x_exp, double &y) {
45- // 2^50 <= |x| < 2^104
46- if (x_exp < 103 ) {
47- // - When x < 2^104, the unit bit is contained in the full exact product of
48- // x * ONE_OVER_PI[0].
49- // - When 2^50 <= |x| < 2^55, the unit bit is contained
50- // in the last 8 bits of double(x * ONE_OVER_PI[0]).
51- // - When |x| >= 2^55, the LSB of double(x * ONE_OVER_PI[0]) is at least 2.
52- fputil::FPBits<double > prod_hi (x * ONE_OVER_PI[0 ]);
53- prod_hi.bits &= (x_exp < 55 ) ? (~0xffULL ) : (~0ULL ); // |x| < 2^55
45+ // 2^46 <= |x| < 2^99
46+ if (x_exp < 99 ) {
47+ // - When x < 2^99, the full exact product of x * SIXTEEN_OVER_PI[0]
48+ // contains at least one integral bit <= 2^4.
49+ // - When 2^46 <= |x| < 2^56, the lowest 5 unit bits are contained
50+ // in the last 10 bits of double(x * SIXTEEN_OVER_PI[0]).
51+ // - When |x| >= 2^56, the LSB of double(x * SIXTEEN_OVER_PI[0]) is at least
52+ // 32.
53+ fputil::FPBits<double > prod_hi (x * SIXTEEN_OVER_PI[0 ]);
54+ prod_hi.bits &= (x_exp < 56 ) ? (~0xfffULL ) : (~0ULL ); // |x| < 2^56
5455 double k_hi = fputil::nearest_integer (static_cast <double >(prod_hi));
55- double truncated_prod = fputil::fma (x, ONE_OVER_PI [0 ], -k_hi);
56- double prod_lo = fputil::fma (x, ONE_OVER_PI [1 ], truncated_prod);
56+ double truncated_prod = fputil::fma (x, SIXTEEN_OVER_PI [0 ], -k_hi);
57+ double prod_lo = fputil::fma (x, SIXTEEN_OVER_PI [1 ], truncated_prod);
5758 double k_lo = fputil::nearest_integer (prod_lo);
58- y = fputil::fma (x, ONE_OVER_PI [1 ], truncated_prod - k_lo);
59- y = fputil::fma (x, ONE_OVER_PI [2 ], y);
60- y = fputil::fma (x, ONE_OVER_PI [3 ], y);
59+ y = fputil::fma (x, SIXTEEN_OVER_PI [1 ], truncated_prod - k_lo);
60+ y = fputil::fma (x, SIXTEEN_OVER_PI [2 ], y);
61+ y = fputil::fma (x, SIXTEEN_OVER_PI [3 ], y);
6162
6263 return static_cast <int64_t >(k_lo);
6364 }
6465
65- // - When x >= 2^104, the full exact product of x * ONE_OVER_PI[0] does not
66- // contain the unit bit, so we can ignore it completely.
67- // - When 2^104 <= |x| < 2^109, the unit bit is contained
68- // in the last 8 bits of double(x * ONE_OVER_PI[1]).
69- // - When |x| >= 2^109, the LSB of double(x * ONE_OVER_PI[1]) is at least 2.
70- fputil::FPBits<double > prod_hi (x * ONE_OVER_PI[1 ]);
71- prod_hi.bits &= (x_exp < 109 ) ? (~0xffULL ) : (~0ULL ); // |x| < 2^55
66+ // - When x >= 2^110, the full exact product of x * SIXTEEN_OVER_PI[0] does
67+ // not contain any of the lowest 5 unit bits, so we can ignore it completely.
68+ // - When 2^99 <= |x| < 2^110, the lowest 5 unit bits are contained
69+ // in the last 12 bits of double(x * SIXTEEN_OVER_PI[1]).
70+ // - When |x| >= 2^110, the LSB of double(x * SIXTEEN_OVER_PI[1]) is at
71+ // least 32.
72+ fputil::FPBits<double > prod_hi (x * SIXTEEN_OVER_PI[1 ]);
73+ prod_hi.bits &= (x_exp < 110 ) ? (~0xfffULL ) : (~0ULL ); // |x| < 2^110
7274 double k_hi = fputil::nearest_integer (static_cast <double >(prod_hi));
73- double truncated_prod = fputil::fma (x, ONE_OVER_PI [1 ], -k_hi);
74- double prod_lo = fputil::fma (x, ONE_OVER_PI [2 ], truncated_prod);
75+ double truncated_prod = fputil::fma (x, SIXTEEN_OVER_PI [1 ], -k_hi);
76+ double prod_lo = fputil::fma (x, SIXTEEN_OVER_PI [2 ], truncated_prod);
7577 double k_lo = fputil::nearest_integer (prod_lo);
76- y = fputil::fma (x, ONE_OVER_PI [2 ], truncated_prod - k_lo);
77- y = fputil::fma (x, ONE_OVER_PI [3 ], y);
78- y = fputil::fma (x, ONE_OVER_PI [4 ], y);
78+ y = fputil::fma (x, SIXTEEN_OVER_PI [2 ], truncated_prod - k_lo);
79+ y = fputil::fma (x, SIXTEEN_OVER_PI [3 ], y);
80+ y = fputil::fma (x, SIXTEEN_OVER_PI [4 ], y);
7981
8082 return static_cast <int64_t >(k_lo);
8183}
8284
8385// Exceptional cases.
84- static constexpr int N_EXCEPT_SMALL = 9 ;
86+ static constexpr int N_EXCEPTS = 2 ;
8587
86- static constexpr fputil::ExceptionalValues<float , N_EXCEPT_SMALL> SmallExcepts {
88+ static constexpr fputil::ExceptionalValues<float , N_EXCEPTS> SinfExcepts {
8789 /* inputs */ {
8890 0x3fa7832a , // x = 0x1.4f0654p0
89- 0x40171973 , // x = 0x1.2e32e6p1
90- 0x4096cbe4 , // x = 0x1.2d97c8p2
91- 0x433b7490 , // x = 0x1.76e92p7
92- 0x437ce5f1 , // x = 0x1.f9cbe2p7
93- 0x46199998 , // x = 0x1.33333p13
94- 0x474d246f , // x = 0x1.9a48dep15
95- 0x4afdece4 , // x = 0x1.fbd9c8p22
9691 0x55cafb2a , // x = 0x1.95f654p44
9792 },
9893 /* outputs (RZ, RU offset, RD offset, RN offset) */
9994 {
10095 {0x3f7741b5 , 1 , 0 , 1 }, // x = 0x1.4f0654p0, sin(x) = 0x1.ee836ap-1 (RZ)
101- {0x3f34290f , 1 , 0 , 1 }, // x = 0x1.2e32e6p1, sin(x) = 0x1.68521ep-1 (RZ)
102- {0xbf7fffff , 0 , 1 , 1 }, // x = 0x1.2d97c8p2, sin(x) = -0x1.fffffep-1 (RZ)
103- {0xbf5cce62 , 0 , 1 , 0 }, // x = 0x1.76e92p7, sin(x) = -0x1.b99cc4p-1 (RZ)
104- {0x3f7fffff , 1 , 0 , 1 }, // x = 0x1.f9cbe2p7, sin(x) = 0x1.fffffep-1 (RZ)
105- {0xbeb1fa5d , 0 , 1 , 0 }, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ)
106- {0x3f7fffff , 1 , 0 , 1 }, // x = 0x1.9a48dep15, sin(x) = 0x1.fffffep-1 (RZ)
107- {0xbf7fb6e0 , 0 , 1 , 1 }, // x = 0x1.fbd9c8p22, sin(x) = -0x1.ff6dcp-1 (RZ)
10896 {0xbf7e7a16 , 0 , 1 ,
10997 1 }, // x = 0x1.95f654p44, sin(x) = -0x1.fcf42cp-1 (RZ)
11098 }};
11199
112- static constexpr int N_EXCEPT_LARGE = 5 ;
113-
114- static constexpr fputil::ExceptionalValues<float , N_EXCEPT_LARGE> LargeExcepts{
115- /* inputs */ {
116- 0x5ebcfdde , // x = 0x1.79fbbcp62
117- 0x5fa6eba7 , // x = 0x1.4dd74ep64
118- 0x6386134e , // x = 0x1.0c269cp72
119- 0x6a1976f1 , // x = 0x1.32ede2p85
120- 0x727669d4 , // x = 0x1.ecd3a8p101
121- },
122- /* outputs (RZ, RU offset, RD offset, RN offset) */
123- {
124- {0x3f50622d , 1 , 0 , 0 }, // x = 0x1.79fbbcp62, sin(x) = 0x1.a0c45ap-1 (RZ)
125- {0xbe52464a , 0 , 1 ,
126- 0 }, // x = 0x1.4dd74ep64, sin(x) = -0x1.a48c94p-3 (RZ)
127- {0x3f7cb2e7 , 1 , 0 , 0 }, // x = 0x1.0c269cp72, sin(x) = 0x1.f965cep-1 (RZ)
128- {0x3f7fffff , 1 , 0 , 1 }, // x = 0x1.32ede2p85, sin(x) = 0x1.fffffep-1 (RZ)
129- {0xbf7a781d , 0 , 1 ,
130- 0 }, // x = 0x1.ecd3a8p101, sin(x) = -0x1.f4f038p-1 (RZ)
131- }};
132-
133100} // namespace fma
134101
135102} // namespace __llvm_libc
0 commit comments