diyfp.h
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
16 // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
17 // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
18 
19 #ifndef RAPIDJSON_DIYFP_H_
20 #define RAPIDJSON_DIYFP_H_
21 
22 #include "../rapidjson.h"
23 #include <limits>
24 
25 #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
26 #include <intrin.h>
27 #pragma intrinsic(_BitScanReverse64)
28 #pragma intrinsic(_umul128)
29 #endif
30 
31 RAPIDJSON_NAMESPACE_BEGIN
32 namespace internal {
33 
34 #ifdef __GNUC__
35 RAPIDJSON_DIAG_PUSH
36 RAPIDJSON_DIAG_OFF(effc++)
37 #endif
38 
39 #ifdef __clang__
40 RAPIDJSON_DIAG_PUSH
41 RAPIDJSON_DIAG_OFF(padded)
42 #endif
43 
44 struct DiyFp {
45  DiyFp() : f(), e() {}
46 
47  DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
48 
49  explicit DiyFp(double d) {
50  union {
51  double d;
52  uint64_t u64;
53  } u = { d };
54 
55  int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
56  uint64_t significand = (u.u64 & kDpSignificandMask);
57  if (biased_e != 0) {
58  f = significand + kDpHiddenBit;
59  e = biased_e - kDpExponentBias;
60  }
61  else {
62  f = significand;
63  e = kDpMinExponent + 1;
64  }
65  }
66 
67  DiyFp operator-(const DiyFp& rhs) const {
68  return DiyFp(f - rhs.f, e);
69  }
70 
71  DiyFp operator*(const DiyFp& rhs) const {
72 #if defined(_MSC_VER) && defined(_M_AMD64)
73  uint64_t h;
74  uint64_t l = _umul128(f, rhs.f, &h);
75  if (l & (uint64_t(1) << 63)) // rounding
76  h++;
77  return DiyFp(h, e + rhs.e + 64);
78 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
79  __extension__ typedef unsigned __int128 uint128;
80  uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
81  uint64_t h = static_cast<uint64_t>(p >> 64);
82  uint64_t l = static_cast<uint64_t>(p);
83  if (l & (uint64_t(1) << 63)) // rounding
84  h++;
85  return DiyFp(h, e + rhs.e + 64);
86 #else
87  const uint64_t M32 = 0xFFFFFFFF;
88  const uint64_t a = f >> 32;
89  const uint64_t b = f & M32;
90  const uint64_t c = rhs.f >> 32;
91  const uint64_t d = rhs.f & M32;
92  const uint64_t ac = a * c;
93  const uint64_t bc = b * c;
94  const uint64_t ad = a * d;
95  const uint64_t bd = b * d;
96  uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
97  tmp += 1U << 31; /// mult_round
98  return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
99 #endif
100  }
101 
102  DiyFp Normalize() const {
103  RAPIDJSON_ASSERT(f != 0); // https://stackoverflow.com/a/26809183/291737
104 #if defined(_MSC_VER) && defined(_M_AMD64)
105  unsigned long index;
106  _BitScanReverse64(&index, f);
107  return DiyFp(f << (63 - index), e - (63 - index));
108 #elif defined(__GNUC__) && __GNUC__ >= 4
109  int s = __builtin_clzll(f);
110  return DiyFp(f << s, e - s);
111 #else
112  DiyFp res = *this;
113  while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
114  res.f <<= 1;
115  res.e--;
116  }
117  return res;
118 #endif
119  }
120 
121  DiyFp NormalizeBoundary() const {
122  DiyFp res = *this;
123  while (!(res.f & (kDpHiddenBit << 1))) {
124  res.f <<= 1;
125  res.e--;
126  }
127  res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
128  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
129  return res;
130  }
131 
132  void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
133  DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
134  DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
135  mi.f <<= mi.e - pl.e;
136  mi.e = pl.e;
137  *plus = pl;
138  *minus = mi;
139  }
140 
141  double ToDouble() const {
142  union {
143  double d;
144  uint64_t u64;
145  }u;
146  RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask);
147  if (e < kDpDenormalExponent) {
148  // Underflow.
149  return 0.0;
150  }
151  if (e >= kDpMaxExponent) {
152  // Overflow.
153  return std::numeric_limits<double>::infinity();
154  }
155  const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
156  static_cast<uint64_t>(e + kDpExponentBias);
157  u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
158  return u.d;
159  }
160 
161  static const int kDiySignificandSize = 64;
162  static const int kDpSignificandSize = 52;
163  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
164  static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
165  static const int kDpMinExponent = -kDpExponentBias;
166  static const int kDpDenormalExponent = -kDpExponentBias + 1;
167  static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
168  static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
169  static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
170 
171  uint64_t f;
172  int e;
173 };
174 
175 inline DiyFp GetCachedPowerByIndex(size_t index) {
176  // 10^-348, 10^-340, ..., 10^340
177  static const uint64_t kCachedPowers_F[] = {
178  RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
179  RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
180  RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
181  RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
182  RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
183  RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
184  RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
185  RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
186  RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
187  RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
188  RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
189  RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
190  RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
191  RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
192  RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
193  RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
194  RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
195  RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
196  RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
197  RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
198  RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
199  RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
200  RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
201  RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
202  RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
203  RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
204  RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
205  RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
206  RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
207  RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
208  RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
209  RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
210  RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
211  RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
212  RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
213  RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
214  RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
215  RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
216  RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
217  RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
218  RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
219  RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
220  RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
221  RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
222  };
223  static const int16_t kCachedPowers_E[] = {
224  -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
225  -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
226  -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
227  -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
228  -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
229  109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
230  375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
231  641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
232  907, 933, 960, 986, 1013, 1039, 1066
233  };
234  RAPIDJSON_ASSERT(index < 87);
235  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
236 }
237 
238 inline DiyFp GetCachedPower(int e, int* K) {
239 
240  //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
241  double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
242  int k = static_cast<int>(dk);
243  if (dk - k > 0.0)
244  k++;
245 
246  unsigned index = static_cast<unsigned>((k >> 3) + 1);
247  *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
248 
249  return GetCachedPowerByIndex(index);
250 }
251 
252 inline DiyFp GetCachedPower10(int exp, int *outExp) {
253  RAPIDJSON_ASSERT(exp >= -348);
254  unsigned index = static_cast<unsigned>(exp + 348) / 8u;
255  *outExp = -348 + static_cast<int>(index) * 8;
256  return GetCachedPowerByIndex(index);
257 }
258 
259 #ifdef __GNUC__
260 RAPIDJSON_DIAG_POP
261 #endif
262 
263 #ifdef __clang__
264 RAPIDJSON_DIAG_POP
265 RAPIDJSON_DIAG_OFF(padded)
266 #endif
267 
268 } // namespace internal
269 RAPIDJSON_NAMESPACE_END
270 
271 #endif // RAPIDJSON_DIYFP_H_
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition: rapidjson.h:289
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406