diyfp.h
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
16 // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
17 // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
18 
19 #ifndef RAPIDJSON_DIYFP_H_
20 #define RAPIDJSON_DIYFP_H_
21 
22 #include "../rapidjson.h"
23 #include "clzll.h"
24 #include <limits>
25 
26 #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
27 #include <intrin.h>
28 #pragma intrinsic(_umul128)
29 #endif
30 
31 RAPIDJSON_NAMESPACE_BEGIN
32 namespace internal {
33 
34 #ifdef __GNUC__
35 RAPIDJSON_DIAG_PUSH
36 RAPIDJSON_DIAG_OFF(effc++)
37 #endif
38 
39 #ifdef __clang__
40 RAPIDJSON_DIAG_PUSH
41 RAPIDJSON_DIAG_OFF(padded)
42 #endif
43 
44 struct DiyFp {
45  DiyFp() : f(), e() {}
46 
47  DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
48 
49  explicit DiyFp(double d) {
50  union {
51  double d;
52  uint64_t u64;
53  } u = { d };
54 
55  int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
56  uint64_t significand = (u.u64 & kDpSignificandMask);
57  if (biased_e != 0) {
58  f = significand + kDpHiddenBit;
59  e = biased_e - kDpExponentBias;
60  }
61  else {
62  f = significand;
63  e = kDpMinExponent + 1;
64  }
65  }
66 
67  DiyFp operator-(const DiyFp& rhs) const {
68  return DiyFp(f - rhs.f, e);
69  }
70 
71  DiyFp operator*(const DiyFp& rhs) const {
72 #if defined(_MSC_VER) && defined(_M_AMD64)
73  uint64_t h;
74  uint64_t l = _umul128(f, rhs.f, &h);
75  if (l & (uint64_t(1) << 63)) // rounding
76  h++;
77  return DiyFp(h, e + rhs.e + 64);
78 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
79  __extension__ typedef unsigned __int128 uint128;
80  uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
81  uint64_t h = static_cast<uint64_t>(p >> 64);
82  uint64_t l = static_cast<uint64_t>(p);
83  if (l & (uint64_t(1) << 63)) // rounding
84  h++;
85  return DiyFp(h, e + rhs.e + 64);
86 #else
87  const uint64_t M32 = 0xFFFFFFFF;
88  const uint64_t a = f >> 32;
89  const uint64_t b = f & M32;
90  const uint64_t c = rhs.f >> 32;
91  const uint64_t d = rhs.f & M32;
92  const uint64_t ac = a * c;
93  const uint64_t bc = b * c;
94  const uint64_t ad = a * d;
95  const uint64_t bd = b * d;
96  uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
97  tmp += 1U << 31; /// mult_round
98  return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
99 #endif
100  }
101 
102  DiyFp Normalize() const {
103  int s = static_cast<int>(clzll(f));
104  return DiyFp(f << s, e - s);
105  }
106 
107  DiyFp NormalizeBoundary() const {
108  DiyFp res = *this;
109  while (!(res.f & (kDpHiddenBit << 1))) {
110  res.f <<= 1;
111  res.e--;
112  }
113  res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
114  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
115  return res;
116  }
117 
118  void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
119  DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
120  DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
121  mi.f <<= mi.e - pl.e;
122  mi.e = pl.e;
123  *plus = pl;
124  *minus = mi;
125  }
126 
127  double ToDouble() const {
128  union {
129  double d;
130  uint64_t u64;
131  }u;
132  RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask);
133  if (e < kDpDenormalExponent) {
134  // Underflow.
135  return 0.0;
136  }
137  if (e >= kDpMaxExponent) {
138  // Overflow.
139  return std::numeric_limits<double>::infinity();
140  }
141  const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
142  static_cast<uint64_t>(e + kDpExponentBias);
143  u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
144  return u.d;
145  }
146 
147  static const int kDiySignificandSize = 64;
148  static const int kDpSignificandSize = 52;
149  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
150  static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
151  static const int kDpMinExponent = -kDpExponentBias;
152  static const int kDpDenormalExponent = -kDpExponentBias + 1;
153  static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
154  static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
155  static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
156 
157  uint64_t f;
158  int e;
159 };
160 
161 inline DiyFp GetCachedPowerByIndex(size_t index) {
162  // 10^-348, 10^-340, ..., 10^340
163  static const uint64_t kCachedPowers_F[] = {
164  RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
165  RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
166  RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
167  RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
168  RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
169  RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
170  RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
171  RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
172  RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
173  RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
174  RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
175  RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
176  RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
177  RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
178  RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
179  RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
180  RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
181  RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
182  RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
183  RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
184  RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
185  RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
186  RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
187  RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
188  RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
189  RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
190  RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
191  RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
192  RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
193  RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
194  RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
195  RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
196  RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
197  RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
198  RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
199  RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
200  RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
201  RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
202  RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
203  RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
204  RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
205  RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
206  RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
207  RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
208  };
209  static const int16_t kCachedPowers_E[] = {
210  -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
211  -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
212  -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
213  -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
214  -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
215  109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
216  375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
217  641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
218  907, 933, 960, 986, 1013, 1039, 1066
219  };
220  RAPIDJSON_ASSERT(index < 87);
221  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
222 }
223 
224 inline DiyFp GetCachedPower(int e, int* K) {
225 
226  //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
227  double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
228  int k = static_cast<int>(dk);
229  if (dk - k > 0.0)
230  k++;
231 
232  unsigned index = static_cast<unsigned>((k >> 3) + 1);
233  *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
234 
235  return GetCachedPowerByIndex(index);
236 }
237 
238 inline DiyFp GetCachedPower10(int exp, int *outExp) {
239  RAPIDJSON_ASSERT(exp >= -348);
240  unsigned index = static_cast<unsigned>(exp + 348) / 8u;
241  *outExp = -348 + static_cast<int>(index) * 8;
242  return GetCachedPowerByIndex(index);
243 }
244 
245 #ifdef __GNUC__
246 RAPIDJSON_DIAG_POP
247 #endif
248 
249 #ifdef __clang__
250 RAPIDJSON_DIAG_POP
251 RAPIDJSON_DIAG_OFF(padded)
252 #endif
253 
254 } // namespace internal
255 RAPIDJSON_NAMESPACE_END
256 
257 #endif // RAPIDJSON_DIYFP_H_
RAPIDJSON_ASSERT
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
RAPIDJSON_UINT64_C2
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition: rapidjson.h:289