// Tencent is pleased to support the open source community by making RapidJSON available. // // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. // // Licensed under the MIT License (the "License"); you may not use this file except // in compliance with the License. You may obtain a copy of the License at // // http://opensource.org/licenses/MIT // // Unless required by applicable law or agreed to in writing, software distributed // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. // This is a C++ header-only implementation of Grisu2 algorithm from the publication: // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with // integers." ACM Sigplan Notices 45.6 (2010): 233-243. #ifndef _DTOA_ #define _DTOA_ #if defined(_MSC_VER) && defined(_M_AMD64) #include #pragma intrinsic(_BitScanReverse64) #pragma intrinsic(_umul128) #endif #define UINT64_C2(h, l) ((static_cast(h) << 32) | static_cast(l)) struct DiyFp { DiyFp() : f(), e() {} DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {} explicit DiyFp(double d) { union { double d; uint64_t u64; } u = { d }; int biased_e = static_cast((u.u64 & kDpExponentMask) >> kDpSignificandSize); uint64_t significand = (u.u64 & kDpSignificandMask); if (biased_e != 0) { f = significand + kDpHiddenBit; e = biased_e - kDpExponentBias; } else { f = significand; e = kDpMinExponent + 1; } } DiyFp operator-(const DiyFp& rhs) const { return DiyFp(f - rhs.f, e); } DiyFp operator*(const DiyFp& rhs) const { #if defined(_MSC_VER) && defined(_M_AMD64) uint64_t h; uint64_t l = _umul128(f, rhs.f, &h); if (l & (uint64_t(1) << 63)) // rounding h++; return DiyFp(h, e + rhs.e + 64); #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) __extension__ typedef unsigned __int128 uint128; uint128 p = static_cast(f) * static_cast(rhs.f); uint64_t h = static_cast(p >> 64); uint64_t l = static_cast(p); if (l & (uint64_t(1) << 63)) // rounding h++; return DiyFp(h, e + rhs.e + 64); #else const uint64_t M32 = 0xFFFFFFFF; const uint64_t a = f >> 32; const uint64_t b = f & M32; const uint64_t c = rhs.f >> 32; const uint64_t d = rhs.f & M32; const uint64_t ac = a * c; const uint64_t bc = b * c; const uint64_t ad = a * d; const uint64_t bd = b * d; uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32); tmp += 1U << 31; /// mult_round return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64); #endif } DiyFp Normalize() const { #if defined(_MSC_VER) && defined(_M_AMD64) unsigned long index; _BitScanReverse64(&index, f); return DiyFp(f << (63 - index), e - (63 - index)); #elif defined(__GNUC__) && __GNUC__ >= 4 int s = __builtin_clzll(f); return DiyFp(f << s, e - s); #else DiyFp res = *this; while (!(res.f & (static_cast(1) << 63))) { res.f <<= 1; res.e--; } return res; #endif } DiyFp NormalizeBoundary() const __pure { DiyFp res = *this; while (!(res.f & (kDpHiddenBit << 1))) { res.f <<= 1; res.e--; } res.f <<= (kDiySignificandSize - kDpSignificandSize - 2); res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2); return res; } void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const { DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary(); DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1); mi.f <<= mi.e - pl.e; mi.e = pl.e; *plus = pl; *minus = mi; } double ToDouble() const { union { double d; uint64_t u64; }u; const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 : static_cast(e + kDpExponentBias); u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize); return u.d; } static const int kDiySignificandSize = 64; static const int kDpSignificandSize = 52; static const int kDpExponentBias = 0x3FF + kDpSignificandSize; static const int kDpMaxExponent = 0x7FF - kDpExponentBias; static const int kDpMinExponent = -kDpExponentBias; static const int kDpDenormalExponent = -kDpExponentBias + 1; static const uint64_t kDpExponentMask = UINT64_C2(0x7FF00000, 0x00000000); static const uint64_t kDpSignificandMask = UINT64_C2(0x000FFFFF, 0xFFFFFFFF); static const uint64_t kDpHiddenBit = UINT64_C2(0x00100000, 0x00000000); uint64_t f; int e; }; inline DiyFp GetCachedPowerByIndex(size_t index) { // 10^-348, 10^-340, ..., 10^340 static const uint64_t kCachedPowers_F[] = { UINT64_C2(0xfa8fd5a0, 0x081c0288), UINT64_C2(0xbaaee17f, 0xa23ebf76), UINT64_C2(0x8b16fb20, 0x3055ac76), UINT64_C2(0xcf42894a, 0x5dce35ea), UINT64_C2(0x9a6bb0aa, 0x55653b2d), UINT64_C2(0xe61acf03, 0x3d1a45df), UINT64_C2(0xab70fe17, 0xc79ac6ca), UINT64_C2(0xff77b1fc, 0xbebcdc4f), UINT64_C2(0xbe5691ef, 0x416bd60c), UINT64_C2(0x8dd01fad, 0x907ffc3c), UINT64_C2(0xd3515c28, 0x31559a83), UINT64_C2(0x9d71ac8f, 0xada6c9b5), UINT64_C2(0xea9c2277, 0x23ee8bcb), UINT64_C2(0xaecc4991, 0x4078536d), UINT64_C2(0x823c1279, 0x5db6ce57), UINT64_C2(0xc2109436, 0x4dfb5637), UINT64_C2(0x9096ea6f, 0x3848984f), UINT64_C2(0xd77485cb, 0x25823ac7), UINT64_C2(0xa086cfcd, 0x97bf97f4), UINT64_C2(0xef340a98, 0x172aace5), UINT64_C2(0xb23867fb, 0x2a35b28e), UINT64_C2(0x84c8d4df, 0xd2c63f3b), UINT64_C2(0xc5dd4427, 0x1ad3cdba), UINT64_C2(0x936b9fce, 0xbb25c996), UINT64_C2(0xdbac6c24, 0x7d62a584), UINT64_C2(0xa3ab6658, 0x0d5fdaf6), UINT64_C2(0xf3e2f893, 0xdec3f126), UINT64_C2(0xb5b5ada8, 0xaaff80b8), UINT64_C2(0x87625f05, 0x6c7c4a8b), UINT64_C2(0xc9bcff60, 0x34c13053), UINT64_C2(0x964e858c, 0x91ba2655), UINT64_C2(0xdff97724, 0x70297ebd), UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), UINT64_C2(0xf8a95fcf, 0x88747d94), UINT64_C2(0xb9447093, 0x8fa89bcf), UINT64_C2(0x8a08f0f8, 0xbf0f156b), UINT64_C2(0xcdb02555, 0x653131b6), UINT64_C2(0x993fe2c6, 0xd07b7fac), UINT64_C2(0xe45c10c4, 0x2a2b3b06), UINT64_C2(0xaa242499, 0x697392d3), UINT64_C2(0xfd87b5f2, 0x8300ca0e), UINT64_C2(0xbce50864, 0x92111aeb), UINT64_C2(0x8cbccc09, 0x6f5088cc), UINT64_C2(0xd1b71758, 0xe219652c), UINT64_C2(0x9c400000, 0x00000000), UINT64_C2(0xe8d4a510, 0x00000000), UINT64_C2(0xad78ebc5, 0xac620000), UINT64_C2(0x813f3978, 0xf8940984), UINT64_C2(0xc097ce7b, 0xc90715b3), UINT64_C2(0x8f7e32ce, 0x7bea5c70), UINT64_C2(0xd5d238a4, 0xabe98068), UINT64_C2(0x9f4f2726, 0x179a2245), UINT64_C2(0xed63a231, 0xd4c4fb27), UINT64_C2(0xb0de6538, 0x8cc8ada8), UINT64_C2(0x83c7088e, 0x1aab65db), UINT64_C2(0xc45d1df9, 0x42711d9a), UINT64_C2(0x924d692c, 0xa61be758), UINT64_C2(0xda01ee64, 0x1a708dea), UINT64_C2(0xa26da399, 0x9aef774a), UINT64_C2(0xf209787b, 0xb47d6b85), UINT64_C2(0xb454e4a1, 0x79dd1877), UINT64_C2(0x865b8692, 0x5b9bc5c2), UINT64_C2(0xc83553c5, 0xc8965d3d), UINT64_C2(0x952ab45c, 0xfa97a0b3), UINT64_C2(0xde469fbd, 0x99a05fe3), UINT64_C2(0xa59bc234, 0xdb398c25), UINT64_C2(0xf6c69a72, 0xa3989f5c), UINT64_C2(0xb7dcbf53, 0x54e9bece), UINT64_C2(0x88fcf317, 0xf22241e2), UINT64_C2(0xcc20ce9b, 0xd35c78a5), UINT64_C2(0x98165af3, 0x7b2153df), UINT64_C2(0xe2a0b5dc, 0x971f303a), UINT64_C2(0xa8d9d153, 0x5ce3b396), UINT64_C2(0xfb9b7cd9, 0xa4a7443c), UINT64_C2(0xbb764c4c, 0xa7a44410), UINT64_C2(0x8bab8eef, 0xb6409c1a), UINT64_C2(0xd01fef10, 0xa657842c), UINT64_C2(0x9b10a4e5, 0xe9913129), UINT64_C2(0xe7109bfb, 0xa19c0c9d), UINT64_C2(0xac2820d9, 0x623bf429), UINT64_C2(0x80444b5e, 0x7aa7cf85), UINT64_C2(0xbf21e440, 0x03acdd2d), UINT64_C2(0x8e679c2f, 0x5e44ff8f), UINT64_C2(0xd433179d, 0x9c8cb841), UINT64_C2(0x9e19db92, 0xb4e31ba9), UINT64_C2(0xeb96bf6e, 0xbadf77d9), UINT64_C2(0xaf87023b, 0x9bf0ee6b) }; static const int16_t kCachedPowers_E[] = { -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77, -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508, 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066 }; return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]); } inline DiyFp GetCachedPower(int e, int* K) { //int k = static_cast(ceil((-61 - e) * 0.30102999566398114)) + 374; double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive int k = static_cast(dk); if (dk - k > 0.0) k++; unsigned index = static_cast((k >> 3) + 1); *K = -(-348 + static_cast(index << 3)); // decimal exponent no need lookup table return GetCachedPowerByIndex(index); } inline DiyFp GetCachedPower10(int exp, int *outExp) { unsigned index = (static_cast(exp) + 348u) / 8u; *outExp = -348 + static_cast(index) * 8; return GetCachedPowerByIndex(index); } class Double { public: Double() {} Double(double d) : d_(d) {} Double(uint64_t u) : u_(u) {} double Value() const { return d_; } uint64_t Uint64Value() const { return u_; } double NextPositiveDouble() const { return Double(u_ + 1).Value(); } bool Sign() const { return (u_ & kSignMask) != 0; } uint64_t Significand() const { return u_ & kSignificandMask; } int Exponent() const { return static_cast(((u_ & kExponentMask) >> kSignificandSize) - kExponentBias); } bool IsNan() const { return (u_ & kExponentMask) == kExponentMask && Significand() != 0; } bool IsInf() const { return (u_ & kExponentMask) == kExponentMask && Significand() == 0; } bool IsNanOrInf() const { return (u_ & kExponentMask) == kExponentMask; } bool IsNormal() const { return (u_ & kExponentMask) != 0 || Significand() == 0; } bool IsZero() const { return (u_ & (kExponentMask | kSignificandMask)) == 0; } uint64_t IntegerSignificand() const { return IsNormal() ? Significand() | kHiddenBit : Significand(); } int IntegerExponent() const { return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize; } uint64_t ToBias() const { return (u_ & kSignMask) ? ~u_ + 1 : u_ | kSignMask; } static unsigned EffectiveSignificandSize(int order) { if (order >= -1021) return 53; else if (order <= -1074) return 0; else return static_cast(order) + 1074; } private: static const int kSignificandSize = 52; static const int kExponentBias = 0x3FF; static const int kDenormalExponent = 1 - kExponentBias; static const uint64_t kSignMask = UINT64_C2(0x80000000, 0x00000000); static const uint64_t kExponentMask = UINT64_C2(0x7FF00000, 0x00000000); static const uint64_t kSignificandMask = UINT64_C2(0x000FFFFF, 0xFFFFFFFF); static const uint64_t kHiddenBit = UINT64_C2(0x00100000, 0x00000000); union { double d_; uint64_t u_; }; }; inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) { while (rest < wp_w && delta - rest >= ten_kappa && (rest + ten_kappa < wp_w || /// closer wp_w - rest > rest + ten_kappa - wp_w)) { buffer[len - 1]--; rest += ten_kappa; } } inline unsigned CountDecimalDigit32(uint32_t n) { // Simple pure C++ implementation was faster than __builtin_clz version in this situation. if (n < 10) return 1; if (n < 100) return 2; if (n < 1000) return 3; if (n < 10000) return 4; if (n < 100000) return 5; if (n < 1000000) return 6; if (n < 10000000) return 7; if (n < 100000000) return 8; // Will not reach 10 digits in DigitGen() //if (n < 1000000000) return 9; //return 10; return 9; } inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) { const DiyFp one(uint64_t(1) << -Mp.e, Mp.e); const DiyFp wp_w = Mp - W; uint32_t p1 = static_cast(Mp.f >> -one.e); uint64_t p2 = Mp.f & (one.f - 1); unsigned kappa = CountDecimalDigit32(p1); // kappa in [0, 9] *len = 0; while (kappa > 0) { uint32_t d = 0; switch (kappa) { case 9: d = p1 / 100000000; p1 %= 100000000; break; case 8: d = p1 / 10000000; p1 %= 10000000; break; case 7: d = p1 / 1000000; p1 %= 1000000; break; case 6: d = p1 / 100000; p1 %= 100000; break; case 5: d = p1 / 10000; p1 %= 10000; break; case 4: d = p1 / 1000; p1 %= 1000; break; case 3: d = p1 / 100; p1 %= 100; break; case 2: d = p1 / 10; p1 %= 10; break; case 1: d = p1; p1 = 0; break; default:; } if (d || *len) buffer[(*len)++] = static_cast('0' + static_cast(d)); kappa--; uint64_t tmp = (static_cast(p1) << -one.e) + p2; if (tmp <= delta) { *K += kappa; # ifdef HAVE_CXX11 GrisuRound(buffer, *len, delta, tmp, pow10(kappa) << -one.e, wp_w.f); # else GrisuRound(buffer, *len, delta, tmp, (uint64_t)u_pow10[kappa] << -one.e, wp_w.f); # endif return; } } // kappa = 0 for (;;) { p2 *= 10; delta *= 10; char d = static_cast(p2 >> -one.e); if (d || *len) buffer[(*len)++] = static_cast('0' + d); p2 &= one.f - 1; kappa--; if (p2 < delta) { *K += kappa; int index = -static_cast(kappa); # ifdef HAVE_CXX11 GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? pow10(index) : 0)); # else GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? (uint64_t)u_pow10[index] : 0)); # endif return; } } } static char* dtoa_rapidjson(double value, char* buffer) { Double d(value); if (d.IsZero()) { if (d.Sign()) { u_put_unalignedp32(buffer, U_MULTICHAR_CONSTANT32('-','0','.','0')); return buffer+4; } u_put_unalignedp32(buffer, U_MULTICHAR_CONSTANT32('0','.','0','\0')); return buffer+3; } int length, k; const DiyFp v(value); DiyFp w_m, w_p; v.NormalizedBoundaries(&w_m, &w_p); const DiyFp c_mk = GetCachedPower(w_p.e, &k); const DiyFp W = v.Normalize() * c_mk; DiyFp Wp = w_p * c_mk; DiyFp Wm = w_m * c_mk; Wm.f++; Wp.f--; DigitGen(W, Wp, Wp.f - Wm.f, buffer, &length, &k); int kk = length + k; // 10^(kk-1) <= v < 10^kk if (0 <= k && kk <= 21) { // 1234e7 -> 12340000000 # ifndef U_COVERITY_FALSE_POSITIVE memset(buffer+length, '0', k); # endif buffer += kk; u_put_unalignedp16(buffer, U_MULTICHAR_CONSTANT16('.','0')); return buffer+2; } if (0 < kk && kk <= 21) { // 1234e-2 -> 12.34 char* ptr = buffer+kk; memmove(ptr+1, ptr, static_cast(length - kk)); *ptr = '.'; return buffer+length+1; } if (-6 < kk && kk <= 0) { // 1234e-6 -> 0.001234 const int offset = 2 - kk; memmove(buffer+offset, buffer, static_cast(length)); u_put_unalignedp16(buffer, U_MULTICHAR_CONSTANT16('0','.')); # ifndef U_COVERITY_FALSE_POSITIVE memset(buffer+2, '0', -kk); # endif return buffer+length+offset; } if (length == 1) { // 1e30 buffer[1] = 'e'; buffer += 2; goto exp; } // 1234e30 -> 1.234e33 memmove(buffer+2, buffer+1, static_cast(length - 1)); buffer[1] = '.'; buffer[length+1] = 'e'; buffer += length+2; exp: if (--kk < 0) { kk = -kk; *buffer++ = '-'; } if (kk >= 100) { *buffer++ = '0' + (kk / 100); kk %= 100; goto next; } if (kk >= 10) { next: u_put_unalignedp16(buffer, u_dd(kk * 2)); return buffer + 2; } *buffer++ = '0' + kk; return buffer; } #endif