viết thử cái floating point inspection: https://rextester.com/UAZZN4341
#include <iostream>
#include <limits>
#include <map>
#include <utility>
#include <iomanip>
#include <cstring>
const std::map<int, std::pair<std::string, std::string>> kFloatRoundStyleDefs {
{-1, {"std::round_indeterminate", "Rounding style cannot be determined"}},
{ 0, {"std::round_toward_zero", "Rounding toward zero"}},
{ 1, {"std::round_to_nearest", "Rounding toward nearest representable value"}},
{ 2, {"std::round_toward_infinity", "Rounding toward positive infinity"}},
{ 3, {"std::round_toward_neg_infinity", "Rounding toward negative infinity "}},
};
template <class Real>
void printInfo() {
const auto& [rsName, rsDef] = kFloatRoundStyleDefs.at(std::numeric_limits<Real>::round_style);
std::cout << rsName << ": " << rsDef << "\n";
if (!std::numeric_limits<Real>::is_iec559) {
std::cerr << "Real type does NOT fulfill the requirements of IEEE 754 standard\n";
return;
}
std::cout << "Real type fulfills the requirements of IEEE 754 standard\n";
const int kSignificandBitStored = std::numeric_limits<Real>::digits - 1;
const int kHighestBit = sizeof(Real) * std::numeric_limits<unsigned char>::digits - 1;
const int kExponentBitCount = kHighestBit - kSignificandBitStored;
const int kExponentBias = 1 << (kExponentBitCount - 1);
std::cout << "Sign bit: 1 bit\n";
std::cout << "Exponent width: " << kExponentBitCount << " bits (exponent bias = " << kExponentBias << ")\n";
std::cout << "Significand precision: " << std::numeric_limits<Real>::digits << " bits (" << kSignificandBitStored << " bits stored)\n";
std::cout << "\n";
}
template <class Uint>
void printSign(Uint bits, int fromBit, int width) {
const bool sign = static_cast<bool>(bits & (Uint{1} << fromBit));
std::cout << std::setw(width) << sign << ": " << "+-"[sign] << "\n";
}
template <class Uint>
void printExponent(Uint bits, int fromBit, int bitCount, int width, int exponentBias) {
std::cout << std::setw(width) << " ";
unsigned exponentValue = 0;
for (Uint mask = Uint{1} << fromBit; bitCount--; mask >>= 1) {
const bool bitValue = static_cast<bool>(bits & mask);
exponentValue = (exponentValue << 1) | bitValue;
std::cout << bitValue;
}
std::cout << ": " << "2^(" << exponentValue << "-" << exponentBias << ") = 2^" << static_cast<int>(exponentValue) - exponentBias << "\n";
}
template <class Uint>
void printSignificand(Uint bits, int fromBit, int bitCount) {
int bc = bitCount;
for (Uint mask = Uint{1} << fromBit; bc--; mask >>= 1)
std::cout << static_cast<bool>(bits & mask);
std::cout << ": 1.";
for (Uint mask = Uint{1} << fromBit; bitCount--; mask >>= 1)
std::cout << static_cast<bool>(bits & mask);
std::cout << "\n";
}
template <class Real>
void printBinary(Real r) {
const int kSignificandBitStored = std::numeric_limits<Real>::digits - 1;
const int kHighestBit = sizeof(Real) * std::numeric_limits<unsigned char>::digits - 1;
const int kExponentBitCount = kHighestBit - kSignificandBitStored;
const int kExponentBias = 1 << (kExponentBitCount - 1);
if (!std::numeric_limits<Real>::is_iec559) {
std::cerr<< "Real type does NOT fulfill the requirements of IEEE 754 standard\n";
return;
}
using Uint = std::conditional_t<sizeof(Real) == sizeof(float), uint32_t, uint64_t>;
Uint bits;
std::memcpy(&bits, &r, sizeof(Uint));
printSign(bits, kHighestBit, kSignificandBitStored);
printExponent(bits, kHighestBit - 1, kExponentBitCount, kSignificandBitStored - kExponentBitCount, kExponentBias);
printSignificand(bits, kSignificandBitStored - 1, kSignificandBitStored);
std::cout << "Value = " << std::fixed << std::setprecision(std::numeric_limits<Real>::digits10 + 5) << r << "\n\n";
}
int main() {
std::cout << "float:\n";
printInfo<float>();
std::cout << "double:\n";
printInfo<double>();
printBinary(0.1f);
printBinary(0.2f);
printBinary(0.3f);
printBinary(0.1);
printBinary(0.2);
printBinary(0.3);
}
output:
float:
std::round_to_nearest: Rounding toward nearest representable value
Real type fulfills the requirements of IEEE 754 standard
Sign bit: 1 bit
Exponent width: 8 bits (exponent bias = 128)
Significand precision: 24 bits (23 bits stored)
double:
std::round_to_nearest: Rounding toward nearest representable value
Real type fulfills the requirements of IEEE 754 standard
Sign bit: 1 bit
Exponent width: 11 bits (exponent bias = 1024)
Significand precision: 53 bits (52 bits stored)
0: +
01111011: 2^(123-128) = 2^-5
10011001100110011001101: 1.10011001100110011001101
Value = 0.10000000149
0: +
01111100: 2^(124-128) = 2^-4
10011001100110011001101: 1.10011001100110011001101
Value = 0.20000000298
0: +
01111101: 2^(125-128) = 2^-3
00110011001100110011010: 1.00110011001100110011010
Value = 0.30000001192
0: +
01111111011: 2^(1019-1024) = 2^-5
1001100110011001100110011001100110011001100110011010: 1.1001100110011001100110011001100110011001100110011010
Value = 0.10000000000000000555
0: +
01111111100: 2^(1020-1024) = 2^-4
1001100110011001100110011001100110011001100110011010: 1.1001100110011001100110011001100110011001100110011010
Value = 0.20000000000000001110
0: +
01111111101: 2^(1021-1024) = 2^-3
0011001100110011001100110011001100110011001100110011: 1.0011001100110011001100110011001100110011001100110011
Value = 0.29999999999999998890
thì thấy rounding style của float/double là nearest value, trên cppref cũng có ghi là usually là nearest value: https://en.cppreference.com/w/cpp/types/numeric_limits/round_style
kiểu float làm tròn theo nearest value thì 0.1f
, 0.2f
, 0.3f
đều được làm tròn lên, nên nếu in ra quá digits10
của nó thì nó sẽ in ra giá trị lớn hơn 0.1, 0.2, 0.3: 0.10000000149
, 0.20000000298
, 0.30000001192
, nên về mặt logic thì có thể 0.1f + 0.2f == 0.3f
kiểu double làm tròn theo nearest value thì 0.1
, 0.2
được làm tròn lên 0.10000000000000000555
, 0.20000000000000001110
nhưng 0.3
lại bị làm tròn xuống 0.29999999999999998890
nên bảo đảm là 0.1 + 0.2 != 0.3
:V