Tại sao cùng 1 biểu thức 0.1+0.2!=0.3 trong khi gán cho biến float thì kết quả so sánh là khác nhau?

tntxtnt · May 18, 2022, 1:04pm

viết thử cái floating point inspection: https://rextester.com/UAZZN4341

#include <iostream>
#include <limits>
#include <map>
#include <utility>
#include <iomanip>
#include <cstring>

const std::map<int, std::pair<std::string, std::string>> kFloatRoundStyleDefs {
    {-1, {"std::round_indeterminate", "Rounding style cannot be determined"}},
    { 0, {"std::round_toward_zero", "Rounding toward zero"}},
    { 1, {"std::round_to_nearest", "Rounding toward nearest representable value"}},
    { 2, {"std::round_toward_infinity", "Rounding toward positive infinity"}},
    { 3, {"std::round_toward_neg_infinity", "Rounding toward negative infinity "}},
};

template <class Real>
void printInfo() {
    const auto& [rsName, rsDef] = kFloatRoundStyleDefs.at(std::numeric_limits<Real>::round_style);
    std::cout << rsName << ": " << rsDef << "\n";
    if (!std::numeric_limits<Real>::is_iec559) {
        std::cerr << "Real type does NOT fulfill the requirements of IEEE 754 standard\n";
        return;
    }
    std::cout << "Real type fulfills the requirements of IEEE 754 standard\n";
    const int kSignificandBitStored = std::numeric_limits<Real>::digits - 1;
    const int kHighestBit = sizeof(Real) * std::numeric_limits<unsigned char>::digits - 1;
    const int kExponentBitCount = kHighestBit - kSignificandBitStored;
    const int kExponentBias = 1 << (kExponentBitCount - 1);
    std::cout << "Sign bit: 1 bit\n";
    std::cout << "Exponent width: " << kExponentBitCount << " bits (exponent bias = " << kExponentBias << ")\n";
    std::cout << "Significand precision: " << std::numeric_limits<Real>::digits << " bits (" << kSignificandBitStored << " bits stored)\n";
    std::cout << "\n";
}
    
template <class Uint>
void printSign(Uint bits, int fromBit, int width) {
    const bool sign = static_cast<bool>(bits & (Uint{1} << fromBit));
    std::cout << std::setw(width) << sign << ": " << "+-"[sign] << "\n";
}

template <class Uint>
void printExponent(Uint bits, int fromBit, int bitCount, int width, int exponentBias) {
    std::cout << std::setw(width) << " ";
    unsigned exponentValue = 0;
    for (Uint mask = Uint{1} << fromBit; bitCount--; mask >>= 1) {
        const bool bitValue = static_cast<bool>(bits & mask);
        exponentValue = (exponentValue << 1) | bitValue;
        std::cout << bitValue;
    }
    std::cout << ": " << "2^(" << exponentValue << "-" << exponentBias << ") = 2^" << static_cast<int>(exponentValue) - exponentBias << "\n";
}

template <class Uint>
void printSignificand(Uint bits, int fromBit, int bitCount) {
    int bc = bitCount;
    for (Uint mask = Uint{1} << fromBit; bc--; mask >>= 1)
        std::cout << static_cast<bool>(bits & mask);
    std::cout << ": 1.";
    for (Uint mask = Uint{1} << fromBit; bitCount--; mask >>= 1)
        std::cout << static_cast<bool>(bits & mask);
    std::cout << "\n";
}

template <class Real>
void printBinary(Real r) {
    const int kSignificandBitStored = std::numeric_limits<Real>::digits - 1;
    const int kHighestBit = sizeof(Real) * std::numeric_limits<unsigned char>::digits - 1;
    const int kExponentBitCount = kHighestBit - kSignificandBitStored;
    const int kExponentBias = 1 << (kExponentBitCount - 1);
    
    if (!std::numeric_limits<Real>::is_iec559) {
        std::cerr<< "Real type does NOT fulfill the requirements of IEEE 754 standard\n";
        return;
    }
    
    using Uint = std::conditional_t<sizeof(Real) == sizeof(float), uint32_t, uint64_t>;
    Uint bits;
    std::memcpy(&bits, &r, sizeof(Uint));
    
    printSign(bits, kHighestBit, kSignificandBitStored);
    printExponent(bits, kHighestBit - 1, kExponentBitCount, kSignificandBitStored - kExponentBitCount, kExponentBias);
    printSignificand(bits, kSignificandBitStored - 1, kSignificandBitStored);
    std::cout << "Value = " << std::fixed << std::setprecision(std::numeric_limits<Real>::digits10 + 5) << r << "\n\n";
}

int main() {
    std::cout << "float:\n";
    printInfo<float>();
    std::cout << "double:\n";
    printInfo<double>();
    
    printBinary(0.1f);
    printBinary(0.2f);
    printBinary(0.3f);
    printBinary(0.1);
    printBinary(0.2);
    printBinary(0.3);
}

output:

float:
std::round_to_nearest: Rounding toward nearest representable value
Real type fulfills the requirements of IEEE 754 standard
Sign bit: 1 bit
Exponent width: 8 bits (exponent bias = 128)
Significand precision: 24 bits (23 bits stored)

double:
std::round_to_nearest: Rounding toward nearest representable value
Real type fulfills the requirements of IEEE 754 standard
Sign bit: 1 bit
Exponent width: 11 bits (exponent bias = 1024)
Significand precision: 53 bits (52 bits stored)

                      0: +
               01111011: 2^(123-128) = 2^-5
10011001100110011001101: 1.10011001100110011001101
Value = 0.10000000149

                      0: +
               01111100: 2^(124-128) = 2^-4
10011001100110011001101: 1.10011001100110011001101
Value = 0.20000000298

                      0: +
               01111101: 2^(125-128) = 2^-3
00110011001100110011010: 1.00110011001100110011010
Value = 0.30000001192

                                                   0: +
                                         01111111011: 2^(1019-1024) = 2^-5
1001100110011001100110011001100110011001100110011010: 1.1001100110011001100110011001100110011001100110011010
Value = 0.10000000000000000555

                                                   0: +
                                         01111111100: 2^(1020-1024) = 2^-4
1001100110011001100110011001100110011001100110011010: 1.1001100110011001100110011001100110011001100110011010
Value = 0.20000000000000001110

                                                   0: +
                                         01111111101: 2^(1021-1024) = 2^-3
0011001100110011001100110011001100110011001100110011: 1.0011001100110011001100110011001100110011001100110011
Value = 0.29999999999999998890

thì thấy rounding style của float/double là nearest value, trên cppref cũng có ghi là usually là nearest value: https://en.cppreference.com/w/cpp/types/numeric_limits/round_style

kiểu float làm tròn theo nearest value thì 0.1f, 0.2f, 0.3f đều được làm tròn lên, nên nếu in ra quá digits10 của nó thì nó sẽ in ra giá trị lớn hơn 0.1, 0.2, 0.3: 0.10000000149, 0.20000000298, 0.30000001192, nên về mặt logic thì có thể 0.1f + 0.2f == 0.3f

kiểu double làm tròn theo nearest value thì 0.1, 0.2 được làm tròn lên 0.10000000000000000555, 0.20000000000000001110 nhưng 0.3 lại bị làm tròn xuống 0.29999999999999998890 nên bảo đảm là 0.1 + 0.2 != 0.3 :V