decimal/rounding.go
Klaus Post cd690d0c9e Make NewFromFloat respect the precision of the input (#100)
* Make NewFromFloat respect the precision of the input

Restores the previous behaviour where input converted from float is truncated at the precision of the float.

The precision is depending on the actual value. Simply making it 15 digits would make it faster, but would in some cases lose some precision. So the code from the stdlib that does this calculation (very well) has been included.

Lots of good articles here: https://www.exploringbinary.com/decimal-precision-of-binary-floating-point-numbers/

Performance is around the same as the previous string roundtrip since it basically does the same, but allocations are a bit less.

`BenchmarkNewFromStringFloat` is the old method, `BenchmarkNewFromFloat` is the new.

```
BenchmarkNewFromFloatWithExponent-8     10000000               260 ns/op             174 B/op          4 allocs/op
BenchmarkNewFromFloat-8                  2000000               744 ns/op              90 B/op          2 allocs/op
BenchmarkNewFromStringFloat-8            2000000               822 ns/op             258 B/op          6 allocs/op
```

* Update Sin/Tan/Cos tests.
2018-07-09 16:31:17 -04:00

118 lines
3.8 KiB
Go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Multiprecision decimal numbers.
// For floating-point formatting only; not general purpose.
// Only operations are assign and (binary) left/right shift.
// Can do binary floating point in multiprecision decimal precisely
// because 2 divides 10; cannot do decimal floating point
// in multiprecision binary precisely.
package decimal
type floatInfo struct {
mantbits uint
expbits uint
bias int
}
var float32info = floatInfo{23, 8, -127}
var float64info = floatInfo{52, 11, -1023}
// roundShortest rounds d (= mant * 2^exp) to the shortest number of digits
// that will let the original floating point value be precisely reconstructed.
func roundShortest(d *decimal, mant uint64, exp int, flt *floatInfo) {
// If mantissa is zero, the number is zero; stop now.
if mant == 0 {
d.nd = 0
return
}
// Compute upper and lower such that any decimal number
// between upper and lower (possibly inclusive)
// will round to the original floating point number.
// We may see at once that the number is already shortest.
//
// Suppose d is not denormal, so that 2^exp <= d < 10^dp.
// The closest shorter number is at least 10^(dp-nd) away.
// The lower/upper bounds computed below are at distance
// at most 2^(exp-mantbits).
//
// So the number is already shortest if 10^(dp-nd) > 2^(exp-mantbits),
// or equivalently log2(10)*(dp-nd) > exp-mantbits.
// It is true if 332/100*(dp-nd) >= exp-mantbits (log2(10) > 3.32).
minexp := flt.bias + 1 // minimum possible exponent
if exp > minexp && 332*(d.dp-d.nd) >= 100*(exp-int(flt.mantbits)) {
// The number is already shortest.
return
}
// d = mant << (exp - mantbits)
// Next highest floating point number is mant+1 << exp-mantbits.
// Our upper bound is halfway between, mant*2+1 << exp-mantbits-1.
upper := new(decimal)
upper.Assign(mant*2 + 1)
upper.Shift(exp - int(flt.mantbits) - 1)
// d = mant << (exp - mantbits)
// Next lowest floating point number is mant-1 << exp-mantbits,
// unless mant-1 drops the significant bit and exp is not the minimum exp,
// in which case the next lowest is mant*2-1 << exp-mantbits-1.
// Either way, call it mantlo << explo-mantbits.
// Our lower bound is halfway between, mantlo*2+1 << explo-mantbits-1.
var mantlo uint64
var explo int
if mant > 1<<flt.mantbits || exp == minexp {
mantlo = mant - 1
explo = exp
} else {
mantlo = mant*2 - 1
explo = exp - 1
}
lower := new(decimal)
lower.Assign(mantlo*2 + 1)
lower.Shift(explo - int(flt.mantbits) - 1)
// The upper and lower bounds are possible outputs only if
// the original mantissa is even, so that IEEE round-to-even
// would round to the original mantissa and not the neighbors.
inclusive := mant%2 == 0
// Now we can figure out the minimum number of digits required.
// Walk along until d has distinguished itself from upper and lower.
for i := 0; i < d.nd; i++ {
l := byte('0') // lower digit
if i < lower.nd {
l = lower.d[i]
}
m := d.d[i] // middle digit
u := byte('0') // upper digit
if i < upper.nd {
u = upper.d[i]
}
// Okay to round down (truncate) if lower has a different digit
// or if lower is inclusive and is exactly the result of rounding
// down (i.e., and we have reached the final digit of lower).
okdown := l != m || inclusive && i+1 == lower.nd
// Okay to round up if upper has a different digit and either upper
// is inclusive or upper is bigger than the result of rounding up.
okup := m != u && (inclusive || m+1 < u || i+1 < upper.nd)
// If it's okay to do either, then round to the nearest one.
// If it's okay to do only one, do it.
switch {
case okdown && okup:
d.Round(i + 1)
return
case okdown:
d.RoundDown(i + 1)
return
case okup:
d.RoundUp(i + 1)
return
}
}
}