Skip to content

Commit 90391c2

Browse files
committed
math: add round assembly implementations on riscv64
This CL reapplies CL 504737 and adds integer precision limitation check, since CL 504737 only checks whether floating point number is +-Inf or NaN. This CL is also ~7% faster than CL 504737. Updates #68322 goos: linux goarch: riscv64 pkg: math │ math.old.bench │ math.new.bench │ │ sec/op │ sec/op vs base │ Ceil 54.09n ± 0% 18.72n ± 0% -65.39% (p=0.000 n=10) Floor 40.72n ± 0% 18.72n ± 0% -54.03% (p=0.000 n=10) Round 20.73n ± 0% 20.73n ± 0% ~ (p=1.000 n=10) RoundToEven 24.07n ± 0% 24.07n ± 0% ~ (p=1.000 n=10) Trunc 38.72n ± 0% 18.72n ± 0% -51.65% (p=0.000 n=10) geomean 33.56n 20.09n -40.13% Change-Id: I06cfe2cb9e2535cd705d40b6650a7e71fedd906c Reviewed-on: https://go-review.googlesource.com/c/go/+/600075 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Joel Sing <joel@sing.id.au> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
1 parent 2982253 commit 90391c2

File tree

3 files changed

+50
-2
lines changed

3 files changed

+50
-2
lines changed

src/math/floor_asm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
//go:build 386 || amd64 || arm64 || ppc64 || ppc64le || s390x || wasm
5+
//go:build 386 || amd64 || arm64 || ppc64 || ppc64le || riscv64 || s390x || wasm
66

77
package math
88

src/math/floor_noasm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
//go:build !386 && !amd64 && !arm64 && !ppc64 && !ppc64le && !s390x && !wasm
5+
//go:build !386 && !amd64 && !arm64 && !ppc64 && !ppc64le && !riscv64 && !s390x && !wasm
66

77
package math
88

src/math/floor_riscv64.s

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
#include "textflag.h"
6+
7+
// RISC-V offered floating-point (FP) rounding by FP conversion instructions (FCVT)
8+
// with rounding mode field.
9+
// As Go spec expects FP rounding result in FP, we have to use FCVT integer
10+
// back to FP (fp -> int -> fp).
11+
// RISC-V only set Inexact flag during invalid FP-integer conversion without changing any data,
12+
// on the other hand, RISC-V sets out of integer represent range yet valid FP into NaN.
13+
// When it comes to integer-FP conversion, invalid FP like NaN, +-Inf will be
14+
// converted into the closest valid FP, for example:
15+
//
16+
// `Floor(-Inf) -> int64(0x7fffffffffffffff) -> float64(9.22e+18)`
17+
// `Floor(18446744073709549568.0) -> int64(0x7fffffffffffffff) -> float64(9.22e+18)`
18+
//
19+
// This ISA conversion limitation requires we skip all invalid or out of range FP
20+
// before any normal rounding operations.
21+
22+
#define ROUNDFN(NAME, MODE) \
23+
TEXT NAME(SB),NOSPLIT,$0; \
24+
MOVD x+0(FP), F10; \
25+
FMVXD F10, X10; \
26+
/* Drop all fraction bits */;\
27+
SRL $52, X10, X12; \
28+
/* Remove sign bit */; \
29+
AND $0x7FF, X12, X12;\
30+
/* Return either input is +-Inf, NaN(0x7FF) or out of precision limitation */;\
31+
/* 1023: bias of exponent, [-2^53, 2^53]: exactly integer represent range */;\
32+
MOV $1023+53, X11; \
33+
BLTU X11, X12, 4(PC);\
34+
FCVTLD.MODE F10, X11; \
35+
FCVTDL X11, F11; \
36+
/* RISC-V rounds negative values to +0, restore original sign */;\
37+
FSGNJD F10, F11, F10; \
38+
MOVD F10, ret+8(FP); \
39+
RET
40+
41+
// func archFloor(x float64) float64
42+
ROUNDFN(·archFloor, RDN)
43+
44+
// func archCeil(x float64) float64
45+
ROUNDFN(·archCeil, RUP)
46+
47+
// func archTrunc(x float64) float64
48+
ROUNDFN(·archTrunc, RTZ)

0 commit comments

Comments
 (0)