Skip to content

Commit 515e6a9

Browse files
committed
runtime: use CreateWaitableTimerEx to implement usleep
@jstarks suggested that recent versions of Windows provide access to high resolution timers. See #8687 (comment) for details. I tried to run this C program on my Windows 10 computer ``` #include <stdio.h> #include <Windows.h> #pragma comment(lib, "Winmm.lib") // Apparently this is already defined when I use msvc cl. //#define CREATE_WAITABLE_TIMER_HIGH_RESOLUTION = 0x00000002; int usleep(HANDLE timer, LONGLONG d) { LARGE_INTEGER liDueTime; DWORD ret; LARGE_INTEGER StartingTime, EndingTime, ElapsedMicroseconds; LARGE_INTEGER Frequency; QueryPerformanceFrequency(&Frequency); QueryPerformanceCounter(&StartingTime); liDueTime.QuadPart = d; liDueTime.QuadPart = liDueTime.QuadPart * 10; // us into 100 of ns units liDueTime.QuadPart = -liDueTime.QuadPart; // negative for relative dure time if (!SetWaitableTimer(timer, &liDueTime, 0, NULL, NULL, 0)) { printf("SetWaitableTimer failed: errno=%d\n", GetLastError()); return 1; } ret = WaitForSingleObject(timer, INFINITE); if (ret != WAIT_OBJECT_0) { printf("WaitForSingleObject failed: ret=%d errno=%d\n", ret, GetLastError()); return 1; } QueryPerformanceCounter(&EndingTime); ElapsedMicroseconds.QuadPart = EndingTime.QuadPart - StartingTime.QuadPart; ElapsedMicroseconds.QuadPart *= 1000000; ElapsedMicroseconds.QuadPart /= Frequency.QuadPart; printf("delay is %lld us - slept for %lld us\n", d, ElapsedMicroseconds.QuadPart); return 0; } int testTimer(DWORD createFlag) { HANDLE timer; timer = CreateWaitableTimerEx(NULL, NULL, createFlag, TIMER_ALL_ACCESS); if (timer == NULL) { printf("CreateWaitableTimerEx failed: errno=%d\n", GetLastError()); return 1; } usleep(timer, 1000LL); usleep(timer, 100LL); usleep(timer, 10LL); usleep(timer, 1LL); CloseHandle(timer); return 0; } int main() { printf("\n1. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is off\n"); testTimer(0); printf("\n2. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is off\n"); testTimer(CREATE_WAITABLE_TIMER_HIGH_RESOLUTION); timeBeginPeriod(1); printf("\n3. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is on\n"); testTimer(0); printf("\n4. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is on\n"); testTimer(CREATE_WAITABLE_TIMER_HIGH_RESOLUTION); } ``` and I see this output ``` 1. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is off delay is 1000 us - slept for 4045 us delay is 100 us - slept for 3915 us delay is 10 us - slept for 3291 us delay is 1 us - slept for 2234 us 2. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is off delay is 1000 us - slept for 1076 us delay is 100 us - slept for 569 us delay is 10 us - slept for 585 us delay is 1 us - slept for 17 us 3. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is on delay is 1000 us - slept for 742 us delay is 100 us - slept for 893 us delay is 10 us - slept for 414 us delay is 1 us - slept for 920 us 4. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is on delay is 1000 us - slept for 1466 us delay is 100 us - slept for 559 us delay is 10 us - slept for 535 us delay is 1 us - slept for 5 us ``` That shows, that indeed using CREATE_WAITABLE_TIMER_HIGH_RESOLUTION will provide sleeps as low as about 500 microseconds, while our current approach provides about 1 millisecond sleep. New approach also does not require for timeBeginPeriod to be on, so this change solves long standing problem with go programs draining laptop battery, because it calls timeBeginPeriod. This change will only run on systems where CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag is available. If not available, the runtime will fallback to original code that uses timeBeginPeriod. This is how this change affects benchmark reported in issue #14790 name               old time/op  new time/op  delta ChanToSyscallPing  1.05ms ± 2%  0.68ms ±11%  -35.43%  (p=0.000 n=10+10) The benchmark was run with GOMAXPROCS set to 1. Fixes #8687 Updates #14790 Change-Id: I5b97ba58289c088c17c05292e12e45285c467eae Reviewed-on: https://go-review.googlesource.com/c/go/+/248699 Run-TryBot: Alex Brainman <alex.brainman@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Trust: Alex Brainman <alex.brainman@gmail.com> Reviewed-by: Austin Clements <austin@google.com>
1 parent fc98165 commit 515e6a9

File tree

4 files changed

+144
-2
lines changed

4 files changed

+144
-2
lines changed

src/runtime/os_windows.go

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ const (
2121
//go:cgo_import_dynamic runtime._CreateIoCompletionPort CreateIoCompletionPort%4 "kernel32.dll"
2222
//go:cgo_import_dynamic runtime._CreateThread CreateThread%6 "kernel32.dll"
2323
//go:cgo_import_dynamic runtime._CreateWaitableTimerA CreateWaitableTimerA%3 "kernel32.dll"
24+
//go:cgo_import_dynamic runtime._CreateWaitableTimerExW CreateWaitableTimerExW%4 "kernel32.dll"
2425
//go:cgo_import_dynamic runtime._DuplicateHandle DuplicateHandle%7 "kernel32.dll"
2526
//go:cgo_import_dynamic runtime._ExitProcess ExitProcess%1 "kernel32.dll"
2627
//go:cgo_import_dynamic runtime._FreeEnvironmentStringsW FreeEnvironmentStringsW%1 "kernel32.dll"
@@ -68,6 +69,7 @@ var (
6869
_CreateIoCompletionPort,
6970
_CreateThread,
7071
_CreateWaitableTimerA,
72+
_CreateWaitableTimerExW,
7173
_DuplicateHandle,
7274
_ExitProcess,
7375
_FreeEnvironmentStringsW,
@@ -151,6 +153,8 @@ type mOS struct {
151153
waitsema uintptr // semaphore for parking on locks
152154
resumesema uintptr // semaphore to indicate suspend/resume
153155

156+
highResTimer uintptr // high resolution timer handle used in usleep
157+
154158
// preemptExtLock synchronizes preemptM with entry/exit from
155159
// external C code.
156160
//
@@ -402,18 +406,64 @@ const osRelaxMinNS = 60 * 1e6
402406
// osRelax is called by the scheduler when transitioning to and from
403407
// all Ps being idle.
404408
//
405-
// On Windows, it adjusts the system-wide timer resolution. Go needs a
409+
// Some versions of Windows have high resolution timer. For those
410+
// versions osRelax is noop.
411+
// For Windows versions without high resolution timer, osRelax
412+
// adjusts the system-wide timer resolution. Go needs a
406413
// high resolution timer while running and there's little extra cost
407414
// if we're already using the CPU, but if all Ps are idle there's no
408415
// need to consume extra power to drive the high-res timer.
409416
func osRelax(relax bool) uint32 {
417+
if haveHighResTimer {
418+
// If the high resolution timer is available, the runtime uses the timer
419+
// to sleep for short durations. This means there's no need to adjust
420+
// the global clock frequency.
421+
return 0
422+
}
423+
410424
if relax {
411425
return uint32(stdcall1(_timeEndPeriod, 1))
412426
} else {
413427
return uint32(stdcall1(_timeBeginPeriod, 1))
414428
}
415429
}
416430

431+
// haveHighResTimer indicates that the CreateWaitableTimerEx
432+
// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag is available.
433+
var haveHighResTimer = false
434+
435+
// createHighResTimer calls CreateWaitableTimerEx with
436+
// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag to create high
437+
// resolution timer. createHighResTimer returns new timer
438+
// handle or 0, if CreateWaitableTimerEx failed.
439+
func createHighResTimer() uintptr {
440+
const (
441+
// As per @jstarks, see
442+
// https://github.com/golang/go/issues/8687#issuecomment-656259353
443+
_CREATE_WAITABLE_TIMER_HIGH_RESOLUTION = 0x00000002
444+
445+
_SYNCHRONIZE = 0x00100000
446+
_TIMER_QUERY_STATE = 0x0001
447+
_TIMER_MODIFY_STATE = 0x0002
448+
)
449+
return stdcall4(_CreateWaitableTimerExW, 0, 0,
450+
_CREATE_WAITABLE_TIMER_HIGH_RESOLUTION,
451+
_SYNCHRONIZE|_TIMER_QUERY_STATE|_TIMER_MODIFY_STATE)
452+
}
453+
454+
func initHighResTimer() {
455+
if GOARCH == "arm" {
456+
// TODO: Not yet implemented.
457+
return
458+
}
459+
h := createHighResTimer()
460+
if h != 0 {
461+
haveHighResTimer = true
462+
usleep2Addr = unsafe.Pointer(funcPC(usleep2HighRes))
463+
stdcall1(_CloseHandle, h)
464+
}
465+
}
466+
417467
func osinit() {
418468
asmstdcallAddr = unsafe.Pointer(funcPC(asmstdcall))
419469
usleep2Addr = unsafe.Pointer(funcPC(usleep2))
@@ -429,6 +479,7 @@ func osinit() {
429479

430480
stdcall2(_SetConsoleCtrlHandler, funcPC(ctrlhandler), 1)
431481

482+
initHighResTimer()
432483
timeBeginPeriodRetValue = osRelax(false)
433484

434485
ncpu = getproccount()
@@ -844,9 +895,20 @@ func minit() {
844895
var thandle uintptr
845896
stdcall7(_DuplicateHandle, currentProcess, currentThread, currentProcess, uintptr(unsafe.Pointer(&thandle)), 0, 0, _DUPLICATE_SAME_ACCESS)
846897

898+
// Configure usleep timer, if possible.
899+
var timer uintptr
900+
if haveHighResTimer {
901+
timer = createHighResTimer()
902+
if timer == 0 {
903+
print("runtime: CreateWaitableTimerEx failed; errno=", getlasterror(), "\n")
904+
throw("CreateWaitableTimerEx when creating timer failed")
905+
}
906+
}
907+
847908
mp := getg().m
848909
lock(&mp.threadLock)
849910
mp.thread = thandle
911+
mp.highResTimer = timer
850912
unlock(&mp.threadLock)
851913

852914
// Query the true stack base from the OS. Currently we're
@@ -884,6 +946,10 @@ func unminit() {
884946
lock(&mp.threadLock)
885947
stdcall1(_CloseHandle, mp.thread)
886948
mp.thread = 0
949+
if mp.highResTimer != 0 {
950+
stdcall1(_CloseHandle, mp.highResTimer)
951+
mp.highResTimer = 0
952+
}
887953
unlock(&mp.threadLock)
888954
}
889955

@@ -976,9 +1042,12 @@ func stdcall7(fn stdFunction, a0, a1, a2, a3, a4, a5, a6 uintptr) uintptr {
9761042
return stdcall(fn)
9771043
}
9781044

979-
// in sys_windows_386.s and sys_windows_amd64.s
1045+
// In sys_windows_386.s and sys_windows_amd64.s.
9801046
func onosstack(fn unsafe.Pointer, arg uint32)
1047+
1048+
// These are not callable functions. They should only be called via onosstack.
9811049
func usleep2(usec uint32)
1050+
func usleep2HighRes(usec uint32)
9821051
func switchtothread()
9831052

9841053
var usleep2Addr unsafe.Pointer

src/runtime/sys_windows_386.s

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,42 @@ TEXT runtime·usleep2(SB),NOSPLIT,$20
428428
MOVL BP, SP
429429
RET
430430

431+
// Runs on OS stack. duration (in 100ns units) is in BX.
432+
TEXT runtime·usleep2HighRes(SB),NOSPLIT,$36
433+
// Want negative 100ns units.
434+
NEGL BX
435+
MOVL $-1, hi-4(SP)
436+
MOVL BX, lo-8(SP)
437+
438+
get_tls(CX)
439+
MOVL g(CX), CX
440+
MOVL g_m(CX), CX
441+
MOVL (m_mOS+mOS_highResTimer)(CX), CX
442+
MOVL CX, saved_timer-12(SP)
443+
444+
MOVL $0, fResume-16(SP)
445+
MOVL $0, lpArgToCompletionRoutine-20(SP)
446+
MOVL $0, pfnCompletionRoutine-24(SP)
447+
MOVL $0, lPeriod-28(SP)
448+
LEAL lo-8(SP), BX
449+
MOVL BX, lpDueTime-32(SP)
450+
MOVL CX, hTimer-36(SP)
451+
MOVL SP, BP
452+
MOVL runtime·_SetWaitableTimer(SB), AX
453+
CALL AX
454+
MOVL BP, SP
455+
456+
MOVL $0, ptime-28(SP)
457+
MOVL $0, alertable-32(SP)
458+
MOVL saved_timer-12(SP), CX
459+
MOVL CX, handle-36(SP)
460+
MOVL SP, BP
461+
MOVL runtime·_NtWaitForSingleObject(SB), AX
462+
CALL AX
463+
MOVL BP, SP
464+
465+
RET
466+
431467
// Runs on OS stack.
432468
TEXT runtime·switchtothread(SB),NOSPLIT,$0
433469
MOVL SP, BP

src/runtime/sys_windows_amd64.s

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,38 @@ TEXT runtime·usleep2(SB),NOSPLIT|NOFRAME,$48
457457
MOVQ 40(SP), SP
458458
RET
459459

460+
// Runs on OS stack. duration (in 100ns units) is in BX.
461+
TEXT runtime·usleep2HighRes(SB),NOSPLIT|NOFRAME,$72
462+
MOVQ SP, AX
463+
ANDQ $~15, SP // alignment as per Windows requirement
464+
MOVQ AX, 64(SP)
465+
466+
get_tls(CX)
467+
MOVQ g(CX), CX
468+
MOVQ g_m(CX), CX
469+
MOVQ (m_mOS+mOS_highResTimer)(CX), CX // hTimer
470+
MOVQ CX, 48(SP) // save hTimer for later
471+
// Want negative 100ns units.
472+
NEGQ BX
473+
LEAQ 56(SP), DX // lpDueTime
474+
MOVQ BX, (DX)
475+
MOVQ $0, R8 // lPeriod
476+
MOVQ $0, R9 // pfnCompletionRoutine
477+
MOVQ $0, AX
478+
MOVQ AX, 32(SP) // lpArgToCompletionRoutine
479+
MOVQ AX, 40(SP) // fResume
480+
MOVQ runtime·_SetWaitableTimer(SB), AX
481+
CALL AX
482+
483+
MOVQ 48(SP), CX // handle
484+
MOVQ $0, DX // alertable
485+
MOVQ $0, R8 // ptime
486+
MOVQ runtime·_NtWaitForSingleObject(SB), AX
487+
CALL AX
488+
489+
MOVQ 64(SP), SP
490+
RET
491+
460492
// Runs on OS stack.
461493
TEXT runtime·switchtothread(SB),NOSPLIT|NOFRAME,$0
462494
MOVQ SP, AX

src/runtime/sys_windows_arm.s

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,11 @@ TEXT runtime·usleep2(SB),NOSPLIT|NOFRAME,$0
468468
MOVW R4, R13 // Restore SP
469469
MOVM.IA.W (R13), [R4, R15] // pop {R4, pc}
470470

471+
// Runs on OS stack. Duration (in 100ns units) is in R0.
472+
// TODO: neeeds to be implemented properly.
473+
TEXT runtime·usleep2HighRes(SB),NOSPLIT|NOFRAME,$0
474+
B runtime·abort(SB)
475+
471476
// Runs on OS stack.
472477
TEXT runtime·switchtothread(SB),NOSPLIT|NOFRAME,$0
473478
MOVM.DB.W [R4, R14], (R13) // push {R4, lr}

0 commit comments

Comments
 (0)