Skip to content

Commit c985648

Browse files
committed
Remove Windows function preloading
1 parent aac82a9 commit c985648

File tree

3 files changed

+86
-158
lines changed

3 files changed

+86
-158
lines changed

library/std/src/sys/windows/c.rs

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,19 +1250,15 @@ compat_fn_with_fallback! {
12501250
}
12511251
}
12521252

1253-
compat_fn_optional! {
1253+
compat_fn_with_fallback! {
12541254
pub static SYNCH_API: &CStr = ansi_str!("api-ms-win-core-synch-l1-2-0");
1255-
1256-
// >= Windows 8 / Server 2012
1257-
// https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-waitonaddress
1258-
pub fn WaitOnAddress(
1259-
Address: LPVOID,
1260-
CompareAddress: LPVOID,
1261-
AddressSize: SIZE_T,
1262-
dwMilliseconds: DWORD
1263-
) -> BOOL;
1264-
pub fn WakeByAddressSingle(Address: LPVOID) -> ();
1255+
#[allow(unused)]
1256+
fn WakeByAddressSingle(Address: LPVOID) -> () {
1257+
crate::sys::windows::thread_parker::unpark_keyed_event(Address)
1258+
}
12651259
}
1260+
pub use crate::sys::compat::WaitOnAddress;
1261+
pub use WakeByAddressSingle::call as wake_by_address_single_or_unpark_keyed_event;
12661262

12671263
compat_fn_with_fallback! {
12681264
pub static NTDLL: &CStr = ansi_str!("ntdll");

library/std/src/sys/windows/compat.rs

Lines changed: 63 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -7,47 +7,17 @@
77
//! `GetModuleHandle` and `GetProcAddress` to look up DLL entry points at
88
//! runtime.
99
//!
10-
//! This implementation uses a static initializer to look up the DLL entry
11-
//! points. The CRT (C runtime) executes static initializers before `main`
12-
//! is called (for binaries) and before `DllMain` is called (for DLLs).
13-
//! This is the ideal time to look up DLL imports, because we are guaranteed
14-
//! that no other threads will attempt to call these entry points. Thus,
15-
//! we can look up the imports and store them in `static mut` fields
16-
//! without any synchronization.
10+
//! This is implemented simply by storing a function pointer in an atomic
11+
//! and using relaxed ordering to load it. This means that calling it will be no
12+
//! more expensive then calling any other dynamically imported function.
1713
//!
18-
//! This has an additional advantage: Because the DLL import lookup happens
19-
//! at module initialization, the cost of these lookups is deterministic,
20-
//! and is removed from the code paths that actually call the DLL imports.
21-
//! That is, there is no unpredictable "cache miss" that occurs when calling
22-
//! a DLL import. For applications that benefit from predictable delays,
23-
//! this is a benefit. This also eliminates the comparison-and-branch
24-
//! from the hot path.
25-
//!
26-
//! Currently, the standard library uses only a small number of dynamic
27-
//! DLL imports. If this number grows substantially, then the cost of
28-
//! performing all of the lookups at initialization time might become
29-
//! substantial.
30-
//!
31-
//! The mechanism of registering a static initializer with the CRT is
32-
//! documented in
33-
//! [CRT Initialization](https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization?view=msvc-160).
34-
//! It works by contributing a global symbol to the `.CRT$XCU` section.
35-
//! The linker builds a table of all static initializer functions.
36-
//! The CRT startup code then iterates that table, calling each
37-
//! initializer function.
38-
//!
39-
//! # **WARNING!!*
40-
//! The environment that a static initializer function runs in is highly
41-
//! constrained. There are **many** restrictions on what static initializers
42-
//! can safely do. Static initializer functions **MUST NOT** do any of the
43-
//! following (this list is not comprehensive):
44-
//! * touch any other static field that is used by a different static
45-
//! initializer, because the order that static initializers run in
46-
//! is not defined.
47-
//! * call `LoadLibrary` or any other function that acquires the DLL
48-
//! loader lock.
49-
//! * call any Rust function or CRT function that touches any static
50-
//! (global) state.
14+
//! The stored function pointer starts out as an importer function which will
15+
//! swap itself with the real function when it's called for the first time. If
16+
//! the real function can't be imported then a fallback function is used in its
17+
//! place. While this is zero cost for the happy path (where the function is
18+
//! already loaded) it does mean there's some overhead the first time the
19+
//! function is called. In the worst case, multiple threads may all end up
20+
//! importing the same function unnecessarily.
5121
5222
use crate::ffi::{c_void, CStr};
5323
use crate::ptr::NonNull;
@@ -85,39 +55,6 @@ pub(crate) const fn const_cstr_from_bytes(bytes: &'static [u8]) -> &'static CStr
8555
unsafe { crate::ffi::CStr::from_bytes_with_nul_unchecked(bytes) }
8656
}
8757

88-
#[used]
89-
#[link_section = ".CRT$XCU"]
90-
static INIT_TABLE_ENTRY: unsafe extern "C" fn() = init;
91-
92-
/// This is where the magic preloading of symbols happens.
93-
///
94-
/// Note that any functions included here will be unconditionally included in
95-
/// the final binary, regardless of whether or not they're actually used.
96-
///
97-
/// Therefore, this is limited to `compat_fn_optional` functions which must be
98-
/// preloaded and any functions which may be more time sensitive, even for the first call.
99-
unsafe extern "C" fn init() {
100-
// There is no locking here. This code is executed before main() is entered, and
101-
// is guaranteed to be single-threaded.
102-
//
103-
// DO NOT do anything interesting or complicated in this function! DO NOT call
104-
// any Rust functions or CRT functions if those functions touch any global state,
105-
// because this function runs during global initialization. For example, DO NOT
106-
// do any dynamic allocation, don't call LoadLibrary, etc.
107-
108-
if let Some(synch) = Module::new(c::SYNCH_API) {
109-
// These are optional and so we must manually attempt to load them
110-
// before they can be used.
111-
c::WaitOnAddress::preload(synch);
112-
c::WakeByAddressSingle::preload(synch);
113-
}
114-
115-
if let Some(kernel32) = Module::new(c::KERNEL32) {
116-
// Preloading this means getting a precise time will be as fast as possible.
117-
c::GetSystemTimePreciseAsFileTime::preload(kernel32);
118-
}
119-
}
120-
12158
/// Represents a loaded module.
12259
///
12360
/// Note that the modules std depends on must not be unloaded.
@@ -196,11 +133,6 @@ macro_rules! compat_fn_with_fallback {
196133
$fallback_body
197134
}
198135

199-
#[allow(unused)]
200-
pub(in crate::sys) fn preload(module: Module) {
201-
load_from_module(Some(module));
202-
}
203-
204136
#[inline(always)]
205137
pub unsafe fn call($($argname: $argtype),*) -> $rettype {
206138
let func: F = mem::transmute(PTR.load(Ordering::Relaxed));
@@ -212,62 +144,61 @@ macro_rules! compat_fn_with_fallback {
212144
)*)
213145
}
214146

215-
/// A function that either exists or doesn't.
147+
/// Optionally load `WaitOnAddress`.
148+
/// Unlike the dynamic loading described above, this does not have a fallback.
216149
///
217-
/// NOTE: Optional functions must be preloaded in the `init` function above, or they will always be None.
218-
macro_rules! compat_fn_optional {
219-
(pub static $module:ident: &CStr = $name:expr; $(
220-
$(#[$meta:meta])*
221-
pub fn $symbol:ident($($argname:ident: $argtype:ty),*) -> $rettype:ty;
222-
)*) => (
223-
pub static $module: &CStr = $name;
224-
$(
225-
$(#[$meta])*
226-
pub mod $symbol {
227-
#[allow(unused_imports)]
228-
use super::*;
229-
use crate::mem;
230-
use crate::sync::atomic::{AtomicPtr, Ordering};
231-
use crate::sys::compat::Module;
232-
use crate::ptr::{self, NonNull};
233-
234-
type F = unsafe extern "system" fn($($argtype),*) -> $rettype;
235-
236-
/// `PTR` will either be `null()` or set to the loaded function.
237-
static PTR: AtomicPtr<c_void> = AtomicPtr::new(ptr::null_mut());
238-
239-
/// Only allow access to the function if it has loaded successfully.
240-
#[inline(always)]
241-
#[cfg(not(miri))]
242-
pub fn option() -> Option<F> {
243-
unsafe {
244-
NonNull::new(PTR.load(Ordering::Relaxed)).map(|f| mem::transmute(f))
245-
}
246-
}
247-
248-
// Miri does not understand the way we do preloading
249-
// therefore load the function here instead.
250-
#[cfg(miri)]
251-
pub fn option() -> Option<F> {
252-
let mut func = NonNull::new(PTR.load(Ordering::Relaxed));
253-
if func.is_none() {
254-
unsafe { Module::new($module).map(preload) };
255-
func = NonNull::new(PTR.load(Ordering::Relaxed));
256-
}
257-
unsafe {
258-
func.map(|f| mem::transmute(f))
259-
}
260-
}
150+
/// This is rexported from sys::c. You should prefer to import
151+
/// from there in case this changes again in the future.
152+
pub mod WaitOnAddress {
153+
use super::*;
154+
use crate::mem;
155+
use crate::ptr;
156+
use crate::sync::atomic::{AtomicBool, AtomicPtr, Ordering};
157+
use crate::sys::c;
158+
159+
static MODULE_NAME: &CStr = ansi_str!("api-ms-win-core-synch-l1-2-0");
160+
static SYMBOL_NAME: &CStr = ansi_str!("WaitOnAddress");
161+
162+
// WaitOnAddress function signature.
163+
type F = unsafe extern "system" fn(
164+
Address: c::LPVOID,
165+
CompareAddress: c::LPVOID,
166+
AddressSize: c::SIZE_T,
167+
dwMilliseconds: c::DWORD,
168+
);
169+
170+
// A place to store the loaded function atomically.
171+
static WAIT_ON_ADDRESS: AtomicPtr<c_void> = AtomicPtr::new(ptr::null_mut());
172+
173+
// We can skip trying to load again if we already tried.
174+
static LOAD_MODULE: AtomicBool = AtomicBool::new(true);
175+
176+
#[inline(always)]
177+
pub fn option() -> Option<F> {
178+
let f = WAIT_ON_ADDRESS.load(Ordering::Relaxed);
179+
if !f.is_null() { Some(unsafe { mem::transmute(f) }) } else { try_load() }
180+
}
261181

262-
#[allow(unused)]
263-
pub(in crate::sys) fn preload(module: Module) {
264-
unsafe {
265-
static SYMBOL_NAME: &CStr = ansi_str!(sym $symbol);
266-
if let Some(f) = module.proc_address(SYMBOL_NAME) {
267-
PTR.store(f.as_ptr(), Ordering::Relaxed);
268-
}
269-
}
182+
#[cold]
183+
fn try_load() -> Option<F> {
184+
if LOAD_MODULE.load(Ordering::Acquire) {
185+
// load the module
186+
let mut wait_on_address = None;
187+
if let Some(func) = try_load_inner() {
188+
WAIT_ON_ADDRESS.store(func.as_ptr(), Ordering::Relaxed);
189+
wait_on_address = Some(unsafe { mem::transmute(func) });
270190
}
191+
// Don't try to load the module again even if loading failed.
192+
LOAD_MODULE.store(false, Ordering::Release);
193+
wait_on_address
194+
} else {
195+
None
271196
}
272-
)*)
197+
}
198+
199+
// In the future this could be a `try` block but until then I think it's a
200+
// little bit cleaner as a separate function.
201+
fn try_load_inner() -> Option<NonNull<c_void>> {
202+
unsafe { Module::new(MODULE_NAME)?.proc_address(SYMBOL_NAME) }
203+
}
273204
}

library/std/src/sys/windows/thread_parker.rs

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -197,21 +197,9 @@ impl Parker {
197197
// purpose, to make sure every unpark() has a release-acquire ordering
198198
// with park().
199199
if self.state.swap(NOTIFIED, Release) == PARKED {
200-
if let Some(wake_by_address_single) = c::WakeByAddressSingle::option() {
201-
unsafe {
202-
wake_by_address_single(self.ptr());
203-
}
204-
} else {
205-
// If we run NtReleaseKeyedEvent before the waiting thread runs
206-
// NtWaitForKeyedEvent, this (shortly) blocks until we can wake it up.
207-
// If the waiting thread wakes up before we run NtReleaseKeyedEvent
208-
// (e.g. due to a timeout), this blocks until we do wake up a thread.
209-
// To prevent this thread from blocking indefinitely in that case,
210-
// park_impl() will, after seeing the state set to NOTIFIED after
211-
// waking up, call NtWaitForKeyedEvent again to unblock us.
212-
unsafe {
213-
c::NtReleaseKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut());
214-
}
200+
unsafe {
201+
// This calls either WakeByAddressSingle or unpark_keyed_event (see below).
202+
c::wake_by_address_single_or_unpark_keyed_event(self.ptr());
215203
}
216204
}
217205
}
@@ -221,6 +209,19 @@ impl Parker {
221209
}
222210
}
223211

212+
// This function signature makes it compatible with c::WakeByAddressSingle
213+
// so that it can be used as a fallback for that function.
214+
pub unsafe extern "C" fn unpark_keyed_event(address: c::LPVOID) {
215+
// If we run NtReleaseKeyedEvent before the waiting thread runs
216+
// NtWaitForKeyedEvent, this (shortly) blocks until we can wake it up.
217+
// If the waiting thread wakes up before we run NtReleaseKeyedEvent
218+
// (e.g. due to a timeout), this blocks until we do wake up a thread.
219+
// To prevent this thread from blocking indefinitely in that case,
220+
// park_impl() will, after seeing the state set to NOTIFIED after
221+
// waking up, call NtWaitForKeyedEvent again to unblock us.
222+
c::NtReleaseKeyedEvent(keyed_event_handle(), address, 0, ptr::null_mut());
223+
}
224+
224225
fn keyed_event_handle() -> c::HANDLE {
225226
const INVALID: c::HANDLE = ptr::invalid_mut(!0);
226227
static HANDLE: AtomicPtr<libc::c_void> = AtomicPtr::new(INVALID);

0 commit comments

Comments
 (0)