Skip to content

Commit 018727d

Browse files
committed
Auto merge of #130696 - scottmcm:reorder-inlining, r=<try>
Inline smaller callees first Then limit the total size and number of inlined things, to allow more top-down inlining (particularly important after calling something generic that couldn't inline internally) without getting exponential blowup. Fixes #130590 r? saethlin
2 parents 80aa6fa + 51efba2 commit 018727d

File tree

51 files changed

+1028
-865
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1028
-865
lines changed

compiler/rustc_mir_transform/src/inline.rs

+89-62
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
//! Inlining pass for MIR functions.
22
3+
use std::cmp::Ordering;
4+
use std::collections::BinaryHeap;
35
use std::iter;
46
use std::ops::{Range, RangeFrom};
57

68
use rustc_attr::InlineAttr;
79
use rustc_hir::def::DefKind;
8-
use rustc_hir::def_id::DefId;
910
use rustc_index::bit_set::BitSet;
1011
use rustc_index::Idx;
1112
use rustc_middle::bug;
@@ -30,7 +31,7 @@ use crate::validate::validate_types;
3031

3132
pub(crate) mod cycle;
3233

33-
const TOP_DOWN_DEPTH_LIMIT: usize = 5;
34+
const MAX_INLINED_CALLEES_PER_BODY: usize = 9;
3435

3536
// Made public so that `mir_drops_elaborated_and_const_checked` can be overridden
3637
// by custom rustc drivers, running all the steps by themselves. See #114628.
@@ -102,46 +103,80 @@ fn inline<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) -> bool {
102103
tcx,
103104
param_env,
104105
codegen_fn_attrs,
105-
history: Vec::new(),
106-
changed: false,
106+
queue: BinaryHeap::new(),
107107
caller_is_inline_forwarder: matches!(
108108
codegen_fn_attrs.inline,
109109
InlineAttr::Hint | InlineAttr::Always
110110
) && body_is_forwarder(body),
111111
};
112-
let blocks = START_BLOCK..body.basic_blocks.next_index();
113-
this.process_blocks(body, blocks);
114-
this.changed
112+
let initial_blocks = START_BLOCK..body.basic_blocks.next_index();
113+
this.enqueue_new_candidates(body, initial_blocks);
114+
115+
let mut changed = false;
116+
let mut remaining_cost = tcx.sess.opts.unstable_opts.inline_mir_total_threshold.unwrap_or(300);
117+
let mut remaining_count = MAX_INLINED_CALLEES_PER_BODY;
118+
while remaining_count > 0
119+
&& let Some(candidate) = this.queue.pop()
120+
{
121+
let Some(c) = remaining_cost.checked_sub(candidate.cost) else {
122+
debug!(
123+
"next-cheapest candidate has cost {}, but only {} left",
124+
candidate.cost, remaining_cost,
125+
);
126+
break;
127+
};
128+
remaining_cost = c;
129+
remaining_count -= 1;
130+
131+
if let Ok(new_blocks) = this.try_inline_candidate(body, candidate) {
132+
changed = true;
133+
this.enqueue_new_candidates(body, new_blocks);
134+
}
135+
}
136+
137+
changed
115138
}
116139

117140
struct Inliner<'tcx> {
118141
tcx: TyCtxt<'tcx>,
119142
param_env: ParamEnv<'tcx>,
120143
/// Caller codegen attributes.
121144
codegen_fn_attrs: &'tcx CodegenFnAttrs,
122-
/// Stack of inlined instances.
123-
/// We only check the `DefId` and not the args because we want to
124-
/// avoid inlining cases of polymorphic recursion.
125-
/// The number of `DefId`s is finite, so checking history is enough
126-
/// to ensure that we do not loop endlessly while inlining.
127-
history: Vec<DefId>,
128-
/// Indicates that the caller body has been modified.
129-
changed: bool,
145+
/// The currently-known inlining candidates.
146+
queue: BinaryHeap<InlineCandidate<'tcx>>,
130147
/// Indicates that the caller is #[inline] and just calls another function,
131148
/// and thus we can inline less into it as it'll be inlined itself.
132149
caller_is_inline_forwarder: bool,
133150
}
134151

152+
struct InlineCandidate<'tcx> {
153+
cost: usize,
154+
callsite: CallSite<'tcx>,
155+
callee_body: &'tcx Body<'tcx>,
156+
destination_ty: Ty<'tcx>,
157+
}
158+
159+
/// Smallest-cost is "max" for use in `BinaryHeap`, with ties broken
160+
/// by preferring the one in the smaller-numbered `BasicBlock`.
161+
impl Ord for InlineCandidate<'_> {
162+
fn cmp(&self, other: &Self) -> Ordering {
163+
(other.cost, other.callsite.block).cmp(&(self.cost, self.callsite.block))
164+
}
165+
}
166+
impl PartialOrd for InlineCandidate<'_> {
167+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
168+
Some(self.cmp(other))
169+
}
170+
}
171+
impl Eq for InlineCandidate<'_> {}
172+
impl PartialEq for InlineCandidate<'_> {
173+
fn eq(&self, other: &Self) -> bool {
174+
self.cmp(other) == Ordering::Equal
175+
}
176+
}
177+
135178
impl<'tcx> Inliner<'tcx> {
136-
fn process_blocks(&mut self, caller_body: &mut Body<'tcx>, blocks: Range<BasicBlock>) {
137-
// How many callsites in this body are we allowed to inline? We need to limit this in order
138-
// to prevent super-linear growth in MIR size
139-
let inline_limit = match self.history.len() {
140-
0 => usize::MAX,
141-
1..=TOP_DOWN_DEPTH_LIMIT => 1,
142-
_ => return,
143-
};
144-
let mut inlined_count = 0;
179+
fn enqueue_new_candidates(&mut self, caller_body: &Body<'tcx>, blocks: Range<BasicBlock>) {
145180
for bb in blocks {
146181
let bb_data = &caller_body[bb];
147182
if bb_data.is_cleanup {
@@ -152,44 +187,23 @@ impl<'tcx> Inliner<'tcx> {
152187
continue;
153188
};
154189

155-
let span = trace_span!("process_blocks", %callsite.callee, ?bb);
156-
let _guard = span.enter();
157-
158-
match self.try_inlining(caller_body, &callsite) {
159-
Err(reason) => {
160-
debug!("not-inlined {} [{}]", callsite.callee, reason);
161-
}
162-
Ok(new_blocks) => {
163-
debug!("inlined {}", callsite.callee);
164-
self.changed = true;
165-
166-
self.history.push(callsite.callee.def_id());
167-
self.process_blocks(caller_body, new_blocks);
168-
self.history.pop();
169-
170-
inlined_count += 1;
171-
if inlined_count == inline_limit {
172-
debug!("inline count reached");
173-
return;
174-
}
175-
}
176-
}
190+
let Ok(candidate) = self.validate_inlining_candidate(caller_body, callsite) else {
191+
continue;
192+
};
193+
self.queue.push(candidate);
177194
}
178195
}
179196

180-
/// Attempts to inline a callsite into the caller body. When successful returns basic blocks
181-
/// containing the inlined body. Otherwise returns an error describing why inlining didn't take
182-
/// place.
183-
fn try_inlining(
197+
fn validate_inlining_candidate(
184198
&self,
185-
caller_body: &mut Body<'tcx>,
186-
callsite: &CallSite<'tcx>,
187-
) -> Result<std::ops::Range<BasicBlock>, &'static str> {
199+
caller_body: &Body<'tcx>,
200+
callsite: CallSite<'tcx>,
201+
) -> Result<InlineCandidate<'tcx>, &'static str> {
188202
self.check_mir_is_available(caller_body, callsite.callee)?;
189203

190204
let callee_attrs = self.tcx.codegen_fn_attrs(callsite.callee.def_id());
191205
let cross_crate_inlinable = self.tcx.cross_crate_inlinable(callsite.callee.def_id());
192-
self.check_codegen_attributes(callsite, callee_attrs, cross_crate_inlinable)?;
206+
self.check_codegen_attributes(&callsite, callee_attrs, cross_crate_inlinable)?;
193207

194208
// Intrinsic fallback bodies are automatically made cross-crate inlineable,
195209
// but at this stage we don't know whether codegen knows the intrinsic,
@@ -210,7 +224,20 @@ impl<'tcx> Inliner<'tcx> {
210224
}
211225

212226
let callee_body = try_instance_mir(self.tcx, callsite.callee.def)?;
213-
self.check_mir_body(callsite, callee_body, callee_attrs, cross_crate_inlinable)?;
227+
let cost =
228+
self.check_mir_body(&callsite, callee_body, callee_attrs, cross_crate_inlinable)?;
229+
Ok(InlineCandidate { cost, callsite, callee_body, destination_ty })
230+
}
231+
232+
/// Attempts to inline a callsite into the caller body. When successful returns basic blocks
233+
/// containing the inlined body. Otherwise returns an error describing why inlining didn't take
234+
/// place.
235+
fn try_inline_candidate(
236+
&self,
237+
caller_body: &mut Body<'tcx>,
238+
candidate: InlineCandidate<'tcx>,
239+
) -> Result<std::ops::Range<BasicBlock>, &'static str> {
240+
let InlineCandidate { callsite, callee_body, destination_ty, .. } = candidate;
214241

215242
if !self.tcx.consider_optimizing(|| {
216243
format!("Inline {:?} into {:?}", callsite.callee, caller_body.source)
@@ -249,6 +276,10 @@ impl<'tcx> Inliner<'tcx> {
249276
trace!(?output_type, ?destination_ty);
250277
return Err("failed to normalize return type");
251278
}
279+
280+
let terminator = caller_body[callsite.block].terminator.as_ref().unwrap();
281+
let TerminatorKind::Call { args, .. } = &terminator.kind else { bug!() };
282+
252283
if callsite.fn_sig.abi() == Abi::RustCall {
253284
// FIXME: Don't inline user-written `extern "rust-call"` functions,
254285
// since this is generally perf-negative on rustc, and we hope that
@@ -294,7 +325,7 @@ impl<'tcx> Inliner<'tcx> {
294325
}
295326

296327
let old_blocks = caller_body.basic_blocks.next_index();
297-
self.inline_call(caller_body, callsite, callee_body);
328+
self.inline_call(caller_body, &callsite, callee_body);
298329
let new_blocks = old_blocks..caller_body.basic_blocks.next_index();
299330

300331
Ok(new_blocks)
@@ -396,10 +427,6 @@ impl<'tcx> Inliner<'tcx> {
396427
return None;
397428
}
398429

399-
if self.history.contains(&callee.def_id()) {
400-
return None;
401-
}
402-
403430
let fn_sig = self.tcx.fn_sig(def_id).instantiate(self.tcx, args);
404431

405432
// Additionally, check that the body that we're inlining actually agrees
@@ -493,7 +520,7 @@ impl<'tcx> Inliner<'tcx> {
493520
callee_body: &Body<'tcx>,
494521
callee_attrs: &CodegenFnAttrs,
495522
cross_crate_inlinable: bool,
496-
) -> Result<(), &'static str> {
523+
) -> Result<usize, &'static str> {
497524
let tcx = self.tcx;
498525

499526
if let Some(_) = callee_body.tainted_by_errors {
@@ -573,7 +600,7 @@ impl<'tcx> Inliner<'tcx> {
573600
let cost = checker.cost();
574601
if cost <= threshold {
575602
debug!("INLINING {:?} [cost={} <= threshold={}]", callsite, cost, threshold);
576-
Ok(())
603+
Ok(cost)
577604
} else {
578605
debug!("NOT inlining {:?} [cost={} > threshold={}]", callsite, cost, threshold);
579606
Err("cost above threshold")

compiler/rustc_session/src/options.rs

+3
Original file line numberDiff line numberDiff line change
@@ -1807,6 +1807,9 @@ options! {
18071807
(default: preserve for debuginfo != None, otherwise remove)"),
18081808
inline_mir_threshold: Option<usize> = (None, parse_opt_number, [TRACKED],
18091809
"a default MIR inlining threshold (default: 50)"),
1810+
inline_mir_total_threshold: Option<usize> = (None, parse_opt_number, [TRACKED],
1811+
"maximum total cost of functions inlined into one caller (default: 300), \
1812+
to limit eventual MIR size in functions which make many calls"),
18101813
input_stats: bool = (false, parse_bool, [UNTRACKED],
18111814
"gather statistics about the input (default: no)"),
18121815
instrument_mcount: bool = (false, parse_bool, [TRACKED],

tests/mir-opt/dest-prop/union.main.DestinationPropagation.panic-abort.diff

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
let mut _3: u32;
99
scope 1 {
1010
debug un => _1;
11-
scope 3 (inlined std::mem::drop::<u32>) {
11+
scope 2 (inlined std::mem::drop::<u32>) {
1212
debug _x => _3;
1313
}
1414
}
15-
scope 2 (inlined val) {
15+
scope 3 (inlined val) {
1616
}
1717

1818
bb0: {

tests/mir-opt/dest-prop/union.main.DestinationPropagation.panic-unwind.diff

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
let mut _3: u32;
99
scope 1 {
1010
debug un => _1;
11-
scope 3 (inlined std::mem::drop::<u32>) {
11+
scope 2 (inlined std::mem::drop::<u32>) {
1212
debug _x => _3;
1313
}
1414
}
15-
scope 2 (inlined val) {
15+
scope 3 (inlined val) {
1616
}
1717

1818
bb0: {

tests/mir-opt/funky_arms.float_to_exponential_common.GVN.panic-abort.diff

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@
2828
scope 3 {
2929
debug precision => _8;
3030
let _8: usize;
31-
scope 5 (inlined Formatter::<'_>::precision) {
31+
scope 4 (inlined Formatter::<'_>::precision) {
3232
}
3333
}
3434
}
3535
}
36-
scope 4 (inlined Formatter::<'_>::sign_plus) {
36+
scope 5 (inlined Formatter::<'_>::sign_plus) {
3737
let mut _20: u32;
3838
let mut _21: u32;
3939
}

tests/mir-opt/funky_arms.float_to_exponential_common.GVN.panic-unwind.diff

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@
2828
scope 3 {
2929
debug precision => _8;
3030
let _8: usize;
31-
scope 5 (inlined Formatter::<'_>::precision) {
31+
scope 4 (inlined Formatter::<'_>::precision) {
3232
}
3333
}
3434
}
3535
}
36-
scope 4 (inlined Formatter::<'_>::sign_plus) {
36+
scope 5 (inlined Formatter::<'_>::sign_plus) {
3737
let mut _20: u32;
3838
let mut _21: u32;
3939
}

0 commit comments

Comments
 (0)