Skip to content

Commit 7beb65a

Browse files
authored
[flang] Fixed LoopVersioning for array slices. (#65703)
The first test case added in the LIT test demonstrates the problem. Even though we did not consider the inner loop as a candidate for the transformation due to the array_coor with a slice, we decided to version the outer loop for the same function argument. During the cloning of the outer loop we dropped the slicing completely producing invalid code. I restructured the code so that we record all arg uses that cannot be transformed (regardless of the reason), and then fixup the usage information across the loop nests. I also noticed that we may generate redundant contiguity checks for the inner loops, so I fixed it since it was easy with the new way of keeping the usage data.
1 parent 8dd87a5 commit 7beb65a

File tree

2 files changed

+346
-43
lines changed

2 files changed

+346
-43
lines changed

flang/lib/Optimizer/Transforms/LoopVersioning.cpp

+176-41
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,72 @@ class LoopVersioningPass
7777
void runOnOperation() override;
7878
};
7979

80+
/// @struct ArgInfo
81+
/// A structure to hold an argument, the size of the argument and dimension
82+
/// information.
83+
struct ArgInfo {
84+
mlir::Value arg;
85+
size_t size;
86+
unsigned rank;
87+
fir::BoxDimsOp dims[CFI_MAX_RANK];
88+
};
89+
90+
/// @struct ArgsUsageInLoop
91+
/// A structure providing information about the function arguments
92+
/// usage by the instructions immediately nested in a loop.
93+
struct ArgsUsageInLoop {
94+
/// Mapping between the memref operand of an array indexing
95+
/// operation (e.g. fir.coordinate_of) and the argument information.
96+
llvm::DenseMap<mlir::Value, ArgInfo> usageInfo;
97+
/// Some array indexing operations inside a loop cannot be transformed.
98+
/// This vector holds the memref operands of such operations.
99+
/// The vector is used to make sure that we do not try to transform
100+
/// any outer loop, since this will imply the operation rewrite
101+
/// in this loop.
102+
llvm::SetVector<mlir::Value> cannotTransform;
103+
104+
// Debug dump of the structure members assuming that
105+
// the information has been collected for the given loop.
106+
void dump(fir::DoLoopOp loop) const {
107+
// clang-format off
108+
LLVM_DEBUG(
109+
mlir::OpPrintingFlags printFlags;
110+
printFlags.skipRegions();
111+
llvm::dbgs() << "Arguments usage info for loop:\n";
112+
loop.print(llvm::dbgs(), printFlags);
113+
llvm::dbgs() << "\nUsed args:\n";
114+
for (auto &use : usageInfo) {
115+
mlir::Value v = use.first;
116+
v.print(llvm::dbgs(), printFlags);
117+
llvm::dbgs() << "\n";
118+
}
119+
llvm::dbgs() << "\nCannot transform args:\n";
120+
for (mlir::Value arg : cannotTransform) {
121+
arg.print(llvm::dbgs(), printFlags);
122+
llvm::dbgs() << "\n";
123+
}
124+
llvm::dbgs() << "====\n"
125+
);
126+
// clang-format on
127+
}
128+
129+
// Erase usageInfo and cannotTransform entries for a set
130+
// of given arguments.
131+
void eraseUsage(const llvm::SetVector<mlir::Value> &args) {
132+
for (auto &arg : args)
133+
usageInfo.erase(arg);
134+
cannotTransform.set_subtract(args);
135+
}
136+
137+
// Erase usageInfo and cannotTransform entries for a set
138+
// of given arguments provided in the form of usageInfo map.
139+
void eraseUsage(const llvm::DenseMap<mlir::Value, ArgInfo> &args) {
140+
for (auto &arg : args) {
141+
usageInfo.erase(arg.first);
142+
cannotTransform.remove(arg.first);
143+
}
144+
}
145+
};
80146
} // namespace
81147

82148
/// @c replaceOuterUses - replace uses outside of @c op with result of @c
@@ -179,16 +245,6 @@ void LoopVersioningPass::runOnOperation() {
179245
LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
180246
mlir::func::FuncOp func = getOperation();
181247

182-
/// @c ArgInfo
183-
/// A structure to hold an argument, the size of the argument and dimension
184-
/// information.
185-
struct ArgInfo {
186-
mlir::Value arg;
187-
size_t size;
188-
unsigned rank;
189-
fir::BoxDimsOp dims[CFI_MAX_RANK];
190-
};
191-
192248
// First look for arguments with assumed shape = unknown extent in the lowest
193249
// dimension.
194250
LLVM_DEBUG(llvm::dbgs() << "Func-name:" << func.getSymName() << "\n");
@@ -224,58 +280,137 @@ void LoopVersioningPass::runOnOperation() {
224280
}
225281
}
226282

227-
if (argsOfInterest.empty())
283+
if (argsOfInterest.empty()) {
284+
LLVM_DEBUG(llvm::dbgs()
285+
<< "No suitable arguments.\n=== End " DEBUG_TYPE " ===\n");
228286
return;
287+
}
229288

230-
struct OpsWithArgs {
231-
mlir::Operation *op;
232-
mlir::SmallVector<ArgInfo, 4> argsAndDims;
233-
};
234-
// Now see if those arguments are used inside any loop.
235-
mlir::SmallVector<OpsWithArgs, 4> loopsOfInterest;
289+
// A list of all loops in the function in post-order.
290+
mlir::SmallVector<fir::DoLoopOp> originalLoops;
291+
// Information about the arguments usage by the instructions
292+
// immediately nested in a loop.
293+
llvm::DenseMap<fir::DoLoopOp, ArgsUsageInLoop> argsInLoops;
236294

295+
// Traverse the loops in post-order and see
296+
// if those arguments are used inside any loop.
237297
func.walk([&](fir::DoLoopOp loop) {
238298
mlir::Block &body = *loop.getBody();
239-
mlir::SmallVector<ArgInfo, 4> argsInLoop;
299+
auto &argsInLoop = argsInLoops[loop];
300+
originalLoops.push_back(loop);
240301
body.walk([&](mlir::Operation *op) {
241-
// support either fir.array_coor or fir.coordinate_of
242-
if (auto arrayCoor = mlir::dyn_cast<fir::ArrayCoorOp>(op)) {
243-
// no support currently for sliced arrays
244-
if (arrayCoor.getSlice())
245-
return;
246-
} else if (!mlir::isa<fir::CoordinateOp>(op)) {
302+
// Support either fir.array_coor or fir.coordinate_of.
303+
if (!mlir::isa<fir::ArrayCoorOp, fir::CoordinateOp>(op))
247304
return;
248-
}
249-
250-
// The current operation could be inside another loop than
251-
// the one we're currently processing. Skip it, we'll get
252-
// to it later.
305+
// Process only operations immediately nested in the current loop.
253306
if (op->getParentOfType<fir::DoLoopOp>() != loop)
254307
return;
255308
mlir::Value operand = op->getOperand(0);
256309
for (auto a : argsOfInterest) {
257310
if (a.arg == normaliseVal(operand)) {
258-
// use the reboxed value, not the block arg when re-creating the loop:
311+
// Use the reboxed value, not the block arg when re-creating the loop.
312+
// TODO: should we check that the operand dominates the loop?
313+
// If this might be a case, we should record such operands in
314+
// argsInLoop.cannotTransform, so that they disable the transformation
315+
// for the parent loops as well.
259316
a.arg = operand;
260-
// Only add if it's not already in the list.
261-
if (std::find_if(argsInLoop.begin(), argsInLoop.end(), [&](auto it) {
262-
return it.arg == a.arg;
263-
}) == argsInLoop.end()) {
264317

265-
argsInLoop.push_back(a);
318+
// No support currently for sliced arrays.
319+
// This means that we cannot transform properly
320+
// instructions referencing a.arg in the whole loop
321+
// nest this loop is located in.
322+
if (auto arrayCoor = mlir::dyn_cast<fir::ArrayCoorOp>(op))
323+
if (arrayCoor.getSlice())
324+
argsInLoop.cannotTransform.insert(a.arg);
325+
326+
if (argsInLoop.cannotTransform.contains(a.arg)) {
327+
// Remove any previously recorded usage, if any.
328+
argsInLoop.usageInfo.erase(a.arg);
266329
break;
267330
}
331+
332+
// Record the a.arg usage, if not recorded yet.
333+
argsInLoop.usageInfo.try_emplace(a.arg, a);
334+
break;
268335
}
269336
}
270337
});
271-
272-
if (!argsInLoop.empty()) {
273-
OpsWithArgs ops = {loop, argsInLoop};
274-
loopsOfInterest.push_back(ops);
275-
}
276338
});
277-
if (loopsOfInterest.empty())
339+
340+
// Dump loops info after initial collection.
341+
// clang-format off
342+
LLVM_DEBUG(
343+
llvm::dbgs() << "Initial usage info:\n";
344+
for (fir::DoLoopOp loop : originalLoops) {
345+
auto &argsInLoop = argsInLoops[loop];
346+
argsInLoop.dump(loop);
347+
}
348+
);
349+
// clang-format on
350+
351+
// Clear argument usage for parent loops if an inner loop
352+
// contains a non-transformable usage.
353+
for (fir::DoLoopOp loop : originalLoops) {
354+
auto &argsInLoop = argsInLoops[loop];
355+
if (argsInLoop.cannotTransform.empty())
356+
continue;
357+
358+
fir::DoLoopOp parent = loop;
359+
while ((parent = parent->getParentOfType<fir::DoLoopOp>()))
360+
argsInLoops[parent].eraseUsage(argsInLoop.cannotTransform);
361+
}
362+
363+
// If an argument access can be optimized in a loop and
364+
// its descendant loop, then it does not make sense to
365+
// generate the contiguity check for the descendant loop.
366+
// The check will be produced as part of the ancestor
367+
// loop's transformation. So we can clear the argument
368+
// usage for all descendant loops.
369+
for (fir::DoLoopOp loop : originalLoops) {
370+
auto &argsInLoop = argsInLoops[loop];
371+
if (argsInLoop.usageInfo.empty())
372+
continue;
373+
374+
loop.getBody()->walk([&](fir::DoLoopOp dloop) {
375+
argsInLoops[dloop].eraseUsage(argsInLoop.usageInfo);
376+
});
377+
}
378+
379+
// clang-format off
380+
LLVM_DEBUG(
381+
llvm::dbgs() << "Final usage info:\n";
382+
for (fir::DoLoopOp loop : originalLoops) {
383+
auto &argsInLoop = argsInLoops[loop];
384+
argsInLoop.dump(loop);
385+
}
386+
);
387+
// clang-format on
388+
389+
// Reduce the collected information to a list of loops
390+
// with attached arguments usage information.
391+
// The list must hold the loops in post order, so that
392+
// the inner loops are transformed before the outer loops.
393+
struct OpsWithArgs {
394+
mlir::Operation *op;
395+
mlir::SmallVector<ArgInfo, 4> argsAndDims;
396+
};
397+
mlir::SmallVector<OpsWithArgs, 4> loopsOfInterest;
398+
for (fir::DoLoopOp loop : originalLoops) {
399+
auto &argsInLoop = argsInLoops[loop];
400+
if (argsInLoop.usageInfo.empty())
401+
continue;
402+
OpsWithArgs info;
403+
info.op = loop;
404+
for (auto &arg : argsInLoop.usageInfo)
405+
info.argsAndDims.push_back(arg.second);
406+
loopsOfInterest.emplace_back(std::move(info));
407+
}
408+
409+
if (loopsOfInterest.empty()) {
410+
LLVM_DEBUG(llvm::dbgs()
411+
<< "No loops to transform.\n=== End " DEBUG_TYPE " ===\n");
278412
return;
413+
}
279414

280415
// If we get here, there are loops to process.
281416
fir::FirOpBuilder builder{module, std::move(kindMap)};

0 commit comments

Comments
 (0)