Skip to content

Compare performance: Detect 'shadowed' cache hits and a few other fixes #4018

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Apr 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions extensions/ql-vscode/src/log-insights/log-summary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ type EvaluationStrategy =
| "EXTENSIONAL"
| "SENTINEL_EMPTY"
| "CACHACA"
| "CACHE_HIT";
| "CACHE_HIT"
| "NAMED_LOCAL";

interface SummaryEventBase {
evaluationStrategy: EvaluationStrategy;
Expand All @@ -28,6 +29,8 @@ interface SummaryEventBase {

interface ResultEventBase extends SummaryEventBase {
resultSize: number;
dependencies?: { [key: string]: string };
mainHash?: string;
}

export interface ComputeSimple extends ResultEventBase {
Expand Down Expand Up @@ -60,6 +63,15 @@ export interface InLayer extends ResultEventBase {
predicateIterationMillis: number[];
}

interface NamedLocal extends ResultEventBase {
evaluationStrategy: "NAMED_LOCAL";
deltaSizes: number[];
ra: Ra;
pipelineRuns: PipelineRun[];
queryCausingWork?: string;
predicateIterationMillis: number[];
}

interface ComputedExtensional extends ResultEventBase {
evaluationStrategy: "COMPUTED_EXTENSIONAL";
queryCausingWork?: string;
Expand Down Expand Up @@ -92,4 +104,5 @@ export type SummaryEvent =
| Extensional
| SentinelEmpty
| Cachaca
| CacheHit;
| CacheHit
| NamedLocal;
104 changes: 83 additions & 21 deletions extensions/ql-vscode/src/log-insights/performance-comparison.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import { createHash } from "crypto";
import type { SummaryEvent } from "./log-summary";

export interface PipelineSummary {
steps: string[];
/** Total counts for each step in the RA array, across all iterations */
counts: number[];
hash: string;
}

/**
Expand All @@ -26,6 +28,9 @@ export interface PerformanceComparisonDataFromLog {
*/
names: string[];

/** RA hash of the `i`th predicate event */
raHashes: string[];

/** Number of milliseconds spent evaluating the `i`th predicate from the `names` array. */
timeCosts: number[];

Expand All @@ -52,41 +57,51 @@ export interface PerformanceComparisonDataFromLog {
* All the pipeline runs seen for the `i`th predicate from the `names` array.
*/
pipelineSummaryList: Array<Record<string, PipelineSummary>>;

/** All dependencies of the `i`th predicate from the `names` array, encoded as a list of indices in `names`. */
dependencyLists: number[][];
}

export class PerformanceOverviewScanner {
private readonly nameToIndex = new Map<string, number>();
private readonly data: PerformanceComparisonDataFromLog = {
names: [],
raHashes: [],
timeCosts: [],
tupleCosts: [],
cacheHitIndices: [],
sentinelEmptyIndices: [],
pipelineSummaryList: [],
evaluationCounts: [],
iterationCounts: [],
dependencyLists: [],
};
private readonly raToIndex = new Map<string, number>();
private readonly mainHashToRepr = new Map<string, number>();
private readonly nameToIndex = new Map<string, number>();

private getPredicateIndex(name: string): number {
const { nameToIndex } = this;
let index = nameToIndex.get(name);
private getPredicateIndex(name: string, ra: string): number {
let index = this.raToIndex.get(ra);
if (index === undefined) {
index = nameToIndex.size;
nameToIndex.set(name, index);
index = this.raToIndex.size;
this.raToIndex.set(ra, index);
const {
names,
raHashes,
timeCosts,
tupleCosts,
iterationCounts,
evaluationCounts,
pipelineSummaryList,
dependencyLists,
} = this.data;
names.push(name);
raHashes.push(ra);
timeCosts.push(0);
tupleCosts.push(0);
iterationCounts.push(0);
evaluationCounts.push(0);
pipelineSummaryList.push({});
dependencyLists.push([]);
}
return index;
}
Expand All @@ -96,46 +111,63 @@ export class PerformanceOverviewScanner {
}

onEvent(event: SummaryEvent): void {
if (
event.completionType !== undefined &&
event.completionType !== "SUCCESS"
) {
const { completionType, evaluationStrategy, predicateName, raHash } = event;
if (completionType !== undefined && completionType !== "SUCCESS") {
return; // Skip any evaluation that wasn't successful
}

switch (event.evaluationStrategy) {
case "EXTENSIONAL":
switch (evaluationStrategy) {
case "EXTENSIONAL": {
break;
}
case "COMPUTED_EXTENSIONAL": {
if (predicateName.startsWith("cached_")) {
// Add a dependency from a cached COMPUTED_EXTENSIONAL to the predicate with the actual contents.
// The raHash of the this event may appear in a CACHE_HIT event in the other event log. The dependency
// we're adding here is needed in order to associate the original predicate with such a cache hit.
const originalName = predicateName.substring("cached_".length);
const originalIndex = this.nameToIndex.get(originalName);
if (originalIndex != null) {
const index = this.getPredicateIndex(predicateName, raHash);
this.data.dependencyLists[index].push(originalIndex);
}
}
break;
}
case "CACHE_HIT":
case "CACHACA": {
// Record a cache hit, but only if the predicate has not been seen before.
// We're mainly interested in the reuse of caches from an earlier query run as they can distort comparisons.
if (!this.nameToIndex.has(event.predicateName)) {
if (!this.raToIndex.has(raHash)) {
this.data.cacheHitIndices.push(
this.getPredicateIndex(event.predicateName),
this.getPredicateIndex(predicateName, raHash),
);
}
break;
}
case "SENTINEL_EMPTY": {
this.data.sentinelEmptyIndices.push(
this.getPredicateIndex(event.predicateName),
);
const index = this.getPredicateIndex(predicateName, raHash);
this.data.sentinelEmptyIndices.push(index);
const sentinelIndex = this.raToIndex.get(event.sentinelRaHash);
if (sentinelIndex != null) {
this.data.dependencyLists[index].push(sentinelIndex); // needed for matching up cache hits
}
break;
}
case "COMPUTE_RECURSIVE":
case "COMPUTE_SIMPLE":
case "NAMED_LOCAL":
case "IN_LAYER": {
const index = this.getPredicateIndex(event.predicateName);
const index = this.getPredicateIndex(predicateName, raHash);
this.nameToIndex.set(predicateName, index);
let totalTime = 0;
let totalTuples = 0;
if (event.evaluationStrategy !== "IN_LAYER") {
if (evaluationStrategy === "COMPUTE_SIMPLE") {
totalTime += event.millis;
} else {
// IN_LAYER events do no record of their total time.
// Make a best-effort estimate by adding up the positive iteration times (they can be negative).
// Make a best-effort estimate of the total time by adding up the positive iteration times (they can be negative).
// Note that for COMPUTE_RECURSIVE the "millis" field contain the total time of the SCC, not just that predicate,
// but we don't have a good way to show that in the UI, so we rely on the accumulated iteration times.
for (const millis of event.predicateIterationMillis ?? []) {
if (millis > 0) {
totalTime += millis;
Expand All @@ -148,13 +180,16 @@ export class PerformanceOverviewScanner {
iterationCounts,
evaluationCounts,
pipelineSummaryList,
dependencyLists,
} = this.data;
const pipelineSummaries = pipelineSummaryList[index];
const dependencyList = dependencyLists[index];
for (const { counts, raReference } of event.pipelineRuns ?? []) {
// Get or create the pipeline summary for this RA
const pipelineSummary = (pipelineSummaries[raReference] ??= {
steps: event.ra[raReference],
counts: counts.map(() => 0),
hash: getPipelineHash(event.ra[raReference]),
});
const { counts: totalTuplesPerStep } = pipelineSummary;
for (let i = 0, length = counts.length; i < length; ++i) {
Expand All @@ -169,6 +204,25 @@ export class PerformanceOverviewScanner {
totalTuplesPerStep[i] += count;
}
}
for (const dependencyHash of Object.values(event.dependencies ?? {})) {
const dependencyIndex = this.raToIndex.get(dependencyHash);
if (dependencyIndex != null) {
dependencyList.push(dependencyIndex);
}
}
// For predicates in the same SCC, add two-way dependencies with an arbitrary SCC member
const sccHash =
event.mainHash ??
(evaluationStrategy === "COMPUTE_RECURSIVE" ? raHash : null);
if (sccHash != null) {
const mainIndex = this.mainHashToRepr.get(sccHash);
if (mainIndex == null) {
this.mainHashToRepr.set(sccHash, index);
} else {
dependencyLists[index].push(mainIndex);
dependencyLists[mainIndex].push(index);
}
}
timeCosts[index] += totalTime;
tupleCosts[index] += totalTuples;
iterationCounts[index] += event.pipelineRuns?.length ?? 0;
Expand All @@ -178,3 +232,11 @@ export class PerformanceOverviewScanner {
}
}
}

function getPipelineHash(steps: string[]) {
const md5 = createHash("md5");
for (const step of steps) {
md5.write(step);
}
return md5.digest("base64");
}
Loading
Loading