Skip to content

gh-107674: Improve performance of sys.settrace #114986

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Feb 28, 2024
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved the performance of :func:`sys.settrace` significantly
31 changes: 18 additions & 13 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,21 +143,26 @@ dummy_func(
inst(RESUME, (--)) {
TIER_ONE_ONLY
assert(frame == tstate->current_frame);
uintptr_t global_version =
_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) &
~_PY_EVAL_EVENTS_MASK;
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
assert((code_version & 255) == 0);
if (code_version != global_version) {
int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
ERROR_IF(err, error);
next_instr = this_instr;
}
else {
if (tstate->tracing == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because RESUME converts itself to RESUME_CHECK, this shouldn't matter in terms of performance, so maybe keep it simple?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I remembered why I did not set this instr to RESUME_CHECK when tracing != 0 - it will convert this instr to RESUME_CHECK but RESUME_CHECK will be deopted as the version does not add up.

For your question - the key is to stop the code from being instrumented, removing this check will still result in an instrumented code, and instrumented code is always slower.

However, we should probably check if the code is tracing and do not deopt RESUME_CHECK if so.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another way to do this is to DISPATCH() in the instrument case, so we don't have to write the "normal" case twice. Is it frown upon to have multiple DISPATCH() in one bytecode?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Multiple DISPATCH()s should be fine.

uintptr_t global_version =
_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) &
~_PY_EVAL_EVENTS_MASK;
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
assert((code_version & 255) == 0);
if (code_version != global_version) {
int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
ERROR_IF(err, error);
next_instr = this_instr;
} else {
if ((oparg & RESUME_OPARG_LOCATION_MASK) < RESUME_AFTER_YIELD_FROM) {
CHECK_EVAL_BREAKER();
}
this_instr->op.code = RESUME_CHECK;
}
} else {
if ((oparg & RESUME_OPARG_LOCATION_MASK) < RESUME_AFTER_YIELD_FROM) {
CHECK_EVAL_BREAKER();
}
this_instr->op.code = RESUME_CHECK;
}
}

Expand All @@ -175,7 +180,7 @@ dummy_func(
inst(INSTRUMENTED_RESUME, (--)) {
uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~_PY_EVAL_EVENTS_MASK;
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
if (code_version != global_version) {
if (code_version != global_version && tstate->tracing == 0) {
if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) {
GOTO_ERROR(error);
}
Expand Down
28 changes: 17 additions & 11 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -795,17 +795,23 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
{
_Py_CODEUNIT *prev = frame->instr_ptr;
_Py_CODEUNIT *here = frame->instr_ptr = next_instr;
_PyFrame_SetStackPointer(frame, stack_pointer);
int original_opcode = _Py_call_instrumentation_line(
tstate, frame, here, prev);
stack_pointer = _PyFrame_GetStackPointer(frame);
if (original_opcode < 0) {
next_instr = here+1;
goto error;
}
next_instr = frame->instr_ptr;
if (next_instr != here) {
DISPATCH();
int original_opcode = 0;
if (tstate->tracing) {
PyCodeObject *code = _PyFrame_GetCode(frame);
original_opcode = code->_co_monitoring->lines[(int)(here - _PyCode_CODE(code))].original_opcode;
} else {
_PyFrame_SetStackPointer(frame, stack_pointer);
original_opcode = _Py_call_instrumentation_line(
tstate, frame, here, prev);
stack_pointer = _PyFrame_GetStackPointer(frame);
if (original_opcode < 0) {
next_instr = here+1;
goto error;
}
next_instr = frame->instr_ptr;
if (next_instr != here) {
DISPATCH();
}
}
if (_PyOpcode_Caches[original_opcode]) {
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1);
Expand Down
16 changes: 10 additions & 6 deletions Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -332,12 +332,16 @@ do { \
// for an exception handler, displaying the traceback, and so on
#define INSTRUMENTED_JUMP(src, dest, event) \
do { \
_PyFrame_SetStackPointer(frame, stack_pointer); \
next_instr = _Py_call_instrumentation_jump(tstate, event, frame, src, dest); \
stack_pointer = _PyFrame_GetStackPointer(frame); \
if (next_instr == NULL) { \
next_instr = (dest)+1; \
goto error; \
if (tstate->tracing) {\
next_instr = dest; \
} else { \
_PyFrame_SetStackPointer(frame, stack_pointer); \
next_instr = _Py_call_instrumentation_jump(tstate, event, frame, src, dest); \
stack_pointer = _PyFrame_GetStackPointer(frame); \
if (next_instr == NULL) { \
next_instr = (dest)+1; \
goto error; \
} \
} \
} while (0);

Expand Down
31 changes: 18 additions & 13 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions Python/instrumentation.c
Original file line number Diff line number Diff line change
Expand Up @@ -1130,16 +1130,14 @@ _Py_Instrumentation_GetLine(PyCodeObject *code, int index)
int
_Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, _Py_CODEUNIT *instr, _Py_CODEUNIT *prev)
{
// The caller should guarantee tstate->tracing == 0
PyCodeObject *code = _PyFrame_GetCode(frame);
assert(is_version_up_to_date(code, tstate->interp));
assert(instrumentation_cross_checks(tstate->interp, code));
int i = (int)(instr - _PyCode_CODE(code));

_PyCoMonitoringData *monitoring = code->_co_monitoring;
_PyCoLineInstrumentationData *line_data = &monitoring->lines[i];
if (tstate->tracing) {
goto done;
}
PyInterpreterState *interp = tstate->interp;
int8_t line_delta = line_data->line_delta;
int line = compute_line(code, i, line_delta);
Expand Down