Commit 71a3d6ec authored by Damien George's avatar Damien George

py: Reduce size of mp_code_state_t structure.

Instead of caching data that is constant (code_info, const_table and
n_state), store just a pointer to the underlying function object from which
this data can be derived.

This helps reduce stack usage for the case when the mp_code_state_t
structure is stored on the stack, as well as heap usage when it's stored
on the heap.

The downside is that the VM becomes a little more complex because it now
needs to derive the data from the underlying function object.  But this
doesn't impact the performance by much (if at all) because most of the
decoding of data is done outside the main opcode loop.  Measurements using
pystone show that little to no performance is lost.

This patch also fixes a nasty bug whereby the bytecode can be reclaimed by
the GC during execution.  With this patch there is always a pointer to the
function object held by the VM during execution, since it's stored in the
mp_code_state_t structure.
parent eeff0c35
......@@ -86,25 +86,26 @@ STATIC void dump_args(const mp_obj_t *a, size_t sz) {
// On entry code_state should be allocated somewhere (stack/heap) and
// contain the following valid entries:
// - code_state->ip should contain the offset in bytes from the start of
// the bytecode chunk to just after n_state and n_exc_stack
// - code_state->n_state should be set to the state size (locals plus stack)
void mp_setup_code_state(mp_code_state_t *code_state, mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, const mp_obj_t *args) {
// - code_state->fun_bc should contain a pointer to the function object
// - code_state->ip should contain the offset in bytes from the pointer
// code_state->fun_bc->bytecode to the entry n_state (0 for bytecode, non-zero for native)
void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw, const mp_obj_t *args) {
// This function is pretty complicated. It's main aim is to be efficient in speed and RAM
// usage for the common case of positional only args.
size_t n_state = code_state->n_state;
// get the function object that we want to set up (could be bytecode or native code)
mp_obj_fun_bc_t *self = code_state->fun_bc;
// ip comes in as an offset into bytecode, so turn it into a true pointer
code_state->ip = self->bytecode + (size_t)code_state->ip;
// store pointer to constant table
code_state->const_table = self->const_table;
#if MICROPY_STACKLESS
code_state->prev = NULL;
#endif
// get params
size_t n_state = mp_decode_uint(&code_state->ip);
mp_decode_uint(&code_state->ip); // skip n_exc_stack
size_t scope_flags = *code_state->ip++;
size_t n_pos_args = *code_state->ip++;
size_t n_kwonly_args = *code_state->ip++;
......@@ -168,7 +169,7 @@ void mp_setup_code_state(mp_code_state_t *code_state, mp_obj_fun_bc_t *self, siz
}
// get pointer to arg_names array
const mp_obj_t *arg_names = (const mp_obj_t*)code_state->const_table;
const mp_obj_t *arg_names = (const mp_obj_t*)self->const_table;
for (size_t i = 0; i < n_kw; i++) {
// the keys in kwargs are expected to be qstr objects
......@@ -244,9 +245,8 @@ continue2:;
// get the ip and skip argument names
const byte *ip = code_state->ip;
// store pointer to code_info and jump over it
// jump over code info (source file and line-number mapping)
{
code_state->code_info = ip;
const byte *ip2 = ip;
size_t code_info_size = mp_decode_uint(&ip2);
ip += code_info_size;
......
......@@ -28,6 +28,7 @@
#include "py/runtime.h"
#include "py/obj.h"
#include "py/objfun.h"
// bytecode layout:
//
......@@ -70,9 +71,12 @@ typedef struct _mp_exc_stack_t {
} mp_exc_stack_t;
typedef struct _mp_code_state_t {
const byte *code_info;
// The fun_bc entry points to the underlying function object that is being executed.
// It is needed to access the start of bytecode and the const_table.
// It is also needed to prevent the GC from reclaiming the bytecode during execution,
// because the ip pointer below will always point to the interior of the bytecode.
mp_obj_fun_bc_t *fun_bc;
const byte *ip;
const mp_uint_t *const_table;
mp_obj_t *sp;
// bit 0 is saved currently_in_except_block value
mp_exc_stack_t *exc_sp;
......@@ -80,7 +84,6 @@ typedef struct _mp_code_state_t {
#if MICROPY_STACKLESS
struct _mp_code_state_t *prev;
#endif
size_t n_state;
// Variable-length
mp_obj_t state[0];
// Variable-length, never accessed by name, only as (void*)(state + n_state)
......@@ -91,8 +94,7 @@ mp_uint_t mp_decode_uint(const byte **ptr);
mp_vm_return_kind_t mp_execute_bytecode(mp_code_state_t *code_state, volatile mp_obj_t inject_exc);
mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t func, size_t n_args, size_t n_kw, const mp_obj_t *args);
struct _mp_obj_fun_bc_t;
void mp_setup_code_state(mp_code_state_t *code_state, struct _mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, const mp_obj_t *args);
void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw, const mp_obj_t *args);
void mp_bytecode_print(const void *descr, const byte *code, mp_uint_t len, const mp_uint_t *const_table);
void mp_bytecode_print2(const byte *code, size_t len, const mp_uint_t *const_table);
const byte *mp_bytecode_print_str(const byte *ip);
......
......@@ -407,43 +407,29 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
#endif
// prepare incoming arguments for call to mp_setup_code_state
#if N_X86
asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_2);
asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_3);
asm_x86_mov_arg_to_r32(emit->as, 2, REG_ARG_4);
asm_x86_mov_arg_to_r32(emit->as, 3, REG_ARG_5);
#else
#if N_THUMB
ASM_MOV_REG_REG(emit->as, ASM_THUMB_REG_R4, REG_ARG_4);
#elif N_ARM
ASM_MOV_REG_REG(emit->as, ASM_ARM_REG_R4, REG_ARG_4);
#else
ASM_MOV_REG_REG(emit->as, REG_ARG_5, REG_ARG_4);
#endif
ASM_MOV_REG_REG(emit->as, REG_ARG_4, REG_ARG_3);
ASM_MOV_REG_REG(emit->as, REG_ARG_3, REG_ARG_2);
ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_ARG_1);
asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_1);
asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_2);
asm_x86_mov_arg_to_r32(emit->as, 2, REG_ARG_3);
asm_x86_mov_arg_to_r32(emit->as, 3, REG_ARG_4);
#endif
// set code_state.fun_bc
ASM_MOV_REG_TO_LOCAL(emit->as, REG_ARG_1, offsetof(mp_code_state_t, fun_bc) / sizeof(uintptr_t));
// set code_state.ip (offset from start of this function to prelude info)
// XXX this encoding may change size
ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state_t, ip) / sizeof(mp_uint_t), REG_ARG_1);
// set code_state.n_state
ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->n_state, offsetof(mp_code_state_t, n_state) / sizeof(mp_uint_t), REG_ARG_1);
ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state_t, ip) / sizeof(uintptr_t), REG_ARG_1);
// put address of code_state into first arg
ASM_MOV_LOCAL_ADDR_TO_REG(emit->as, 0, REG_ARG_1);
// call mp_setup_code_state to prepare code_state structure
#if N_THUMB
asm_thumb_op16(emit->as, 0xb400 | (1 << ASM_THUMB_REG_R4)); // push 5th arg
asm_thumb_bl_ind(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE, ASM_THUMB_REG_R4);
asm_thumb_op16(emit->as, 0xbc00 | (1 << REG_RET)); // pop dummy (was 5th arg)
#elif N_ARM
asm_arm_push(emit->as, 1 << ASM_ARM_REG_R4); // push 5th arg
asm_arm_bl_ind(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE, ASM_ARM_REG_R4);
asm_arm_pop(emit->as, 1 << REG_RET); // pop dummy (was 5th arg)
#else
ASM_CALL_IND(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE);
#endif
......@@ -477,6 +463,9 @@ STATIC void emit_native_end_pass(emit_t *emit) {
if (!emit->do_viper_types) {
emit->prelude_offset = mp_asm_base_get_code_pos(&emit->as->base);
mp_asm_base_data(&emit->as->base, 1, 0x80 | ((emit->n_state >> 7) & 0x7f));
mp_asm_base_data(&emit->as->base, 1, emit->n_state & 0x7f);
mp_asm_base_data(&emit->as->base, 1, 0); // n_exc_stack
mp_asm_base_data(&emit->as->base, 1, emit->scope->scope_flags);
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_pos_args);
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_kwonly_args);
......
......@@ -220,9 +220,9 @@ mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, size_t n_args
return NULL;
}
code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
code_state->n_state = n_state;
mp_setup_code_state(code_state, self, n_args, n_kw, args);
code_state->fun_bc = self;
code_state->ip = 0;
mp_setup_code_state(code_state, n_args, n_kw, args);
// execute the byte code with the correct globals context
code_state->old_globals = mp_globals_get();
......@@ -265,9 +265,9 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const
state_size = 0; // indicate that we allocated using alloca
}
code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
code_state->n_state = n_state;
mp_setup_code_state(code_state, self, n_args, n_kw, args);
code_state->fun_bc = self;
code_state->ip = 0;
mp_setup_code_state(code_state, n_args, n_kw, args);
// execute the byte code with the correct globals context
code_state->old_globals = mp_globals_get();
......
......@@ -67,9 +67,9 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_kw, cons
o->base.type = &mp_type_gen_instance;
o->globals = self_fun->globals;
o->code_state.n_state = n_state;
o->code_state.ip = (byte*)(ip - self_fun->bytecode); // offset to prelude
mp_setup_code_state(&o->code_state, self_fun, n_args, n_kw, args);
o->code_state.fun_bc = self_fun;
o->code_state.ip = 0;
mp_setup_code_state(&o->code_state, n_args, n_kw, args);
return MP_OBJ_FROM_PTR(o);
}
......@@ -92,7 +92,7 @@ mp_obj_t mp_obj_new_gen_wrap(mp_obj_t fun) {
STATIC void gen_instance_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
(void)kind;
mp_obj_gen_instance_t *self = MP_OBJ_TO_PTR(self_in);
mp_printf(print, "<generator object '%q' at %p>", mp_obj_code_get_name(self->code_state.code_info), self);
mp_printf(print, "<generator object '%q' at %p>", mp_obj_fun_get_name(MP_OBJ_FROM_PTR(self->code_state.fun_bc)), self);
}
mp_vm_return_kind_t mp_obj_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t throw_value, mp_obj_t *ret_val) {
......@@ -134,10 +134,13 @@ mp_vm_return_kind_t mp_obj_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_
}
break;
case MP_VM_RETURN_EXCEPTION:
case MP_VM_RETURN_EXCEPTION: {
const byte *bc = self->code_state.fun_bc->bytecode;
size_t n_state = mp_decode_uint(&bc);
self->code_state.ip = 0;
*ret_val = self->code_state.state[self->code_state.n_state - 1];
*ret_val = self->code_state.state[n_state - 1];
break;
}
}
return ret_kind;
......
......@@ -73,10 +73,10 @@ typedef enum {
ip += 2;
#define DECODE_PTR \
DECODE_UINT; \
void *ptr = (void*)(uintptr_t)code_state->const_table[unum]
void *ptr = (void*)(uintptr_t)code_state->fun_bc->const_table[unum]
#define DECODE_OBJ \
DECODE_UINT; \
mp_obj_t obj = (mp_obj_t)code_state->const_table[unum]
mp_obj_t obj = (mp_obj_t)code_state->fun_bc->const_table[unum]
#else
......@@ -162,8 +162,10 @@ mp_vm_return_kind_t mp_execute_bytecode(mp_code_state_t *code_state, volatile mp
run_code_state: ;
#endif
// Pointers which are constant for particular invocation of mp_execute_bytecode()
mp_obj_t * /*const*/ fastn = &code_state->state[code_state->n_state - 1];
mp_exc_stack_t * /*const*/ exc_stack = (mp_exc_stack_t*)(code_state->state + code_state->n_state);
const byte *temp_bc = code_state->fun_bc->bytecode;
size_t n_state = mp_decode_uint(&temp_bc);
mp_obj_t * /*const*/ fastn = &code_state->state[n_state - 1];
mp_exc_stack_t * /*const*/ exc_stack = (mp_exc_stack_t*)(code_state->state + n_state);
// variables that are visible to the exception handler (declared volatile)
volatile bool currently_in_except_block = MP_TAGPTR_TAG0(code_state->exc_sp); // 0 or 1, to detect nested exceptions
......@@ -1327,8 +1329,16 @@ unwind_loop:
// But consider how to handle nested exceptions.
// TODO need a better way of not adding traceback to constant objects (right now, just GeneratorExit_obj and MemoryError_obj)
if (nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) {
const byte *ip = code_state->code_info;
const byte *ip = code_state->fun_bc->bytecode;
mp_decode_uint(&ip); // skip n_state
mp_decode_uint(&ip); // skip n_exc_stack
ip++; // skip scope_params
ip++; // skip n_pos_args
ip++; // skip n_kwonly_args
ip++; // skip n_def_pos_args
size_t bc = code_state->ip - ip;
size_t code_info_size = mp_decode_uint(&ip);
bc -= code_info_size;
#if MICROPY_PERSISTENT_CODE
qstr block_name = ip[0] | (ip[1] << 8);
qstr source_file = ip[2] | (ip[3] << 8);
......@@ -1337,7 +1347,6 @@ unwind_loop:
qstr block_name = mp_decode_uint(&ip);
qstr source_file = mp_decode_uint(&ip);
#endif
size_t bc = code_state->ip - code_state->code_info - code_info_size;
size_t source_line = 1;
size_t c;
while ((c = *ip)) {
......@@ -1393,8 +1402,8 @@ unwind_loop:
} else if (code_state->prev != NULL) {
mp_globals_set(code_state->old_globals);
code_state = code_state->prev;
fastn = &code_state->state[code_state->n_state - 1];
exc_stack = (mp_exc_stack_t*)(code_state->state + code_state->n_state);
fastn = &code_state->state[n_state - 1];
exc_stack = (mp_exc_stack_t*)(code_state->state + n_state);
// variables that are visible to the exception handler (declared volatile)
currently_in_except_block = MP_TAGPTR_TAG0(code_state->exc_sp); // 0 or 1, to detect nested exceptions
exc_sp = MP_TAGPTR_PTR(code_state->exc_sp); // stack grows up, exc_sp points to top of stack
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment