|
| 1 | +# code 对象 |
| 2 | + |
| 3 | +```c |
| 4 | +struct PyCodeObject { |
| 5 | + PyObject_HEAD |
| 6 | + int co_argcount; /* #arguments, except *args */ |
| 7 | + int co_posonlyargcount; /* #positional only arguments */ |
| 8 | + int co_kwonlyargcount; /* #keyword only arguments */ |
| 9 | + int co_nlocals; /* #local variables */ |
| 10 | + int co_stacksize; /* #entries needed for evaluation stack */ |
| 11 | + int co_flags; /* CO_..., see below */ |
| 12 | + int co_firstlineno; /* first source line number */ |
| 13 | + PyObject *co_code; /* instruction opcodes */ |
| 14 | + PyObject *co_consts; /* list (constants used) */ |
| 15 | + PyObject *co_names; /* list of strings (names used) */ |
| 16 | + PyObject *co_varnames; /* tuple of strings (local variable names) */ |
| 17 | + PyObject *co_freevars; /* tuple of strings (free variable names) */ |
| 18 | + PyObject *co_cellvars; /* tuple of strings (cell variable names) */ |
| 19 | + /* The rest aren't used in either hash or comparisons, except for co_name, |
| 20 | + used in both. This is done to preserve the name and line number |
| 21 | + for tracebacks and debuggers; otherwise, constant de-duplication |
| 22 | + would collapse identical functions/lambdas defined on different lines. |
| 23 | + */ |
| 24 | + Py_ssize_t *co_cell2arg; /* Maps cell vars which are arguments. */ |
| 25 | + PyObject *co_filename; /* unicode (where it was loaded from) */ |
| 26 | + PyObject *co_name; /* unicode (name, for reference) */ |
| 27 | + PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) See |
| 28 | + Objects/lnotab_notes.txt for details. */ |
| 29 | + void *co_zombieframe; /* for optimization only (see frameobject.c) */ |
| 30 | + PyObject *co_weakreflist; /* to support weakrefs to code objects */ |
| 31 | + /* Scratch space for extra data relating to the code object. |
| 32 | + Type is a void* to keep the format private in codeobject.c to force |
| 33 | + people to go through the proper APIs. */ |
| 34 | + void *co_extra; |
| 35 | + |
| 36 | + /* Per opcodes just-in-time cache |
| 37 | + * |
| 38 | + * To reduce cache size, we use indirect mapping from opcode index to |
| 39 | + * cache object: |
| 40 | + * cache = co_opcache[co_opcache_map[next_instr - first_instr] - 1] |
| 41 | + */ |
| 42 | + |
| 43 | + // co_opcache_map is indexed by (next_instr - first_instr). |
| 44 | + // * 0 means there is no cache for this opcode. |
| 45 | + // * n > 0 means there is cache in co_opcache[n-1]. |
| 46 | + unsigned char *co_opcache_map; |
| 47 | + _PyOpcache *co_opcache; |
| 48 | + int co_opcache_flag; // used to determine when create a cache. |
| 49 | + unsigned char co_opcache_size; // length of co_opcache. |
| 50 | +}; |
| 51 | +``` |
| 52 | + |
| 53 | + |
| 54 | +## 字节码缓存(opcache) |
| 55 | + |
| 56 | +注意到 PyCodeObject 对象中有一个 co_opcache 属性, 似乎支持字节码缓存, 查看了其它代码发现字节码缓存功能目前只支持 LOAD_GLOBALS. |
| 57 | + |
| 58 | +字节码缓存的基本原理是保存字节码执行的结果, 当再次执行该字节码可以直接返回缓存的结果, 从而提高字节码的执行效率. |
| 59 | + |
| 60 | +从定义 PyCodeObject 的结构体的代码注释中可以看出字节码缓存的实现原理, co_opcache_map 是一个 char 类型的数组, 索引是字节码的偏移量(`offset = next_instr - first_instr`), 如果 `co_opcache_map[offset]` 等于 0 说明该字节码没有缓存, 如果大于 0, 说明该字节码的缓存保存在 `co_opcache[co_opcache_map[offset]]`. |
| 61 | + |
| 62 | +co_opcache 是一个 _PyOpcache 类型的数组, 代码如下: |
| 63 | + |
| 64 | +```c |
| 65 | +typedef struct { |
| 66 | + PyObject *ptr; /* Cached pointer (borrowed reference) */ |
| 67 | + uint64_t globals_ver; /* ma_version of global dict */ |
| 68 | + uint64_t builtins_ver; /* ma_version of builtin dict */ |
| 69 | +} _PyOpcache_LoadGlobal; |
| 70 | + |
| 71 | +struct _PyOpcache { |
| 72 | + union { |
| 73 | + _PyOpcache_LoadGlobal lg; |
| 74 | + } u; |
| 75 | + char optimized; |
| 76 | +}; |
| 77 | +``` |
| 78 | + |
| 79 | +`_PyOpcache_LoadGlobal.ptr` 指向缓存的数据, `_PyOpcache_LoadGlobal.globals_ver` 表示缓存数据时 globals(全局变量字典) 的版本, `_PyOpcache_LoadGlobal.builtins_ver` 表示缓存数据时 builtins 的版本. |
| 80 | + |
| 81 | +字典类型内部有一个版本字段 `ma_version_tag`, 每次字典被修改时, 都会增加版本字段. 代码如下: |
| 82 | + |
| 83 | +```c |
| 84 | +/*Global counter used to set ma_version_tag field of dictionary. |
| 85 | + * It is incremented each time that a dictionary is created and each |
| 86 | + * time that a dictionary is modified. */ |
| 87 | +static uint64_t pydict_global_version = 0; |
| 88 | + |
| 89 | +#define DICT_NEXT_VERSION() (++pydict_global_version) |
| 90 | +``` |
| 91 | +
|
| 92 | +关于 `ma_version_tag` 的更多信息可以查看 [PEP 509 -- Add a private version to dict](https://www.python.org/dev/peps/pep-0509/). |
| 93 | +
|
| 94 | +当执行 `LOAD_GLOBAL` 时, 如果缓存存在并且缓存的版本号和当前版本号一致, 那么直接返回缓存的数据. |
| 95 | +
|
| 96 | +### 初始化 opcache |
| 97 | +
|
| 98 | +```c |
| 99 | +int |
| 100 | +_PyCode_InitOpcache(PyCodeObject *co) |
| 101 | +{ |
| 102 | + Py_ssize_t co_size = PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT); |
| 103 | + co->co_opcache_map = (unsigned char *)PyMem_Calloc(co_size, 1); |
| 104 | + if (co->co_opcache_map == NULL) { |
| 105 | + return -1; |
| 106 | + } |
| 107 | +
|
| 108 | + _Py_CODEUNIT *opcodes = (_Py_CODEUNIT*)PyBytes_AS_STRING(co->co_code); |
| 109 | + Py_ssize_t opts = 0; |
| 110 | +
|
| 111 | + for (Py_ssize_t i = 0; i < co_size;) { |
| 112 | + unsigned char opcode = _Py_OPCODE(opcodes[i]); |
| 113 | + i++; // 'i' is now aligned to (next_instr - first_instr) |
| 114 | +
|
| 115 | + // TODO: LOAD_METHOD, LOAD_ATTR |
| 116 | + if (opcode == LOAD_GLOBAL) { |
| 117 | + opts++; |
| 118 | + co->co_opcache_map[i] = (unsigned char)opts; |
| 119 | + if (opts > 254) { |
| 120 | + break; |
| 121 | + } |
| 122 | + } |
| 123 | + } |
| 124 | +
|
| 125 | + if (opts) { |
| 126 | + co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache)); |
| 127 | + if (co->co_opcache == NULL) { |
| 128 | + PyMem_FREE(co->co_opcache_map); |
| 129 | + return -1; |
| 130 | + } |
| 131 | + } |
| 132 | + else { |
| 133 | + PyMem_FREE(co->co_opcache_map); |
| 134 | + co->co_opcache_map = NULL; |
| 135 | + co->co_opcache = NULL; |
| 136 | + } |
| 137 | +
|
| 138 | + co->co_opcache_size = (unsigned char)opts; |
| 139 | + return 0; |
| 140 | +} |
| 141 | +``` |
| 142 | + |
| 143 | +### LOAD_GLOBAL 检查 opcache |
| 144 | + |
| 145 | +```c |
| 146 | +case TARGET(LOAD_GLOBAL): { |
| 147 | + PyObject *name; |
| 148 | + PyObject *v; |
| 149 | + if (PyDict_CheckExact(f->f_globals) |
| 150 | + && PyDict_CheckExact(f->f_builtins)) |
| 151 | + { |
| 152 | + OPCACHE_CHECK(); |
| 153 | + if (co_opcache != NULL && co_opcache->optimized > 0) { |
| 154 | + _PyOpcache_LoadGlobal *lg = &co_opcache->u.lg; |
| 155 | + |
| 156 | + if (lg->globals_ver == |
| 157 | + ((PyDictObject *)f->f_globals)->ma_version_tag |
| 158 | + && lg->builtins_ver == |
| 159 | + ((PyDictObject *)f->f_builtins)->ma_version_tag) |
| 160 | + { |
| 161 | + PyObject *ptr = lg->ptr; |
| 162 | + OPCACHE_STAT_GLOBAL_HIT(); |
| 163 | + assert(ptr != NULL); |
| 164 | + Py_INCREF(ptr); |
| 165 | + PUSH(ptr); |
| 166 | + DISPATCH(); |
| 167 | + } |
| 168 | + } |
| 169 | + |
| 170 | + name = GETITEM(names, oparg); |
| 171 | + v = _PyDict_LoadGlobal((PyDictObject *)f->f_globals, |
| 172 | + (PyDictObject *)f->f_builtins, |
| 173 | + name); |
| 174 | + if (v == NULL) { |
| 175 | + if (!_PyErr_OCCURRED()) { |
| 176 | + /* _PyDict_LoadGlobal() returns NULL without raising |
| 177 | + * an exception if the key doesn't exist */ |
| 178 | + format_exc_check_arg(tstate, PyExc_NameError, |
| 179 | + NAME_ERROR_MSG, name); |
| 180 | + } |
| 181 | + goto error; |
| 182 | + } |
| 183 | + |
| 184 | + if (co_opcache != NULL) { |
| 185 | + _PyOpcache_LoadGlobal *lg = &co_opcache->u.lg; |
| 186 | + |
| 187 | + if (co_opcache->optimized == 0) { |
| 188 | + /* Wasn't optimized before. */ |
| 189 | + OPCACHE_STAT_GLOBAL_OPT(); |
| 190 | + } else { |
| 191 | + OPCACHE_STAT_GLOBAL_MISS(); |
| 192 | + } |
| 193 | + |
| 194 | + co_opcache->optimized = 1; |
| 195 | + lg->globals_ver = |
| 196 | + ((PyDictObject *)f->f_globals)->ma_version_tag; |
| 197 | + lg->builtins_ver = |
| 198 | + ((PyDictObject *)f->f_builtins)->ma_version_tag; |
| 199 | + lg->ptr = v; /* borrowed */ |
| 200 | + } |
| 201 | + |
| 202 | + Py_INCREF(v); |
| 203 | + } |
| 204 | +``` |
| 205 | +
|
| 206 | +网上搜 "Python opcache" 发现都是关于 PHP 的, 唯一比较有用的信息是一个 [issue](https://bugs.python.org/issue26219). 这个 issue 在 2016 年提出, 2019 年才合并到 python 3.8. 到目前为止只支持 LOAD_GLOBAL, 未来应该会支持 LOAD_ATTR 和 LOAD_METHOD. |
| 207 | +
|
| 208 | +突然想到一个手动优化读取全局变量的性能的方法, 在函数内使用一个局部变量保存全局变量的引用, 然后在之后代码都使用该局部变量. 这招对于比较长的属性访问也有帮助, 例如 `foo = obj.a.b.c.d` 可以提高属性访问的速度. |
| 209 | +
|
| 210 | +一个例子: |
| 211 | +
|
| 212 | +```py |
| 213 | +class A: |
| 214 | + def __init__(self) -> None: |
| 215 | + self.a = 1 |
| 216 | +
|
| 217 | +class B: |
| 218 | + def __init__(self) -> None: |
| 219 | + self.a = A() |
| 220 | +
|
| 221 | +class C: |
| 222 | + def __init__(self) -> None: |
| 223 | + self.b = B() |
| 224 | +
|
| 225 | +c = C() |
| 226 | +print(c.b.a.a) |
| 227 | +``` |
| 228 | + |
| 229 | +属性访问的字节码: |
| 230 | + |
| 231 | +``` |
| 232 | + 15 48 LOAD_NAME 4 (print) |
| 233 | + 50 LOAD_NAME 3 (c) |
| 234 | + 52 LOAD_ATTR 5 (b) |
| 235 | + 54 LOAD_ATTR 6 (a) |
| 236 | + 56 LOAD_ATTR 6 (a) |
| 237 | + 58 CALL_FUNCTION 1 |
| 238 | +``` |
| 239 | + |
| 240 | + |
| 241 | + |
| 242 | + |
0 commit comments