Sep 4, 2015

Python源码阅读-闭包的实现

闭包

e.g.

def add(x):
    def do_add(value):
        return x + value
    return do_add

add_5 = add(5)
print add_5(1)  # 6
print add_5(2)  # 7

需要回答, 什么是闭包, CPython底层是如何实现的?

PyCodeObject

typedef struct {
    PyObject_HEAD
    int co_argcount;		/* #arguments, except *args */
    int co_nlocals;		    /* #local variables */
    int co_stacksize;		/* #entries needed for evaluation stack */
    int co_flags;		    /* CO_..., see below */
    PyObject *co_code;		/* instruction opcodes */
    PyObject *co_consts;	/* list (constants used) */
    PyObject *co_names;		/* list of strings (names used) */
    PyObject *co_varnames;	/* tuple of strings (local variable names) */

    // 保存使用了的外层作用域中的变量名集合 (编译时就知道的! 被嵌套的时候有用)
    PyObject *co_freevars;	/* tuple of strings (free variable names) */
    // 保存嵌套作用域中使用的变量名集合, (编译时就知道的! 包含嵌套函数时有用)
    PyObject *co_cellvars;      /* tuple of strings (cell variable names) */


    /* The rest doesn't count for hash/cmp */
    PyObject *co_filename;	/* string (where it was loaded from) */
    PyObject *co_name;		/* string (name, for reference) */
    int co_firstlineno;		/* first source line number */
    PyObject *co_lnotab;	/* string (encoding addr<->lineno mapping) See
				   Objects/lnotab_notes.txt for details. */
    void *co_zombieframe;     /* for optimization only (see frameobject.c) */
    PyObject *co_weakreflist;   /* to support weakrefs to code objects */
} PyCodeObject;

我们关注两个, co_freevars 和 co_cellvars

co_freevars, 保存使用了的外层作用域中的变量名集合 (编译时就知道的! 被嵌套的时候有用)

co_cellvars, 保存嵌套作用域中使用的变量名集合, (编译时就知道的! 包含嵌套函数时有用)

对于我们上面的那个示例, add是外层函数, do_add是嵌套函数, 我们可以通过func_code打印看看

def add(x):    # 外层函数
    # 外层函数, 没有使用了外层作用域变量, 被嵌套函数使用了'x'
    print add.func_code.co_freevars    # ()
    print add.func_code.co_cellvars    # ('x',)

    def do_add(value):  # 嵌套函数
        return x + value

    # 内层函数, 使用了外层作用域便令'x', 没有嵌套函数故嵌套作用域变量名集合空
    print do_add.func_code.co_freevars # ('x',)
    print do_add.func_code.co_cellvars # ()
    return do_add

此时图示

closure

这时候, 只是记录了使用到的变量名, 标记下是否使用了外层的/被内层使用的变量

具体的值是在运行时确定的, 例如

add(5)

此时x=5, 这个是在add的名字空间里面的, 那么, x=5是怎么传递到嵌套函数内? 嵌套函数又是如何知晓x的值?

记住这两个问题, 然后我们首先来看一个新的数据结构

PyCellObject

  typedef struct {
    PyObject_HEAD
    PyObject *ob_ref;   /* Content of the cell or NULL when empty */ => 指向一个PyObject
  } PyCellObject;


  PyObject *
  PyCell_New(PyObject *obj)
  {
      PyCellObject *op;

      op = (PyCellObject *)PyObject_GC_New(PyCellObject, &PyCell_Type);
      if (op == NULL)
          return NULL;
      op->ob_ref = obj;  //建立关系
      Py_XINCREF(obj);

      _PyObject_GC_TRACK(op);
      return (PyObject *)op;
  }

这是个很简单的基本对象, 有一个ob_ref指向另一个PyObject, 仅此而已

图示

closure

作用呢?

值的确认与传递过程

调用

add(5)

此时, 开始调用函数

CALL_FUNCTION

=> x = call_function(&sp, oparg);

=> x = fast_function(func, pp_stack, n, na, nk);

      return PyEval_EvalCodeEx(co, globals,
                               (PyObject *)NULL, (*pp_stack)-n, na,
                               (*pp_stack)-2*nk, nk, d, nd,
                               PyFunction_GET_CLOSURE(func));

=>

PyEval_EvalCodeEx

add(5), 此时其co_cellvars = ('x',) 非空, 将会执行的逻辑代码


      /* Allocate and initialize storage for cell vars, and copy free
         vars into frame.  This isn't too efficient right now. */
      if (PyTuple_GET_SIZE(co->co_cellvars)) {
          int i, j, nargs, found;
          char *cellname, *argname;
          PyObject *c;

          nargs = co->co_argcount;
          if (co->co_flags & CO_VARARGS)
              nargs++;
          if (co->co_flags & CO_VARKEYWORDS)
              nargs++;

          /* Initialize each cell var, taking into account
             cell vars that are initialized from arguments.

             Should arrange for the compiler to put cellvars
             that are arguments at the beginning of the cellvars
             list so that we can march over it more efficiently?
          */

          // for 循环遍历 co_cellvars = ('x', ), i = 0
          for (i = 0; i < PyTuple_GET_SIZE(co->co_cellvars); ++i) {

              // cellname = 'x'
              cellname = PyString_AS_STRING(
                  PyTuple_GET_ITEM(co->co_cellvars, i));
              found = 0;

              // 遍历函数的参数变量, narg=1, j=0
              for (j = 0; j < nargs; j++) {

                  // 访问当前名字空间
                  argname = PyString_AS_STRING(
                      PyTuple_GET_ITEM(co->co_varnames, j));

                  // 匹配上了
                  if (strcmp(cellname, argname) == 0) {

                      // new 一个 PyCellObject, ob_ref指向变量的PyObject
                      c = PyCell_New(GETLOCAL(j));
                      if (c == NULL)
                          goto fail;

                      // #define GETLOCAL(i)     (fastlocals[i])
                      // fastlocals = f->f_localsplus;
                      // 即 f->f_localsplus[co->co_nlocals + i] = c, 相当于放到下一层freevars变量
                      GETLOCAL(co->co_nlocals + i) = c;
                      found = 1;
                      break;
                  }
              }

              // 没有匹配, 给个指向NULL的PyCellObject, 先New一个对象占位
              if (found == 0) {
                  c = PyCell_New(NULL);
                  if (c == NULL)
                      goto fail;
                  SETLOCAL(co->co_nlocals + i, c); //注意内存地址
              }
          }
      }

逻辑即, 如果发现当前函数co_cellvars非空, 即表示存在被内层函数调用的变量, 那么遍历这个co_cellvars集合, 拿到集合中每个变量名在当前名字空间中的值, 然后放到当前函数的f->f_localsplus中.

这里, 我们可以知道x=5被放进去了

为什么放到f->f_localsplus中呢?

看看PyFrameObject

typedef struct _frame {
    PyObject_VAR_HEAD
    struct _frame *f_back;	/* previous frame, or NULL */
    PyCodeObject *f_code;	/* code segment */
    PyObject *f_builtins;	/* builtin symbol table (PyDictObject) */
    PyObject *f_globals;	/* global symbol table (PyDictObject) */
    PyObject *f_locals;		/* local symbol table (any mapping) */
    PyObject **f_valuestack;	/* points after the last local */
    /* Next free slot in f_valuestack.  Frame creation sets to f_valuestack.
       Frame evaluation usually NULLs it, but a frame that yields sets it
       to the current stack top. */
    PyObject **f_stacktop;
    PyObject *f_trace;		/* Trace function */

    /* If an exception is raised in this frame, the next three are used to
     * record the exception info (if any) originally in the thread state.  See
     * comments before set_exc_info() -- it's not obvious.
     * Invariant:  if _type is NULL, then so are _value and _traceback.
     * Desired invariant:  all three are NULL, or all three are non-NULL.  That
     * one isn't currently true, but "should be".
     */
    PyObject *f_exc_type, *f_exc_value, *f_exc_traceback;

    PyThreadState *f_tstate;
    int f_lasti;		/* Last instruction if called */
    /* Call PyFrame_GetLineNumber() instead of reading this field
       directly.  As of 2.3 f_lineno is only valid when tracing is
       active (i.e. when f_trace is set).  At other times we use
       PyCode_Addr2Line to calculate the line from the current
       bytecode index. */
    int f_lineno;		/* Current line number */
    int f_iblock;		/* index in f_blockstack */
    PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
    PyObject *f_localsplus[1];	/* locals+stack, dynamically sized */
} PyFrameObject;

注意f_localsplus

f_localsplus为一个PyObject的指针数组，大小为1。

c语言中, 当申请一个大小超过sizeof(PyFrameObject)的结构体对象时，超过的部分就自动分配给f_localsplus

创建过程

在call_function的时候, new了一个PyFrameObject

f = PyFrame_New(tstate, co, globals, locals);

=>

PyFrameObject *
PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
            PyObject *locals)
{
        Py_ssize_t extras, ncells, nfrees;
        ncells = PyTuple_GET_SIZE(code->co_cellvars);
        nfrees = PyTuple_GET_SIZE(code->co_freevars);
        extras = code->co_stacksize + code->co_nlocals + ncells +
            nfrees;

=>

f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, extras);

即

f_localsplus => 局部变量 + cell对象 + free对象 + 运行时栈

原因: 因为函数中的局部变量总是固定不变的, 在编译时就能确定局部变量使用的内存空间的位置, 也能确定访问局部变量的字节码应该如何访问内存, 有了这些信息, Python就能借助静态的方法实现局部变量, 而不是动态查找PyDictObject, 提高执行效率

closure

示例函数的f_localsplus

看一下上面赋值用的宏定义

  fastlocals = f->f_localsplus;
  #define GETLOCAL(i)     (fastlocals[i])
  #define SETLOCAL(i, value)      do { PyObject *tmp = GETLOCAL(i); \
                                       GETLOCAL(i) = value; \
                                       Py_XDECREF(tmp); } while (0)

最终得到

closure

接下去呢? CALL_FUNCTION最后怎么处理将cell传入嵌套函数?

传递

CALL_FUNCTION 完成new一个PyFrameObject之后,

最终执行这个frame

retval = PyEval_EvalFrameEx(f,0);

PyEval_EvalFrameEx

  PyObject *
  PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
  {

    fastlocals = f->f_localsplus;
    freevars = f->f_localsplus + co->co_nlocals;


=> 此时涉及op_code的执行了

查看一下dis的结果

def add(x):
    def do_add(value):
        return x + value
    return do_add

  5           0 LOAD_CLOSURE             0 (x)
              3 BUILD_TUPLE              1
              6 LOAD_CONST               1 (<code object do_add at 0x10c9cec30, file "a.py", line 5>)
              9 MAKE_CLOSURE             0
             12 STORE_FAST               1 (do_add)

  7          15 LOAD_FAST                1 (do_add)
             18 RETURN_VALUE

首先LOAD_CLOSURE 0

          case LOAD_CLOSURE:
              x = freevars[oparg];
              Py_INCREF(x);
              PUSH(x);
              if (x != NULL) continue;
              break;

入栈, 此时得到一个PyCellObject, 指向2, name='x'

LOAD_CLOSURE 在编译时会根据嵌套函数中 co_freevars, 决定了取得参数位置和个数

然后, BUILD_TUPLE, 将cell对象打包成tuple, 得到('x', )

然后, 开始, 载入嵌套函数do_add, 入栈

调用MAKE_CLOSURE

          case MAKE_CLOSURE:
          {
              v = POP(); /* code object */  // do_add函数
              x = PyFunction_New(v, f->f_globals); //绑定global名字空间
              // 到这里, 得到一个PyFunctionObject

              Py_DECREF(v);
              if (x != NULL) {
                  v = POP();   // 得到tuple, ('x', )

                  // 注意这里
                  if (PyFunction_SetClosure(x, v) != 0) {
                      /* Can't happen unless bytecode is corrupt. */
                      why = WHY_EXCEPTION;
                  }
                  Py_DECREF(v);
              }
              ......
          }

来关注一下 PyFunction_SetClosure

int
PyFunction_SetClosure(PyObject *op, PyObject *closure)
{
    ...
    Py_XDECREF(((PyFunctionObject *) op) -> func_closure);
    ((PyFunctionObject *) op) -> func_closure = closure;  // 注意这里
    return 0;
}

即do_add的 PyFunctionObject的func_closure指向一个tuple

注意: 这时候, 外层变量已经固定下来了!!!!!!

然后, 在嵌套函数被调用的时候

CALL_FUNCTION

=>

x = call_function(&sp, oparg);

=>

x = fast_function(func, pp_stack, n, na, nk);

=>

      return PyEval_EvalCodeEx(co, globals,
                               (PyObject *)NULL, (*pp_stack)-n, na,
                               (*pp_stack)-2*nk, nk, d, nd,
                               PyFunction_GET_CLOSURE(func));

看下PyFunction_GET_CLOSURE

  #define PyFunction_GET_CLOSURE(func) \
      (((PyFunctionObject *)func) -> func_closure)

然后, 进入 PyEval_EvalCodeEx, 注意这里的closure参数即上一步取出来的func_closure, 即外层函数传进来的tuple

  PyObject *
  PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
             PyObject **args, int argcount, PyObject **kws, int kwcount,
             PyObject **defs, int defcount, PyObject *closure)
{
      ......
      //  嵌套函数do_add, 使用到了外层函数的变量, 所以co->co_freevars非空, 这里得到 ('x', )
      if (PyTuple_GET_SIZE(co->co_freevars)) {
          int i;
          for (i = 0; i < PyTuple_GET_SIZE(co->co_freevars); ++i) {
              // 顺序是一致的
              PyObject *o = PyTuple_GET_ITEM(closure, i);
              Py_INCREF(o);
              // 放到freevars里面, 编译时已经确定了顺序
              // 在上一步多LOAD_CLOSURE => tuple 已经保证了顺序
              freevars[PyTuple_GET_SIZE(co->co_cellvars) + i] = o;
          }
      }
      ......

最后, 再来看一个闭包的dis

注意BUILD_TUPLE

def add(x, y):

    def do_add(value):
        return x + value

    def do_add2(value):
        return y + value

    def do_add3(value):
        return x + y + value

    return do_add

dis结果

 18           0 LOAD_CLOSURE             0 (x)
              3 BUILD_TUPLE              1
              6 LOAD_CONST               1 (<code object do_add at 0x10560dc30, file "a.py", line 18>)
              9 MAKE_CLOSURE             0
             12 STORE_FAST               2 (do_add)

 21          15 LOAD_CLOSURE             1 (y)
             18 BUILD_TUPLE              1
             21 LOAD_CONST               2 (<code object do_add2 at 0x10560d8b0, file "a.py", line 21>)
             24 MAKE_CLOSURE             0
             27 STORE_FAST               3 (do_add2)

 24          30 LOAD_CLOSURE             0 (x)
             33 LOAD_CLOSURE             1 (y)
             36 BUILD_TUPLE              2
             39 LOAD_CONST               3 (<code object do_add3 at 0x10560e3b0, file "a.py", line 24>)
             42 MAKE_CLOSURE             0
             45 STORE_FAST               4 (do_add3)

 32          48 LOAD_FAST                2 (do_add)
             51 RETURN_VALUE