【笔记】PHP7的基本变量:zval、gdb分析php、字符串写时复制

看视频啦、看书啥的笔记。

安装php

为之后gdb做准备

wget https://www.php.net/distributions/php-7.1.0.tar.gz

./configure '--prefix=/app/webserver/php710'  '--enable-debug' '--enable-fpm'

实际的生产环境的配置不止这么多。生产环境debug是disable的。

zval

./Zend/zend_types.h 文件。

struct _zval_struct {
        zend_value        value;                        /* value */
        union {
                struct {
                        ZEND_ENDIAN_LOHI_4(
                                zend_uchar    type,                     /* active type */
                                zend_uchar    type_flags,
                                zend_uchar    const_flags,
                                zend_uchar    reserved)     /* call info for EX(This) */
                } v;
                uint32_t type_info;
        } u1;
        union {
                uint32_t     next;                 /* hash collision chain */
                uint32_t     cache_slot;           /* literal cache slot */
                uint32_t     lineno;               /* line number (for ast nodes) */
                uint32_t     num_args;             /* arguments number for EX(This) */
                uint32_t     fe_pos;               /* foreach position */
                uint32_t     fe_iter_idx;          /* foreach iterator index */
                uint32_t     access_flags;         /* class constant access flags */
                uint32_t     property_guard;       /* single property guard */
        } u2;
};

zval 结构体大小是16字节。

zend_value

typedef union _zend_value {
        zend_long         lval;                         // 整型
        double            dval;                         // 浮点型
        zend_refcounted  *counted;
        zend_string      *str;  // 字符串
        zend_array       *arr;  // 数组
        zend_object      *obj;   // 对象
        zend_resource    *res;  // 资源类型
        zend_reference   *ref;  // 引用类型
        zend_ast_ref     *ast;
        zval             *zv;
        void             *ptr;
        zend_class_entry *ce;   // 类
        zend_function    *func;  // 函数
        struct {
                uint32_t w1;
                uint32_t w2;
        } ww;
} zend_value;

如何表示数据的类型呢?在u1中zend_uchar type

#define IS_UNDEF                                        0
#define IS_NULL                                         1
#define IS_FALSE                                        2
#define IS_TRUE                                         3
#define IS_LONG                                         4
#define IS_DOUBLE                                       5
#define IS_STRING                                       6
#define IS_ARRAY                                        7
#define IS_OBJECT                                       8
#define IS_RESOURCE                                     9
#define IS_REFERENCE                            10

/* constant expressions */
#define IS_CONSTANT                                     11
#define IS_CONSTANT_AST                         12

后面gdb调试的时候,注意看结构体的type数值。通过type 类型,去zend_value里面的值。

gdb 调试php程序

a.php

gdb ./php71/bin/php
b execute_ex 
r a.php

excute_ex()为opcode执行的handler,可以在这个函数中捕获所有的opcode的执行,局部变量分配在zend_execute_data。故在此打断点。

然后执行 n,知道看到if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
然后执行 s进入handler中。

然后看到 ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER 是把一个常量赋值给一个变量。

执行n,再回车(回车表示重复上一条命令)直到看到第二个 execute_ex

当我在调试过程中遇到了execute_ex (ex=)。然后就跟教程不一样了。什么原因呢?是编译的时候'--disable-debug'了,如果你选择--enable-debug,在make的时候用的gcc的-O0,在一些地方不在优化,就能看到一些调试信息了。

于是我重新编译了一份。接着来。

(gdb) r a.php
Starting program: /app/webserver/php710/bin/php a.php
[Thread debugging using libthread_db enabled]

Breakpoint 1, execute_ex (ex=0x7ffff6014030)
    at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:417
417     zend_execute_data *execute_data = ex;
(gdb) n
422     ZEND_VM_LOOP_INTERRUPT_CHECK();
(gdb)
432         if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
(gdb) s
ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER (
    execute_data=0x7ffff6014030)
    at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:39435
39435       USE_OPLINE
(gdb) n
39441       value = EX_CONSTANT(opline->op2);
(gdb)
39442       variable_ptr = _get_zval_ptr_cv_undef_BP_VAR_W(execute_data, opline->op1.var);
(gdb)
39444       if (IS_CV == IS_VAR && UNEXPECTED(Z_ISERROR_P(variable_ptr))) {
(gdb)
39450           value = zend_assign_to_variable(variable_ptr, value, IS_CONST);
(gdb) p value
$15 = (zval *) 0x7ffff6064be0
(gdb) p *$15
$16 = {value = {lval = 10, dval = 4.9406564584124654e-323,
    counted = 0xa, str = 0xa, arr = 0xa, obj = 0xa, res = 0xa,
    ref = 0xa, ast = 0xa, zv = 0xa, ptr = 0xa, ce = 0xa, func = 0xa,
    ww = {w1 = 10, w2 = 0}}, u1 = {v = {type = 4 '\004',
      type_flags = 0 '\000', const_flags = 0 '\000',
      reserved = 0 '\000'}, type_info = 4}, u2 = {next = 4294967295,
    cache_slot = 4294967295, lineno = 4294967295,
    num_args = 4294967295, fe_pos = 4294967295,
    fe_iter_idx = 4294967295, access_flags = 4294967295,
    property_guard = 4294967295}}
(gdb) p (*$15)->value->lval
$17 = 10
(gdb) p (*$15)->u1->v.type
$19 = 4 '\004'

上面的代码是 15)->u1->v.type
15)->u1->v.type`。对了直接 p(value)即可,我上面是p(15)。

我们再来一遍,我们在这个过程中打印下。变量的指针地址。

(gdb)
39450           value = zend_assign_to_variable(variable_ptr, value, IS_CONST);
(gdb) p variable_ptr
$21 = (zval *) 0x7ffff6014080

接着继续执行n。直到下一个execute_ex。

Breakpoint 2, ZEND_ECHO_SPEC_CV_HANDLER (execute_data=0x7ffff6014030)
    at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:34641
34641       z = _get_zval_ptr_cv_undef(execute_data, opline->op1.var);
(gdb) n
34643       if (Z_TYPE_P(z) == IS_STRING) {
(gdb) p z
$23 = (zval *) 0x7ffff6014080

ZEND_ECHO_SPEC_CV_HANDLER这是执行echo。然后打印下z的地址。发现跟上面一样。ok。教程中说到用echo是调试技巧。你也可以直接b ZEND_ECHO_SPEC_CV_HANDLER 这样就更方便看 z了。

剩下的php代码,如果你还想看,就是这样的调试方式。

字符串

struct _zend_string {
        zend_refcounted_h gc;
        zend_ulong        h;                /* hash value */
        size_t            len;
        char              val[1];
};

char val[1] 是一个柔性数组。柔性数组是指,我这里定义了一个变长的数组。对于这个数组也是变长的。因为我字符串用到的长度是不定的。字符串长度是len决定的。这里只声明了一个位值的char。后面需要扩充的时候,直接申请内存往后增加。

这种字符串的实现是二进制安全的。

什么是非二进制安全?字符串以\0结尾,如果字符串中有\0那么就会被截断。

什么是二进制安全呢?可以字符串len获取字符串的内容,即便字符串中有\0也不会因此而截断。

字符串与写时复制

Breakpoint 1, execute_ex (ex=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:417
417     zend_execute_data *execute_data = ex;
(gdb) n
422     ZEND_VM_LOOP_INTERRUPT_CHECK();
(gdb) 
432         if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
(gdb) s
ZEND_INIT_FCALL_SPEC_CONST_HANDLER (execute_data=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:2238
2238        USE_OPLINE

ZEND_INIT_FCALL_SPEC_CONST_HANDLER 执行的date函数

接着执行n。到第二个excute_ex。

execute_ex (ex=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:453
453     }
(gdb) 
432         if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
(gdb) s
ZEND_SEND_VAL_SPEC_CONST_HANDLER (execute_data=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:3060
3060        USE_OPLINE
(gdb) n
3064        value = EX_CONSTANT(opline->op1);
(gdb) 
3065        arg = ZEND_CALL_VAR(EX(call), opline->result.var);
(gdb) p *value
$6 = {value = {lval = 140737320971264, dval = 6.953347537963436e-310, counted = 0x7ffff605ec00, str = 0x7ffff605ec00, arr = 0x7ffff605ec00, 
    obj = 0x7ffff605ec00, res = 0x7ffff605ec00, ref = 0x7ffff605ec00, ast = 0x7ffff605ec00, zv = 0x7ffff605ec00, ptr = 0x7ffff605ec00, ce = 0x7ffff605ec00, 
    func = 0x7ffff605ec00, ww = {w1 = 4127583232, w2 = 32767}}, u1 = {v = {type = 6 '\006', type_flags = 0 '\000', const_flags = 0 '\000', 
      reserved = 0 '\000'}, type_info = 6}, u2 = {next = 4294967295, cache_slot = 4294967295, lineno = 4294967295, num_args = 4294967295, 
    fe_pos = 4294967295, fe_iter_idx = 4294967295, access_flags = 4294967295, property_guard = 4294967295}}
(gdb) p *value.value.str
$7 = {gc = {refcount = 0, u = {v = {type = 6 '\006', flags = 2 '\002', gc_info = 0}, type_info = 518}}, h = 9223372247549609961, len = 5, val = "Y"}
(gdb) p *value.value.str.val@5
$8 = "Y-m-d"

接着到下一个execute_ex,这个就先跳过。

execute_ex (ex=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:453
453     }
(gdb) 
432         if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
(gdb) s
ZEND_DO_ICALL_SPEC_RETVAL_USED_HANDLER (execute_data=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:660
660     USE_OPLINE
(gdb) n
661     zend_execute_data *call = EX(call);

又一个execute_ex。通过ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDLER

execute_ex (ex=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:453
453     }
(gdb) 
432         if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
(gdb) s
ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDLER (execute_data=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:10463
10463       USE_OPLINE

又一个excute_ex,从ZEND_ASSIGN_SPEC_CV_TMP_RETVAL_UNUSED_HANDLER能看出来是,变量赋值。也能猜出来是赋值给了$a。

execute_ex (ex=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:453
453     }
(gdb) 
432         if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
(gdb) s
ZEND_ASSIGN_SPEC_CV_TMP_RETVAL_UNUSED_HANDLER (execute_data=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:40728
40728       USE_OPLINE
(gdb) n
40734       value = _get_zval_ptr_tmp(opline->op2.var, execute_data, &free_op2);
(gdb) 
40735       variable_ptr = _get_zval_ptr_cv_undef_BP_VAR_W(execute_data, opline->op1.var);
(gdb) p *value.value.str
$9 = {gc = {refcount = 1, u = {v = {type = 6 '\006', flags = 0 '\000', gc_info = 0}, type_info = 6}}, h = 0, len = 16, val = "s"}
(gdb) p *value.value.str.val@16
$10 = "string2019-04-21"
(gdb) p value
$11 = (zval *) 0x7ffff60140

那上面是赋值给了$a,下一步就是echo的执行了。

execute_ex (ex=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:453
453     }
(gdb) 
432         if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
(gdb) s
ZEND_ECHO_SPEC_CV_HANDLER (execute_data=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:34636
34636       USE_OPLINE
(gdb) n
34641       z = _get_zval_ptr_cv_undef(execute_data, opline->op1.var);
(gdb) n
34643       if (Z_TYPE_P(z) == IS_STRING) {
(gdb) p *z
$16 = {value = {lval = 140737320995808, dval = 6.9533475391760707e-310, counted = 0x7ffff6064be0, str = 0x7ffff6064be0, arr = 0x7ffff6064be0, 
    obj = 0x7ffff6064be0, res = 0x7ffff6064be0, ref = 0x7ffff6064be0, ast = 0x7ffff6064be0, zv = 0x7ffff6064be0, ptr = 0x7ffff6064be0, ce = 0x7ffff6064be0, 
    func = 0x7ffff6064be0, ww = {w1 = 4127607776, w2 = 32767}}, u1 = {v = {type = 6 '\006', type_flags = 20 '\024', const_flags = 0 '\000', 
      reserved = 0 '\000'}, type_info = 5126}, u2 = {next = 0, cache_slot = 0, lineno = 0, num_args = 0, fe_pos = 0, fe_iter_idx = 0, access_flags = 0, 
    property_guard = 0}}
(gdb) p *z.value.str
$17 = {gc = {refcount = 1, u = {v = {type = 6 '\006', flags = 0 '\000', gc_info = 0}, type_info = 6}}, h = 0, len = 16, val = "s"}
(gdb) p *z.value.str.val@16
$18 = "string2019-04-21"

a;

execute_ex (ex=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:453
453     }
(gdb) 
432         if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
(gdb) s
ZEND_ASSIGN_SPEC_CV_CV_RETVAL_UNUSED_HANDLER (execute_data=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:46355
46355       USE_OPLINE
(gdb) n
46361       value = _get_zval_ptr_cv_BP_VAR_R(execute_data, opline->op2.var);

echo b 跟$a 地址一样。refcount为2,同样的引用了两次。

execute_ex (ex=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:453
453     }
(gdb) 
432         if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
(gdb) s
ZEND_ECHO_SPEC_CV_HANDLER (execute_data=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:34636
34636       USE_OPLINE
(gdb) n
34641       z = _get_zval_ptr_cv_undef(execute_data, opline->op1.var);
(gdb) 
34643       if (Z_TYPE_P(z) == IS_STRING) {
(gdb) p *z
$19 = {value = {lval = 140737320995808, dval = 6.9533475391760707e-310, counted = 0x7ffff6064be0, str = 0x7ffff6064be0, arr = 0x7ffff6064be0, 
    obj = 0x7ffff6064be0, res = 0x7ffff6064be0, ref = 0x7ffff6064be0, ast = 0x7ffff6064be0, zv = 0x7ffff6064be0, ptr = 0x7ffff6064be0, ce = 0x7ffff6064be0, 
    func = 0x7ffff6064be0, ww = {w1 = 4127607776, w2 = 32767}}, u1 = {v = {type = 6 '\006', type_flags = 20 '\024', const_flags = 0 '\000', 
      reserved = 0 '\000'}, type_info = 5126}, u2 = {next = 0, cache_slot = 0, lineno = 0, num_args = 0, fe_pos = 0, fe_iter_idx = 0, access_flags = 0, 
    property_guard = 0}}
(gdb) p *z.value.str
$20 = {gc = {refcount = 2, u = {v = {type = 6 '\006', flags = 0 '\000', gc_info = 0}, type_info = 6}}, h = 0, len = 16, val = "s"}

接着$b = 'new string';

execute_ex (ex=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:453
453     }
(gdb) 
432         if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
(gdb) s
ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER (execute_data=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:39435
39435       USE_OPLINE
(gdb) n
39441       value = EX_CONSTANT(opline->op2);
(gdb) n
39442       variable_ptr = _get_zval_ptr_cv_undef_BP_VAR_W(execute_data, opline->op1.var);
(gdb) p *value.value.str
$23 = {gc = {refcount = 0, u = {v = {type = 6 '\006', flags = 2 '\002', gc_info = 0}, type_info = 518}}, h = 17470050984681474246, len = 10, val = "n"}
(gdb) p *value.value.str.val@10
$24 = "new string"
(gdb) p *value
$25 = {value = {lval = 140737320995728, dval = 6.9533475391721181e-310, counted = 0x7ffff6064b90, str = 0x7ffff6064b90, arr = 0x7ffff6064b90, 
    obj = 0x7ffff6064b90, res = 0x7ffff6064b90, ref = 0x7ffff6064b90, ast = 0x7ffff6064b90, zv = 0x7ffff6064b90, ptr = 0x7ffff6064b90, ce = 0x7ffff6064b90, 
    func = 0x7ffff6064b90, ww = {w1 = 4127607696, w2 = 32767}}, u1 = {v = {type = 6 '\006', type_flags = 0 '\000', const_flags = 0 '\000', 
      reserved = 0 '\000'}, type_info = 6}, u2 = {next = 4294967295, cache_slot = 4294967295, lineno = 4294967295, num_args = 4294967295, 
    fe_pos = 4294967295, fe_iter_idx = 4294967295, access_flags = 4294967295, property_guard = 4294967295}}

我们看到str的地址是0x7ffff6064b90

echo $b;

execute_ex (ex=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:453
453     }
(gdb) 
432         if (UNEXPECTED((ret = ((opcode_handler_t)OPLINE->handler)(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU)) != 0)) {
(gdb) s
ZEND_ECHO_SPEC_CV_HANDLER (execute_data=0x7ffff6014030) at /app/webserver/php-7.1.0/Zend/zend_vm_execute.h:34636
34636       USE_OPLINE
(gdb) n
34641       z = _get_zval_ptr_cv_undef(execute_data, opline->op1.var);
(gdb) 
34643       if (Z_TYPE_P(z) == IS_STRING) {
(gdb) p *z
$26 = {value = {lval = 140737320995728, dval = 6.9533475391721181e-310, counted = 0x7ffff6064b90, str = 0x7ffff6064b90, arr = 0x7ffff6064b90, 
    obj = 0x7ffff6064b90, res = 0x7ffff6064b90, ref = 0x7ffff6064b90, ast = 0x7ffff6064b90, zv = 0x7ffff6064b90, ptr = 0x7ffff6064b90, ce = 0x7ffff6064b90, 
    func = 0x7ffff6064b90, ww = {w1 = 4127607696, w2 = 32767}}, u1 = {v = {type = 6 '\006', type_flags = 0 '\000', const_flags = 0 '\000', 
      reserved = 0 '\000'}, type_info = 6}, u2 = {next = 0, cache_slot = 0, lineno = 0, num_args = 0, fe_pos = 0, fe_iter_idx = 0, access_flags = 0, 
    property_guard = 0}}
(gdb) p *z.value.str
$27 = {gc = {refcount = 0, u = {v = {type = 6 '\006', flags = 2 '\002', gc_info = 0}, type_info = 518}}, h = 17470050984681474246, len = 10, val = "n"}
(gdb) p *z.value.str.val@10
$28 = "new string"

从上面看到$b地址变成了0x7ffff6064b90,然后引用计数(refcount )也是0。

打印下之前的地址。也就是现在$a还在用。然后看到refcount为1。

(gdb) p (zend_string*)0x7ffff6064be0
$31 = (zend_string *) 0x7ffff6064be0
(gdb) p *(zend_string*)0x7ffff6064be0
$32 = {gc = {refcount = 1, u = {v = {type = 6 '\006', flags = 0 '\000', gc_info = 0}, type_info = 6}}, h = 0, len = 16, val = "s"}
(gdb) p *((zend_string*)0x7ffff6064be0).val@16
$33 = "string2019-04-21"

上面的a的引用计数为1。这里涉及到了一个内部字符串(interned string)的概念,这些interned string字符串内容是不变的。等同于c语言中的静态变量区的字符串。程序执行完会统一销毁释放,无需通过引用计数管理。

参考资料:

  • 《gdb调试乱序,value optimized out解决方法》https://blog.csdn.net/lemonaha/article/details/76391683
  • 《PHP7的基本变量》https://www.imooc.com/video/18431
    *《php内核剖析》秦朋
  • 《__stdcall》https://baike.baidu.com/item/__stdcall/9466040?fr=aladdin

你可能感兴趣的:(【笔记】PHP7的基本变量:zval、gdb分析php、字符串写时复制)