cJSON源码分析（二）

2021-05-07 21:57:27 阅读：184 来源： 互联网

标签：分析 return cJSON parse buffer 源码 offset input

在构建好一个JSON对象之后，如何访问呢？

首先试着将json字符串序列化，并全部打印出来看下结构再说：

char * string = "{\"name\":\"xxx\", \"name2\":\"xxx2\"}";cJSON * root = cJSON_Parse(string);//json字符串序列化printf("%s\n", cJSON_Print(root));//json格式化输出

在这里插入图片描述
看源码了解一下cJSON_Parse函数大致实现过程吧

//用来格式化json字符串所需要的缓存空间地带typedef struct{
    const unsigned char *content;//格式化的内容
    size_t length;//内容长度
    size_t offset;//偏移量
    size_t depth; /* How deeply nested (in arrays/objects) is the input at the current offset. 
    当前偏移量处的输入嵌套深度（在数组/对象中）有多深*/
    internal_hooks hooks;//内存分配} parse_buffer;/* check if the given size is left to read in a given parse buffer (starting with 1) 
检查给定的大小是否留在给定的解析缓冲区中读取（从1开始）
这里应该是通过给定buffer读取内存范围是否比buffer实际占据的范围小，保证不超出空间读取*/#define can_read(buffer, size) ((buffer != NULL) && (((buffer)->offset + size) length))/* check if the buffer can be accessed at the given index (starting with 0) 
检查是否可以在给定索引处访问缓冲区（从0开始）*/#define can_access_at_index(buffer, index) ((buffer != NULL) && (((buffer)->offset + index) < (buffer)->length))#define cannot_access_at_index(buffer, index) (!can_access_at_index(buffer, index))/* get a pointer to the buffer at the position 
获取指向位置处缓冲区的指针*/#define buffer_at_offset(buffer) ((buffer)->content + (buffer)->offset)/* Default options for cJSON_Parse 
cJSON_Parse的默认选项*/CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value){
    //传入json字符串，调用cJSON_ParseWithOpts进行序列化
    return cJSON_ParseWithOpts(value, 0, 0);//0为假，1为真
    //第二个参数为0，说明不需要获得字符串结尾的偏移量
    //第三个参数为0，说明解析字符串时不是以空白作为结尾，类似我们使用scanf读取一个带空格的字符串是，默认空格就是字符串的结尾。但是在这里，空格不是结尾
    }CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated){
    //return_parse_end  require_null_terminated为0，为空、为假
    size_t buffer_length;

    if (NULL == value)
    {
        return NULL;
    }

    /* Adding null character size due to require_null_terminated. 
    由require_null_terminated添加空字符大小*/
    buffer_length = strlen(value) + sizeof("");//统计json字符串长度，并在结尾加一个空字符串的长度，因为strlen统计长度是末尾的\0是不统计进去

    return cJSON_ParseWithLengthOpts(value, buffer_length, return_parse_end, require_null_terminated);}/* Parse an object - create a new root, and populate. 
解析一个对象-创建一个新的根，并填充*/CJSON_PUBLIC(cJSON *) cJSON_ParseWithLengthOpts(const char *value, size_t buffer_length, const char **return_parse_end, cJSON_bool require_null_terminated){
    parse_buffer buffer = { 0, 0, 0, 0, { 0, 0, 0 } };//首先都初始化为0
    cJSON *item = NULL;

    /* reset error position 复位错误位置*/
    //global_error是静态全集变量
    global_error.json = NULL;
    global_error.position = 0;

    if (value == NULL || 0 == buffer_length)
    {
        goto fail;
    }

    buffer.content = (const unsigned char*)value;
    buffer.length = buffer_length; 
    buffer.offset = 0;
    buffer.hooks = global_hooks;

    item = cJSON_New_Item(&global_hooks);//根据上面初始化后的buffer，构建一个json对象（具体实现前面的文章已有介绍）
    if (item == NULL) /* memory fail */
    {
        goto fail;//构建失败
    }

    /*
        总之parse_value主要是解析各种类型的数据，比如在json字符串里面的时候，有 "num":123, 那么123应该作为整形数据存储
        如果是 "num":"123"，那么"123"应该作为字符串类型数据(char *)进行存储
    */
    if (!parse_value(item, buffer_skip_whitespace(skip_utf8_bom(&buffer))))
    {
        /* parse failure. ep is set. */
        goto fail;//格式化失败
    }

    /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator 
    如果我们需要不附加垃圾的以空结尾的JSON，请跳过，然后检查空结尾符*/
    if (require_null_terminated)
    {
        buffer_skip_whitespace(&buffer);
        if ((buffer.offset >= buffer.length) || buffer_at_offset(&buffer)[0] != '\0')
        {
            goto fail;
        }
    }
    if (return_parse_end)
    {
        *return_parse_end = (const char*)buffer_at_offset(&buffer);
    }

    return item;fail:
    if (item != NULL)
    {
        cJSON_Delete(item);//前面的文章已有介绍过
    }

    if (value != NULL)
    {
        error local_error;
        local_error.json = (const unsigned char*)value;
        local_error.position = 0;

        if (buffer.offset < buffer.length)
        {
            local_error.position = buffer.offset;
        }
        else if (buffer.length > 0)
        {
            local_error.position = buffer.length - 1;
        }

        if (return_parse_end != NULL)
        {
            *return_parse_end = (const char*)local_error.json + local_error.position;
        }

        global_error = local_error;
    }

    return NULL;}/* Parser core - when encountering text, process appropriately. 
解析器核心-遇到文本时，进行适当的处理*/static cJSON_bool parse_value(cJSON * const item, parse_buffer * const input_buffer){
    if ((input_buffer == NULL) || (input_buffer->content == NULL))
    {
        return false; /* no input */
    }

    /* parse the different types of values 解析不同类型的值*/
    /* null */
    if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "null", 4) == 0))
    {
        item->type = cJSON_NULL;
        input_buffer->offset += 4;
        return true;
    }
    /* false */
    if (can_read(input_buffer, 5) && (strncmp((const char*)buffer_at_offset(input_buffer), "false", 5) == 0))
    {
        item->type = cJSON_False;
        input_buffer->offset += 5;
        return true;
    }
    /* true */
    if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "true", 4) == 0))
    {
        item->type = cJSON_True;
        item->valueint = 1;
        input_buffer->offset += 4;
        return true;
    }
    /* string */
    if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '\"'))
    {
        return parse_string(item, input_buffer);//解析字符串
    }
    /* number */
    if (can_access_at_index(input_buffer, 0) && ((buffer_at_offset(input_buffer)[0] == '-') || ((buffer_at_offset(input_buffer)[0] >= '0') && (buffer_at_offset(input_buffer)[0] <= '9'))))
    {
        return parse_number(item, input_buffer);//解析数字，原理同parse_string类似
    }
    /* array */
    if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '['))
    {
        return parse_array(item, input_buffer);//解析数组，原理同parse_string类似
    }
    /* object */
    if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '{'))
    {
        return parse_object(item, input_buffer);
    }

    return false;}/* Parse the input text into an unescaped cinput, and populate item. 
将输入文本解析为未转义的 cinput，并填充项
这里就是开始序列化json字符串了
*/static cJSON_bool parse_string(cJSON * const item, parse_buffer * const input_buffer){
    // 获得缓存正确的读取空间范围，防治越界
    const unsigned char *input_pointer = buffer_at_offset(input_buffer) + 1;
    const unsigned char *input_end = buffer_at_offset(input_buffer) + 1;
    unsigned char *output_pointer = NULL;
    unsigned char *output = NULL;

    /* not a string */
    if (buffer_at_offset(input_buffer)[0] != '\"')//字符串没有"为开头，那么该json字符串就有问题
    {
        goto fail;
    }

    {
        /* calculate approximate size of the output (overestimate) 计算输出的近似大小（高估）*/
        size_t allocation_length = 0;
        size_t skipped_bytes = 0;
        while (((size_t)(input_end - input_buffer->content) < input_buffer->length) && (*input_end != '\"'))
        {
            /* is escape sequence 
            是转义序列*/
            if (input_end[0] == '\\')
            {
                if ((size_t)(input_end + 1 - input_buffer->content) >= input_buffer->length)
                {
                    /* prevent buffer overflow when last input character is a backslash 
                    当最后一个输入字符是反斜杠时防止缓冲区溢出*/
                    goto fail;
                }
                skipped_bytes++;
                input_end++;
            }
            input_end++;
        }
        if (((size_t)(input_end - input_buffer->content) >= input_buffer->length) || (*input_end != '\"'))
        {
            goto fail; /* string ended unexpectedly 字符串意外结束，就是说明明还符合循环条件但是却运行到这里来*/
        }

        /* This is at most how much we need for the output 
        这是我们最多需要多少产出*/
        allocation_length = (size_t) (input_end - buffer_at_offset(input_buffer)) - skipped_bytes;
        output = (unsigned char*)input_buffer->hooks.allocate(allocation_length + sizeof(""));//多加一个""的大小，应该是为了存下每个字符串末尾的\0,strlen()计算长度时不考虑末尾的'\0'
        if (output == NULL)
        {
            goto fail; /* allocation failure 分配失败*/
        }
    }

    output_pointer = output;
    /* loop through the string literal 
    循环字符串文本*/
    while (input_pointer < input_end)
    {
        if (*input_pointer != '\\')
        {
            *output_pointer++ = *input_pointer++;
        }
        /* escape sequence 转义序列*/
        else
        {
            unsigned char sequence_length = 2;
            if ((input_end - input_pointer) < 1)
            {
                goto fail;
            }

            switch (input_pointer[1])
            {
                case 'b':
                    *output_pointer++ = '\b';
                    break;
                case 'f':
                    *output_pointer++ = '\f';
                    break;
                case 'n':
                    *output_pointer++ = '\n';
                    break;
                case 'r':
                    *output_pointer++ = '\r';
                    break;
                case 't':
                    *output_pointer++ = '\t';
                    break;
                case '\"':
                case '\\':
                case '/':
                    *output_pointer++ = input_pointer[1];
                    break;

                /* UTF-16 literal UTF-16文字*/
                case 'u':
                    sequence_length = utf16_literal_to_utf8(input_pointer, input_end, &output_pointer);
                    if (sequence_length == 0)
                    {
                        /* failed to convert UTF16-literal to UTF-8 无法将UTF16文本转换为UTF-8*/
                        goto fail;
                    }
                    break;

                default:
                    goto fail;
            }
            input_pointer += sequence_length;
        }
    }

    /* zero terminate the output 零终止输出*/
    *output_pointer = '\0';

    item->type = cJSON_String;
    item->valuestring = (char*)output;

    input_buffer->offset = (size_t) (input_end - input_buffer->content);
    input_buffer->offset++;

    return true;//序列化成功fail:
    if (output != NULL)
    {
        input_buffer->hooks.deallocate(output);
    }

    if (input_pointer != NULL)
    {
        input_buffer->offset = (size_t)(input_pointer - input_buffer->content);
    }

    return false;//序列化失败}/* skip the UTF-8 BOM (byte order mark) if it is at the beginning of a buffer 
如果UTF-8bom（字节顺序标记）位于缓冲区的开头，则跳过它*/static parse_buffer *skip_utf8_bom(parse_buffer * const buffer){
    if ((buffer == NULL) || (buffer->content == NULL) || (buffer->offset != 0))
    {
        return NULL;
    }

    if (can_access_at_index(buffer, 4) && (strncmp((const char*)buffer_at_offset(buffer), "\xEF\xBB\xBF", 3) == 0))
    {
        buffer->offset += 3;
    }

    return buffer;}

序列化json字符串确实繁琐，其主要花费时间在缓冲区边界界定和数据类型转换上。
大致了解一下工作流程，函数调用顺序大致如下（主要功能）：

cJSON_Parse   ==>    cJSON_ParseWithOpts  ==>   cJSON_ParseWithLengthOpts ==>parse_value和cJSON_New_Item

标签：分析,return,cJSON,parse,buffer,源码,offset,input
来源： https://blog.51cto.com/u_14175378/2759885

本站声明： 1. iCode9 技术分享网（下文简称本站）提供的所有内容，仅供技术学习、探讨和分享；
2. 关于本站的所有留言、评论、转载及引用，纯属内容发起人的个人观点，与本站观点和立场无关；
3. 关于本站的所有言论和文字，纯属内容发起人的个人观点，与本站观点和立场无关；
4. 本站文章均是网友提供，不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属；如您发现该文章侵犯了您的权益，可联系我们第一时间进行删除；
5. 本站为非盈利性的个人网站，所有内容不会用来进行牟利，也不会利用任何形式的广告来间接获益，纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

ICode9

cJSON源码分析（二）