摘要：

本文作为 “x264视频编码器应用与实现” 系列博文的第六篇，主要讨论x264中图像与码流结构体的定义和使用。

1. 基本概念

我们知道，包括H.264在内的所有的视频编码方法其本质都是将像素格式的图像数据编码为数据量远小于前的压缩码流格式。而在x264中，专门为像素格式和码流格式定义了相应的结构来表示，即：

x264_picture_t：x264的图像结构；
x264_nal_t：x264的码流结构；

其中，输入编码器的图像结构 x264_picture_t 的实例应当由调用者负责创建。

2. 图像结构 x264_picture_t

x264_picture_t 的定义如下所示：

typedef struct x264_picture_t
{
    /* In: force picture type (if not auto)
     *     If x264 encoding parameters are violated in the forcing of picture types,
     *     x264 will correct the input picture type and log a warning.
     * Out: type of the picture encoded */
    int     i_type;
    /* In: force quantizer for != X264_QP_AUTO */
    int     i_qpplus1;
    /* In: pic_struct, for pulldown/doubling/etc...used only if b_pic_struct=1.
     *     use pic_struct_e for pic_struct inputs
     * Out: pic_struct element associated with frame */
    int     i_pic_struct;
    /* Out: whether this frame is a keyframe.  Important when using modes that result in
     * SEI recovery points being used instead of IDR frames. */
    int     b_keyframe;
    /* In: user pts, Out: pts of encoded picture (user)*/
    int64_t i_pts;
    /* Out: frame dts. When the pts of the first frame is close to zero,
     *      initial frames may have a negative dts which must be dealt with by any muxer */
    int64_t i_dts;
    /* In: custom encoding parameters to be set from this frame forwards
           (in coded order, not display order). If NULL, continue using
           parameters from the previous frame.  Some parameters, such as
           aspect ratio, can only be changed per-GOP due to the limitations
           of H.264 itself; in this case, the caller must force an IDR frame
           if it needs the changed parameter to apply immediately. */
    x264_param_t *param;
    /* In: raw image data */
    /* Out: reconstructed image data.  x264 may skip part of the reconstruction process,
            e.g. deblocking, in frames where it isn't necessary.  To force complete
            reconstruction, at a small speed cost, set b_full_recon. */
    x264_image_t img;
    /* In: optional information to modify encoder decisions for this frame
     * Out: information about the encoded frame */
    x264_image_properties_t prop;
    /* Out: HRD timing information. Output only when i_nal_hrd is set. */
    x264_hrd_t hrd_timing;
    /* In: arbitrary user SEI (e.g subtitles, AFDs) */
    x264_sei_t extra_sei;
    /* private user data. copied from input to output frames. */
    void *opaque;
} x264_picture_t;

下面分别对这三个函数进行研究。

3. 图像结构 x264_picture_t 的初始化和释放方法

3.1 x264_picture_init

从命名可知，x264_picture_init 方法的主要作用在于对图像结构进行初始化，其实现为：

/****************************************************************************
 * x264_picture_init:
 ****************************************************************************/
static void picture_init( x264_picture_t *pic )
{
    memset( pic, 0, sizeof( x264_picture_t ) );
    pic->i_type = X264_TYPE_AUTO;
    pic->i_qpplus1 = X264_QP_AUTO;
    pic->i_pic_struct = PIC_STRUCT_AUTO;
}

void x264_picture_init( x264_picture_t *pic )
{
    x264_stack_align( picture_init, pic );
}

该函数在这三个之中最简单，只是分配了 x264_picture_t 对象的内存空间，并将其中三个数据成员设为 AUTO 类型。

3.2 x264_picture_alloc

该函数相比 x264_picture_init，多了若干个输入参数，其实现如下：

/****************************************************************************
 * x264_picture_alloc:
 ****************************************************************************/
static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
    typedef struct
    {
        int planes;
        int width_fix8[3];
        int height_fix8[3];
    } x264_csp_tab_t;

    static const x264_csp_tab_t csp_tab[] =
    {
        [X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
        [X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
        [X264_CSP_NV12] = { 2, { 256*1, 256*1 },        { 256*1, 256/2 },       },
        [X264_CSP_NV21] = { 2, { 256*1, 256*1 },        { 256*1, 256/2 },       },
        [X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
        [X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
        [X264_CSP_NV16] = { 2, { 256*1, 256*1 },        { 256*1, 256*1 },       },
        [X264_CSP_YUYV] = { 1, { 256*2 },               { 256*1 },              },
        [X264_CSP_UYVY] = { 1, { 256*2 },               { 256*1 },              },
        [X264_CSP_I444] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
        [X264_CSP_YV24] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
        [X264_CSP_BGR]  = { 1, { 256*3 },               { 256*1 },              },
        [X264_CSP_BGRA] = { 1, { 256*4 },               { 256*1 },              },
        [X264_CSP_RGB]  = { 1, { 256*3 },               { 256*1 },              },
    };

    int csp = i_csp & X264_CSP_MASK;
    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
        return -1;
    picture_init( pic );
    pic->img.i_csp = i_csp;
    pic->img.i_plane = csp_tab[csp].planes;
    int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
    int plane_offset[3] = {0};
    int frame_size = 0;
    for( int i = 0; i < pic->img.i_plane; i++ )
    {
        int stride = (((int64_t)i_width * csp_tab[csp].width_fix8[i]) >> 8) * depth_factor;
        int plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride;
        pic->img.i_stride[i] = stride;
        plane_offset[i] = frame_size;
        frame_size += plane_size;
    }
    pic->img.plane[0] = x264_malloc( frame_size );
    if( !pic->img.plane[0] )
        return -1;
    for( int i = 1; i < pic->img.i_plane; i++ )
        pic->img.plane[i] = pic->img.plane[0] + plane_offset[i];
    return 0;
}

int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
    return x264_stack_align( picture_alloc, pic, i_csp, i_width, i_height );
}

从该函数的实现中可以看出，其内部也是调用了 picture_init 来初始化一个对象。另外，还进行了一个图像结构中像素存储空间的分配。

3.3 x264_picture_clean

该函数执行的是释放相应的图像结构，其实现为：

/****************************************************************************
 * x264_picture_clean:
 ****************************************************************************/
static void picture_clean( x264_picture_t *pic )
{
    x264_free( pic->img.plane[0] );

    /* just to be safe */
    memset( pic, 0, sizeof( x264_picture_t ) );
}

void x264_picture_clean( x264_picture_t *pic )
{
    x264_stack_align( picture_clean, pic );
}

需注意的是，只有使用 x264_picture_alloc 创建的图像结构才使用该函数进行清理。

4. x264的码流结构 x264_nal_t

对于编码器输出的码流，x264 同样定义了专门的数据结构来保存，即 x264_nal_t。其定义如下:

typedef struct x264_nal_t
{
    int i_ref_idc;  /* nal_priority_e */
    int i_type;     /* nal_unit_type_e */
    int b_long_startcode;
    int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
    int i_last_mb;  /* If this NAL is a slice, the index of the last MB in the slice. */

    /* Size of payload (including any padding) in bytes. */
    int     i_payload;
    /* If param->b_annexb is set, Annex-B bytestream with startcode.
     * Otherwise, startcode is replaced with a 4-byte size.
     * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
    uint8_t *p_payload;

    /* Size of padding in bytes. */
    int i_padding;
} x264_nal_t;