VideoToolBox 解码H265
2024-04-09 18:10:53  阅读数 1338

上一篇文章介绍了如何编码H265,这篇主要介绍如果通过VideoToolBox解码H265,以及如何在只有裸流的情况区别H265H264

maxresdefault.jpeg

首先拿到一个H265的裸流之后,我们已经知道保存的这种裸流格式是Annex-b格式的;

解码的步骤如下:

1.读取媒体文件找到第一个StartCode(0x00 00 00 01 / 0x00 00 01);
2.找到第一个StartCode后,找到紧挨着的第二个StartCode,提取NALU Unit
3.根据H265 Header的结构解析该NALU是什么类型 从而找到 VPS/SPS/PPS 以及P帧/I帧
4.通过VPS/SPS/PPS 创建编码器,将第一帧为I帧送入编码器,后续将B/P帧送入编码器 得到解码后的pixelBuffer

以上解析逻辑和关键代码,如下代码中均有体现,觉得不够直观的可以直接看demo源码;

解析裸流文件

判断是否是 0x00 00 01 / 0x00 00 00 01 起始码,及起始码长度

/// 判断是否是 00 00 00 01 \ 00 00 01
static BOOL isNaluStartCode(unsigned char *data) {
    BOOL isStartCode = (memcmp(data, startCode4, 4) == 0) || (memcmp(data, startCode3, 3) == 0);
    return isStartCode;
}

/// 获取startCode 长度
static int getNaluStartCodeLength(unsigned char *data) {
    BOOL isStartCode = (memcmp(data, startCode4, 4) == 0) || (memcmp(data, startCode3, 3) == 0);
    if (!isStartCode) return 0;
    int nalu_startcode_size = 0;
    if (memcmp(data, startCode4, 4) == 0) {
        nalu_startcode_size = sizeof(startCode4);
    } else if (memcmp(data, startCode3, 3) == 0) {
        nalu_startcode_size = sizeof(startCode3);
    } else {
        //do nothing
    }
    return nalu_startcode_size;
}

定时器循环解析文件内的 NALU Unit

- (void)tick {
    
    dispatch_sync(_decodeQueue, ^{
        packetSize = 0;
        if (packetBuffer) {
            free(packetBuffer);
            packetBuffer = NULL;
        }
    
        BOOL isStartCode = isNaluStartCode(_inputBuffer);
        unsigned int nalu_startcode_size = getNaluStartCodeLength(_inputBuffer);
    
        if (isStartCode && (_inputSize > nalu_startcode_size)) {
            
            uint8_t *pStart = _inputBuffer + nalu_startcode_size;         //pStart 表示 NALU 的起始指针
            uint8_t *pEnd = _inputBuffer + _inputSize;                    //pEnd 表示 NALU 的末尾指针
            while (pStart != pEnd) {
                
                BOOL isNextStartCode  = isNaluStartCode(pStart);
                if (isNextStartCode) {
                    
                    packetSize = (pStart - _inputBuffer);
                    packetBuffer = malloc(packetSize);
                    memcpy(packetBuffer, _inputBuffer, packetSize); //复制packet内容到新的缓冲区
                    memmove(_inputBuffer, _inputBuffer + packetSize, _inputSize - packetSize); //把缓冲区前移
                    _inputSize -= packetSize;
                    
                    if (nalu_startcode_size == 3) {
                    /// 额外处理 startCode == 00 00 01 情况
                        long newPacketSize = packetSize + 1;
                        uint8_t *newPacketBuffer = malloc(newPacketSize);
                        memset(newPacketBuffer, 0, sizeof(newPacketSize));
                        memcpy(newPacketBuffer + 1, packetBuffer , packetSize);
                        free(packetBuffer);
                        packetBuffer = newPacketBuffer;
                        packetSize = newPacketSize;
                        
                    }
                    break;
                }
                else {
                    ++pStart;
                }
            }
            
            if ((pStart == pEnd) && (_inputSize > sizeof(startCode4)) && (packetSize == 0)  && (packetBuffer == NULL)) {
                packetSize = _inputSize;//pStart - _inputBuffer - 3;
                packetBuffer = malloc(packetSize);
                memcpy(packetBuffer, _inputBuffer, packetSize);
                memmove(_inputBuffer, _inputBuffer + packetSize, _inputSize - packetSize); //把缓冲区前移
                _inputSize -= packetSize;
            }
            
        }
        if (packetBuffer == NULL || packetSize == 0) {
            [self endDecode];
            return;
        }
        //2.将packet的前4个字节换成大端的长度 (有可能startCode是 00000001 / 000001两种情况 都需要处理)
        uint32_t nalSize = (uint32_t)(packetSize - 4);
        uint8_t *pNalSize = (uint8_t*)(&nalSize);
        packetBuffer[0] = pNalSize[3];
        packetBuffer[1] = pNalSize[2];
        packetBuffer[2] = pNalSize[1];
        packetBuffer[3] = pNalSize[0];
       
        //3.判断帧类型(根据码流结构可知,startcode后面紧跟着就是码流的类型)
        int nalType = (packetBuffer[4] & 0x7E) >> 1;
        switch (nalType) {
            case 0x10:
            case 0x11:
            case 0x12:
            case 0x13:
            case 0x14:
            case 0x15:
                {
                    //IDR frame
                    [self _initDecodeSession];
                    [self decodePacket];
                }
                break;
            case 0x27:
                {
                    //SEI
                }
                break;
            case 0x20:
                {
                    //vps
                    if (_vps) { _vps = nil;}
                    size_t vpsSize = (size_t) packetSize - 4;
                    uint8_t *vps = malloc(vpsSize);
                    memcpy(vps, packetBuffer + 4, vpsSize);
                    _vps = [NSData dataWithBytes:vps length:vpsSize];
                    free(vps);
                }
                break;
            case 0x21:
                {
                    //sps
                    if (_sps) { _sps = nil;}
                    size_t spsSize = (size_t) packetSize - 4;
                    uint8_t *sps = malloc(spsSize);
                    memcpy(sps, packetBuffer + 4, spsSize);
                    _sps = [NSData dataWithBytes:sps length:spsSize];
                    free(sps);
                }
                break;
            case 0x22:
                {
                    //pps
                    if (_pps) { _pps = nil; }
                    size_t ppsSize = (size_t) packetSize - 4;
                    uint8_t *pps = malloc(ppsSize);
                    memcpy(pps, packetBuffer + 4, ppsSize);
                    _pps = [NSData dataWithBytes:pps length:ppsSize];
                    free(pps);
                }
                break;
            default:
                {
                    // B/P frame
                    [self decodePacket];
                }
                break;
        }
    });

}

获取VPS/SPS/PPS 创建编码器

//2.将packet的前4个字节换成大端的长度 (有可能startCode是 00000001 / 000001两种情况 都需要处理)
        uint32_t nalSize = (uint32_t)(packetSize - 4);
        uint8_t *pNalSize = (uint8_t*)(&nalSize);
        packetBuffer[0] = pNalSize[3];
        packetBuffer[1] = pNalSize[2];
        packetBuffer[2] = pNalSize[1];
        packetBuffer[3] = pNalSize[0];
       
        //3.判断帧类型(根据码流结构可知,startcode后面紧跟着就是码流的类型)
        int nalType = (packetBuffer[4] & 0x7E) >> 1;
        switch (nalType) {
            case 0x10:
            case 0x11:
            case 0x12:
            case 0x13:
            case 0x14:
            case 0x15:
                {
                    //IDR frame
                    [self _initDecodeSession];
                    [self decodePacket];
                }
                break;
            case 0x27:
                {
                    //SEI
                }
                break;
            case 0x20:
                {
                    //vps
                    if (_vps) { _vps = nil;}
                    size_t vpsSize = (size_t) packetSize - 4;
                    uint8_t *vps = malloc(vpsSize);
                    memcpy(vps, packetBuffer + 4, vpsSize);
                    _vps = [NSData dataWithBytes:vps length:vpsSize];
                    free(vps);
                }
                break;
            case 0x21:
                {
                    //sps
                    if (_sps) { _sps = nil;}
                    size_t spsSize = (size_t) packetSize - 4;
                    uint8_t *sps = malloc(spsSize);
                    memcpy(sps, packetBuffer + 4, spsSize);
                    _sps = [NSData dataWithBytes:sps length:spsSize];
                    free(sps);
                }
                break;
            case 0x22:
                {
                    //pps
                    if (_pps) { _pps = nil; }
                    size_t ppsSize = (size_t) packetSize - 4;
                    uint8_t *pps = malloc(ppsSize);
                    memcpy(pps, packetBuffer + 4, ppsSize);
                    _pps = [NSData dataWithBytes:pps length:ppsSize];
                    free(pps);
                }
                break;
            default:
                {
                    // B/P frame
                    [self decodePacket];
                }
                break;
        }
    });

通过VPS/SPS/PPS 创建H265类型VideoToolbox

-(void)initVideoToolBox {
    
    if (_decodeSession) {
        return;
    }
    
    CMFormatDescriptionRef formatDescriptionOut;
    const uint8_t * const param[3] = {_vps.bytes,_sps.bytes,_pps.bytes};
    const size_t paramSize[3] = {_vps.length,_sps.length,_pps.length};
    OSStatus formateStatus =
    CMVideoFormatDescriptionCreateFromHEVCParameterSets(kCFAllocatorDefault, 3, param, paramSize, 4, NULL, &formatDescriptionOut);
    _formatDescriptionOut = formatDescriptionOut;
    
    if (formateStatus!=noErr) {
        NSLog(@"FormatDescriptionCreate fail");
        return;
    }
    //2. 创建VTDecompressionSessionRef
    //确定编码格式
    const void *keys[] = {kCVPixelBufferPixelFormatTypeKey};
    
    uint32_t t = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
    const void *values[] = {CFNumberCreate(NULL, kCFNumberSInt32Type, &t)};
    
    CFDictionaryRef att = CFDictionaryCreate(NULL, keys, values, 1, NULL, NULL);
    
    VTDecompressionOutputCallbackRecord VTDecompressionOutputCallbackRecord;
    VTDecompressionOutputCallbackRecord.decompressionOutputCallback = decodeCompressionOutputCallback;
    VTDecompressionOutputCallbackRecord.decompressionOutputRefCon = (__bridge void * _Nullable)(self);
    
    OSStatus sessionStatus = VTDecompressionSessionCreate(NULL,
                                 formatDescriptionOut,
                                 NULL,
                                 att,
                                 &VTDecompressionOutputCallbackRecord,
                                 &_decodeSession);
    CFRelease(att);
    if (sessionStatus != noErr) {
        NSLog(@"SessionCreate fail");
        [self endDecode];
    }
}

解码H265类型 NALU

如何辨别裸流是 H265 还是H264

探针的作用是试探当前的数据流是否是某一协议/编码格式,一般方法是进行部分预解析/解码,观察是否满足指定协议/编码的格式要求

以下代码参考FFMPEG 源码

/// 判断是否是 HEVC
static int hevc_probe(unsigned char *buf, unsigned int length)
{
   uint32_t code = -1;
   int vps = 0, sps = 0, pps = 0, irap = 0;
   int i;

   for (i = 0; i < length - 1; i++) {
       code = (code << 8) + buf[i];
       if ((code & 0xffffff00) == 0x100) {
           uint8_t nal2 = buf[i + 1];
           int type = (code & 0x7E) >> 1;

           if (code & 0x81) // forbidden and reserved zero bits
               return 0;

           if (nal2 & 0xf8) // reserved zero
               return 0;

           switch (type) {
           case HEVC_NAL_VPS:        vps++;  break;
           case HEVC_NAL_SPS:        sps++;  break;
           case HEVC_NAL_PPS:        pps++;  break;
           case HEVC_NAL_BLA_N_LP:
           case HEVC_NAL_BLA_W_LP:
           case HEVC_NAL_BLA_W_RADL:
           case HEVC_NAL_CRA_NUT:
           case HEVC_NAL_IDR_N_LP:
           case HEVC_NAL_IDR_W_RADL: irap++; break;
           }
       }
   }

   if (vps && sps && pps && irap)
       return  1; // 1 more than .mpg
   return 0;
}

源码地址 源码地址: https://github.com/hunter858/OpenGL_Study/AVFoundation/VideoToolBox-decoderH265