How to decode h264 byte stream on iOS 6+?
I am working on an iOS app to display an h264 video stream with aac audio .
The stream I have is a custom stream that doesn't use HLS or rtsp / rtmp , so I have my own code to handle the data. The data I receive is in two parts: header data and frame data (for both audio and video). I would like to support iOS6 +, but will sort it out if necessary.
My initial idea was to convert the frame array data from byte array to UIImage and keep the UIImageView updated with new frames. The problem is that the frames still need to be decoded first.
I looked at ffmpeg but all the examples I've seen need either a url or a local file which doesn't work for me. And I read that there may be licensing issues when using ffmpeg.
I also looked at openh264 . I think this might be an option, but since I am developing for iOS I will still face these licensing issues.
Edit:
I was able to implement this feature on iOS 8+ using videoToolbox and the provided sample . My problem was that I was getting more data from my stream than the example.
I am still looking for a way to do this on iOS 6 and 7.
So my question is, how should I handle decoding and displaying my frames?
I ended up getting this working with FFmpeg and without using the GPL license.
This is how I installed it:
I downloaded the iOS FFmpeg libraries from source forge . (You can also build it from scratch by downloading the build script from: https://github.com/kewlbear/FFmpeg-iOS-build-script )
In the code, I added a check to see which OS version I am on:
uint8_t *data = (unsigned char*)buf;
float version = [[[UIDevice currentDevice] systemVersion] floatValue];
if (version >= 8.0)
{
[self receivedRawVideoFrame:data withSize:ret ];
}
else if (version >= 6.0 && version < 8.0)
{
[self altDecodeFrame:data withSize:ret isConfigured:configured];
}
You can find the implementation for the VideoToolbox part here .
- (void)altDecodeFrame:(uint8_t *)frame_bytes withSize:(int) frameSize isConfigured:(Boolean) configured
{
if (!configured) {
uint8_t *header = NULL;
// I know what my H.264 data source NALUs look like so I know start code index is always 0.
// if you don't know where it starts, you can use a for loop similar to how i find the 2nd and 3rd start codes
int startCodeIndex = 0;
int secondStartCodeIndex = 0;
int thirdStartCodeIndex = 0;
int fourthStartCodeIndex = 0;
int nalu_type = (frame_bytes[startCodeIndex + 4] & 0x1F);
// NALU type 7 is the SPS parameter NALU
if (nalu_type == 7)
{
// find where the second PPS start code begins, (the 0x00 00 00 01 code)
// from which we also get the length of the first SPS code
for (int i = startCodeIndex + 4; i < startCodeIndex + 40; i++)
{
if (frame_bytes[i] == 0x00 && frame_bytes[i+1] == 0x00 && frame_bytes[i+2] == 0x00 && frame_bytes[i+3] == 0x01)
{
secondStartCodeIndex = i;
_spsSize = secondStartCodeIndex; // includes the header in the size
break;
}
}
// find what the second NALU type is
nalu_type = (frame_bytes[secondStartCodeIndex + 4] & 0x1F);
}
// type 8 is the PPS parameter NALU
if(nalu_type == 8)
{
// find where the NALU after this one starts so we know how long the PPS parameter is
for (int i = _spsSize + 4; i < _spsSize + 30; i++)
{
if (frame_bytes[i] == 0x00 && frame_bytes[i+1] == 0x00 && frame_bytes[i+2] == 0x00 && frame_bytes[i+3] == 0x01)
{
thirdStartCodeIndex = i;
_ppsSize = thirdStartCodeIndex - _spsSize;
break;
}
}
// allocate enough data to fit the SPS and PPS parameters into our data object.
header = malloc(_ppsSize + _spsSize);
// copy in the actual sps and pps values, again ignoring the 4 byte header
memcpy (header, &frame_bytes[0], _ppsSize + _spsSize);
NSLog(@"refresh codec context");
avcodec_close(instance.codec_context);
int result;
// I know I have an H264 stream, so that is the codex I look for
AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264);
self.codec_context = avcodec_alloc_context3(codec);
//open codec
result = avcodec_open2(self.codec_context, codec,NULL);
if (result < 0) {
NSLog(@"avcodec_open2 returned %i", result);
}
if (header != NULL) {
//set the extra data for decoding
self.codec_context->extradata = header;
self.codec_context->extradata_size = _spsSize+_ppsSize;
self.codec_context->flags |= CODEC_FLAG_GLOBAL_HEADER;
free(header);
}
// allocate the picture data.
// My frame data is in PIX_FMT_YUV420P format, but I will be converting that later on.
avpicture_alloc(&_pictureData, PIX_FMT_RGB24, 1280, 720);
// After my SPS and PPS data I receive a SEI NALU
nalu_type = (frame_bytes[thirdStartCodeIndex + 4] & 0x1F);
}
if(nalu_type == 6)
{
for (int i = _spsSize +_ppsSize + 4; i < _spsSize +_ppsSize + 30; i++)
{
if (frame_bytes[i] == 0x00 && frame_bytes[i+1] == 0x00 && frame_bytes[i+2] == 0x00 && frame_bytes[i+3] == 0x01)
{
fourthStartCodeIndex = i;
_seiSize = fourthStartCodeIndex - (_spsSize + _ppsSize);
break;
}
}
// do stuff here
// [...]
nalu_type = (frame_bytes[fourthStartCodeIndex + 4] & 0x1F);
}
}
//I had some issues with a large build up of memory, so I created an autoreleasepool
@autoreleasepool {
_frm = av_frame_alloc();
int result;
//fill the packet with the frame data
av_init_packet(&_pkt);
_pkt.data = frame_bytes;
_pkt.size = frameSize;
_pkt.flags = AV_PKT_FLAG_KEY;
int got_packet;
//Decode the frames
result = avcodec_decode_video2(self.codec_context, _frm, &got_packet, &_pkt);
if (result < 0) {
NSLog(@"avcodec_decode_video2 returned %i", result);
}
if (_frm == NULL) {
return;
}
else
{
//Here we will convert from YUV420P to RGB24
static int sws_flags = SWS_FAST_BILINEAR;
struct SwsContext *img_convert_ctx = sws_getContext(self.codec_context->width, self.codec_context->height, self.codec_context->pix_fmt, 1280, 720, PIX_FMT_RGB24, sws_flags, NULL, NULL, NULL);
sws_scale(img_convert_ctx, (const uint8_t* const*)_frm->data, _frm->linesize, 0, _frm->height, _pictureData.data, _pictureData.linesize);
sws_freeContext(img_convert_ctx);
self.lastImage = [self imageFromAVPicture:_pictureData width:_frm->width height:_frm->height];
av_frame_unref(_frm);
}
if (!self.lastImage) {
return;
}
//Normally we render on the AVSampleBufferDisplayLayer, so hide that.
//Add a UIImageView and display the image there.
dispatch_sync(dispatch_get_main_queue(), ^{
if (![[[self viewController] avSbdLayer] isHidden]) {
[[[self viewController] avSbdLayer] setHidden:true];
self.imageView = [[UIImageView alloc] initWithFrame:[[[self viewController] view] bounds]] ;
[[[self viewController] view] addSubview: self.imageView];
}
[[self imageView] setImage: self.lastImage];
});
// Free the allocated data
av_free_packet(&_pkt);
av_frame_free(&_frm);
av_free(_frm);
// free(bckgrnd);
}
}
And this is how I created the UIImage from AVPicture
-(UIImage *)imageFromAVPicture:(AVPicture)pict width:(int)width height:(int)height {
CGBitmapInfo bitmapInfo = kCGBitmapByteOrderDefault;
CFDataRef data = CFDataCreateWithBytesNoCopy(kCFAllocatorDefault, pict.data[0], pict.linesize[0]*height,kCFAllocatorNull);
CGDataProviderRef provider = CGDataProviderCreateWithCFData(data);
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
CGImageRef cgImage = CGImageCreate(width,
height,
8,
24,
pict.linesize[0],
colorSpace,
bitmapInfo,
provider,
NULL,
NO,
kCGRenderingIntentDefault);
CGColorSpaceRelease(colorSpace);
UIImage *image = [UIImage imageWithCGImage:cgImage];
CGImageRelease(cgImage);
CGDataProviderRelease(provider);
CFRelease(data);
return image;
}
If anyone has any other (or better) solution please let me know.