From cf44c39aaf75f4ca36940579fa2b10860595c160 Mon Sep 17 00:00:00 2001 From: nillerusr Date: Thu, 13 Oct 2022 17:55:10 +0300 Subject: [PATCH] video: add initial implementation for video_bink --- video/video_bink/bink_common.h | 51 ++ video/video_bink/bink_material.cpp | 1079 +++++++++++++++++++++++ video/video_bink/bink_material.h | 220 +++++ video/video_bink/bink_video.cpp | 345 ++++++++ video/video_bink/bink_video.h | 109 +++ video/video_bink/wscript | 55 ++ video/video_bink/yuv_rgb.c | 1312 ++++++++++++++++++++++++++++ video/video_bink/yuv_rgb.h | 155 ++++ 8 files changed, 3326 insertions(+) create mode 100644 video/video_bink/bink_common.h create mode 100644 video/video_bink/bink_material.cpp create mode 100644 video/video_bink/bink_material.h create mode 100644 video/video_bink/bink_video.cpp create mode 100644 video/video_bink/bink_video.h create mode 100755 video/video_bink/wscript create mode 100644 video/video_bink/yuv_rgb.c create mode 100644 video/video_bink/yuv_rgb.h diff --git a/video/video_bink/bink_common.h b/video/video_bink/bink_common.h new file mode 100644 index 00000000..662b9b8e --- /dev/null +++ b/video/video_bink/bink_common.h @@ -0,0 +1,51 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// File: quicktime_common.h +// +// QuickTime limits and constants shared among all QuickTime functions +// +//============================================================================= + + +#ifndef QUICKTIME_COMMON_H +#define QUICKTIME_COMMON_H + +#ifdef _WIN32 +#pragma once +#endif + +// constant that define the bounds of various inputs +static const int cMinVideoFrameWidth = 16; +static const int cMinVideoFrameHeight = 16; +static const int cMaxVideoFrameWidth = 2 * 2048; +static const int cMaxVideoFrameHeight = 2 * 2048; + +static const int cMinFPS = 1; +static const int cMaxFPS = 600; + +static const float cMinDuration = 0.016666666f; // 1/60th second +static const float cMaxDuration = 3600.0f; // 1 Hour + +static const int cMinSampleRate = 11025; // 1/4 CD sample rate +static const int cMaxSampleRate = 88200; // 2x CD rate + +#define NO_MORE_INTERESTING_TIMES -2 +#define END_OF_QUICKTIME_MOVIE -1 + +extern char *COPY_STRING( const char *pString ); + +//----------------------------------------------------------------------------- +// Computes a power of two at least as big as the passed-in number +//----------------------------------------------------------------------------- +static inline int ComputeGreaterPowerOfTwo( int n ) +{ + int i = 1; + while ( i < n ) + { + i <<= 1; + } + return i; +} + + +#endif // QUICKTIME_COMMON_H diff --git a/video/video_bink/bink_material.cpp b/video/video_bink/bink_material.cpp new file mode 100644 index 00000000..42fa21b7 --- /dev/null +++ b/video/video_bink/bink_material.cpp @@ -0,0 +1,1079 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//============================================================================= + + +#include "filesystem.h" +#include "tier1/strtools.h" +#include "tier1/utllinkedlist.h" +#include "tier1/KeyValues.h" +#include "materialsystem/imaterial.h" +#include "materialsystem/imaterialsystem.h" +#include "materialsystem/MaterialSystemUtil.h" +#include "materialsystem/itexture.h" +#include "vtf/vtf.h" +#include "pixelwriter.h" +#include "tier3/tier3.h" +#include "platform.h" +#include "bink_material.h" +#include "tier0/memdbgon.h" + +extern "C" { +#include "yuv_rgb.h" +} + + +// makes a copy of a string +char *COPY_STRING( const char *pString ) +{ + if ( pString == nullptr ) + return nullptr; + + size_t strLen = V_strlen( pString ); + + char *pNewStr = new char[ strLen+ 1 ]; + if ( strLen > 0 ) + V_memcpy( pNewStr, pString, strLen ); + + pNewStr[strLen] = nullchar; + + return pNewStr; +} + +int open_codec_context(int *stream_idx, AVCodecContext **dec_ctx, AVFormatContext *fmt_ctx, enum AVMediaType type) +{ + int ret, stream_index; + AVStream *st; + const AVCodec *dec = NULL; + + ret = av_find_best_stream(fmt_ctx, type, -1, -1, NULL, 0); + if (ret < 0) + { + Warning("Could not find %s stream\n", + av_get_media_type_string(type)); + return ret; + } + else + { + stream_index = ret; + st = fmt_ctx->streams[stream_index]; + + /* find decoder for the stream */ + dec = avcodec_find_decoder(st->codecpar->codec_id); + if (!dec) + { + Warning("Failed to find %s codec\n", + av_get_media_type_string(type)); + return AVERROR(EINVAL); + } + + /* Allocate a codec context for the decoder */ + *dec_ctx = avcodec_alloc_context3(dec); + if (!*dec_ctx) + { + Warning("Failed to allocate the %s codec context\n", + av_get_media_type_string(type)); + return AVERROR(ENOMEM); + } + + /* Copy codec parameters from input stream to output codec context */ + if ((ret = avcodec_parameters_to_context(*dec_ctx, st->codecpar)) < 0) + { + Warning("Failed to copy %s codec parameters to decoder context\n", + av_get_media_type_string(type)); + return ret; + } + + /* Init the decoders */ + if ((ret = avcodec_open2(*dec_ctx, dec, NULL)) < 0) + { + Warning("Failed to open %s codec\n", + av_get_media_type_string(type)); + return ret; + } + + *stream_idx = stream_index; + } + + return 0; +} + +// =========================================================================== +// CBinkMaterialRGBTextureRegenerator - Inherited from ITextureRegenerator +// Copies and converts the buffer bits to texture bits +// Currently only supports 32-bit BGR +// =========================================================================== +CBinkMaterialRGBTextureRegenerator::CBinkMaterialRGBTextureRegenerator() : + m_nSourceWidth( 0 ), + m_nSourceHeight( 0 ) +{ +} + + +CBinkMaterialRGBTextureRegenerator::~CBinkMaterialRGBTextureRegenerator() +{ + // nothing to do +} + +void CBinkMaterialRGBTextureRegenerator::SetSourceImage( uint8_t *SrcImage, int nWidth, int nHeight ) +{ + m_SrcImage = SrcImage; + m_nSourceWidth = nWidth; + m_nSourceHeight = nHeight; +} + +void CBinkMaterialRGBTextureRegenerator::RegenerateTextureBits( ITexture *pTexture, IVTFTexture *pVTFTexture, Rect_t *pRect ) +{ + AssertExit( pVTFTexture != nullptr ); + + // Error condition, should only have 1 frame, 1 face, 1 mip level + if ( ( pVTFTexture->FrameCount() > 1 ) || ( pVTFTexture->FaceCount() > 1 ) || ( pVTFTexture->MipCount() > 1 ) || ( pVTFTexture->Depth() > 1 ) ) + { + WarningAssert( "Texture Properties Incorrect "); + memset( pVTFTexture->ImageData(), 0xAA, pVTFTexture->ComputeTotalSize() ); + return; + } + + // Make sure we have a valid video image source +/* if ( m_SrcGWorld == nullptr ) + { + WarningAssert( "Video texture source not set" ); + memset( pVTFTexture->ImageData(), 0xCC, pVTFTexture->ComputeTotalSize() ); + return; + }*/ + + // Verify the destination texture is set up correctly + Assert( pVTFTexture->Format() == IMAGE_FORMAT_RGB888 ); + Assert( pVTFTexture->RowSizeInBytes( 0 ) >= pVTFTexture->Width() * 4 ); + Assert( pVTFTexture->Width() >= m_nSourceWidth ); + Assert( pVTFTexture->Height() >= m_nSourceHeight ); + + // Copy directly from the Quicktime GWorld + BYTE *pImageData = pVTFTexture->ImageData(); + int dstStride = pVTFTexture->RowSizeInBytes( 0 ); + + BYTE *pSrcData = m_SrcImage; + + for (int y = 0; y < m_nSourceHeight; y++ ) + { + memcpy( pImageData, pSrcData, m_nSourceWidth*3 ); + + pImageData += dstStride; + pSrcData += m_nSourceWidth*3; + } +} + + +void CBinkMaterialRGBTextureRegenerator::Release() +{ + // we don't invoke the destructor here, we're not using the no-release extensions +} + + + +// =========================================================================== +// CBinkMaterial class - creates a material, opens a QuickTime movie +// and plays the movie onto the material +// =========================================================================== + +//----------------------------------------------------------------------------- +// CBinkMaterial Constructor +//----------------------------------------------------------------------------- +CBinkMaterial::CBinkMaterial() : + m_pFileName( nullptr ), + m_bInitCalled( false ), + m_AVFrame( nullptr ), + m_AVPkt( nullptr ) +{ + memset( m_AVVideoData, 0, sizeof(m_AVVideoData) ); + memset( m_AVVideoLinesize, 0, sizeof(m_AVVideoLinesize) ); + + Reset(); +} + + +//----------------------------------------------------------------------------- +// CBinkMaterial Destructor +//----------------------------------------------------------------------------- +CBinkMaterial::~CBinkMaterial() +{ + SetFileName( nullptr ); + + DestroyProceduralTexture(); + DestroyProceduralMaterial(); + + av_frame_free( &m_AVFrame ); + av_packet_free( &m_AVPkt ); + + if( m_AVVideoData[0] ) + av_free(m_AVVideoData[0]); + + if( m_AVFmtCtx ) + avformat_close_input( &m_AVFmtCtx ); +} + + +void CBinkMaterial::Reset() +{ + printf("CBinkMaterial::Reset()\n"); + + SetFileName( nullptr ); + + DestroyProceduralTexture(); + DestroyProceduralMaterial(); + + m_TexCordU = 0.0f; + m_TexCordV = 0.0f; + + m_VideoFrameWidth = 0; + m_VideoFrameHeight = 0; + + m_AVPixFormat = 0; + m_PlaybackFlags = VideoPlaybackFlags::NO_PLAYBACK_OPTIONS; + + m_bMovieInitialized = false; + m_bMoviePlaying = false; + m_bMovieFinishedPlaying = false; + m_bMoviePaused = false; + m_bLoopMovie = false; + + m_bHasAudio = false; + m_bMuted = false; + + m_CurrentVolume = 0.0f; + + m_QTMovieTimeScale = 0; + m_QTMovieDuration = 0; + m_QTMovieDurationinSec = 0.0f; + m_QTMovieFrameRate.SetFPS( 0, false ); + + if( !m_AVFrame ) + m_AVFrame = av_frame_alloc(); + if( !m_AVPkt) + m_AVPkt = av_packet_alloc(); + + m_RGBData = nullptr; + + m_AVFmtCtx = nullptr; + m_AVAudioStream = nullptr; + m_AVVideoStream = nullptr; + + AssertMsg( m_AVFrame, "av_frame_alloc return nullptr\n" ); + AssertMsg( m_AVPkt, "av_packet_alloc return nullptr\n" ); + + if( m_AVVideoData[0] ) + { + av_free(m_AVVideoData[0]); + m_AVVideoData[0] = nullptr; + } + + if( m_AVFmtCtx ) + { + avformat_close_input( &m_AVFmtCtx ); + m_AVFmtCtx = nullptr; + } + + m_LastResult = VideoResult::SUCCESS; +} + + +void CBinkMaterial::SetFileName( const char *theMovieFileName ) +{ + SAFE_DELETE_ARRAY( m_pFileName ); + + if ( theMovieFileName != nullptr ) + { + AssertMsg( V_strlen( theQTMovieFileName ) <= MAX_FILENAME_LEN, "Bad Quicktime Movie Filename" ); + m_pFileName = COPY_STRING( theMovieFileName ); + } +} + + +VideoResult_t CBinkMaterial::SetResult( VideoResult_t status ) +{ + m_LastResult = status; + return status; +} + + +//----------------------------------------------------------------------------- +// Video information functions +//----------------------------------------------------------------------------- + + +//----------------------------------------------------------------------------- +// Returns the resolved filename of the video, as it might differ from +// what the user supplied, (also with absolute path) +//----------------------------------------------------------------------------- +const char *CBinkMaterial::GetVideoFileName() +{ + return m_pFileName; +} + + +VideoFrameRate_t &CBinkMaterial::GetVideoFrameRate() +{ + return m_QTMovieFrameRate; +} + + +VideoResult_t CBinkMaterial::GetLastResult() +{ + return m_LastResult; +} + + +//----------------------------------------------------------------------------- +// Audio Functions +//----------------------------------------------------------------------------- +bool CBinkMaterial::HasAudio() +{ + return m_bHasAudio; +} + + +bool CBinkMaterial::SetVolume( float fVolume ) +{ + clamp( fVolume, 0.0f, 1.0f ); + + m_CurrentVolume = fVolume; + + SetResult( VideoResult::AUDIO_ERROR_OCCURED ); + return false; +} + + +float CBinkMaterial::GetVolume() +{ + return m_CurrentVolume; +} + + +void CBinkMaterial::SetMuted( bool bMuteState ) +{ + AssertExitFunc( m_bMoviePlaying, SetResult( VideoResult::OPERATION_OUT_OF_SEQUENCE) ); + + SetResult( VideoResult::SUCCESS ); + + if ( bMuteState == m_bMuted ) // no change? + { + return; + } + + m_bMuted = bMuteState; + + if ( m_bHasAudio ) + { + + } + + SetResult( VideoResult::SUCCESS ); +} + + +bool CBinkMaterial::IsMuted() +{ + return m_bMuted; +} + + +VideoResult_t CBinkMaterial::SoundDeviceCommand( VideoSoundDeviceOperation_t operation, void *pDevice, void *pData ) +{ + AssertExitV( m_bMovieInitialized || m_bMoviePlaying, VideoResult::OPERATION_OUT_OF_SEQUENCE ); + + switch( operation ) + { + // On win32, we try and create an audio context from a GUID + case VideoSoundDeviceOperation::SET_DIRECT_SOUND_DEVICE: + { +#if defined ( WIN32 ) + SAFE_RELEASE_AUDIOCONTEXT( m_AudioContext ); + return ( CreateMovieAudioContext( m_bHasAudio, m_QTMovie, &m_AudioContext ) ? SetResult( VideoResult::SUCCESS ) : SetResult( VideoResult::AUDIO_ERROR_OCCURED ) ); +#else + // On any other OS, we don't support this operation + return SetResult( VideoResult::OPERATION_NOT_SUPPORTED ); +#endif + } + case VideoSoundDeviceOperation::SET_SOUND_MANAGER_DEVICE: + { +#if defined ( OSX ) + SAFE_RELEASE_AUDIOCONTEXT( m_AudioContext ); + return ( CreateMovieAudioContext( m_bHasAudio, m_QTMovie, &m_AudioContext ) ? SetResult( VideoResult::SUCCESS ) : SetResult( VideoResult::AUDIO_ERROR_OCCURED ) ); +#else + // On any other OS, we don't support this operation + return SetResult( VideoResult::OPERATION_NOT_SUPPORTED ); +#endif + } + case VideoSoundDeviceOperation::SET_LIB_AUDIO_DEVICE: + case VideoSoundDeviceOperation::HOOK_X_AUDIO: + case VideoSoundDeviceOperation::SET_MILES_SOUND_DEVICE: + { + return SetResult( VideoResult::OPERATION_NOT_SUPPORTED ); + } + default: + { + return SetResult( VideoResult::BAD_INPUT_PARAMETERS ); + } + } + +} + + +//----------------------------------------------------------------------------- +// Initializes the video material +//----------------------------------------------------------------------------- +bool CBinkMaterial::Init( const char *pMaterialName, const char *pFileName, VideoPlaybackFlags_t flags ) +{ + printf("CBinkMaterial::Init\n"); + + SetResult( VideoResult::BAD_INPUT_PARAMETERS ); + AssertExitF( IS_NOT_EMPTY( pFileName ) ); + AssertExitF( m_bInitCalled == false ); + + m_PlaybackFlags = flags; + + OpenMovie( pFileName ); // Open up the Quicktime file + + if ( !m_bMovieInitialized ) + { + return false; // Something bad happened when we went to open + } + + // Now we can properly setup our regenerators +// m_TextureRegen.SetSourceGWorld( m_MovieGWorld, m_VideoFrameWidth, m_VideoFrameHeight ); + + CreateProceduralTexture( pMaterialName ); + CreateProceduralMaterial( pMaterialName ); + + // Start movie playback + if ( !BITFLAGS_SET( m_PlaybackFlags, VideoPlaybackFlags::DONT_AUTO_START_VIDEO ) ) + { + StartVideo(); + } + + m_bInitCalled = true; // Look, if you only got one shot... + + return true; +} + + +void CBinkMaterial::Shutdown( void ) +{ + StopVideo(); + Reset(); +} + + +//----------------------------------------------------------------------------- +// Video playback state functions +//----------------------------------------------------------------------------- +bool CBinkMaterial::IsVideoReadyToPlay() +{ + return m_bMovieInitialized; +} + + +bool CBinkMaterial::IsVideoPlaying() +{ + return m_bMoviePlaying; +} + + +//----------------------------------------------------------------------------- +// Checks to see if the video has a new frame ready to be rendered and +// downloaded into the texture and eventually display +//----------------------------------------------------------------------------- +bool CBinkMaterial::IsNewFrameReady( void ) +{ + // Are we waiting to start playing the first frame? if so, tell them we are ready! + if ( m_bMovieInitialized == true ) + { + return true; + } + + // paused? + if ( m_bMoviePaused ) + { + return false; + } + +// float curMovieTime; + // Enough time passed to get to next frame?? +/* if ( curMovieTime < m_NextInterestingTimeToPlay ) + { + // nope.. use the previous frame + return false; + }*/ + + // we have a new frame we want then.. + return true; +} + + +bool CBinkMaterial::IsFinishedPlaying() +{ + return m_bMovieFinishedPlaying; +} + + +void CBinkMaterial::SetLooping( bool bLoopVideo ) +{ + m_bLoopMovie = bLoopVideo; +} + + +bool CBinkMaterial::IsLooping() +{ + return m_bLoopMovie; +} + + +void CBinkMaterial::SetPaused( bool bPauseState ) +{ + if ( !m_bMoviePlaying || m_bMoviePaused == bPauseState ) + { + Assert( m_bMoviePlaying ); + return; + } + + if ( bPauseState ) // Pausing the movie? + { + // Save off current time and set paused state +// m_MoviePauseTime = GetMovieTime( m_QTMovie, nullptr ); +// StopMovie( m_QTMovie ); + } + else // unpausing the movie + { + // Reset the movie to the paused time +// SetMovieTimeValue( m_QTMovie, m_MoviePauseTime ); +// StartMovie( m_QTMovie ); +// Assert( GetMoviesError() == noErr ); + } + + m_bMoviePaused = bPauseState; +} + + +bool CBinkMaterial::IsPaused() +{ + return ( m_bMoviePlaying ) ? m_bMoviePaused : false; +} + + +// Begins playback of the movie +bool CBinkMaterial::StartVideo() +{ + if ( !m_bMovieInitialized ) + { + Assert( false ); + SetResult( VideoResult::OPERATION_ALREADY_PERFORMED ); + return false; + } + + m_NextInterestingTimeToPlay = Plat_FloatTime(); + + printf("Movie start time = %lf\n", Plat_FloatTime()); + + // Transition to playing state + m_bMovieInitialized = false; + m_bMoviePlaying = true; + + Update(); + + return true; +} + + +// stops movie for good, frees resources, but retains texture & material of last frame rendered +bool CBinkMaterial::StopVideo() +{ + if ( !m_bMoviePlaying ) + { + SetResult( VideoResult::OPERATION_OUT_OF_SEQUENCE ); + return false; + } + + m_bMoviePlaying = false; + m_bMoviePaused = false; + m_bMovieFinishedPlaying = true; + + // free resources + CloseFile(); + + SetResult( VideoResult::SUCCESS ); + return true; +} + + +//----------------------------------------------------------------------------- +// Purpose: Updates our scene +// Output : true = movie playing ok, false = time to end movie +// supposed to be: Returns true on a new frame of video being downloaded into the texture +//----------------------------------------------------------------------------- +bool CBinkMaterial::Update( void ) +{ + AssertExitF( m_bMoviePlaying ); + + + // are we paused? can't update if so... + if ( m_bMoviePaused ) + return true; // reuse the last frame + + // Get current time in the movie + float curMovieTime; // = GetMovieTime( m_QTMovie, nullptr ); + + if( m_NextInterestingTimeToPlay > Plat_FloatTime() ) + return true; + + m_NextInterestingTimeToPlay += m_MovieFrameDuration; + + /* read frames from the file */ + + int ret; + while( (ret = av_read_frame(m_AVFmtCtx, m_AVPkt)) >= 0 ) + { + if (m_AVPkt->stream_index == m_AVVideoStreamID) + { + avcodec_send_packet(m_AVVideoDecCtx, m_AVPkt); + + ret = avcodec_receive_frame(m_AVVideoDecCtx, m_AVFrame); + if (ret < 0) + { + av_packet_unref(m_AVPkt); + return true; + } + + // write the frame data to output file + if (m_AVVideoDecCtx->codec->type == AVMEDIA_TYPE_VIDEO) + { + av_image_copy(m_AVVideoData, m_AVVideoLinesize, (const uint8_t **)(m_AVFrame->data), m_AVFrame->linesize, m_AVPixFormat, m_VideoFrameWidth, m_VideoFrameHeight); + } + + av_frame_unref(m_AVFrame); + break; + } + + av_packet_unref(m_AVPkt); + } + + + if( ret < 0 ) + { + StopVideo(); + return false; + } + + + + yuv420_rgb24_std( m_VideoFrameWidth, m_VideoFrameHeight, m_AVVideoData[0], + m_AVVideoData[0]+m_VideoFrameHeight*m_VideoFrameWidth, + m_AVVideoData[0]+m_VideoFrameWidth*m_VideoFrameHeight+((m_VideoFrameWidth+1)/2)*((m_VideoFrameHeight+1)/2), + m_VideoFrameWidth, (m_VideoFrameWidth+1)/2, m_RGBData, m_VideoFrameWidth*3, YCBCR_601 + ); + + m_Texture->Download(); + + SetResult( VideoResult::SUCCESS ); + return true; +} + + +//----------------------------------------------------------------------------- +// Returns the material +//----------------------------------------------------------------------------- +IMaterial *CBinkMaterial::GetMaterial() +{ + return m_Material; +} + + +//----------------------------------------------------------------------------- +// Returns the texcoord range +//----------------------------------------------------------------------------- +void CBinkMaterial::GetVideoTexCoordRange( float *pMaxU, float *pMaxV ) +{ + AssertExit( pMaxU != nullptr && pMaxV != nullptr ); + + if ( m_Texture == nullptr ) // no texture? + { + *pMaxU = *pMaxV = 1.0f; + return; + } + + *pMaxU = m_TexCordU; + *pMaxV = m_TexCordV; +} + + +//----------------------------------------------------------------------------- +// Returns the frame size of the QuickTime Video in pixels +//----------------------------------------------------------------------------- +void CBinkMaterial::GetVideoImageSize( int *pWidth, int *pHeight ) +{ + Assert( pWidth != nullptr && pHeight != nullptr ); + + *pWidth = m_VideoFrameWidth; + *pHeight = m_VideoFrameHeight; +} + + +float CBinkMaterial::GetVideoDuration() +{ + return m_QTMovieDurationinSec; +} + + +int CBinkMaterial::GetFrameCount() +{ + return m_QTMovieFrameCount; +} + + +//----------------------------------------------------------------------------- +// Sets the frame for an QuickTime Material (use instead of SetTime) +//----------------------------------------------------------------------------- +bool CBinkMaterial::SetFrame( int FrameNum ) +{ + if ( !m_bMoviePlaying ) + { + Assert( false ); + SetResult( VideoResult::OPERATION_OUT_OF_SEQUENCE ); + return false; + } + + float theTime = (float) FrameNum * m_QTMovieFrameRate.GetFPS(); + return SetTime( theTime ); +} + + +int CBinkMaterial::GetCurrentFrame() +{ + AssertExitV( m_bMoviePlaying, -1 ); + + float curTime; // = m_bMoviePaused ? m_MoviePauseTime : GetMovieTime( m_QTMovie, nullptr ); + + return curTime / m_QTMovieFrameRate.GetUnitsPerFrame(); +} + + +float CBinkMaterial::GetCurrentVideoTime() +{ + AssertExitV( m_bMoviePlaying, -1.0f ); + + float curTime; // = m_bMoviePaused ? m_MoviePauseTime : GetMovieTime( m_QTMovie, nullptr ); + + return curTime / m_QTMovieFrameRate.GetUnitsPerSecond(); +} + + +bool CBinkMaterial::SetTime( float flTime ) +{ + AssertExitF( m_bMoviePlaying ); + AssertExitF( flTime >= 0 && flTime < m_QTMovieDurationinSec ); + + float newTime = ( flTime * m_QTMovieFrameRate.GetUnitsPerSecond() + 0.5f) ; + + clamp( newTime, m_MovieFirstFrameTime, m_QTMovieDuration ); + + // Are we paused? + if ( m_bMoviePaused ) + { + m_MoviePauseTime = newTime; + return true; + } + + float curMovieTime; // = GetMovieTime( m_QTMovie, nullptr ); + + // Don't stop and reset movie if we are within 1 frame of the requested time + if ( newTime <= curMovieTime - m_QTMovieFrameRate.GetUnitsPerFrame() || newTime >= curMovieTime + m_QTMovieFrameRate.GetUnitsPerFrame() ) + { + // Reset the movie to the requested time +/* StopMovie( m_QTMovie ); + SetMovieTimeValue( m_QTMovie, newTime ); + StartMovie( m_QTMovie ); + + Assert( GetMoviesError() == noErr );*/ + } + + return true; +} + + +//----------------------------------------------------------------------------- +// Initializes, shuts down the procedural texture +//----------------------------------------------------------------------------- +void CBinkMaterial::CreateProceduralTexture( const char *pTextureName ) +{ + printf("CBinkMaterial::CreateProceduralTexture\n"); + + AssertIncRange( m_VideoFrameWidth, cMinVideoFrameWidth, cMaxVideoFrameWidth ); + AssertIncRange( m_VideoFrameHeight, cMinVideoFrameHeight, cMaxVideoFrameHeight ); + AssertStr( pTextureName ); + + // Either make the texture the same dimensions as the video, + // or choose power-of-two textures which are at least as big as the video + bool actualSizeTexture = BITFLAGS_SET( m_PlaybackFlags, VideoPlaybackFlags::TEXTURES_ACTUAL_SIZE ); + + int nWidth = ( actualSizeTexture ) ? ALIGN_VALUE( m_VideoFrameWidth, TEXTURE_SIZE_ALIGNMENT ) : ComputeGreaterPowerOfTwo( m_VideoFrameWidth ); + int nHeight = ( actualSizeTexture ) ? ALIGN_VALUE( m_VideoFrameHeight, TEXTURE_SIZE_ALIGNMENT ) : ComputeGreaterPowerOfTwo( m_VideoFrameHeight ); + + // initialize the procedural texture as 32-it RGBA, w/o mipmaps + m_Texture.InitProceduralTexture( pTextureName, "VideoCacheTextures", nWidth, nHeight, + IMAGE_FORMAT_RGB888, TEXTUREFLAGS_CLAMPS | TEXTUREFLAGS_CLAMPT | TEXTUREFLAGS_NOMIP | + TEXTUREFLAGS_PROCEDURAL | TEXTUREFLAGS_SINGLECOPY | TEXTUREFLAGS_NOLOD ); + + // Use this to get the updated frame from the remote connection + m_Texture->SetTextureRegenerator( &m_TextureRegen /* , false */ ); + + // compute the texcoords + int nTextureWidth = m_Texture->GetActualWidth(); + int nTextureHeight = m_Texture->GetActualHeight(); + + m_TexCordU = ( nTextureWidth > 0 ) ? (float) m_VideoFrameWidth / (float) nTextureWidth : 0.0f; + m_TexCordV = ( nTextureHeight > 0 ) ? (float) m_VideoFrameHeight / (float) nTextureHeight : 0.0f; +} + + +void CBinkMaterial::DestroyProceduralTexture() +{ + if ( m_Texture != nullptr ) + { + // DO NOT Call release on the Texture Regenerator, as it will destroy this object! bad bad bad + // instead we tell it to assign a NULL regenerator and flag it to not call release + m_Texture->SetTextureRegenerator( nullptr /*, false */ ); + // Texture, texture go away... + m_Texture.Shutdown( true ); + } +} + + +//----------------------------------------------------------------------------- +// Initializes, shuts down the procedural material +//----------------------------------------------------------------------------- +void CBinkMaterial::CreateProceduralMaterial( const char *pMaterialName ) +{ + // create keyvalues if necessary + KeyValues *pVMTKeyValues = new KeyValues( "UnlitGeneric" ); + { + pVMTKeyValues->SetString( "$basetexture", m_Texture->GetName() ); + pVMTKeyValues->SetInt( "$nobasetexture", 1 ); + pVMTKeyValues->SetInt( "$nofog", 1 ); + pVMTKeyValues->SetInt( "$spriteorientation", 3 ); + pVMTKeyValues->SetInt( "$translucent", 1 ); + pVMTKeyValues->SetInt( "$nolod", 1 ); + pVMTKeyValues->SetInt( "$nomip", 1 ); + pVMTKeyValues->SetInt( "$gammacolorread", 0 ); + } + + // FIXME: gak, this is backwards. Why doesn't the material just see that it has a funky basetexture? + m_Material.Init( pMaterialName, pVMTKeyValues ); + m_Material->Refresh(); +} + + +void CBinkMaterial::DestroyProceduralMaterial() +{ + // Store the internal material pointer for later use + IMaterial *pMaterial = m_Material; + m_Material.Shutdown(); + materials->UncacheUnusedMaterials(); + + // Now be sure to free that material because we don't want to reference it again later, we'll recreate it! + if ( pMaterial != nullptr ) + { + pMaterial->DeleteIfUnreferenced(); + } +} + + + +//----------------------------------------------------------------------------- +// Opens a movie file using quicktime +//----------------------------------------------------------------------------- +void CBinkMaterial::OpenMovie( const char *theMovieFileName ) +{ + AssertExit( IS_NOT_EMPTY( theMovieFileName ) ); +/* + // Set graphics port +#if defined ( WIN32 ) + SetGWorld ( (CGrafPtr) GetNativeWindowPort( nil ), nil ); +#elif defined ( OSX ) + SetGWorld( nil, nil ); +#endif +*/ + + SetFileName( theMovieFileName ); + printf("CBinkMaterial::OpenMovie( \"%s\" )\n", theMovieFileName); + + if (avformat_open_input(&m_AVFmtCtx, theMovieFileName, NULL, NULL) < 0) + { + Warning("Could not open source file %s\n", theMovieFileName); + SetResult( VideoResult::FILE_ERROR_OCCURED ) ; + Reset(); + return; + } + + if (avformat_find_stream_info(m_AVFmtCtx, NULL) < 0) + { + Warning("Could not find stream information for %s\n", theMovieFileName); + SetResult( VideoResult::FILE_ERROR_OCCURED ) ; + Reset(); + return; + } + + if (open_codec_context(&m_AVVideoStreamID, &m_AVVideoDecCtx, m_AVFmtCtx, AVMEDIA_TYPE_VIDEO) == 0) + { + m_AVVideoStream = m_AVFmtCtx->streams[m_AVVideoStreamID]; + + /* allocate image where the decoded image will be put */ + m_VideoFrameWidth = m_AVVideoDecCtx->width; + m_VideoFrameHeight = m_AVVideoDecCtx->height; + m_AVPixFormat = m_AVVideoDecCtx->pix_fmt; + size_t size = av_image_alloc(m_AVVideoData, m_AVVideoLinesize, + m_VideoFrameWidth, m_VideoFrameHeight, m_AVPixFormat, 1); + + m_RGBData = calloc( m_VideoFrameWidth*m_VideoFrameHeight*3, 1 ); + + printf("m_AVVideoData size = %zu\nm_VideoFrameWidth=%d\nm_VideoFrameHeight=%d\n", size, m_VideoFrameWidth, m_VideoFrameHeight); + + if (size < 0) + { + Warning("Could not allocate raw video buffer\n", theMovieFileName); + SetResult( VideoResult::SYSTEM_ERROR_OCCURED ) ; + Reset(); + return; + } + } + else + { + Warning("open_codec_context failed for %s\n", theMovieFileName); + SetResult( VideoResult::SYSTEM_ERROR_OCCURED ) ; + Reset(); + return; + } + + m_MovieFrameDuration = 1.0/((double)m_AVVideoStream->r_frame_rate.num/(double)m_AVVideoStream->r_frame_rate.den); + m_TextureRegen.SetSourceImage( m_RGBData, m_VideoFrameWidth, m_VideoFrameHeight ); + printf("Video FPS: %lf\n", (double)m_AVVideoStream->r_frame_rate.num/(double)m_AVVideoStream->r_frame_rate.den); + +#if 0 + Handle MovieFileDataRef = nullptr; + OSType MovieFileDataRefType = 0; + + CFStringRef imageStrRef = CFStringCreateWithCString ( NULL, theQTMovieFileName, 0 ); + AssertExitFunc( imageStrRef != nullptr, SetResult( VideoResult::SYSTEM_ERROR_OCCURED ) ); + + OSErr status = QTNewDataReferenceFromFullPathCFString( imageStrRef, (QTPathStyle) kQTNativeDefaultPathStyle, 0, &MovieFileDataRef, &MovieFileDataRefType ); + AssertExitFunc( status == noErr, SetResult( VideoResult::FILE_ERROR_OCCURED ) ); + + CFRelease( imageStrRef ); + +// status = NewMovieFromDataRef( &m_QTMovie, newMovieActive, nil, MovieFileDataRef, MovieFileDataRefType ); +// SAFE_DISPOSE_HANDLE( MovieFileDataRef ); + + if ( status != noErr ) + { + Assert( false ); + Reset(); + SetResult( VideoResult::VIDEO_ERROR_OCCURED ); + return; + } + + // disabling audio? + if ( BITFLAGS_SET( m_PlaybackFlags, VideoPlaybackFlags::NO_AUDIO ) ) + { + m_bHasAudio = false; + } + else + { + // does movie have audio? +// Track audioTrack = GetMovieIndTrackType( m_QTMovie, 1, SoundMediaType, movieTrackMediaType ); +// m_bHasAudio = ( audioTrack != nullptr ); + } + + // Now we need to extract the time info from the QT Movie +// m_QTMovieTimeScale = GetMovieTimeScale( m_QTMovie ); +// m_QTMovieDuration = GetMovieDuration( m_QTMovie ); + + // compute movie duration +/* m_QTMovieDurationinSec = float ( double( m_QTMovieDuration ) / double( m_QTMovieTimeScale ) ); + if ( !MovieGetStaticFrameRate( m_QTMovie, m_QTMovieFrameRate ) ) + { + WarningAssert( "Couldn't Get Frame Rate" ); + }*/ + + // and get an estimated frame count + m_QTMovieFrameCount = m_QTMovieDuration / m_QTMovieTimeScale; + + if ( m_QTMovieFrameRate.GetUnitsPerSecond() == m_QTMovieTimeScale ) + { + m_QTMovieFrameCount = m_QTMovieDuration / m_QTMovieFrameRate.GetUnitsPerFrame(); + } + else + { + m_QTMovieFrameCount = (int) ( (float) m_QTMovieDurationinSec * m_QTMovieFrameRate.GetFPS() + 0.5f ); + } + + // what size do we set the output rect to? +// GetMovieNaturalBoundsRect(m_QTMovie, &m_QTMovieRect); + + m_VideoFrameWidth = m_QTMovieRect.right; + m_VideoFrameHeight = m_QTMovieRect.bottom; + + // Sanity check... + AssertExitFunc( m_QTMovieRect.top == 0 && m_QTMovieRect.left == 0 && + m_QTMovieRect.right >= cMinVideoFrameWidth && m_QTMovieRect.right <= cMaxVideoFrameWidth && + m_QTMovieRect.bottom >= cMinVideoFrameHeight && m_QTMovieRect.bottom <= cMaxVideoFrameHeight && + m_QTMovieRect.right % 4 == 0, + SetResult( VideoResult::VIDEO_ERROR_OCCURED ) ); + + // Setup the QuiuckTime Graphics World for the Movie +/* status = QTNewGWorld( &m_MovieGWorld, k32BGRAPixelFormat, &m_QTMovieRect, nil, nil, 0 ); + AssertExit( status == noErr ); + + // Setup the playback gamma according to the convar + SetGWorldDecodeGamma( m_MovieGWorld, VideoPlaybackGamma::USE_GAMMA_CONVAR ); + + // Assign the GWorld to this movie + SetMovieGWorld( m_QTMovie, m_MovieGWorld, nil ); + + // Setup Movie Audio, unless suppressed + if ( !CreateMovieAudioContext( m_bHasAudio, m_QTMovie, &m_AudioContext, true, &m_CurrentVolume ) ) + { + SetResult( VideoResult::AUDIO_ERROR_OCCURED ); + WarningAssert( "Couldn't Set Audio" ); + } + + // Get the time of the first frame + OSType qTypes[1] = { VisualMediaCharacteristic }; + short qFlags = nextTimeStep | nextTimeEdgeOK; // use nextTimeStep instead of nextTimeMediaSample for MPEG 1-2 compatibility + + GetMovieNextInterestingTime( m_QTMovie, qFlags, 1, qTypes, (TimeValue) 0, fixed1, &m_MovieFirstFrameTime, NULL ); + AssertExitFunc( GetMoviesError() == noErr, SetResult( VideoResult::VIDEO_ERROR_OCCURED ) ); + + // Preroll the movie + if ( BITFLAGS_SET( m_PlaybackFlags, VideoPlaybackFlags::PRELOAD_VIDEO ) ) + { + Fixed playRate = GetMoviePreferredRate( m_QTMovie ); + status = PrerollMovie( m_QTMovie, m_MovieFirstFrameTime, playRate ); + AssertExitFunc( status == noErr, SetResult( VideoResult::VIDEO_ERROR_OCCURED ) ); + }*/ + +#endif + m_bMovieInitialized = true; +} + + +void CBinkMaterial::CloseFile() +{ + av_freep( &m_AVVideoData[0] ); + avformat_close_input( &m_AVFmtCtx ); + m_AVFmtCtx = nullptr; + free(m_RGBData); + + SetFileName( nullptr ); +} + + diff --git a/video/video_bink/bink_material.h b/video/video_bink/bink_material.h new file mode 100644 index 00000000..b3a817af --- /dev/null +++ b/video/video_bink/bink_material.h @@ -0,0 +1,220 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//============================================================================= +#ifndef BINK_MATERIAL_H +#define BINK_MATERIAL_H + +#ifdef _WIN32 +#pragma once +#endif + + +//----------------------------------------------------------------------------- +// Forward declarations +//----------------------------------------------------------------------------- +class IFileSystem; +class IMaterialSystem; +class CBinkMaterial; + +//----------------------------------------------------------------------------- +// Global interfaces - you already did the needed includes, right? +//----------------------------------------------------------------------------- +extern IFileSystem *g_pFileSystem; +extern IMaterialSystem *materials; + +#include "video/ivideoservices.h" + +#include "video_macros.h" +#include "bink_common.h" + +#include "materialsystem/itexture.h" +#include "materialsystem/imaterialsystem.h" +#include "materialsystem/MaterialSystemUtil.h" + +extern "C" +{ +#include +#include +#include +#include +#include +} + +// ----------------------------------------------------------------------------- +// Texture regenerator - callback to get new movie pixels into the texture +// ----------------------------------------------------------------------------- +class CBinkMaterialRGBTextureRegenerator : public ITextureRegenerator +{ + public: + CBinkMaterialRGBTextureRegenerator(); + ~CBinkMaterialRGBTextureRegenerator(); + + void SetSourceImage( uint8_t *SrcImage, int nWidth, int nHeight ); + + // Inherited from ITextureRegenerator + virtual void RegenerateTextureBits( ITexture *pTexture, IVTFTexture *pVTFTexture, Rect_t *pRect ); + virtual void Release(); + + private: + uint8_t *m_SrcImage; + int m_nSourceWidth; + int m_nSourceHeight; +}; + + + +// ----------------------------------------------------------------------------- +// Class used to play a QuickTime video onto a texture +// ----------------------------------------------------------------------------- +class CBinkMaterial : public IVideoMaterial +{ + public: + CBinkMaterial(); + ~CBinkMaterial(); + + static const int MAX_FILENAME_LEN = 255; + static const int MAX_MATERIAL_NAME_LEN = 255; + static const int TEXTURE_SIZE_ALIGNMENT = 8; + + // Initializes, shuts down the material + bool Init( const char *pMaterialName, const char *pFileName, VideoPlaybackFlags_t flags ); + void Shutdown(); + + // Video information functions + virtual const char *GetVideoFileName(); // Gets the file name of the video this material is playing + virtual VideoResult_t GetLastResult(); // Gets detailed info on the last operation + + virtual VideoFrameRate_t &GetVideoFrameRate(); // Returns the frame rate of the associated video in FPS + + // Audio Functions + virtual bool HasAudio(); // Query if the video has an audio track + + virtual bool SetVolume( float fVolume ); // Adjust the playback volume + virtual float GetVolume(); // Query the current volume + virtual void SetMuted( bool bMuteState ); // Mute/UnMutes the audio playback + virtual bool IsMuted(); // Query muted status + + virtual VideoResult_t SoundDeviceCommand( VideoSoundDeviceOperation_t operation, void *pDevice = nullptr, void *pData = nullptr ); // Assign Sound Device for this Video Material + + // Video playback state functions + virtual bool IsVideoReadyToPlay(); // Queries if the video material was initialized successfully and is ready for playback, but not playing or finished + virtual bool IsVideoPlaying(); // Is the video currently playing (and needs update calls, etc) + virtual bool IsNewFrameReady(); // Do we have a new frame to get & display? + virtual bool IsFinishedPlaying(); // Have we reached the end of the movie + + virtual bool StartVideo(); // Starts the video playing + virtual bool StopVideo(); // Terminates the video playing + + virtual void SetLooping( bool bLoopVideo ); // Sets the video to loop (or not) + virtual bool IsLooping(); // Queries if the video is looping + + virtual void SetPaused( bool bPauseState ); // Pauses or Unpauses video playback + virtual bool IsPaused(); // Queries if the video is paused + + // Position in playback functions + virtual float GetVideoDuration(); // Returns the duration of the associated video in seconds + virtual int GetFrameCount(); // Returns the total number of (unique) frames in the video + + virtual bool SetFrame( int FrameNum ); // Sets the current frame # in the video to play next + virtual int GetCurrentFrame(); // Gets the current frame # for the video playback, 0 Based + + virtual bool SetTime( float flTime ); // Sets the video playback to specified time (in seconds) + virtual float GetCurrentVideoTime(); // Gets the current time in the video playback + + // Update function + virtual bool Update(); // Updates the video frame to reflect the time passed, true = new frame available + + // Material / Texture Info functions + virtual IMaterial *GetMaterial(); // Gets the IMaterial associated with an video material + + virtual void GetVideoTexCoordRange( float *pMaxU, float *pMaxV ) ; // Returns the max texture coordinate of the video portion of the material surface ( 0.0, 0.0 to U, V ) + virtual void GetVideoImageSize( int *pWidth, int *pHeight ); // Returns the frame size of the Video Image Frame in pixels ( the stored in a subrect of the material itself) + + private: + friend class CBinkMaterialRGBTextureRegenerator; + + void Reset(); // clears internal state + void SetFileName( const char *theMovieFileName ); + VideoResult_t SetResult( VideoResult_t status ); + + // Initializes, shuts down the video stream + void OpenMovie( const char *theMovieFileName ); + void CloseFile(); + + // Initializes, shuts down the procedural texture + void CreateProceduralTexture( const char *pTextureName ); + void DestroyProceduralTexture(); + + // Initializes, shuts down the procedural material + void CreateProceduralMaterial( const char *pMaterialName ); + void DestroyProceduralMaterial(); + + CBinkMaterialRGBTextureRegenerator m_TextureRegen; + + VideoResult_t m_LastResult; + + CMaterialReference m_Material; // Ref to Material used for rendering the video frame + CTextureReference m_Texture; // Ref to the renderable texture which contains the most recent video frame (in a sub-rect) + + float m_TexCordU; // Max U texture coordinate of the texture sub-rect which holds the video frame + float m_TexCordV; // Max V texture coordinate of the texture sub-rect which holds the video frame + + int m_VideoFrameWidth; // Size of the movie frame in pixels + int m_VideoFrameHeight; + + char *m_pFileName; // resolved filename of the movie being played + VideoPlaybackFlags_t m_PlaybackFlags; // option flags user supplied + + bool m_bInitCalled; + bool m_bMovieInitialized; + bool m_bMoviePlaying; + bool m_bMovieFinishedPlaying; + bool m_bMoviePaused; + bool m_bLoopMovie; + + bool m_bHasAudio; + bool m_bMuted; + + float m_CurrentVolume; + + // QuickTime Stuff + + float m_QTMovieTimeScale; + float m_QTMoviefloat; // Units per second + float m_QTMovieDuration; // movie duration in float Units Per Second + float m_QTMovieDurationinSec; // movie duration in seconds + VideoFrameRate_t m_QTMovieFrameRate; // Frame Rate of movie + int m_QTMovieFrameCount; + + double m_MovieFirstFrameTime; + double m_NextInterestingTimeToPlay; + float m_MoviePauseTime; + + // AV stuff + AVFrame *m_AVFrame; + AVPacket *m_AVPkt; + AVFormatContext *m_AVFmtCtx; + + int m_AVVideoStreamID; + int m_AVAudioStreamID; + + AVCodecContext *m_AVVideoDecCtx ; + AVCodecContext *m_AVAudioDecCtx ; + + AVStream *m_AVVideoStream; + AVStream *m_AVAudioStream; + + int m_AVPixFormat; + + double m_MovieFrameDuration; + + uint8_t *m_AVVideoData[4]; + uint8_t *m_RGBData; + + int m_AVVideoLinesize[4]; + +}; + +#endif // BINK_MATERIAL_H diff --git a/video/video_bink/bink_video.cpp b/video/video_bink/bink_video.cpp new file mode 100644 index 00000000..843a466f --- /dev/null +++ b/video/video_bink/bink_video.cpp @@ -0,0 +1,345 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//============================================================================= + +#include "bink_video.h" +#include "video_macros.h" + +#include "filesystem.h" +#include "tier0/icommandline.h" +#include "tier1/strtools.h" +#include "tier1/utllinkedlist.h" +#include "tier1/KeyValues.h" +#include "materialsystem/imaterial.h" +#include "materialsystem/imaterialsystem.h" +#include "materialsystem/MaterialSystemUtil.h" +#include "materialsystem/itexture.h" +#include "vtf/vtf.h" +#include "pixelwriter.h" +#include "tier2/tier2.h" +#include "platform.h" + + +#include "tier0/memdbgon.h" +#include "bink_material.h" + +// =========================================================================== +// Singleton to expose Bink video subsystem +// =========================================================================== +static CBinkVideoSubSystem g_BinkSystem; +EXPOSE_SINGLE_INTERFACE_GLOBALVAR( CBinkVideoSubSystem, IVideoSubSystem, VIDEO_SUBSYSTEM_INTERFACE_VERSION, g_BinkSystem ); + + +// =========================================================================== +// List of file extensions and features supported by this subsystem +// =========================================================================== +VideoFileExtensionInfo_t s_BinkExtensions[] = +{ + { ".bik", VideoSystem::BINK, VideoSystemFeature::PLAY_VIDEO_FILE_IN_MATERIAL }, +}; + +const int s_BinkExtensionCount = ARRAYSIZE( s_BinkExtensions ); +const VideoSystemFeature_t CBinkVideoSubSystem::DEFAULT_FEATURE_SET = VideoSystemFeature::PLAY_VIDEO_FILE_IN_MATERIAL; + +// =========================================================================== +// CBinkVideoSubSystem class +// =========================================================================== +CBinkVideoSubSystem::CBinkVideoSubSystem() : + m_bBinkInitialized( false ), + m_LastResult( VideoResult::SUCCESS ), + m_CurrentStatus( VideoSystemStatus::NOT_INITIALIZED ), + m_AvailableFeatures( CBinkVideoSubSystem::DEFAULT_FEATURE_SET ), + m_pCommonServices( nullptr ) +{ + +} + +CBinkVideoSubSystem::~CBinkVideoSubSystem() +{ + ShutdownBink(); // Super redundant safety check +} + +// =========================================================================== +// IAppSystem methods +// =========================================================================== +bool CBinkVideoSubSystem::Connect( CreateInterfaceFn factory ) +{ + if ( !BaseClass::Connect( factory ) ) + { + return false; + } + + if ( g_pFullFileSystem == nullptr || materials == nullptr ) + { + Msg( "Bink video subsystem failed to connect to missing a required system\n" ); + return false; + } + return true; +} + +void CBinkVideoSubSystem::Disconnect() +{ + BaseClass::Disconnect(); +} + +void* CBinkVideoSubSystem::QueryInterface( const char *pInterfaceName ) +{ + + if ( IS_NOT_EMPTY( pInterfaceName ) ) + { + if ( V_strncmp( pInterfaceName, VIDEO_SUBSYSTEM_INTERFACE_VERSION, Q_strlen( VIDEO_SUBSYSTEM_INTERFACE_VERSION ) + 1) == STRINGS_MATCH ) + { + return (IVideoSubSystem*) this; + } + } + + return nullptr; +} + + +InitReturnVal_t CBinkVideoSubSystem::Init() +{ + InitReturnVal_t nRetVal = BaseClass::Init(); + if ( nRetVal != INIT_OK ) + { + return nRetVal; + } + + return INIT_OK; + +} + +void CBinkVideoSubSystem::Shutdown() +{ + // Make sure we shut down Bink + ShutdownBink(); + + BaseClass::Shutdown(); +} + + +// =========================================================================== +// IVideoSubSystem identification methods +// =========================================================================== +VideoSystem_t CBinkVideoSubSystem::GetSystemID() +{ + return VideoSystem::BINK; +} + + +VideoSystemStatus_t CBinkVideoSubSystem::GetSystemStatus() +{ + return m_CurrentStatus; +} + + +VideoSystemFeature_t CBinkVideoSubSystem::GetSupportedFeatures() +{ + return m_AvailableFeatures; +} + + +const char* CBinkVideoSubSystem::GetVideoSystemName() +{ + return "BINK"; +} + + +// =========================================================================== +// IVideoSubSystem setup and shutdown services +// =========================================================================== +bool CBinkVideoSubSystem::InitializeVideoSystem( IVideoCommonServices *pCommonServices ) +{ + m_AvailableFeatures = DEFAULT_FEATURE_SET; // Put here because of issue with static const int, binary OR and DEBUG builds + + AssertPtr( pCommonServices ); + m_pCommonServices = pCommonServices; + + return ( m_bBinkInitialized ) ? true : SetupBink(); +} + + +bool CBinkVideoSubSystem::ShutdownVideoSystem() +{ + return ( m_bBinkInitialized ) ? ShutdownBink() : true; +} + + +VideoResult_t CBinkVideoSubSystem::VideoSoundDeviceCMD( VideoSoundDeviceOperation_t operation, void *pDevice, void *pData ) +{ + switch ( operation ) + { + case VideoSoundDeviceOperation::SET_DIRECT_SOUND_DEVICE: + { + return SetResult( VideoResult::OPERATION_NOT_SUPPORTED ); + } + + case VideoSoundDeviceOperation::SET_MILES_SOUND_DEVICE: + case VideoSoundDeviceOperation::HOOK_X_AUDIO: + { + return SetResult( VideoResult::OPERATION_NOT_SUPPORTED ); + } + + default: + { + return SetResult( VideoResult::UNKNOWN_OPERATION ); + } + } +} + + +// =========================================================================== +// IVideoSubSystem supported extensions & features +// =========================================================================== +int CBinkVideoSubSystem::GetSupportedFileExtensionCount() +{ + return s_BinkExtensionCount; +} + + +const char* CBinkVideoSubSystem::GetSupportedFileExtension( int num ) +{ + return ( num < 0 || num >= s_BinkExtensionCount ) ? nullptr : s_BinkExtensions[num].m_FileExtension; +} + + +VideoSystemFeature_t CBinkVideoSubSystem::GetSupportedFileExtensionFeatures( int num ) +{ + return ( num < 0 || num >= s_BinkExtensionCount ) ? VideoSystemFeature::NO_FEATURES : s_BinkExtensions[num].m_VideoFeatures; +} + + +// =========================================================================== +// IVideoSubSystem Video Playback and Recording Services +// =========================================================================== +VideoResult_t CBinkVideoSubSystem::PlayVideoFileFullScreen( const char *filename, void *mainWindow, int windowWidth, int windowHeight, int desktopWidth, int desktopHeight, bool windowed, float forcedMinTime, VideoPlaybackFlags_t playbackFlags ) +{ + return SetResult( VideoResult::FEATURE_NOT_AVAILABLE ); +} + + +// =========================================================================== +// IVideoSubSystem Video Material Services +// note that the filename is absolute and has already resolved any paths +// =========================================================================== +IVideoMaterial* CBinkVideoSubSystem::CreateVideoMaterial( const char *pMaterialName, const char *pVideoFileName, VideoPlaybackFlags_t flags ) +{ + SetResult( VideoResult::BAD_INPUT_PARAMETERS ); + AssertExitN( m_CurrentStatus == VideoSystemStatus::OK && IS_NOT_EMPTY( pMaterialName ) || IS_NOT_EMPTY( pVideoFileName ) ); + + CBinkMaterial *pVideoMaterial = new CBinkMaterial(); + if ( pVideoMaterial == nullptr || pVideoMaterial->Init( pMaterialName, pVideoFileName, flags ) == false ) + { + SAFE_DELETE( pVideoMaterial ); + SetResult( VideoResult::VIDEO_ERROR_OCCURED ); + return nullptr; + } + + IVideoMaterial *pInterface = (IVideoMaterial*) pVideoMaterial; + m_MaterialList.AddToTail( pInterface ); + + SetResult( VideoResult::SUCCESS ); + return pInterface; +} + + +VideoResult_t CBinkVideoSubSystem::DestroyVideoMaterial( IVideoMaterial *pVideoMaterial ) +{ + AssertExitV( m_CurrentStatus == VideoSystemStatus::OK, SetResult( VideoResult::SYSTEM_NOT_AVAILABLE ) ); + AssertPtrExitV( pVideoMaterial, SetResult( VideoResult::BAD_INPUT_PARAMETERS ) ); + + if ( m_MaterialList.Find( pVideoMaterial ) != -1 ) + { + CBinkMaterial *pObject = (CBinkMaterial*) pVideoMaterial; + pObject->Shutdown(); + delete pObject; + + m_MaterialList.FindAndFastRemove( pVideoMaterial ); + + return SetResult( VideoResult::SUCCESS ); + } + + return SetResult (VideoResult::MATERIAL_NOT_FOUND ); +} + + +// =========================================================================== +// IVideoSubSystem Video Recorder Services +// =========================================================================== +IVideoRecorder* CBinkVideoSubSystem::CreateVideoRecorder() +{ + SetResult( VideoResult::FEATURE_NOT_AVAILABLE ); + return nullptr; +} + + +VideoResult_t CBinkVideoSubSystem::DestroyVideoRecorder( IVideoRecorder *pRecorder ) +{ + return SetResult( VideoResult::FEATURE_NOT_AVAILABLE ); +} + +VideoResult_t CBinkVideoSubSystem::CheckCodecAvailability( VideoEncodeCodec_t codec ) +{ + AssertExitV( m_CurrentStatus == VideoSystemStatus::OK, SetResult( VideoResult::SYSTEM_NOT_AVAILABLE ) ); + AssertExitV( codec >= VideoEncodeCodec::DEFAULT_CODEC && codec < VideoEncodeCodec::CODEC_COUNT, SetResult( VideoResult::BAD_INPUT_PARAMETERS ) ); + + return SetResult( VideoResult::FEATURE_NOT_AVAILABLE ); +} + + +// =========================================================================== +// Status support +// =========================================================================== +VideoResult_t CBinkVideoSubSystem::GetLastResult() +{ + return m_LastResult; +} + + +VideoResult_t CBinkVideoSubSystem::SetResult( VideoResult_t status ) +{ + m_LastResult = status; + return status; +} + + +// =========================================================================== +// Bink Initialization & Shutdown +// =========================================================================== +bool CBinkVideoSubSystem::SetupBink() +{ + SetResult( VideoResult::INITIALIZATION_ERROR_OCCURED); + AssertExitF( m_bBinkInitialized == false ); + + // This is set early to indicate we have already been through here, even if we error out for some reason + m_bBinkInitialized = true; + m_CurrentStatus = VideoSystemStatus::OK; + m_AvailableFeatures = DEFAULT_FEATURE_SET; + // $$INIT CODE HERE$$ + + + // Note that we are now open for business.... + m_bBinkInitialized = true; + SetResult( VideoResult::SUCCESS ); + + return true; +} + + +bool CBinkVideoSubSystem::ShutdownBink() +{ + if ( m_bBinkInitialized && m_CurrentStatus == VideoSystemStatus::OK ) + { + + } + + m_bBinkInitialized = false; + m_CurrentStatus = VideoSystemStatus::NOT_INITIALIZED; + m_AvailableFeatures = VideoSystemFeature::NO_FEATURES; + SetResult( VideoResult::SUCCESS ); + + return true; +} + diff --git a/video/video_bink/bink_video.h b/video/video_bink/bink_video.h new file mode 100644 index 00000000..5dc4f811 --- /dev/null +++ b/video/video_bink/bink_video.h @@ -0,0 +1,109 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//============================================================================= + + +#ifndef BINK_VIDEO_H +#define BINK_VIDEO_H + +#ifdef _WIN32 +#pragma once +#endif + + +//----------------------------------------------------------------------------- +// Forward declarations +//----------------------------------------------------------------------------- +class IFileSystem; +class IMaterialSystem; +class CQuickTimeMaterial; + +//----------------------------------------------------------------------------- +// Global interfaces - you already did the needed includes, right? +//----------------------------------------------------------------------------- +extern IFileSystem *g_pFileSystem; +extern IMaterialSystem *materials; + +#include "video/ivideoservices.h" +#include "videosubsystem.h" + +#include "utlvector.h" +#include "tier1/KeyValues.h" +#include "tier0/platform.h" + +// ----------------------------------------------------------------------------- +// CQuickTimeVideoSubSystem - Implementation of IVideoSubSystem +// ----------------------------------------------------------------------------- +class CBinkVideoSubSystem : public CTier2AppSystem< IVideoSubSystem > +{ + typedef CTier2AppSystem< IVideoSubSystem > BaseClass; + + public: + CBinkVideoSubSystem(); + ~CBinkVideoSubSystem(); + + // Inherited from IAppSystem + virtual bool Connect( CreateInterfaceFn factory ); + virtual void Disconnect(); + virtual void *QueryInterface( const char *pInterfaceName ); + virtual InitReturnVal_t Init(); + virtual void Shutdown(); + + // Inherited from IVideoSubSystem + + // SubSystem Identification functions + virtual VideoSystem_t GetSystemID(); + virtual VideoSystemStatus_t GetSystemStatus(); + virtual VideoSystemFeature_t GetSupportedFeatures(); + virtual const char *GetVideoSystemName(); + + // Setup & Shutdown Services + virtual bool InitializeVideoSystem( IVideoCommonServices *pCommonServices ); + virtual bool ShutdownVideoSystem(); + + virtual VideoResult_t VideoSoundDeviceCMD( VideoSoundDeviceOperation_t operation, void *pDevice = nullptr, void *pData = nullptr ); + + // get list of file extensions and features we support + virtual int GetSupportedFileExtensionCount(); + virtual const char *GetSupportedFileExtension( int num ); + virtual VideoSystemFeature_t GetSupportedFileExtensionFeatures( int num ); + + // Video Playback and Recording Services + virtual VideoResult_t PlayVideoFileFullScreen( const char *filename, void *mainWindow, int windowWidth, int windowHeight, int desktopWidth, int desktopHeight, bool windowed, float forcedMinTime, VideoPlaybackFlags_t playbackFlags ); + + // Create/destroy a video material + virtual IVideoMaterial *CreateVideoMaterial( const char *pMaterialName, const char *pVideoFileName, VideoPlaybackFlags_t flags ); + virtual VideoResult_t DestroyVideoMaterial( IVideoMaterial *pVideoMaterial ); + + // Create/destroy a video encoder + virtual IVideoRecorder *CreateVideoRecorder(); + virtual VideoResult_t DestroyVideoRecorder( IVideoRecorder *pRecorder ); + + virtual VideoResult_t CheckCodecAvailability( VideoEncodeCodec_t codec ); + + virtual VideoResult_t GetLastResult(); + + private: + + bool SetupBink(); + bool ShutdownBink(); + + VideoResult_t SetResult( VideoResult_t status ); + + bool m_bBinkInitialized; + VideoResult_t m_LastResult; + + VideoSystemStatus_t m_CurrentStatus; + VideoSystemFeature_t m_AvailableFeatures; + + IVideoCommonServices *m_pCommonServices; + + CUtlVector< IVideoMaterial* > m_MaterialList; + CUtlVector< IVideoRecorder* > m_RecorderList; + + static const VideoSystemFeature_t DEFAULT_FEATURE_SET; +}; + +#endif // BINK_VIDEO_H diff --git a/video/video_bink/wscript b/video/video_bink/wscript new file mode 100755 index 00000000..fdcfa9bc --- /dev/null +++ b/video/video_bink/wscript @@ -0,0 +1,55 @@ +#! /usr/bin/env python +# encoding: utf-8 + +from waflib import Utils +import os + +top = '.' +PROJECT_NAME = 'video_bink' + +def options(opt): + # stub + return + +def configure(conf): + return + +def build(bld): + source = [ + 'bink_material.cpp', + 'bink_video.cpp', + 'yuv_rgb.c', + '../../public/tier0/memoverride.cpp' + ] + + includes = [ + '.', + '../../public', + '../../public/tier0', + '../../public/tier1', + '../../common', + '../' + ] + bld.env.INCLUDES_SDL2 + + defines = [] + + libs = ['tier0','tier1','tier2','tier3','vstdlib', 'AVCODEC', 'AVUTIL', 'AVFORMAT'] + + if bld.env.DEST_OS == 'win32': + libs += ['USER32'] + + install_path = bld.env.LIBDIR + + bld.shlib( + source = source, + target = PROJECT_NAME, + name = PROJECT_NAME, + features = 'c cxx', + includes = includes, + defines = defines, + use = libs, + install_path = install_path, + subsystem = bld.env.MSVC_SUBSYSTEM, + idx = bld.get_taskgen_count() + ) + diff --git a/video/video_bink/yuv_rgb.c b/video/video_bink/yuv_rgb.c new file mode 100644 index 00000000..041fbaa9 --- /dev/null +++ b/video/video_bink/yuv_rgb.c @@ -0,0 +1,1312 @@ +// Copyright 2016 Adrien Descamps +// Distributed under BSD 3-Clause License + +#include "yuv_rgb.h" + +#include + +#ifdef _MSC_VER +// MSVC does not have __SSE2__ macro + #if (defined(_M_AMD64) || defined(_M_X64) || (_M_IX86_FP == 2)) + #define _YUVRGB_SSE2_ + #endif +#else +// For everything else than MSVC + #ifdef __SSE2__ + #define _YUVRGB_SSE2_ + #endif // __SSE2__ +#endif // _MSC_VER + +uint8_t clamp(int16_t value) +{ + return value<0 ? 0 : (value>255 ? 255 : value); +} + +// Definitions +// +// E'R, E'G, E'B, E'Y, E'Cb and E'Cr refer to the analog signals +// E'R, E'G, E'B and E'Y range is [0:1], while E'Cb and E'Cr range is [-0.5:0.5] +// R, G, B, Y, Cb and Cr refer to the digitalized values +// The digitalized values can use their full range ([0:255] for 8bit values), +// or a subrange (typically [16:235] for Y and [16:240] for CbCr). +// We assume here that RGB range is always [0:255], since it is the case for +// most digitalized images. +// For 8bit values : +// * Y = round((YMax-YMin)*E'Y + YMin) +// * Cb = round((CbRange)*E'Cb + 128) +// * Cr = round((CrRange)*E'Cr + 128) +// Where *Min and *Max are the range of each channel +// +// In the analog domain , the RGB to YCbCr transformation is defined as: +// * E'Y = Rf*E'R + Gf*E'G + Bf*E'B +// Where Rf, Gf and Bf are constants defined in each standard, with +// Rf + Gf + Bf = 1 (necessary to ensure that E'Y range is [0:1]) +// * E'Cb = (E'B - E'Y) / CbNorm +// * E'Cr = (E'R - E'Y) / CrNorm +// Where CbNorm and CrNorm are constants, dependent of Rf, Gf, Bf, computed +// to normalize to a [-0.5:0.5] range : CbNorm=2*(1-Bf) and CrNorm=2*(1-Rf) +// +// Algorithms +// +// Most operations will be made in a fixed point format for speed, using +// N bits of precision. In next section the [x] convention is used for +// a fixed point rounded value, that is (int being the c type conversion) +// * [x] = int(x*(2^N)+0.5) +// N can be different for each factor, we simply use the highest value +// that will not overflow in 16 bits intermediate variables. +//. +// For RGB to YCbCr conversion, we start by generating a pseudo Y value +// (noted Y') in fixed point format, using the full range for now. +// * Y' = ([Rf]*R + [Gf]*G + [Bf]*B)>>N +// We can then compute Cb and Cr by +// * Cb = ((B - Y')*[CbRange/(255*CbNorm)])>>N + 128 +// * Cr = ((R - Y')*[CrRange/(255*CrNorm)])>>N + 128 +// And finally, we normalize Y to its digital range +// * Y = (Y'*[(YMax-YMin)/255])>>N + YMin +// +// For YCbCr to RGB conversion, we first compute the full range Y' value : +// * Y' = ((Y-YMin)*[255/(YMax-YMin)])>>N +// We can then compute B and R values by : +// * B = ((Cb-128)*[(255*CbNorm)/CbRange])>>N + Y' +// * R = ((Cr-128)*[(255*CrNorm)/CrRange])>>N + Y' +// And finally, for G we know that: +// * G = (Y' - (Rf*R + Bf*B)) / Gf +// From above: +// * G = (Y' - Rf * ((Cr-128)*(255*CrNorm)/CrRange + Y') - Bf * ((Cb-128)*(255*CbNorm)/CbRange + Y')) / Gf +// Since 1-Rf-Bf=Gf, we can take Y' out of the division by Gf, and we get: +// * G = Y' - (Cr-128)*Rf/Gf*(255*CrNorm)/CrRange - (Cb-128)*Bf/Gf*(255*CbNorm)/CbRange +// That we can compute, with fixed point arithmetic, by +// * G = Y' - ((Cr-128)*[Rf/Gf*(255*CrNorm)/CrRange] + (Cb-128)*[Bf/Gf*(255*CbNorm)/CbRange])>>N +// +// Note : in ITU-T T.871(JPEG), Y=Y', so that part could be optimized out + + +#define FIXED_POINT_VALUE(value, precision) ((int)(((value)*(1<r_factor*rgb_ptr1[0] + param->g_factor*rgb_ptr1[1] + param->b_factor*rgb_ptr1[2])>>8; + u_tmp = rgb_ptr1[2]-y_tmp; + v_tmp = rgb_ptr1[0]-y_tmp; + y_ptr1[0]=((y_tmp*param->y_factor)>>7) + param->y_offset; + + y_tmp = (param->r_factor*rgb_ptr1[3] + param->g_factor*rgb_ptr1[4] + param->b_factor*rgb_ptr1[5])>>8; + u_tmp += rgb_ptr1[5]-y_tmp; + v_tmp += rgb_ptr1[3]-y_tmp; + y_ptr1[1]=((y_tmp*param->y_factor)>>7) + param->y_offset; + + y_tmp = (param->r_factor*rgb_ptr2[0] + param->g_factor*rgb_ptr2[1] + param->b_factor*rgb_ptr2[2])>>8; + u_tmp += rgb_ptr2[2]-y_tmp; + v_tmp += rgb_ptr2[0]-y_tmp; + y_ptr2[0]=((y_tmp*param->y_factor)>>7) + param->y_offset; + + y_tmp = (param->r_factor*rgb_ptr2[3] + param->g_factor*rgb_ptr2[4] + param->b_factor*rgb_ptr2[5])>>8; + u_tmp += rgb_ptr2[5]-y_tmp; + v_tmp += rgb_ptr2[3]-y_tmp; + y_ptr2[1]=((y_tmp*param->y_factor)>>7) + param->y_offset; + + u_ptr[0] = (((u_tmp>>2)*param->cb_factor)>>8) + 128; + v_ptr[0] = (((v_tmp>>2)*param->cr_factor)>>8) + 128; + + rgb_ptr1 += 6; + rgb_ptr2 += 6; + y_ptr1 += 2; + y_ptr2 += 2; + u_ptr += 1; + v_ptr += 1; + } + } +} + +void rgb32_yuv420_std( + uint32_t width, uint32_t height, + const uint8_t *RGBA, uint32_t RGBA_stride, + uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, + YCbCrType yuv_type) +{ + const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *rgb_ptr1=RGBA+y*RGBA_stride, + *rgb_ptr2=RGBA+(y+1)*RGBA_stride; + + uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *u_ptr=U+(y/2)*UV_stride, + *v_ptr=V+(y/2)*UV_stride; + + for(x=0; x<(width-1); x+=2) + { + // compute yuv for the four pixels, u and v values are summed + uint8_t y_tmp; + int16_t u_tmp, v_tmp; + + y_tmp = (param->r_factor*rgb_ptr1[0] + param->g_factor*rgb_ptr1[1] + param->b_factor*rgb_ptr1[2])>>8; + u_tmp = rgb_ptr1[2]-y_tmp; + v_tmp = rgb_ptr1[0]-y_tmp; + y_ptr1[0]=((y_tmp*param->y_factor)>>7) + param->y_offset; + + y_tmp = (param->r_factor*rgb_ptr1[4] + param->g_factor*rgb_ptr1[5] + param->b_factor*rgb_ptr1[6])>>8; + u_tmp += rgb_ptr1[6]-y_tmp; + v_tmp += rgb_ptr1[4]-y_tmp; + y_ptr1[1]=((y_tmp*param->y_factor)>>7) + param->y_offset; + + y_tmp = (param->r_factor*rgb_ptr2[0] + param->g_factor*rgb_ptr2[1] + param->b_factor*rgb_ptr2[2])>>8; + u_tmp += rgb_ptr2[2]-y_tmp; + v_tmp += rgb_ptr2[0]-y_tmp; + y_ptr2[0]=((y_tmp*param->y_factor)>>7) + param->y_offset; + + y_tmp = (param->r_factor*rgb_ptr2[4] + param->g_factor*rgb_ptr2[5] + param->b_factor*rgb_ptr2[6])>>8; + u_tmp += rgb_ptr2[6]-y_tmp; + v_tmp += rgb_ptr2[4]-y_tmp; + y_ptr2[1]=((y_tmp*param->y_factor)>>7) + param->y_offset; + + u_ptr[0] = (((u_tmp>>2)*param->cb_factor)>>8) + 128; + v_ptr[0] = (((v_tmp>>2)*param->cb_factor)>>8) + 128; + + rgb_ptr1 += 8; + rgb_ptr2 += 8; + y_ptr1 += 2; + y_ptr2 += 2; + u_ptr += 1; + v_ptr += 1; + } + } +} + + +void yuv420_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *Y, const uint8_t *U, const uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, + uint8_t *RGB, uint32_t RGB_stride, + YCbCrType yuv_type) +{ + const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]); + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *u_ptr=U+(y/2)*UV_stride, + *v_ptr=V+(y/2)*UV_stride; + + uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + for(x=0; x<(width-1); x+=2) + { + int8_t u_tmp, v_tmp; + u_tmp = u_ptr[0]-128; + v_tmp = v_ptr[0]-128; + + //compute Cb Cr color offsets, common to four pixels + int16_t b_cb_offset, r_cr_offset, g_cbcr_offset; + b_cb_offset = (param->cb_factor*u_tmp)>>6; + r_cr_offset = (param->cr_factor*v_tmp)>>6; + g_cbcr_offset = (param->g_cb_factor*u_tmp + param->g_cr_factor*v_tmp)>>7; + + int16_t y_tmp; + y_tmp = (param->y_factor*(y_ptr1[0]-param->y_offset))>>7; + rgb_ptr1[2] = clamp(y_tmp + r_cr_offset); + rgb_ptr1[1] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr1[0] = clamp(y_tmp + b_cb_offset); + + y_tmp = (param->y_factor*(y_ptr1[1]-param->y_offset))>>7; + rgb_ptr1[5] = clamp(y_tmp + r_cr_offset); + rgb_ptr1[4] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr1[3] = clamp(y_tmp + b_cb_offset); + + y_tmp = (param->y_factor*(y_ptr2[0]-param->y_offset))>>7; + rgb_ptr2[2] = clamp(y_tmp + r_cr_offset); + rgb_ptr2[1] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr2[0] = clamp(y_tmp + b_cb_offset); + + y_tmp = (param->y_factor*(y_ptr2[1]-param->y_offset))>>7; + rgb_ptr2[5] = clamp(y_tmp + r_cr_offset); + rgb_ptr2[4] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr2[3] = clamp(y_tmp + b_cb_offset); + + rgb_ptr1 += 6; + rgb_ptr2 += 6; + y_ptr1 += 2; + y_ptr2 += 2; + u_ptr += 1; + v_ptr += 1; + } + } +} + +void nv12_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *Y, const uint8_t *UV, uint32_t Y_stride, uint32_t UV_stride, + uint8_t *RGB, uint32_t RGB_stride, + YCbCrType yuv_type) +{ + const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]); + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *uv_ptr=UV+(y/2)*UV_stride; + + uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + for(x=0; x<(width-1); x+=2) + { + int8_t u_tmp, v_tmp; + u_tmp = uv_ptr[0]-128; + v_tmp = uv_ptr[1]-128; + + //compute Cb Cr color offsets, common to four pixels + int16_t b_cb_offset, r_cr_offset, g_cbcr_offset; + b_cb_offset = (param->cb_factor*u_tmp)>>6; + r_cr_offset = (param->cr_factor*v_tmp)>>6; + g_cbcr_offset = (param->g_cb_factor*u_tmp + param->g_cr_factor*v_tmp)>>7; + + int16_t y_tmp; + y_tmp = (param->y_factor*(y_ptr1[0]-param->y_offset))>>7; + rgb_ptr1[0] = clamp(y_tmp + r_cr_offset); + rgb_ptr1[1] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr1[2] = clamp(y_tmp + b_cb_offset); + + y_tmp = (param->y_factor*(y_ptr1[1]-param->y_offset))>>7; + rgb_ptr1[3] = clamp(y_tmp + r_cr_offset); + rgb_ptr1[4] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr1[5] = clamp(y_tmp + b_cb_offset); + + y_tmp = (param->y_factor*(y_ptr2[0]-param->y_offset))>>7; + rgb_ptr2[0] = clamp(y_tmp + r_cr_offset); + rgb_ptr2[1] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr2[2] = clamp(y_tmp + b_cb_offset); + + y_tmp = (param->y_factor*(y_ptr2[1]-param->y_offset))>>7; + rgb_ptr2[3] = clamp(y_tmp + r_cr_offset); + rgb_ptr2[4] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr2[5] = clamp(y_tmp + b_cb_offset); + + rgb_ptr1 += 6; + rgb_ptr2 += 6; + y_ptr1 += 2; + y_ptr2 += 2; + uv_ptr += 2; + } + } +} + +void nv21_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *Y, const uint8_t *UV, uint32_t Y_stride, uint32_t UV_stride, + uint8_t *RGB, uint32_t RGB_stride, + YCbCrType yuv_type) +{ + const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]); + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *uv_ptr=UV+(y/2)*UV_stride; + + uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + for(x=0; x<(width-1); x+=2) + { + int8_t u_tmp, v_tmp; + u_tmp = uv_ptr[1]-128; + v_tmp = uv_ptr[0]-128; + + //compute Cb Cr color offsets, common to four pixels + int16_t b_cb_offset, r_cr_offset, g_cbcr_offset; + b_cb_offset = (param->cb_factor*u_tmp)>>6; + r_cr_offset = (param->cr_factor*v_tmp)>>6; + g_cbcr_offset = (param->g_cb_factor*u_tmp + param->g_cr_factor*v_tmp)>>7; + + int16_t y_tmp; + y_tmp = (param->y_factor*(y_ptr1[0]-param->y_offset))>>7; + rgb_ptr1[0] = clamp(y_tmp + r_cr_offset); + rgb_ptr1[1] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr1[2] = clamp(y_tmp + b_cb_offset); + + y_tmp = (param->y_factor*(y_ptr1[1]-param->y_offset))>>7; + rgb_ptr1[3] = clamp(y_tmp + r_cr_offset); + rgb_ptr1[4] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr1[5] = clamp(y_tmp + b_cb_offset); + + y_tmp = (param->y_factor*(y_ptr2[0]-param->y_offset))>>7; + rgb_ptr2[0] = clamp(y_tmp + r_cr_offset); + rgb_ptr2[1] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr2[2] = clamp(y_tmp + b_cb_offset); + + y_tmp = (param->y_factor*(y_ptr2[1]-param->y_offset))>>7; + rgb_ptr2[3] = clamp(y_tmp + r_cr_offset); + rgb_ptr2[4] = clamp(y_tmp - g_cbcr_offset); + rgb_ptr2[5] = clamp(y_tmp + b_cb_offset); + + rgb_ptr1 += 6; + rgb_ptr2 += 6; + y_ptr1 += 2; + y_ptr2 += 2; + uv_ptr += 2; + } + } +} + + +#ifdef _YUVRGB_SSE2_ + +//see rgb.txt +#define UNPACK_RGB24_32_STEP(RS1, RS2, RS3, RS4, RS5, RS6, RD1, RD2, RD3, RD4, RD5, RD6) \ +RD1 = _mm_unpacklo_epi8(RS1, RS4); \ +RD2 = _mm_unpackhi_epi8(RS1, RS4); \ +RD3 = _mm_unpacklo_epi8(RS2, RS5); \ +RD4 = _mm_unpackhi_epi8(RS2, RS5); \ +RD5 = _mm_unpacklo_epi8(RS3, RS6); \ +RD6 = _mm_unpackhi_epi8(RS3, RS6); + +#define RGB2YUV_16(R, G, B, Y, U, V) \ +Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(G, _mm_set1_epi16(param->g_factor))); \ +Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->b_factor))); \ +Y = _mm_srli_epi16(Y, 8); \ +U = _mm_mullo_epi16(_mm_sub_epi16(B, Y), _mm_set1_epi16(param->cb_factor)); \ +U = _mm_add_epi16(_mm_srai_epi16(U, 8), _mm_set1_epi16(128)); \ +V = _mm_mullo_epi16(_mm_sub_epi16(R, Y), _mm_set1_epi16(param->cr_factor)); \ +V = _mm_add_epi16(_mm_srai_epi16(V, 8), _mm_set1_epi16(128)); \ +Y = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(Y, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); + +#define RGB2YUV_32 \ + __m128i r_16, g_16, b_16; \ + __m128i y1_16, y2_16, cb1_16, cb2_16, cr1_16, cr2_16, Y, cb, cr; \ + __m128i tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; \ + __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \ + rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \ + rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \ + rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \ + rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \ + rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \ + /* unpack rgb24 data to r, g and b data in separate channels*/ \ + /* see rgb.txt to get an idea of the algorithm, note that we only go to the next to last step*/ \ + /* here, because averaging in horizontal direction is easier like this*/ \ + /* The last step is applied further on the Y channel only*/ \ + UNPACK_RGB24_32_STEP(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) \ + UNPACK_RGB24_32_STEP(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, rgb1, rgb2, rgb3, rgb4, rgb5, rgb6) \ + UNPACK_RGB24_32_STEP(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) \ + UNPACK_RGB24_32_STEP(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, rgb1, rgb2, rgb3, rgb4, rgb5, rgb6) \ + /* first compute Y', (B-Y') and (R-Y'), in 16bits values, for the first line */ \ + /* Y is saved for each pixel, while only sums of (B-Y') and (R-Y') for pairs of adjacents pixels are saved*/ \ + r_16 = _mm_unpacklo_epi8(rgb1, _mm_setzero_si128()); \ + g_16 = _mm_unpacklo_epi8(rgb2, _mm_setzero_si128()); \ + b_16 = _mm_unpacklo_epi8(rgb3, _mm_setzero_si128()); \ + y1_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y1_16 = _mm_add_epi16(y1_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y1_16 = _mm_srli_epi16(y1_16, 8); \ + cb1_16 = _mm_sub_epi16(b_16, y1_16); \ + cr1_16 = _mm_sub_epi16(r_16, y1_16); \ + r_16 = _mm_unpacklo_epi8(rgb4, _mm_setzero_si128()); \ + g_16 = _mm_unpacklo_epi8(rgb5, _mm_setzero_si128()); \ + b_16 = _mm_unpacklo_epi8(rgb6, _mm_setzero_si128()); \ + y2_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y2_16 = _mm_add_epi16(y2_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y2_16 = _mm_srli_epi16(y2_16, 8); \ + cb1_16 = _mm_add_epi16(cb1_16, _mm_sub_epi16(b_16, y2_16)); \ + cr1_16 = _mm_add_epi16(cr1_16, _mm_sub_epi16(r_16, y2_16)); \ + /* Rescale Y' to Y, pack it to 8bit values and save it */ \ + y1_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y1_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + y2_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y2_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + Y = _mm_packus_epi16(y1_16, y2_16); \ + Y = _mm_unpackhi_epi8(_mm_slli_si128(Y, 8), Y); \ + SAVE_SI128((__m128i*)(y_ptr1), Y); \ + /* same for the second line, compute Y', (B-Y') and (R-Y'), in 16bits values */ \ + /* Y is saved for each pixel, while only sums of (B-Y') and (R-Y') for pairs of adjacents pixels are added to the previous values*/ \ + r_16 = _mm_unpackhi_epi8(rgb1, _mm_setzero_si128()); \ + g_16 = _mm_unpackhi_epi8(rgb2, _mm_setzero_si128()); \ + b_16 = _mm_unpackhi_epi8(rgb3, _mm_setzero_si128()); \ + y1_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y1_16 = _mm_add_epi16(y1_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y1_16 = _mm_srli_epi16(y1_16, 8); \ + cb1_16 = _mm_add_epi16(cb1_16, _mm_sub_epi16(b_16, y1_16)); \ + cr1_16 = _mm_add_epi16(cr1_16, _mm_sub_epi16(r_16, y1_16)); \ + r_16 = _mm_unpackhi_epi8(rgb4, _mm_setzero_si128()); \ + g_16 = _mm_unpackhi_epi8(rgb5, _mm_setzero_si128()); \ + b_16 = _mm_unpackhi_epi8(rgb6, _mm_setzero_si128()); \ + y2_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y2_16 = _mm_add_epi16(y2_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y2_16 = _mm_srli_epi16(y2_16, 8); \ + cb1_16 = _mm_add_epi16(cb1_16, _mm_sub_epi16(b_16, y2_16)); \ + cr1_16 = _mm_add_epi16(cr1_16, _mm_sub_epi16(r_16, y2_16)); \ + /* Rescale Y' to Y, pack it to 8bit values and save it */ \ + y1_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y1_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + y2_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y2_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + Y = _mm_packus_epi16(y1_16, y2_16); \ + Y = _mm_unpackhi_epi8(_mm_slli_si128(Y, 8), Y); \ + SAVE_SI128((__m128i*)(y_ptr2), Y); \ + /* Rescale Cb and Cr to their final range */ \ + cb1_16 = _mm_add_epi16(_mm_srai_epi16(_mm_mullo_epi16(_mm_srai_epi16(cb1_16, 2), _mm_set1_epi16(param->cb_factor)), 8), _mm_set1_epi16(128)); \ + cr1_16 = _mm_add_epi16(_mm_srai_epi16(_mm_mullo_epi16(_mm_srai_epi16(cr1_16, 2), _mm_set1_epi16(param->cr_factor)), 8), _mm_set1_epi16(128)); \ + \ + /* do the same again with next data */ \ + rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)), \ + rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)), \ + rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)), \ + rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)), \ + rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)), \ + rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \ + /* unpack rgb24 data to r, g and b data in separate channels*/ \ + /* see rgb.txt to get an idea of the algorithm, note that we only go to the next to last step*/ \ + /* here, because averaging in horizontal direction is easier like this*/ \ + /* The last step is applied further on the Y channel only*/ \ + UNPACK_RGB24_32_STEP(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) \ + UNPACK_RGB24_32_STEP(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, rgb1, rgb2, rgb3, rgb4, rgb5, rgb6) \ + UNPACK_RGB24_32_STEP(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) \ + UNPACK_RGB24_32_STEP(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, rgb1, rgb2, rgb3, rgb4, rgb5, rgb6) \ + /* first compute Y', (B-Y') and (R-Y'), in 16bits values, for the first line */ \ + /* Y is saved for each pixel, while only sums of (B-Y') and (R-Y') for pairs of adjacents pixels are saved*/ \ + r_16 = _mm_unpacklo_epi8(rgb1, _mm_setzero_si128()); \ + g_16 = _mm_unpacklo_epi8(rgb2, _mm_setzero_si128()); \ + b_16 = _mm_unpacklo_epi8(rgb3, _mm_setzero_si128()); \ + y1_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y1_16 = _mm_add_epi16(y1_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y1_16 = _mm_srli_epi16(y1_16, 8); \ + cb2_16 = _mm_sub_epi16(b_16, y1_16); \ + cr2_16 = _mm_sub_epi16(r_16, y1_16); \ + r_16 = _mm_unpacklo_epi8(rgb4, _mm_setzero_si128()); \ + g_16 = _mm_unpacklo_epi8(rgb5, _mm_setzero_si128()); \ + b_16 = _mm_unpacklo_epi8(rgb6, _mm_setzero_si128()); \ + y2_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y2_16 = _mm_add_epi16(y2_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y2_16 = _mm_srli_epi16(y2_16, 8); \ + cb2_16 = _mm_add_epi16(cb2_16, _mm_sub_epi16(b_16, y2_16)); \ + cr2_16 = _mm_add_epi16(cr2_16, _mm_sub_epi16(r_16, y2_16)); \ + /* Rescale Y' to Y, pack it to 8bit values and save it */ \ + y1_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y1_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + y2_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y2_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + Y = _mm_packus_epi16(y1_16, y2_16); \ + Y = _mm_unpackhi_epi8(_mm_slli_si128(Y, 8), Y); \ + SAVE_SI128((__m128i*)(y_ptr1+16), Y); \ + /* same for the second line, compute Y', (B-Y') and (R-Y'), in 16bits values */ \ + /* Y is saved for each pixel, while only sums of (B-Y') and (R-Y') for pairs of adjacents pixels are added to the previous values*/ \ + r_16 = _mm_unpackhi_epi8(rgb1, _mm_setzero_si128()); \ + g_16 = _mm_unpackhi_epi8(rgb2, _mm_setzero_si128()); \ + b_16 = _mm_unpackhi_epi8(rgb3, _mm_setzero_si128()); \ + y1_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y1_16 = _mm_add_epi16(y1_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y1_16 = _mm_srli_epi16(y1_16, 8); \ + cb2_16 = _mm_add_epi16(cb2_16, _mm_sub_epi16(b_16, y1_16)); \ + cr2_16 = _mm_add_epi16(cr2_16, _mm_sub_epi16(r_16, y1_16)); \ + r_16 = _mm_unpackhi_epi8(rgb4, _mm_setzero_si128()); \ + g_16 = _mm_unpackhi_epi8(rgb5, _mm_setzero_si128()); \ + b_16 = _mm_unpackhi_epi8(rgb6, _mm_setzero_si128()); \ + y2_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y2_16 = _mm_add_epi16(y2_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y2_16 = _mm_srli_epi16(y2_16, 8); \ + cb2_16 = _mm_add_epi16(cb2_16, _mm_sub_epi16(b_16, y2_16)); \ + cr2_16 = _mm_add_epi16(cr2_16, _mm_sub_epi16(r_16, y2_16)); \ + /* Rescale Y' to Y, pack it to 8bit values and save it */ \ + y1_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y1_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + y2_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y2_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + Y = _mm_packus_epi16(y1_16, y2_16); \ + Y = _mm_unpackhi_epi8(_mm_slli_si128(Y, 8), Y); \ + SAVE_SI128((__m128i*)(y_ptr2+16), Y); \ + /* Rescale Cb and Cr to their final range */ \ + cb2_16 = _mm_add_epi16(_mm_srai_epi16(_mm_mullo_epi16(_mm_srai_epi16(cb2_16, 2), _mm_set1_epi16(param->cb_factor)), 8), _mm_set1_epi16(128)); \ + cr2_16 = _mm_add_epi16(_mm_srai_epi16(_mm_mullo_epi16(_mm_srai_epi16(cr2_16, 2), _mm_set1_epi16(param->cr_factor)), 8), _mm_set1_epi16(128)); \ + /* Pack and save Cb Cr */ \ + cb = _mm_packus_epi16(cb1_16, cb2_16); \ + cr = _mm_packus_epi16(cr1_16, cr2_16); \ + SAVE_SI128((__m128i*)(u_ptr), cb); \ + SAVE_SI128((__m128i*)(v_ptr), cr); + + +void rgb24_yuv420_sse(uint32_t width, uint32_t height, + const uint8_t *RGB, uint32_t RGB_stride, + uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, + YCbCrType yuv_type) +{ + #define LOAD_SI128 _mm_load_si128 + #define SAVE_SI128 _mm_stream_si128 + const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *u_ptr=U+(y/2)*UV_stride, + *v_ptr=V+(y/2)*UV_stride; + + for(x=0; x<(width-31); x+=32) + { + RGB2YUV_32 + + rgb_ptr1+=96; + rgb_ptr2+=96; + y_ptr1+=32; + y_ptr2+=32; + u_ptr+=16; + v_ptr+=16; + } + } + #undef LOAD_SI128 + #undef SAVE_SI128 +} + +void rgb24_yuv420_sseu(uint32_t width, uint32_t height, + const uint8_t *RGB, uint32_t RGB_stride, + uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, + YCbCrType yuv_type) +{ + #define LOAD_SI128 _mm_loadu_si128 + #define SAVE_SI128 _mm_storeu_si128 + const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *u_ptr=U+(y/2)*UV_stride, + *v_ptr=V+(y/2)*UV_stride; + + for(x=0; x<(width-31); x+=32) + { + RGB2YUV_32 + + rgb_ptr1+=96; + rgb_ptr2+=96; + y_ptr1+=32; + y_ptr2+=32; + u_ptr+=16; + v_ptr+=16; + } + } + #undef LOAD_SI128 + #undef SAVE_SI128 +} + + +// see rgba.txt +#define UNPACK_RGB32_32_STEP(RS1, RS2, RS3, RS4, RS5, RS6, RS7, RS8, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8) \ +RD1 = _mm_unpacklo_epi8(RS1, RS5); \ +RD2 = _mm_unpackhi_epi8(RS1, RS5); \ +RD3 = _mm_unpacklo_epi8(RS2, RS6); \ +RD4 = _mm_unpackhi_epi8(RS2, RS6); \ +RD5 = _mm_unpacklo_epi8(RS3, RS7); \ +RD6 = _mm_unpackhi_epi8(RS3, RS7); \ +RD7 = _mm_unpacklo_epi8(RS4, RS8); \ +RD8 = _mm_unpackhi_epi8(RS4, RS8); + + +#define RGBA2YUV_32 \ + __m128i r_16, g_16, b_16; \ + __m128i y1_16, y2_16, cb1_16, cb2_16, cr1_16, cr2_16, Y, cb, cr; \ + __m128i tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; \ + __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \ + rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \ + rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \ + rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)), \ + rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \ + rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \ + rgb7 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)), \ + rgb8 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \ + /* unpack rgb24 data to r, g and b data in separate channels*/ \ + /* see rgb.txt to get an idea of the algorithm, note that we only go to the next to last step*/ \ + /* here, because averaging in horizontal direction is easier like this*/ \ + /* The last step is applied further on the Y channel only*/ \ + UNPACK_RGB32_32_STEP(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, rgb7, rgb8, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8) \ + UNPACK_RGB32_32_STEP(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, rgb7, rgb8) \ + UNPACK_RGB32_32_STEP(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, rgb7, rgb8, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8) \ + UNPACK_RGB32_32_STEP(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, rgb7, rgb8) \ + /* first compute Y', (B-Y') and (R-Y'), in 16bits values, for the first line */ \ + /* Y is saved for each pixel, while only sums of (B-Y') and (R-Y') for pairs of adjacents pixels are saved*/ \ + r_16 = _mm_unpacklo_epi8(rgb1, _mm_setzero_si128()); \ + g_16 = _mm_unpacklo_epi8(rgb2, _mm_setzero_si128()); \ + b_16 = _mm_unpacklo_epi8(rgb3, _mm_setzero_si128()); \ + y1_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y1_16 = _mm_add_epi16(y1_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y1_16 = _mm_srli_epi16(y1_16, 8); \ + cb1_16 = _mm_sub_epi16(b_16, y1_16); \ + cr1_16 = _mm_sub_epi16(r_16, y1_16); \ + r_16 = _mm_unpacklo_epi8(rgb5, _mm_setzero_si128()); \ + g_16 = _mm_unpacklo_epi8(rgb6, _mm_setzero_si128()); \ + b_16 = _mm_unpacklo_epi8(rgb7, _mm_setzero_si128()); \ + y2_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y2_16 = _mm_add_epi16(y2_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y2_16 = _mm_srli_epi16(y2_16, 8); \ + cb1_16 = _mm_add_epi16(cb1_16, _mm_sub_epi16(b_16, y2_16)); \ + cr1_16 = _mm_add_epi16(cr1_16, _mm_sub_epi16(r_16, y2_16)); \ + /* Rescale Y' to Y, pack it to 8bit values and save it */ \ + y1_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y1_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + y2_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y2_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + Y = _mm_packus_epi16(y1_16, y2_16); \ + Y = _mm_unpackhi_epi8(_mm_slli_si128(Y, 8), Y); \ + SAVE_SI128((__m128i*)(y_ptr1), Y); \ + /* same for the second line, compute Y', (B-Y') and (R-Y'), in 16bits values */ \ + /* Y is saved for each pixel, while only sums of (B-Y') and (R-Y') for pairs of adjacents pixels are added to the previous values*/ \ + r_16 = _mm_unpackhi_epi8(rgb1, _mm_setzero_si128()); \ + g_16 = _mm_unpackhi_epi8(rgb2, _mm_setzero_si128()); \ + b_16 = _mm_unpackhi_epi8(rgb3, _mm_setzero_si128()); \ + y1_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y1_16 = _mm_add_epi16(y1_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y1_16 = _mm_srli_epi16(y1_16, 8); \ + cb1_16 = _mm_add_epi16(cb1_16, _mm_sub_epi16(b_16, y1_16)); \ + cr1_16 = _mm_add_epi16(cr1_16, _mm_sub_epi16(r_16, y1_16)); \ + r_16 = _mm_unpackhi_epi8(rgb5, _mm_setzero_si128()); \ + g_16 = _mm_unpackhi_epi8(rgb6, _mm_setzero_si128()); \ + b_16 = _mm_unpackhi_epi8(rgb7, _mm_setzero_si128()); \ + y2_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y2_16 = _mm_add_epi16(y2_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y2_16 = _mm_srli_epi16(y2_16, 8); \ + cb1_16 = _mm_add_epi16(cb1_16, _mm_sub_epi16(b_16, y2_16)); \ + cr1_16 = _mm_add_epi16(cr1_16, _mm_sub_epi16(r_16, y2_16)); \ + /* Rescale Y' to Y, pack it to 8bit values and save it */ \ + y1_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y1_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + y2_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y2_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + Y = _mm_packus_epi16(y1_16, y2_16); \ + Y = _mm_unpackhi_epi8(_mm_slli_si128(Y, 8), Y); \ + SAVE_SI128((__m128i*)(y_ptr2), Y); \ + /* Rescale Cb and Cr to their final range */ \ + cb1_16 = _mm_add_epi16(_mm_srai_epi16(_mm_mullo_epi16(_mm_srai_epi16(cb1_16, 2), _mm_set1_epi16(param->cb_factor)), 8), _mm_set1_epi16(128)); \ + cr1_16 = _mm_add_epi16(_mm_srai_epi16(_mm_mullo_epi16(_mm_srai_epi16(cr1_16, 2), _mm_set1_epi16(param->cr_factor)), 8), _mm_set1_epi16(128)); \ + \ + /* do the same again with next data */ \ + rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)), \ + rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)), \ + rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+96)), \ + rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr1+112)), \ + rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)), \ + rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)), \ + rgb7 = LOAD_SI128((const __m128i*)(rgb_ptr2+96)), \ + rgb8 = LOAD_SI128((const __m128i*)(rgb_ptr2+112)); \ + /* unpack rgb24 data to r, g and b data in separate channels*/ \ + /* see rgb.txt to get an idea of the algorithm, note that we only go to the next to last step*/ \ + /* here, because averaging in horizontal direction is easier like this*/ \ + /* The last step is applied further on the Y channel only*/ \ + UNPACK_RGB32_32_STEP(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, rgb7, rgb8, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8) \ + UNPACK_RGB32_32_STEP(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, rgb7, rgb8) \ + UNPACK_RGB32_32_STEP(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, rgb7, rgb8, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8) \ + UNPACK_RGB32_32_STEP(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, rgb7, rgb8) \ + /* first compute Y', (B-Y') and (R-Y'), in 16bits values, for the first line */ \ + /* Y is saved for each pixel, while only sums of (B-Y') and (R-Y') for pairs of adjacents pixels are saved*/ \ + r_16 = _mm_unpacklo_epi8(rgb1, _mm_setzero_si128()); \ + g_16 = _mm_unpacklo_epi8(rgb2, _mm_setzero_si128()); \ + b_16 = _mm_unpacklo_epi8(rgb3, _mm_setzero_si128()); \ + y1_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y1_16 = _mm_add_epi16(y1_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y1_16 = _mm_srli_epi16(y1_16, 8); \ + cb2_16 = _mm_sub_epi16(b_16, y1_16); \ + cr2_16 = _mm_sub_epi16(r_16, y1_16); \ + r_16 = _mm_unpacklo_epi8(rgb5, _mm_setzero_si128()); \ + g_16 = _mm_unpacklo_epi8(rgb6, _mm_setzero_si128()); \ + b_16 = _mm_unpacklo_epi8(rgb7, _mm_setzero_si128()); \ + y2_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y2_16 = _mm_add_epi16(y2_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y2_16 = _mm_srli_epi16(y2_16, 8); \ + cb2_16 = _mm_add_epi16(cb2_16, _mm_sub_epi16(b_16, y2_16)); \ + cr2_16 = _mm_add_epi16(cr2_16, _mm_sub_epi16(r_16, y2_16)); \ + /* Rescale Y' to Y, pack it to 8bit values and save it */ \ + y1_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y1_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + y2_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y2_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + Y = _mm_packus_epi16(y1_16, y2_16); \ + Y = _mm_unpackhi_epi8(_mm_slli_si128(Y, 8), Y); \ + SAVE_SI128((__m128i*)(y_ptr1+16), Y); \ + /* same for the second line, compute Y', (B-Y') and (R-Y'), in 16bits values */ \ + /* Y is saved for each pixel, while only sums of (B-Y') and (R-Y') for pairs of adjacents pixels are added to the previous values*/ \ + r_16 = _mm_unpackhi_epi8(rgb1, _mm_setzero_si128()); \ + g_16 = _mm_unpackhi_epi8(rgb2, _mm_setzero_si128()); \ + b_16 = _mm_unpackhi_epi8(rgb3, _mm_setzero_si128()); \ + y1_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y1_16 = _mm_add_epi16(y1_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y1_16 = _mm_srli_epi16(y1_16, 8); \ + cb2_16 = _mm_add_epi16(cb2_16, _mm_sub_epi16(b_16, y1_16)); \ + cr2_16 = _mm_add_epi16(cr2_16, _mm_sub_epi16(r_16, y1_16)); \ + r_16 = _mm_unpackhi_epi8(rgb5, _mm_setzero_si128()); \ + g_16 = _mm_unpackhi_epi8(rgb6, _mm_setzero_si128()); \ + b_16 = _mm_unpackhi_epi8(rgb7, _mm_setzero_si128()); \ + y2_16 = _mm_add_epi16(_mm_mullo_epi16(r_16, _mm_set1_epi16(param->r_factor)), \ + _mm_mullo_epi16(g_16, _mm_set1_epi16(param->g_factor))); \ + y2_16 = _mm_add_epi16(y2_16, _mm_mullo_epi16(b_16, _mm_set1_epi16(param->b_factor))); \ + y2_16 = _mm_srli_epi16(y2_16, 8); \ + cb2_16 = _mm_add_epi16(cb2_16, _mm_sub_epi16(b_16, y2_16)); \ + cr2_16 = _mm_add_epi16(cr2_16, _mm_sub_epi16(r_16, y2_16)); \ + /* Rescale Y' to Y, pack it to 8bit values and save it */ \ + y1_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y1_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + y2_16 = _mm_add_epi16(_mm_srli_epi16(_mm_mullo_epi16(y2_16, _mm_set1_epi16(param->y_factor)), 7), _mm_set1_epi16(param->y_offset)); \ + Y = _mm_packus_epi16(y1_16, y2_16); \ + Y = _mm_unpackhi_epi8(_mm_slli_si128(Y, 8), Y); \ + SAVE_SI128((__m128i*)(y_ptr2+16), Y); \ + /* Rescale Cb and Cr to their final range */ \ + cb2_16 = _mm_add_epi16(_mm_srai_epi16(_mm_mullo_epi16(_mm_srai_epi16(cb2_16, 2), _mm_set1_epi16(param->cb_factor)), 8), _mm_set1_epi16(128)); \ + cr2_16 = _mm_add_epi16(_mm_srai_epi16(_mm_mullo_epi16(_mm_srai_epi16(cr2_16, 2), _mm_set1_epi16(param->cr_factor)), 8), _mm_set1_epi16(128)); \ + /* Pack and save Cb Cr */ \ + cb = _mm_packus_epi16(cb1_16, cb2_16); \ + cr = _mm_packus_epi16(cr1_16, cr2_16); \ + SAVE_SI128((__m128i*)(u_ptr), cb); \ + SAVE_SI128((__m128i*)(v_ptr), cr); + +void rgb32_yuv420_sse(uint32_t width, uint32_t height, + const uint8_t *RGBA, uint32_t RGBA_stride, + uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, + YCbCrType yuv_type) +{ + #define LOAD_SI128 _mm_load_si128 + #define SAVE_SI128 _mm_stream_si128 + const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *rgb_ptr1=RGBA+y*RGBA_stride, + *rgb_ptr2=RGBA+(y+1)*RGBA_stride; + + uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *u_ptr=U+(y/2)*UV_stride, + *v_ptr=V+(y/2)*UV_stride; + + for(x=0; x<(width-31); x+=32) + { + RGBA2YUV_32 + + rgb_ptr1+=128; + rgb_ptr2+=128; + y_ptr1+=32; + y_ptr2+=32; + u_ptr+=16; + v_ptr+=16; + } + } + #undef LOAD_SI128 + #undef SAVE_SI128 +} + +void rgb32_yuv420_sseu(uint32_t width, uint32_t height, + const uint8_t *RGBA, uint32_t RGBA_stride, + uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, + YCbCrType yuv_type) +{ + #define LOAD_SI128 _mm_loadu_si128 + #define SAVE_SI128 _mm_storeu_si128 + const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *rgb_ptr1=RGBA+y*RGBA_stride, + *rgb_ptr2=RGBA+(y+1)*RGBA_stride; + + uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *u_ptr=U+(y/2)*UV_stride, + *v_ptr=V+(y/2)*UV_stride; + + for(x=0; x<(width-31); x+=32) + { + RGBA2YUV_32 + + rgb_ptr1+=128; + rgb_ptr2+=128; + y_ptr1+=32; + y_ptr2+=32; + u_ptr+=16; + v_ptr+=16; + } + } + #undef LOAD_SI128 + #undef SAVE_SI128 +} + +#endif + +#ifdef _YUVRGB_SSE2_ + +#define UV2RGB_16(U,V,R1,G1,B1,R2,G2,B2) \ + r_tmp = _mm_srai_epi16(_mm_mullo_epi16(V, _mm_set1_epi16(param->cr_factor)), 6); \ + g_tmp = _mm_srai_epi16(_mm_add_epi16( \ + _mm_mullo_epi16(U, _mm_set1_epi16(param->g_cb_factor)), \ + _mm_mullo_epi16(V, _mm_set1_epi16(param->g_cr_factor))), 7); \ + b_tmp = _mm_srai_epi16(_mm_mullo_epi16(U, _mm_set1_epi16(param->cb_factor)), 6); \ + R1 = _mm_unpacklo_epi16(r_tmp, r_tmp); \ + G1 = _mm_unpacklo_epi16(g_tmp, g_tmp); \ + B1 = _mm_unpacklo_epi16(b_tmp, b_tmp); \ + R2 = _mm_unpackhi_epi16(r_tmp, r_tmp); \ + G2 = _mm_unpackhi_epi16(g_tmp, g_tmp); \ + B2 = _mm_unpackhi_epi16(b_tmp, b_tmp); \ + +#define ADD_Y2RGB_16(Y1,Y2,R1,G1,B1,R2,G2,B2) \ + Y1 = _mm_srli_epi16(_mm_mullo_epi16(Y1, _mm_set1_epi16(param->y_factor)), 7); \ + Y2 = _mm_srli_epi16(_mm_mullo_epi16(Y2, _mm_set1_epi16(param->y_factor)), 7); \ + \ + R1 = _mm_add_epi16(Y1, R1); \ + G1 = _mm_sub_epi16(Y1, G1); \ + B1 = _mm_add_epi16(Y1, B1); \ + R2 = _mm_add_epi16(Y2, R2); \ + G2 = _mm_sub_epi16(Y2, G2); \ + B2 = _mm_add_epi16(Y2, B2); \ + +#define PACK_RGB24_32_STEP(RS1, RS2, RS3, RS4, RS5, RS6, RD1, RD2, RD3, RD4, RD5, RD6) \ +RD1 = _mm_packus_epi16(_mm_and_si128(RS1,_mm_set1_epi16(0xFF)), _mm_and_si128(RS2,_mm_set1_epi16(0xFF))); \ +RD2 = _mm_packus_epi16(_mm_and_si128(RS3,_mm_set1_epi16(0xFF)), _mm_and_si128(RS4,_mm_set1_epi16(0xFF))); \ +RD3 = _mm_packus_epi16(_mm_and_si128(RS5,_mm_set1_epi16(0xFF)), _mm_and_si128(RS6,_mm_set1_epi16(0xFF))); \ +RD4 = _mm_packus_epi16(_mm_srli_epi16(RS1,8), _mm_srli_epi16(RS2,8)); \ +RD5 = _mm_packus_epi16(_mm_srli_epi16(RS3,8), _mm_srli_epi16(RS4,8)); \ +RD6 = _mm_packus_epi16(_mm_srli_epi16(RS5,8), _mm_srli_epi16(RS6,8)); \ + +#define PACK_RGB24_32(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \ +PACK_RGB24_32_STEP(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \ +PACK_RGB24_32_STEP(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ +PACK_RGB24_32_STEP(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \ +PACK_RGB24_32_STEP(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ +PACK_RGB24_32_STEP(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \ + +#define LOAD_UV_PLANAR \ + __m128i u = LOAD_SI128((const __m128i*)(u_ptr)); \ + __m128i v = LOAD_SI128((const __m128i*)(v_ptr)); \ + +#define LOAD_UV_NV12 \ + __m128i uv1 = LOAD_SI128((const __m128i*)(uv_ptr)); \ + __m128i uv2 = LOAD_SI128((const __m128i*)(uv_ptr+16)); \ + __m128i u = _mm_packus_epi16(_mm_and_si128(uv1, _mm_set1_epi16(255)), _mm_and_si128(uv2, _mm_set1_epi16(255))); \ + uv1 = _mm_srli_epi16(uv1, 8); \ + uv2 = _mm_srli_epi16(uv2, 8); \ + __m128i v = _mm_packus_epi16(_mm_and_si128(uv1, _mm_set1_epi16(255)), _mm_and_si128(uv2, _mm_set1_epi16(255))); \ + +#define LOAD_UV_NV21 \ + __m128i uv1 = LOAD_SI128((const __m128i*)(uv_ptr)); \ + __m128i uv2 = LOAD_SI128((const __m128i*)(uv_ptr+16)); \ + __m128i v = _mm_packus_epi16(_mm_and_si128(uv1, _mm_set1_epi16(255)), _mm_and_si128(uv2, _mm_set1_epi16(255))); \ + uv1 = _mm_srli_epi16(uv1, 8); \ + uv2 = _mm_srli_epi16(uv2, 8); \ + __m128i u = _mm_packus_epi16(_mm_and_si128(uv1, _mm_set1_epi16(255)), _mm_and_si128(uv2, _mm_set1_epi16(255))); \ + +#define YUV2RGB_32 \ + __m128i r_tmp, g_tmp, b_tmp; \ + __m128i r_16_1, g_16_1, b_16_1, r_16_2, g_16_2, b_16_2; \ + __m128i r_uv_16_1, g_uv_16_1, b_uv_16_1, r_uv_16_2, g_uv_16_2, b_uv_16_2; \ + __m128i y_16_1, y_16_2; \ + \ + u = _mm_add_epi8(u, _mm_set1_epi8(-128)); \ + v = _mm_add_epi8(v, _mm_set1_epi8(-128)); \ + \ + /* process first 16 pixels of first line */\ + __m128i u_16 = _mm_srai_epi16(_mm_unpacklo_epi8(u, u), 8); \ + __m128i v_16 = _mm_srai_epi16(_mm_unpacklo_epi8(v, v), 8); \ + \ + UV2RGB_16(u_16, v_16, r_uv_16_1, g_uv_16_1, b_uv_16_1, r_uv_16_2, g_uv_16_2, b_uv_16_2) \ + r_16_1=r_uv_16_1; g_16_1=g_uv_16_1; b_16_1=b_uv_16_1; \ + r_16_2=r_uv_16_2; g_16_2=g_uv_16_2; b_16_2=b_uv_16_2; \ + \ + __m128i y = LOAD_SI128((const __m128i*)(y_ptr1)); \ + y = _mm_subs_epu8(y, _mm_set1_epi8(param->y_offset)); \ + y_16_1 = _mm_unpacklo_epi8(y, _mm_setzero_si128()); \ + y_16_2 = _mm_unpackhi_epi8(y, _mm_setzero_si128()); \ + \ + ADD_Y2RGB_16(y_16_1, y_16_2, r_16_1, g_16_1, b_16_1, r_16_2, g_16_2, b_16_2) \ + \ + __m128i r_8_11 = _mm_packus_epi16(r_16_1, r_16_2); \ + __m128i g_8_11 = _mm_packus_epi16(g_16_1, g_16_2); \ + __m128i b_8_11 = _mm_packus_epi16(b_16_1, b_16_2); \ + \ + /* process first 16 pixels of second line */\ + r_16_1=r_uv_16_1; g_16_1=g_uv_16_1; b_16_1=b_uv_16_1; \ + r_16_2=r_uv_16_2; g_16_2=g_uv_16_2; b_16_2=b_uv_16_2; \ + \ + y = LOAD_SI128((const __m128i*)(y_ptr2)); \ + y = _mm_subs_epu8(y, _mm_set1_epi8(param->y_offset)); \ + y_16_1 = _mm_unpacklo_epi8(y, _mm_setzero_si128()); \ + y_16_2 = _mm_unpackhi_epi8(y, _mm_setzero_si128()); \ + \ + ADD_Y2RGB_16(y_16_1, y_16_2, r_16_1, g_16_1, b_16_1, r_16_2, g_16_2, b_16_2) \ + \ + __m128i r_8_21 = _mm_packus_epi16(r_16_1, r_16_2); \ + __m128i g_8_21 = _mm_packus_epi16(g_16_1, g_16_2); \ + __m128i b_8_21 = _mm_packus_epi16(b_16_1, b_16_2); \ + \ + /* process last 16 pixels of first line */\ + u_16 = _mm_srai_epi16(_mm_unpackhi_epi8(u, u), 8); \ + v_16 = _mm_srai_epi16(_mm_unpackhi_epi8(v, v), 8); \ + \ + UV2RGB_16(u_16, v_16, r_uv_16_1, g_uv_16_1, b_uv_16_1, r_uv_16_2, g_uv_16_2, b_uv_16_2) \ + r_16_1=r_uv_16_1; g_16_1=g_uv_16_1; b_16_1=b_uv_16_1; \ + r_16_2=r_uv_16_2; g_16_2=g_uv_16_2; b_16_2=b_uv_16_2; \ + \ + y = LOAD_SI128((const __m128i*)(y_ptr1+16)); \ + y = _mm_subs_epu8(y, _mm_set1_epi8(param->y_offset)); \ + y_16_1 = _mm_unpacklo_epi8(y, _mm_setzero_si128()); \ + y_16_2 = _mm_unpackhi_epi8(y, _mm_setzero_si128()); \ + \ + ADD_Y2RGB_16(y_16_1, y_16_2, r_16_1, g_16_1, b_16_1, r_16_2, g_16_2, b_16_2) \ + \ + __m128i r_8_12 = _mm_packus_epi16(r_16_1, r_16_2); \ + __m128i g_8_12 = _mm_packus_epi16(g_16_1, g_16_2); \ + __m128i b_8_12 = _mm_packus_epi16(b_16_1, b_16_2); \ + \ + /* process last 16 pixels of second line */\ + r_16_1=r_uv_16_1; g_16_1=g_uv_16_1; b_16_1=b_uv_16_1; \ + r_16_2=r_uv_16_2; g_16_2=g_uv_16_2; b_16_2=b_uv_16_2; \ + \ + y = LOAD_SI128((const __m128i*)(y_ptr2+16)); \ + y = _mm_subs_epu8(y, _mm_set1_epi8(param->y_offset)); \ + y_16_1 = _mm_unpacklo_epi8(y, _mm_setzero_si128()); \ + y_16_2 = _mm_unpackhi_epi8(y, _mm_setzero_si128()); \ + \ + ADD_Y2RGB_16(y_16_1, y_16_2, r_16_1, g_16_1, b_16_1, r_16_2, g_16_2, b_16_2) \ + \ + __m128i r_8_22 = _mm_packus_epi16(r_16_1, r_16_2); \ + __m128i g_8_22 = _mm_packus_epi16(g_16_1, g_16_2); \ + __m128i b_8_22 = _mm_packus_epi16(b_16_1, b_16_2); \ + \ + __m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6; \ + \ + PACK_RGB24_32(r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6) \ + SAVE_SI128((__m128i*)(rgb_ptr1), rgb_1); \ + SAVE_SI128((__m128i*)(rgb_ptr1+16), rgb_2); \ + SAVE_SI128((__m128i*)(rgb_ptr1+32), rgb_3); \ + SAVE_SI128((__m128i*)(rgb_ptr1+48), rgb_4); \ + SAVE_SI128((__m128i*)(rgb_ptr1+64), rgb_5); \ + SAVE_SI128((__m128i*)(rgb_ptr1+80), rgb_6); \ + \ + PACK_RGB24_32(r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6) \ + SAVE_SI128((__m128i*)(rgb_ptr2), rgb_1); \ + SAVE_SI128((__m128i*)(rgb_ptr2+16), rgb_2); \ + SAVE_SI128((__m128i*)(rgb_ptr2+32), rgb_3); \ + SAVE_SI128((__m128i*)(rgb_ptr2+48), rgb_4); \ + SAVE_SI128((__m128i*)(rgb_ptr2+64), rgb_5); \ + SAVE_SI128((__m128i*)(rgb_ptr2+80), rgb_6); \ + +#define YUV2RGB_32_PLANAR \ + LOAD_UV_PLANAR \ + YUV2RGB_32 + +#define YUV2RGB_32_NV12 \ + LOAD_UV_NV12 \ + YUV2RGB_32 + +#define YUV2RGB_32_NV21 \ + LOAD_UV_NV21 \ + YUV2RGB_32 + + +void yuv420_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *Y, const uint8_t *U, const uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, + uint8_t *RGB, uint32_t RGB_stride, + YCbCrType yuv_type) +{ + #define LOAD_SI128 _mm_load_si128 + #define SAVE_SI128 _mm_stream_si128 + const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *u_ptr=U+(y/2)*UV_stride, + *v_ptr=V+(y/2)*UV_stride; + + uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + for(x=0; x<(width-31); x+=32) + { + YUV2RGB_32_PLANAR + + y_ptr1+=32; + y_ptr2+=32; + u_ptr+=16; + v_ptr+=16; + rgb_ptr1+=96; + rgb_ptr2+=96; + } + } + #undef LOAD_SI128 + #undef SAVE_SI128 +} + +void yuv420_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *Y, const uint8_t *U, const uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, + uint8_t *RGB, uint32_t RGB_stride, + YCbCrType yuv_type) +{ + #define LOAD_SI128 _mm_loadu_si128 + #define SAVE_SI128 _mm_storeu_si128 + const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *u_ptr=U+(y/2)*UV_stride, + *v_ptr=V+(y/2)*UV_stride; + + uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + for(x=0; x<(width-31); x+=32) + { + YUV2RGB_32_PLANAR + + y_ptr1+=32; + y_ptr2+=32; + u_ptr+=16; + v_ptr+=16; + rgb_ptr1+=96; + rgb_ptr2+=96; + } + } + #undef LOAD_SI128 + #undef SAVE_SI128 +} + +void nv12_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *Y, const uint8_t *UV, uint32_t Y_stride, uint32_t UV_stride, + uint8_t *RGB, uint32_t RGB_stride, + YCbCrType yuv_type) +{ + #define LOAD_SI128 _mm_load_si128 + #define SAVE_SI128 _mm_stream_si128 + const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *uv_ptr=UV+(y/2)*UV_stride; + + uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + for(x=0; x<(width-31); x+=32) + { + YUV2RGB_32_NV12 + + y_ptr1+=32; + y_ptr2+=32; + uv_ptr+=32; + rgb_ptr1+=96; + rgb_ptr2+=96; + } + } + #undef LOAD_SI128 + #undef SAVE_SI128 +} + +void nv12_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *Y, const uint8_t *UV, uint32_t Y_stride, uint32_t UV_stride, + uint8_t *RGB, uint32_t RGB_stride, + YCbCrType yuv_type) +{ + #define LOAD_SI128 _mm_loadu_si128 + #define SAVE_SI128 _mm_storeu_si128 + const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *uv_ptr=UV+(y/2)*UV_stride; + + uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + for(x=0; x<(width-31); x+=32) + { + YUV2RGB_32_NV12 + + y_ptr1+=32; + y_ptr2+=32; + uv_ptr+=32; + rgb_ptr1+=96; + rgb_ptr2+=96; + } + } + #undef LOAD_SI128 + #undef SAVE_SI128 +} + +void nv21_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *Y, const uint8_t *UV, uint32_t Y_stride, uint32_t UV_stride, + uint8_t *RGB, uint32_t RGB_stride, + YCbCrType yuv_type) +{ + #define LOAD_SI128 _mm_load_si128 + #define SAVE_SI128 _mm_stream_si128 + const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *uv_ptr=UV+(y/2)*UV_stride; + + uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + for(x=0; x<(width-31); x+=32) + { + YUV2RGB_32_NV21 + + y_ptr1+=32; + y_ptr2+=32; + uv_ptr+=32; + rgb_ptr1+=96; + rgb_ptr2+=96; + } + } + #undef LOAD_SI128 + #undef SAVE_SI128 +} + +void nv21_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *Y, const uint8_t *UV, uint32_t Y_stride, uint32_t UV_stride, + uint8_t *RGB, uint32_t RGB_stride, + YCbCrType yuv_type) +{ + #define LOAD_SI128 _mm_loadu_si128 + #define SAVE_SI128 _mm_storeu_si128 + const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *uv_ptr=UV+(y/2)*UV_stride; + + uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + for(x=0; x<(width-31); x+=32) + { + YUV2RGB_32_NV21 + + y_ptr1+=32; + y_ptr2+=32; + uv_ptr+=32; + rgb_ptr1+=96; + rgb_ptr2+=96; + } + } + #undef LOAD_SI128 + #undef SAVE_SI128 +} + + + +#endif //_YUVRGB_SSE2_ diff --git a/video/video_bink/yuv_rgb.h b/video/video_bink/yuv_rgb.h new file mode 100644 index 00000000..9f431a6b --- /dev/null +++ b/video/video_bink/yuv_rgb.h @@ -0,0 +1,155 @@ +// Copyright 2016 Adrien Descamps +// Distributed under BSD 3-Clause License + +// Provide optimized functions to convert images from 8bits yuv420 to rgb24 format + +// There are a few slightly different variations of the YCbCr color space with different parameters that +// change the conversion matrix. +// The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here. +// See the respective standards for details +// The matrix values used are derived from http://www.equasys.de/colorconversion.html + +// YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor +// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This +// is suboptimal for image quality, but by far the fastest method. + +// For all methods, width and height should be even, if not, the last row/column of the result image won't be affected. +// For sse methods, if the width if not divisable by 32, the last (width%32) pixels of each line won't be affected. + +#include + +typedef enum +{ + YCBCR_JPEG, + YCBCR_601, + YCBCR_709 +} YCbCrType; + +#ifdef __cplusplus +extern "C" { +#endif + +// yuv to rgb, standard c implementation +void yuv420_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +// yuv to rgb, yuv in nv12 semi planar format +void nv12_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +// yuv to rgb, yuv in nv12 semi planar format +void nv21_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +// yuv to rgb, sse implementation +// pointers must be 16 byte aligned, and strides must be divisable by 16 +void yuv420_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +// yuv to rgb, sse implementation +// pointers do not need to be 16 byte aligned +void yuv420_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +// yuv nv12 to rgb, sse implementation +// pointers must be 16 byte aligned, and strides must be divisable by 16 +void nv12_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +// yuv nv12 to rgb, sse implementation +// pointers do not need to be 16 byte aligned +void nv12_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +// yuv nv21 to rgb, sse implementation +// pointers must be 16 byte aligned, and strides must be divisable by 16 +void nv21_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +// yuv nv21 to rgb, sse implementation +// pointers do not need to be 16 byte aligned +void nv21_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + + + + +// rgb to yuv, standard c implementation +void rgb24_yuv420_std( + uint32_t width, uint32_t height, + const uint8_t *rgb, uint32_t rgb_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); + +// rgb to yuv, sse implementation +// pointers must be 16 byte aligned, and strides must be divisible by 16 +void rgb24_yuv420_sse( + uint32_t width, uint32_t height, + const uint8_t *rgb, uint32_t rgb_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); + +// rgb to yuv, sse implementation +// pointers do not need to be 16 byte aligned +void rgb24_yuv420_sseu( + uint32_t width, uint32_t height, + const uint8_t *rgb, uint32_t rgb_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); + +// rgba to yuv, standard c implementation +// alpha channel is ignored +void rgb32_yuv420_std( + uint32_t width, uint32_t height, + const uint8_t *rgba, uint32_t rgba_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); + +// rgba to yuv, sse implementation +// pointers must be 16 byte aligned, and strides must be divisible by 16 +// alpha channel is ignored +void rgb32_yuv420_sse( + uint32_t width, uint32_t height, + const uint8_t *rgba, uint32_t rgba_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); + +// rgba to yuv, sse implementation +// pointers do not need to be 16 byte aligned +// alpha channel is ignored +void rgb32_yuv420_sseu( + uint32_t width, uint32_t height, + const uint8_t *rgba, uint32_t rgba_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); + +#ifdef __cplusplus +} +#endif