You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
316 lines
9.6 KiB
316 lines
9.6 KiB
#include "tier0/dbg.h" |
|
#include "vstdlib/vstrtools.h" |
|
|
|
#if defined( _WIN32 ) && !defined( _X360 ) |
|
#include <windows.h> |
|
#endif |
|
#if defined(POSIX) && !defined(_PS3) |
|
#include <iconv.h> |
|
#endif |
|
|
|
#ifdef _PS3 |
|
#include <cell/sysmodule.h> |
|
#include <cell/l10n.h> |
|
|
|
class DummyInitL10N |
|
{ |
|
public: |
|
DummyInitL10N() |
|
{ |
|
int ret = cellSysmoduleLoadModule( CELL_SYSMODULE_L10N ); |
|
if( ret != CELL_OK ) |
|
{ |
|
Warning( "Cannot initialize l10n, unicode services will not work. Error %d\n", ret ); |
|
} |
|
} |
|
|
|
~DummyInitL10N() |
|
{ |
|
cellSysmoduleUnloadModule( CELL_SYSMODULE_L10N ); |
|
} |
|
}s_dummyInitL10N; |
|
#endif |
|
|
|
//----------------------------------------------------------------------------- |
|
// Purpose: Converts a UTF8 string into a unicode string |
|
//----------------------------------------------------------------------------- |
|
int V_UTF8ToUnicode( const char *pUTF8, wchar_t *pwchDest, int cubDestSizeInBytes ) |
|
{ |
|
if ( !pUTF8 ) |
|
return 0; |
|
|
|
AssertValidStringPtr(pUTF8); |
|
AssertValidWritePtr(pwchDest); |
|
|
|
pwchDest[0] = 0; |
|
#ifdef _WIN32 |
|
int cchResult = MultiByteToWideChar( CP_UTF8, 0, pUTF8, -1, pwchDest, cubDestSizeInBytes / sizeof(wchar_t) ); |
|
#elif defined( _PS3 ) |
|
size_t cchResult = cubDestSizeInBytes / sizeof( uint16 ), cchSrc = V_strlen( pUTF8 ) + 1; |
|
L10nResult result = UTF8stoUCS2s( ( const uint8 *) pUTF8, &cchSrc, ( uint16 * ) pwchDest, &cchResult ); |
|
Assert( result == ConversionOK ); |
|
cchResult *= sizeof( uint16 ); |
|
#elif POSIX |
|
iconv_t conv_t = iconv_open( "UTF-32LE", "UTF-8" ); |
|
int cchResult = -1; |
|
size_t nLenUnicde = cubDestSizeInBytes; |
|
size_t nMaxUTF8 = strlen(pUTF8) + 1; |
|
char *pIn = (char *)pUTF8; |
|
char *pOut = (char *)pwchDest; |
|
if ( conv_t > 0 ) |
|
{ |
|
cchResult = 0; |
|
cchResult = iconv( conv_t, &pIn, &nMaxUTF8, &pOut, &nLenUnicde ); |
|
iconv_close( conv_t ); |
|
if ( (int)cchResult < 0 ) |
|
cchResult = 0; |
|
else |
|
cchResult = nMaxUTF8; |
|
} |
|
#endif |
|
pwchDest[(cubDestSizeInBytes / sizeof(wchar_t)) - 1] = 0; |
|
return cchResult; |
|
} |
|
|
|
//----------------------------------------------------------------------------- |
|
// Purpose: Converts a unicode string into a UTF8 (standard) string |
|
//----------------------------------------------------------------------------- |
|
int V_UnicodeToUTF8( const wchar_t *pUnicode, char *pUTF8, int cubDestSizeInBytes ) |
|
{ |
|
AssertValidStringPtr(pUTF8, cubDestSizeInBytes); |
|
AssertValidReadPtr(pUnicode); |
|
|
|
if ( cubDestSizeInBytes > 0 ) |
|
{ |
|
pUTF8[0] = 0; |
|
} |
|
|
|
#ifdef _WIN32 |
|
int cchResult = WideCharToMultiByte( CP_UTF8, 0, pUnicode, -1, pUTF8, cubDestSizeInBytes, NULL, NULL ); |
|
#elif defined( _PS3 ) |
|
size_t cchResult = cubDestSizeInBytes, cchSrc = V_wcslen( pUnicode ) + 1; |
|
L10nResult result = UCS2stoUTF8s( ( const uint16 *) pUnicode, &cchSrc, ( uint8 * ) pUTF8, &cchResult ); |
|
Assert( result == ConversionOK ); |
|
#elif POSIX |
|
int cchResult = 0; |
|
if ( pUnicode && pUTF8 ) |
|
{ |
|
iconv_t conv_t = iconv_open( "UTF-8", "UTF-32LE" ); |
|
int cchResult = -1; |
|
size_t nLenUnicde = ( wcslen(pUnicode) + 1 ) * sizeof(wchar_t); // 4 bytes per wchar vs. 1 byte for utf8 for simple english |
|
size_t nMaxUTF8 = cubDestSizeInBytes; |
|
char *pIn = (char *)pUnicode; |
|
char *pOut = (char *)pUTF8; |
|
if ( conv_t > 0 ) |
|
{ |
|
cchResult = 0; |
|
cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 ); |
|
iconv_close( conv_t ); |
|
if ( (int)cchResult < 0 ) |
|
cchResult = 0; |
|
else |
|
cchResult = nMaxUTF8; |
|
} |
|
} |
|
#endif |
|
|
|
if ( cubDestSizeInBytes > 0 ) |
|
{ |
|
pUTF8[cubDestSizeInBytes - 1] = 0; |
|
} |
|
|
|
return cchResult; |
|
} |
|
|
|
|
|
//----------------------------------------------------------------------------- |
|
// Purpose: Converts a ucs2 string to a unicode (wchar_t) one, no-op on win32 |
|
//----------------------------------------------------------------------------- |
|
int V_UCS2ToUnicode( const ucs2 *pUCS2, wchar_t *pUnicode, int cubDestSizeInBytes ) |
|
{ |
|
AssertValidWritePtr(pUnicode); |
|
AssertValidReadPtr(pUCS2); |
|
|
|
pUnicode[0] = 0; |
|
#if defined( _WIN32 ) || defined( _PS3 ) |
|
int lenUCS2 = V_wcslen( pUCS2 ); |
|
int cchResult = MIN( (lenUCS2+1)*( int )sizeof(ucs2), cubDestSizeInBytes ); |
|
V_wcsncpy( (wchar_t*)pUCS2, pUnicode, cchResult ); |
|
#else |
|
iconv_t conv_t = iconv_open( "UCS-4LE", "UCS-2LE" ); |
|
int cchResult = -1; |
|
size_t nLenUnicde = cubDestSizeInBytes; |
|
size_t nMaxUTF8 = cubDestSizeInBytes; |
|
char *pIn = (char *)pUCS2; |
|
char *pOut = (char *)pUnicode; |
|
if ( conv_t > 0 ) |
|
{ |
|
cchResult = 0; |
|
cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 ); |
|
iconv_close( conv_t ); |
|
if ( (int)cchResult < 0 ) |
|
cchResult = 0; |
|
else |
|
cchResult = nMaxUTF8; |
|
} |
|
#endif |
|
pUnicode[(cubDestSizeInBytes / sizeof(wchar_t)) - 1] = 0; |
|
return cchResult; |
|
|
|
} |
|
|
|
|
|
//----------------------------------------------------------------------------- |
|
// Purpose: Converts a wchar_t string into a UCS2 string -noop on windows |
|
//----------------------------------------------------------------------------- |
|
int V_UnicodeToUCS2( const wchar_t *pUnicode, int cubSrcInBytes, char *pUCS2, int cubDestSizeInBytes ) |
|
{ |
|
// TODO: MACMERGE: Figure out how to convert from 2-byte Win32 wchars to platform wchar_t type that can be 4 bytes |
|
#if defined( _WIN32 ) || defined( _PS3 ) |
|
int cchResult = MIN( cubSrcInBytes, cubDestSizeInBytes ); |
|
V_wcsncpy( (wchar_t*)pUCS2, pUnicode, cchResult ); |
|
#elif defined (POSIX) |
|
iconv_t conv_t = iconv_open( "UCS-2LE", "UTF-32LE" ); |
|
size_t cchResult = -1; |
|
size_t nLenUnicde = cubSrcInBytes; |
|
size_t nMaxUCS2 = cubDestSizeInBytes; |
|
char *pIn = (char*)pUnicode; |
|
char *pOut = pUCS2; |
|
if ( conv_t > 0 ) |
|
{ |
|
cchResult = 0; |
|
cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUCS2 ); |
|
iconv_close( conv_t ); |
|
if ( (int)cchResult < 0 ) |
|
cchResult = 0; |
|
else |
|
cchResult = cubSrcInBytes / sizeof( wchar_t ); |
|
} |
|
#endif |
|
return cchResult; |
|
} |
|
|
|
// UTF-8 encodes each character (code point) in 1 to 4 octets (8-bit bytes). |
|
// The first 128 characters of the Unicode character set (which correspond directly to the ASCII) use a single octet with the same binary value as in ASCII. |
|
// url:http://en.wikipedia.org/wiki/UTF-8 |
|
#define MAX_UTF8_CHARACTER_BYTES 4 |
|
|
|
|
|
//----------------------------------------------------------------------------- |
|
// Purpose: Converts a ucs-2 (windows wchar_t) string into a UTF8 (standard) string |
|
//----------------------------------------------------------------------------- |
|
VSTRTOOLS_INTERFACE int V_UCS2ToUTF8( const ucs2 *pUCS2, char *pUTF8, int cubDestSizeInBytes ) |
|
{ |
|
AssertValidStringPtr(pUTF8, cubDestSizeInBytes); |
|
AssertValidReadPtr(pUCS2); |
|
Assert( cubDestSizeInBytes >= 1 ); // must have at least 1 byte to write the terminator character |
|
|
|
pUTF8[0] = '\0'; |
|
#ifdef _WIN32 |
|
// under win32 wchar_t == ucs2, sigh |
|
int cchResult = WideCharToMultiByte( CP_UTF8, 0, pUCS2, -1, pUTF8, cubDestSizeInBytes, NULL, NULL ); |
|
#elif defined( _PS3 ) |
|
size_t cchResult = cubDestSizeInBytes, cchSrc = V_wcslen( pUCS2 ) + 1; |
|
L10nResult result = UCS2stoUTF8s( ( const uint16 *) pUCS2, &cchSrc, ( uint8 * ) pUTF8, &cchResult ); |
|
Assert( result == ConversionOK ); |
|
#elif defined(POSIX) |
|
iconv_t conv_t = iconv_open( "UTF-8", "UCS-2LE" ); |
|
size_t cchResult = -1; |
|
size_t nLenUnicde = cubDestSizeInBytes; |
|
size_t nMaxUTF8 = cubDestSizeInBytes; |
|
char *pIn = (char *)pUCS2; |
|
char *pOut = (char *)pUTF8; |
|
if ( conv_t > 0 ) |
|
{ |
|
cchResult = 0; |
|
cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 ); |
|
iconv_close( conv_t ); |
|
if ( (int)cchResult < 0 ) |
|
cchResult = 0; |
|
else |
|
cchResult = nMaxUTF8; |
|
} |
|
#endif |
|
pUTF8[cubDestSizeInBytes - 1] = '\0'; |
|
return cchResult; |
|
} |
|
|
|
|
|
//----------------------------------------------------------------------------- |
|
// Purpose: Converts a UTF8 to ucs-2 (windows wchar_t) |
|
//----------------------------------------------------------------------------- |
|
VSTRTOOLS_INTERFACE int V_UTF8ToUCS2( const char *pUTF8, int cubSrcInBytes, ucs2 *pUCS2, int cubDestSizeInBytes ) |
|
{ |
|
AssertValidStringPtr(pUTF8, cubDestSizeInBytes); |
|
AssertValidReadPtr(pUCS2); |
|
|
|
pUCS2[0] = 0; |
|
#ifdef _WIN32 |
|
// under win32 wchar_t == ucs2, sigh |
|
int cchResult = MultiByteToWideChar( CP_UTF8, 0, pUTF8, -1, pUCS2, cubDestSizeInBytes / sizeof(wchar_t) ); |
|
#elif defined( _PS3 ) |
|
size_t cchResult = cubDestSizeInBytes / sizeof( uint16 ), cchSrc = cubSrcInBytes; |
|
L10nResult result = UTF8stoUCS2s( ( const uint8 *) pUTF8, &cchSrc, ( uint16 * ) pUCS2, &cchResult ); |
|
Assert( result == ConversionOK ); |
|
cchResult *= sizeof( uint16 ); |
|
#elif defined(POSIX) |
|
iconv_t conv_t = iconv_open( "UCS-2LE", "UTF-8" ); |
|
size_t cchResult = -1; |
|
size_t nLenUnicde = cubSrcInBytes; |
|
size_t nMaxUTF8 = cubDestSizeInBytes; |
|
char *pIn = (char *)pUTF8; |
|
char *pOut = (char *)pUCS2; |
|
if ( conv_t > 0 ) |
|
{ |
|
cchResult = 0; |
|
cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 ); |
|
iconv_close( conv_t ); |
|
if ( (int)cchResult < 0 ) |
|
cchResult = 0; |
|
else |
|
cchResult = cubSrcInBytes; |
|
|
|
} |
|
#endif |
|
pUCS2[ (cubDestSizeInBytes/sizeof(ucs2)) - 1] = 0; |
|
return cchResult; |
|
} |
|
|
|
//----------------------------------------------------------------------------- |
|
// Purpose: copies at most nMaxBytes of the UTF-8 input data into the destination, |
|
// ensuring that a trailing multi-byte sequence isn't truncated. |
|
//----------------------------------------------------------------------------- |
|
VSTRTOOLS_INTERFACE void * V_UTF8_strncpy( char *pDest, const char *pSrc, size_t nMaxBytes ) |
|
{ |
|
strncpy( pDest, pSrc, nMaxBytes ); |
|
|
|
// http://en.wikipedia.org/wiki/UTF-8 |
|
int end = nMaxBytes-1; |
|
pDest[end] = 0; |
|
|
|
int nBytesSeen = 0, nBytesExpected = 0; |
|
// walk backwards, ignoring nulls |
|
while ( pDest[end] == 0 ) |
|
--end; |
|
|
|
// found a non-null - see if it's part of a multi-byte sequence |
|
while ( ( pDest[end] & 0x80 ) && !( pDest[end] & 0x40 ) ) |
|
{ |
|
nBytesSeen++; |
|
--end; |
|
} |
|
|
|
if ( ( pDest[end] & 0xC0 ) == 0xC0 ) |
|
{ |
|
for ( int i = 6; i > 1; --i ) |
|
{ |
|
if ( (char)( pDest[end] >> i ) & 0x1 ) |
|
++nBytesExpected; |
|
} |
|
} |
|
|
|
if ( nBytesExpected != nBytesSeen ) |
|
pDest[end] = 0; |
|
|
|
return pDest; |
|
} |