You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
942 lines
41 KiB
942 lines
41 KiB
/* |
|
File: TextCommon.h |
|
|
|
Contains: TextEncoding-related types and constants, and prototypes for related functions |
|
|
|
Version: QuickTime 7.3 |
|
|
|
Copyright: (c) 2007 (c) 1995-2002 by Apple Computer, Inc., all rights reserved. |
|
|
|
Bugs?: For bug reports, consult the following page on |
|
the World Wide Web: |
|
|
|
http://developer.apple.com/bugreporter/ |
|
|
|
*/ |
|
#ifndef __TEXTCOMMON__ |
|
#define __TEXTCOMMON__ |
|
|
|
#ifndef __MACTYPES__ |
|
#include <MacTypes.h> |
|
#endif |
|
|
|
|
|
|
|
|
|
#if PRAGMA_ONCE |
|
#pragma once |
|
#endif |
|
|
|
#ifdef __cplusplus |
|
extern "C" { |
|
#endif |
|
|
|
#if PRAGMA_IMPORT |
|
#pragma import on |
|
#endif |
|
|
|
#if PRAGMA_STRUCT_ALIGN |
|
#pragma options align=mac68k |
|
#elif PRAGMA_STRUCT_PACKPUSH |
|
#pragma pack(push, 2) |
|
#elif PRAGMA_STRUCT_PACK |
|
#pragma pack(2) |
|
#endif |
|
|
|
/* TextEncodingBase type & values */ |
|
/* (values 0-32 correspond to the Script Codes defined in Inside Macintosh: Text pages 6-52 and 6-53 */ |
|
typedef UInt32 TextEncodingBase; |
|
enum { |
|
/* Mac OS encodings*/ |
|
kTextEncodingMacRoman = 0L, |
|
kTextEncodingMacJapanese = 1, |
|
kTextEncodingMacChineseTrad = 2, |
|
kTextEncodingMacKorean = 3, |
|
kTextEncodingMacArabic = 4, |
|
kTextEncodingMacHebrew = 5, |
|
kTextEncodingMacGreek = 6, |
|
kTextEncodingMacCyrillic = 7, |
|
kTextEncodingMacDevanagari = 9, |
|
kTextEncodingMacGurmukhi = 10, |
|
kTextEncodingMacGujarati = 11, |
|
kTextEncodingMacOriya = 12, |
|
kTextEncodingMacBengali = 13, |
|
kTextEncodingMacTamil = 14, |
|
kTextEncodingMacTelugu = 15, |
|
kTextEncodingMacKannada = 16, |
|
kTextEncodingMacMalayalam = 17, |
|
kTextEncodingMacSinhalese = 18, |
|
kTextEncodingMacBurmese = 19, |
|
kTextEncodingMacKhmer = 20, |
|
kTextEncodingMacThai = 21, |
|
kTextEncodingMacLaotian = 22, |
|
kTextEncodingMacGeorgian = 23, |
|
kTextEncodingMacArmenian = 24, |
|
kTextEncodingMacChineseSimp = 25, |
|
kTextEncodingMacTibetan = 26, |
|
kTextEncodingMacMongolian = 27, |
|
kTextEncodingMacEthiopic = 28, |
|
kTextEncodingMacCentralEurRoman = 29, |
|
kTextEncodingMacVietnamese = 30, |
|
kTextEncodingMacExtArabic = 31, /* The following use script code 0, smRoman*/ |
|
kTextEncodingMacSymbol = 33, |
|
kTextEncodingMacDingbats = 34, |
|
kTextEncodingMacTurkish = 35, |
|
kTextEncodingMacCroatian = 36, |
|
kTextEncodingMacIcelandic = 37, |
|
kTextEncodingMacRomanian = 38, |
|
kTextEncodingMacCeltic = 39, |
|
kTextEncodingMacGaelic = 40, |
|
kTextEncodingMacKeyboardGlyphs = 41 |
|
}; |
|
|
|
/* The following are older names for backward compatibility*/ |
|
enum { |
|
kTextEncodingMacTradChinese = kTextEncodingMacChineseTrad, |
|
kTextEncodingMacRSymbol = 8, |
|
kTextEncodingMacSimpChinese = kTextEncodingMacChineseSimp, |
|
kTextEncodingMacGeez = kTextEncodingMacEthiopic, |
|
kTextEncodingMacEastEurRoman = kTextEncodingMacCentralEurRoman, |
|
kTextEncodingMacUninterp = 32 |
|
}; |
|
|
|
|
|
/* |
|
Beginning in Mac OS 8.5, the following meta-value is used to indicate Unicode in some parts |
|
of the Mac OS which previously only expected a Mac OS script code. In some of these places, |
|
only 7 bits are available to indicate encoding (script code), so kTextEncodingUnicodeDefault |
|
cannot be used. For example, kTextEncodingMacUnicode can be used to indicate Unicode in the |
|
7-bit script code field of a Unicode input method's ComponentDescription.componentFlags field; |
|
it can also be used to indicate Unicode in the 16-bit script code field of an AppleEvent's |
|
typeIntlWritingCode text tag. |
|
*/ |
|
enum { |
|
kTextEncodingMacUnicode = 0x7E /* Meta-value, Unicode as a Mac encoding*/ |
|
}; |
|
|
|
/* Variant Mac OS encodings that use script codes other than 0*/ |
|
enum { |
|
/* The following use script code 4, smArabic*/ |
|
kTextEncodingMacFarsi = 0x8C, /* Like MacArabic but uses Farsi digits*/ |
|
/* The following use script code 7, smCyrillic*/ |
|
kTextEncodingMacUkrainian = 0x98, /* Meta-value in TEC 1.5 & later; maps to kTextEncodingMacCyrillic variant */ |
|
/* The following use script code 28, smEthiopic*/ |
|
kTextEncodingMacInuit = 0xEC, /* The following use script code 32, smUnimplemented*/ |
|
kTextEncodingMacVT100 = 0xFC /* VT100/102 font from Comm Toolbox: Latin-1 repertoire + box drawing etc*/ |
|
}; |
|
|
|
/* Special Mac OS encodings*/ |
|
enum { |
|
kTextEncodingMacHFS = 0xFF /* Meta-value, should never appear in a table.*/ |
|
}; |
|
|
|
/* Unicode & ISO UCS encodings begin at 0x100*/ |
|
enum { |
|
kTextEncodingUnicodeDefault = 0x0100, /* Meta-value, should never appear in a table.*/ |
|
kTextEncodingUnicodeV1_1 = 0x0101, |
|
kTextEncodingISO10646_1993 = 0x0101, /* Code points identical to Unicode 1.1*/ |
|
kTextEncodingUnicodeV2_0 = 0x0103, /* New location for Korean Hangul*/ |
|
kTextEncodingUnicodeV2_1 = 0x0103, /* We treat both Unicode 2.0 and Unicode 2.1 as 2.1*/ |
|
kTextEncodingUnicodeV3_0 = 0x0104, |
|
kTextEncodingUnicodeV3_1 = 0x0105, /* Adds characters requiring surrogate pairs in UTF-16*/ |
|
kTextEncodingUnicodeV3_2 = 0x0106 |
|
}; |
|
|
|
/* ISO 8-bit and 7-bit encodings begin at 0x200*/ |
|
enum { |
|
kTextEncodingISOLatin1 = 0x0201, /* ISO 8859-1*/ |
|
kTextEncodingISOLatin2 = 0x0202, /* ISO 8859-2*/ |
|
kTextEncodingISOLatin3 = 0x0203, /* ISO 8859-3*/ |
|
kTextEncodingISOLatin4 = 0x0204, /* ISO 8859-4*/ |
|
kTextEncodingISOLatinCyrillic = 0x0205, /* ISO 8859-5*/ |
|
kTextEncodingISOLatinArabic = 0x0206, /* ISO 8859-6, = ASMO 708, =DOS CP 708*/ |
|
kTextEncodingISOLatinGreek = 0x0207, /* ISO 8859-7*/ |
|
kTextEncodingISOLatinHebrew = 0x0208, /* ISO 8859-8*/ |
|
kTextEncodingISOLatin5 = 0x0209, /* ISO 8859-9*/ |
|
kTextEncodingISOLatin6 = 0x020A, /* ISO 8859-10 */ |
|
kTextEncodingISOLatin7 = 0x020D, /* ISO 8859-13, Baltic Rim */ |
|
kTextEncodingISOLatin8 = 0x020E, /* ISO 8859-14, Celtic */ |
|
kTextEncodingISOLatin9 = 0x020F /* ISO 8859-15, 8859-1 changed for EURO & CP1252 letters */ |
|
}; |
|
|
|
/* MS-DOS & Windows encodings begin at 0x400*/ |
|
enum { |
|
kTextEncodingDOSLatinUS = 0x0400, /* code page 437*/ |
|
kTextEncodingDOSGreek = 0x0405, /* code page 737 (formerly code page 437G)*/ |
|
kTextEncodingDOSBalticRim = 0x0406, /* code page 775*/ |
|
kTextEncodingDOSLatin1 = 0x0410, /* code page 850, "Multilingual"*/ |
|
kTextEncodingDOSGreek1 = 0x0411, /* code page 851*/ |
|
kTextEncodingDOSLatin2 = 0x0412, /* code page 852, Slavic*/ |
|
kTextEncodingDOSCyrillic = 0x0413, /* code page 855, IBM Cyrillic*/ |
|
kTextEncodingDOSTurkish = 0x0414, /* code page 857, IBM Turkish*/ |
|
kTextEncodingDOSPortuguese = 0x0415, /* code page 860*/ |
|
kTextEncodingDOSIcelandic = 0x0416, /* code page 861*/ |
|
kTextEncodingDOSHebrew = 0x0417, /* code page 862*/ |
|
kTextEncodingDOSCanadianFrench = 0x0418, /* code page 863*/ |
|
kTextEncodingDOSArabic = 0x0419, /* code page 864*/ |
|
kTextEncodingDOSNordic = 0x041A, /* code page 865*/ |
|
kTextEncodingDOSRussian = 0x041B, /* code page 866*/ |
|
kTextEncodingDOSGreek2 = 0x041C, /* code page 869, IBM Modern Greek*/ |
|
kTextEncodingDOSThai = 0x041D, /* code page 874, also for Windows*/ |
|
kTextEncodingDOSJapanese = 0x0420, /* code page 932, also for Windows; Shift-JIS with additions*/ |
|
kTextEncodingDOSChineseSimplif = 0x0421, /* code page 936, also for Windows; was EUC-CN, now GBK (EUC-CN extended)*/ |
|
kTextEncodingDOSKorean = 0x0422, /* code page 949, also for Windows; Unified Hangul Code (EUC-KR extended)*/ |
|
kTextEncodingDOSChineseTrad = 0x0423, /* code page 950, also for Windows; Big-5*/ |
|
kTextEncodingWindowsLatin1 = 0x0500, /* code page 1252*/ |
|
kTextEncodingWindowsANSI = 0x0500, /* code page 1252 (alternate name)*/ |
|
kTextEncodingWindowsLatin2 = 0x0501, /* code page 1250, Central Europe*/ |
|
kTextEncodingWindowsCyrillic = 0x0502, /* code page 1251, Slavic Cyrillic*/ |
|
kTextEncodingWindowsGreek = 0x0503, /* code page 1253*/ |
|
kTextEncodingWindowsLatin5 = 0x0504, /* code page 1254, Turkish*/ |
|
kTextEncodingWindowsHebrew = 0x0505, /* code page 1255*/ |
|
kTextEncodingWindowsArabic = 0x0506, /* code page 1256*/ |
|
kTextEncodingWindowsBalticRim = 0x0507, /* code page 1257*/ |
|
kTextEncodingWindowsVietnamese = 0x0508, /* code page 1258*/ |
|
kTextEncodingWindowsKoreanJohab = 0x0510 /* code page 1361, for Windows NT*/ |
|
}; |
|
|
|
/* Various national standards begin at 0x600*/ |
|
enum { |
|
kTextEncodingUS_ASCII = 0x0600, |
|
kTextEncodingJIS_X0201_76 = 0x0620, /* JIS Roman and 1-byte katakana (halfwidth)*/ |
|
kTextEncodingJIS_X0208_83 = 0x0621, |
|
kTextEncodingJIS_X0208_90 = 0x0622, |
|
kTextEncodingJIS_X0212_90 = 0x0623, |
|
kTextEncodingJIS_C6226_78 = 0x0624, |
|
kTextEncodingShiftJIS_X0213_00 = 0x0628, /* Shift-JIS format encoding of JIS X0213 planes 1 and 2*/ |
|
kTextEncodingGB_2312_80 = 0x0630, |
|
kTextEncodingGBK_95 = 0x0631, /* annex to GB 13000-93; for Windows 95; EUC-CN extended*/ |
|
kTextEncodingGB_18030_2000 = 0x0632, |
|
kTextEncodingKSC_5601_87 = 0x0640, /* same as KSC 5601-92 without Johab annex*/ |
|
kTextEncodingKSC_5601_92_Johab = 0x0641, /* KSC 5601-92 Johab annex*/ |
|
kTextEncodingCNS_11643_92_P1 = 0x0651, /* CNS 11643-1992 plane 1*/ |
|
kTextEncodingCNS_11643_92_P2 = 0x0652, /* CNS 11643-1992 plane 2*/ |
|
kTextEncodingCNS_11643_92_P3 = 0x0653 /* CNS 11643-1992 plane 3 (was plane 14 in 1986 version)*/ |
|
}; |
|
|
|
/* ISO 2022 collections begin at 0x800*/ |
|
enum { |
|
kTextEncodingISO_2022_JP = 0x0820, /* RFC 1468*/ |
|
kTextEncodingISO_2022_JP_2 = 0x0821, /* RFC 1554*/ |
|
kTextEncodingISO_2022_JP_1 = 0x0822, /* RFC 2237*/ |
|
kTextEncodingISO_2022_JP_3 = 0x0823, /* JIS X0213*/ |
|
kTextEncodingISO_2022_CN = 0x0830, |
|
kTextEncodingISO_2022_CN_EXT = 0x0831, |
|
kTextEncodingISO_2022_KR = 0x0840 |
|
}; |
|
|
|
/* EUC collections begin at 0x900*/ |
|
enum { |
|
kTextEncodingEUC_JP = 0x0920, /* ISO 646, 1-byte katakana, JIS 208, JIS 212*/ |
|
kTextEncodingEUC_CN = 0x0930, /* ISO 646, GB 2312-80*/ |
|
kTextEncodingEUC_TW = 0x0931, /* ISO 646, CNS 11643-1992 Planes 1-16*/ |
|
kTextEncodingEUC_KR = 0x0940 /* ISO 646, KS C 5601-1987*/ |
|
}; |
|
|
|
/* Misc standards begin at 0xA00*/ |
|
enum { |
|
kTextEncodingShiftJIS = 0x0A01, /* plain Shift-JIS*/ |
|
kTextEncodingKOI8_R = 0x0A02, /* Russian internet standard*/ |
|
kTextEncodingBig5 = 0x0A03, /* Big-5 (has variants)*/ |
|
kTextEncodingMacRomanLatin1 = 0x0A04, /* Mac OS Roman permuted to align with ISO Latin-1*/ |
|
kTextEncodingHZ_GB_2312 = 0x0A05, /* HZ (RFC 1842, for Chinese mail & news)*/ |
|
kTextEncodingBig5_HKSCS_1999 = 0x0A06 /* Big-5 with Hong Kong special char set supplement*/ |
|
}; |
|
|
|
/* Other platform encodings*/ |
|
enum { |
|
kTextEncodingNextStepLatin = 0x0B01, /* NextStep Latin encoding*/ |
|
kTextEncodingNextStepJapanese = 0x0B02 /* NextStep Japanese encoding (variant of EUC-JP)*/ |
|
}; |
|
|
|
/* EBCDIC & IBM host encodings begin at 0xC00*/ |
|
enum { |
|
kTextEncodingEBCDIC_US = 0x0C01, /* basic EBCDIC-US*/ |
|
kTextEncodingEBCDIC_CP037 = 0x0C02 /* code page 037, extended EBCDIC (Latin-1 set) for US,Canada...*/ |
|
}; |
|
|
|
/* Special values*/ |
|
enum { |
|
kTextEncodingMultiRun = 0x0FFF, /* Multi-encoding text with external run info*/ |
|
kTextEncodingUnknown = 0xFFFF /* Unknown or unspecified */ |
|
}; |
|
|
|
|
|
/* TextEncodingVariant type & values */ |
|
typedef UInt32 TextEncodingVariant; |
|
/* Default TextEncodingVariant, for any TextEncodingBase*/ |
|
enum { |
|
kTextEncodingDefaultVariant = 0 |
|
}; |
|
|
|
/* Variants of kTextEncodingMacRoman */ |
|
enum { |
|
kMacRomanDefaultVariant = 0, /* meta value, maps to 1 or 2 depending on System */ |
|
kMacRomanCurrencySignVariant = 1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN*/ |
|
kMacRomanEuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */ |
|
}; |
|
|
|
/* Variants of kTextEncodingMacCyrillic (for TEC 1.5 and later) */ |
|
enum { |
|
kMacCyrillicDefaultVariant = 0, /* meta value, maps to 1, 2, or 3 depending on System*/ |
|
kMacCyrillicCurrSignStdVariant = 1, /* Mac OS < 9.0 (RU,BG), 0xFF = CURRENCY SIGN, 0xA2/0xB6 = CENT / PARTIAL DIFF.*/ |
|
kMacCyrillicCurrSignUkrVariant = 2, /* Mac OS < 9.0 (UA,LangKit), 0xFF = CURRENCY SIGN, 0xA2/0xB6 = GHE WITH UPTURN*/ |
|
kMacCyrillicEuroSignVariant = 3 /* Mac OS >= 9.0, 0xFF is EURO SIGN, 0xA2/0xB6 = GHE WITH UPTURN*/ |
|
}; |
|
|
|
/* Variants of kTextEncodingMacIcelandic */ |
|
enum { |
|
kMacIcelandicStdDefaultVariant = 0, /* meta value, maps to 2 or 4 depending on System */ |
|
kMacIcelandicTTDefaultVariant = 1, /* meta value, maps to 3 or 5 depending on System */ |
|
/* The following are for Mac OS version < 8.5, 0xDB is CURRENCY SIGN */ |
|
kMacIcelandicStdCurrSignVariant = 2, /* 0xBB/0xBC are fem./masc. ordinal indicators*/ |
|
kMacIcelandicTTCurrSignVariant = 3, /* 0xBB/0xBC are fi/fl ligatures*/ |
|
/* The following are for Mac OS version >= 8.5, 0xDB is EURO SIGN */ |
|
kMacIcelandicStdEuroSignVariant = 4, /* 0xBB/0xBC are fem./masc. ordinal indicators*/ |
|
kMacIcelandicTTEuroSignVariant = 5 /* 0xBB/0xBC are fi/fl ligatures*/ |
|
}; |
|
|
|
/* Variants of kTextEncodingMacCroatian */ |
|
enum { |
|
kMacCroatianDefaultVariant = 0, /* meta value, maps to 1 or 2 depending on System */ |
|
kMacCroatianCurrencySignVariant = 1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN */ |
|
kMacCroatianEuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */ |
|
}; |
|
|
|
|
|
/* Variants of kTextEncodingMacRomanian */ |
|
enum { |
|
kMacRomanianDefaultVariant = 0, /* meta value, maps to 1 or 2 depending on System */ |
|
kMacRomanianCurrencySignVariant = 1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN */ |
|
kMacRomanianEuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */ |
|
}; |
|
|
|
|
|
/* Variants of kTextEncodingMacJapanese*/ |
|
enum { |
|
kMacJapaneseStandardVariant = 0, |
|
kMacJapaneseStdNoVerticalsVariant = 1, |
|
kMacJapaneseBasicVariant = 2, |
|
kMacJapanesePostScriptScrnVariant = 3, |
|
kMacJapanesePostScriptPrintVariant = 4, |
|
kMacJapaneseVertAtKuPlusTenVariant = 5 |
|
}; |
|
|
|
/* Variants of kTextEncodingMacArabic*/ |
|
enum { |
|
kMacArabicStandardVariant = 0, /* 0xC0 is 8-spoke asterisk, 0x2A & 0xAA are asterisk (e.g. Cairo)*/ |
|
kMacArabicTrueTypeVariant = 1, /* 0xC0 is asterisk, 0x2A & 0xAA are multiply signs (e.g. Baghdad)*/ |
|
kMacArabicThuluthVariant = 2, /* 0xC0 is Arabic five-point star, 0x2A & 0xAA are multiply signs*/ |
|
kMacArabicAlBayanVariant = 3 /* 8-spoke asterisk, multiply sign, Koranic ligatures & parens*/ |
|
}; |
|
|
|
/* Variants of kTextEncodingMacFarsi*/ |
|
enum { |
|
kMacFarsiStandardVariant = 0, /* 0xC0 is 8-spoke asterisk, 0x2A & 0xAA are asterisk (e.g. Tehran)*/ |
|
kMacFarsiTrueTypeVariant = 1 /* asterisk, multiply signs, Koranic ligatures, geometric shapes*/ |
|
}; |
|
|
|
/* Variants of kTextEncodingMacHebrew*/ |
|
enum { |
|
kMacHebrewStandardVariant = 0, |
|
kMacHebrewFigureSpaceVariant = 1 |
|
}; |
|
|
|
/* Variants of kTextEncodingMacVT100 */ |
|
enum { |
|
kMacVT100DefaultVariant = 0, /* meta value, maps to 1 or 2 depending on System */ |
|
kMacVT100CurrencySignVariant = 1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN */ |
|
kMacVT100EuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */ |
|
}; |
|
|
|
/* Variants of Unicode & ISO 10646 encodings*/ |
|
enum { |
|
kUnicodeNoSubset = 0, |
|
kUnicodeCanonicalDecompVariant = 2, /* canonical decomposition (NFD); excludes composed characters*/ |
|
kUnicodeCanonicalCompVariant = 3, /* canonical composition (NFC); uses the composed chars as of Unicode 3.1*/ |
|
kUnicodeHFSPlusDecompVariant = 8, /* decomposition for HFS+; doesn't decompose in 2000-2FFF, F900-FAFF, 2F800-2FAFF*/ |
|
kUnicodeHFSPlusCompVariant = 9 /* composition based on HFS+ decomposition*/ |
|
}; |
|
|
|
/* Variants of Big-5 encoding*/ |
|
enum { |
|
kBig5_BasicVariant = 0, |
|
kBig5_StandardVariant = 1, /* 0xC6A1-0xC7FC: kana, Cyrillic, enclosed numerics*/ |
|
kBig5_ETenVariant = 2 /* adds kana, Cyrillic, radicals, etc with hi bytes C6-C8,F9*/ |
|
}; |
|
|
|
/* Variants of MacRomanLatin1 */ |
|
enum { |
|
kMacRomanLatin1DefaultVariant = 0, /* meta value, maps to others depending on System*/ |
|
kMacRomanLatin1StandardVariant = 2, /* permuted MacRoman, EuroSignVariant*/ |
|
kMacRomanLatin1TurkishVariant = 6, /* permuted MacTurkish*/ |
|
kMacRomanLatin1CroatianVariant = 8, /* permuted MacCroatian, EuroSignVariant*/ |
|
kMacRomanLatin1IcelandicVariant = 11, /* permuted MacIcelandic, StdEuroSignVariant*/ |
|
kMacRomanLatin1RomanianVariant = 14 /* permuted MacRomanian, EuroSignVariant*/ |
|
}; |
|
|
|
/* Unicode variants not yet supported (and not fully defined)*/ |
|
enum { |
|
kUnicodeNoCompatibilityVariant = 1, |
|
kUnicodeNoCorporateVariant = 4 |
|
}; |
|
|
|
/* The following are older names for backward compatibility*/ |
|
enum { |
|
kMacRomanStandardVariant = 0, |
|
kMacIcelandicStandardVariant = 0, |
|
kMacIcelandicTrueTypeVariant = 1, |
|
kJapaneseStandardVariant = 0, |
|
kJapaneseStdNoVerticalsVariant = 1, |
|
kJapaneseBasicVariant = 2, |
|
kJapanesePostScriptScrnVariant = 3, |
|
kJapanesePostScriptPrintVariant = 4, |
|
kJapaneseVertAtKuPlusTenVariant = 5, /* kJapaneseStdNoOneByteKanaVariant = 6, // replaced by kJapaneseNoOneByteKanaOption*/ |
|
/* kJapaneseBasicNoOneByteKanaVariant = 7, // replaced by kJapaneseNoOneByteKanaOption */ |
|
kHebrewStandardVariant = 0, |
|
kHebrewFigureSpaceVariant = 1, |
|
kUnicodeMaxDecomposedVariant = 2, /* replaced by kUnicodeCanonicalDecompVariant*/ |
|
kUnicodeNoComposedVariant = 3, /* this really meant NoComposing; replaced by kUnicodeCanonicalCompVariant*/ |
|
/* The following Japanese variant options were never supported and are now deprecated.*/ |
|
/* In TEC 1.4 and later their functionality is replaced by the Unicode Converter options listed.*/ |
|
kJapaneseNoOneByteKanaOption = 0x20, /* replaced by UnicodeConverter option kUnicodeNoHalfwidthCharsBit*/ |
|
kJapaneseUseAsciiBackslashOption = 0x40 /* replaced by UnicodeConverter option kUnicodeForceASCIIRangeBit*/ |
|
}; |
|
|
|
/* TextEncodingFormat type & values */ |
|
typedef UInt32 TextEncodingFormat; |
|
enum { |
|
/* Default TextEncodingFormat for any TextEncodingBase*/ |
|
kTextEncodingDefaultFormat = 0, /* Formats for Unicode & ISO 10646*/ |
|
kUnicode16BitFormat = 0, |
|
kUnicodeUTF7Format = 1, |
|
kUnicodeUTF8Format = 2, |
|
kUnicode32BitFormat = 3, /* New constants since 10.3?*/ |
|
kUnicodeUTF16Format = 0, /* UTF16 form (16-bit units), native or external byte order (see below)*/ |
|
kUnicodeUTF32Format = 3, /* UTF32 form (32-bit units), native or external byte order (see below)*/ |
|
kUnicodeUTF16BEFormat = 4, /* UTF16 form, explicit big-endian byte order, no BOM*/ |
|
kUnicodeUTF16LEFormat = 5, /* UTF16 form, explicit little-endian byte order, no BOM*/ |
|
kUnicodeUTF32BEFormat = 6, /* UTF32 form, explicit big-endian byte order, no BOM*/ |
|
kUnicodeUTF32LEFormat = 7, /* UTF32 form, explicit little-endian byte order, no BOM*/ |
|
kUnicodeSCSUFormat = 8 /* Std. Compression Scheme for Unicode, Unicode Tech Std. #6*/ |
|
}; |
|
|
|
/* |
|
Note for kUnicodeUTF16Format and kUnicodeUTF32Format: |
|
- An array of UTF16Char (UniChar) or UTF32Char is normally understood to use "internal" or |
|
platform-native byte ordering for kUnicodeUTF16Format and kUnicodeUTF32Format; the array MAY |
|
begin with byte-order mark (BOM), but the BOM should match the internal ordering. |
|
- If an array of bytes (such as char *) that can be in various encodings is specified to be |
|
in Unicode with kUnicodeUTF16Format or kUnicodeUTF32Format (not explicitly BE or LE), then it |
|
is assumed to use "external" byte ordering, which means: If there is a BOM at the beginning |
|
of text, the BOM specifies the byte ordering, otherwise big-endian is assumed. |
|
Synonyms for some Unicode formats |
|
*/ |
|
/* TextEncoding type */ |
|
typedef UInt32 TextEncoding; |
|
/* name part selector for GetTextEncodingName*/ |
|
typedef UInt32 TextEncodingNameSelector; |
|
enum { |
|
kTextEncodingFullName = 0, |
|
kTextEncodingBaseName = 1, |
|
kTextEncodingVariantName = 2, |
|
kTextEncodingFormatName = 3 |
|
}; |
|
|
|
/* Types used in conversion */ |
|
struct TextEncodingRun { |
|
ByteOffset offset; |
|
TextEncoding textEncoding; |
|
}; |
|
typedef struct TextEncodingRun TextEncodingRun; |
|
typedef TextEncodingRun * TextEncodingRunPtr; |
|
typedef const TextEncodingRun * ConstTextEncodingRunPtr; |
|
struct ScriptCodeRun { |
|
ByteOffset offset; |
|
ScriptCode script; |
|
}; |
|
typedef struct ScriptCodeRun ScriptCodeRun; |
|
typedef ScriptCodeRun * ScriptCodeRunPtr; |
|
typedef const ScriptCodeRun * ConstScriptCodeRunPtr; |
|
typedef UInt8 * TextPtr; |
|
typedef const UInt8 * ConstTextPtr; |
|
/* Basic types for Unicode characters and strings:*/ |
|
typedef UniChar * UniCharArrayPtr; |
|
typedef const UniChar * ConstUniCharArrayPtr; |
|
/* |
|
UniCharArrayHandle is a handle type to correspond to UniCharArrayPtr, |
|
i.e. a handle to an array of UniChars (UInt16s). |
|
*/ |
|
typedef UniCharArrayPtr * UniCharArrayHandle; |
|
/* |
|
UniCharArrayOffset is used to indicate an edge offset in an array |
|
of UniChars (UInt16s). |
|
*/ |
|
typedef UInt32 UniCharArrayOffset; |
|
/* enums for TextEncoding Conversion routines*/ |
|
enum { |
|
kTextScriptDontCare = -128, |
|
kTextLanguageDontCare = -128, |
|
kTextRegionDontCare = -128 |
|
}; |
|
|
|
/* struct for TECGetInfo*/ |
|
|
|
struct TECInfo { |
|
UInt16 format; /* format code for this struct*/ |
|
UInt16 tecVersion; /* TEC version in BCD, e.g. 0x0121 for 1.2.1*/ |
|
UInt32 tecTextConverterFeatures; /* bitmask indicating TEC features/fixes*/ |
|
UInt32 tecUnicodeConverterFeatures; /* bitmask indicating UnicodeConverter features/fixes*/ |
|
UInt32 tecTextCommonFeatures; /* bitmask indicating TextCommon features/fixes*/ |
|
Str31 tecTextEncodingsFolderName; /* localized name of Text Encodings folder (pascal string)*/ |
|
Str31 tecExtensionFileName; /* localized name of TEC extension (pascal string)*/ |
|
UInt16 tecLowestTEFileVersion; /* Lowest version (BCD) of all files in Text Encodings folder*/ |
|
UInt16 tecHighestTEFileVersion; /* Highest version (BCD) of all files in Text Encodings folder*/ |
|
}; |
|
typedef struct TECInfo TECInfo; |
|
typedef TECInfo * TECInfoPtr; |
|
typedef TECInfoPtr * TECInfoHandle; |
|
/* Value for TECInfo format code*/ |
|
enum { |
|
kTECInfoCurrentFormat = 2 /* any future formats will just add fields at the end*/ |
|
}; |
|
|
|
/* |
|
Defined feature/fix bits for tecUnicodeConverterFeatures field |
|
Bit: Meaning if set: |
|
---- --------------- |
|
kTECKeepInfoFixBit Unicode Converter no longer ignores other control flags if |
|
kUnicodeKeepInfoBit is set. Bug fix in TEC Manager 1.2.1. |
|
kTECFallbackTextLengthFixBit Unicode Converter honors the *srcConvLen and *destConvLen |
|
returned by caller-supplied fallback handler for any status it |
|
returns except for kTECUnmappableElementErr (previously it only |
|
honored these values if noErr was returned). Bug fix in TEC |
|
Manager 1.2.1. |
|
kTECTextRunBitClearFixBit ConvertFromUnicodeToTextRun & ConvertFromUnicodeToScriptCodeRun |
|
function correctly if the kUnicodeTextRunBit is set (previously |
|
their determination of best target encoding was incorrect). Bug |
|
fix in TEC Manager 1.3. |
|
kTECTextToUnicodeScanFixBit ConvertFromTextToUnicode uses an improved scanner and maintains |
|
some resulting state information, which it uses for mapping. |
|
This has several effects: |
|
- Improved mapping of 0x30-0x39 digits in Mac OS Arabic, fewer |
|
direction overrides when mapping Mac OS Arabic & Hebrew, and |
|
improved mapping of certain characters in Indic encodings. |
|
- Malformed input produces kTextMalformedInputErr. |
|
- ConvertFromTextToUnicode accepts and uses the control flags |
|
kUnicodeKeepInfoMask and kUnicodeStringUnterminatedMask. |
|
Bug fix and enhancement in TEC Manager 1.3. |
|
kTECAddForceASCIIChangesBit Define new control flag bits kUnicodeForceASCIIRangeBit and |
|
kUnicodeNoHalfwidthCharsBit for use with |
|
ConvertFromTextToUnicode, ConvertFromUnicodeToText, etc. |
|
Enhancement in TEC Manager 1.4. |
|
kTECPreferredEncodingFixBit CreateUnicodeToTextRunInfo and related functions fix a problem |
|
that occurred when a preferred encoding was specified that did |
|
not match the System script; the preferred script was not |
|
actually placed first in the ordered list of encodings to use. |
|
Bug fix in TEC Manager 1.4. |
|
kTECAddTextRunHeuristicsBit Define new control flag bit kUnicodeTextRunHeuristicsBit for |
|
use with ConvertFromUnicodeToTextRun. |
|
kTECAddFallbackInterruptBit Define new option kUnicodeFallbackInterruptSafeMask for use |
|
with SetFallbackUnicodeToText. If a client fallback handler is |
|
installed without specifying this bit, ConvertFromUnicodeToText |
|
will HLock the tables it uses (in case the fallback handler |
|
moves memory); otherwise, it won't. |
|
*/ |
|
|
|
enum { |
|
kTECKeepInfoFixBit = 0, |
|
kTECFallbackTextLengthFixBit = 1, |
|
kTECTextRunBitClearFixBit = 2, |
|
kTECTextToUnicodeScanFixBit = 3, |
|
kTECAddForceASCIIChangesBit = 4, |
|
kTECPreferredEncodingFixBit = 5, |
|
kTECAddTextRunHeuristicsBit = 6, |
|
kTECAddFallbackInterruptBit = 7 |
|
}; |
|
|
|
enum { |
|
kTECKeepInfoFixMask = 1L << kTECKeepInfoFixBit, |
|
kTECFallbackTextLengthFixMask = 1L << kTECFallbackTextLengthFixBit, |
|
kTECTextRunBitClearFixMask = 1L << kTECTextRunBitClearFixBit, |
|
kTECTextToUnicodeScanFixMask = 1L << kTECTextToUnicodeScanFixBit, |
|
kTECAddForceASCIIChangesMask = 1L << kTECAddForceASCIIChangesBit, |
|
kTECPreferredEncodingFixMask = 1L << kTECPreferredEncodingFixBit, |
|
kTECAddTextRunHeuristicsMask = 1L << kTECAddTextRunHeuristicsBit, |
|
kTECAddFallbackInterruptMask = 1L << kTECAddFallbackInterruptBit |
|
}; |
|
|
|
/* |
|
------------------------------------------------------------------------------------------------- |
|
CONSTANTS for common and special Unicode code values |
|
------------------------------------------------------------------------------------------------- |
|
*/ |
|
|
|
enum { |
|
kUnicodeByteOrderMark = 0xFEFF, |
|
kUnicodeObjectReplacement = 0xFFFC, /* placeholder for non-text object*/ |
|
kUnicodeReplacementChar = 0xFFFD, /* Unicode replacement for unconvertable input char*/ |
|
kUnicodeSwappedByteOrderMark = 0xFFFE, /* not a Unicode char; byte-swapped version of FEFF*/ |
|
kUnicodeNotAChar = 0xFFFF /* not a Unicode char; may be used as a terminator*/ |
|
}; |
|
|
|
/* |
|
------------------------------------------------------------------------------------------------- |
|
CONSTANTS & DATA STRUCTURES for Unicode Properties |
|
------------------------------------------------------------------------------------------------- |
|
*/ |
|
typedef SInt32 UCCharPropertyType; |
|
enum { |
|
kUCCharPropTypeGenlCategory = 1, /* requests enumeration value*/ |
|
kUCCharPropTypeCombiningClass = 2, /* requests numeric value 0..255*/ |
|
kUCCharPropTypeBidiCategory = 3 /* requests enumeration value*/ |
|
}; |
|
|
|
typedef UInt32 UCCharPropertyValue; |
|
/* General Category enumeration values (requested by kUCCharPropTypeGenlCategory)*/ |
|
enum { |
|
/* Normative categories:*/ |
|
kUCGenlCatOtherNotAssigned = 0, /* Cn Other, Not Assigned*/ |
|
kUCGenlCatOtherControl = 1, /* Cc Other, Control*/ |
|
kUCGenlCatOtherFormat = 2, /* Cf Other, Format*/ |
|
kUCGenlCatOtherSurrogate = 3, /* Cs Other, Surrogate*/ |
|
kUCGenlCatOtherPrivateUse = 4, /* Co Other, Private Use*/ |
|
kUCGenlCatMarkNonSpacing = 5, /* Mn Mark, Non-Spacing*/ |
|
kUCGenlCatMarkSpacingCombining = 6, /* Mc Mark, Spacing Combining*/ |
|
kUCGenlCatMarkEnclosing = 7, /* Me Mark, Enclosing*/ |
|
kUCGenlCatNumberDecimalDigit = 8, /* Nd Number, Decimal Digit*/ |
|
kUCGenlCatNumberLetter = 9, /* Nl Number, Letter*/ |
|
kUCGenlCatNumberOther = 10, /* No Number, Other*/ |
|
kUCGenlCatSeparatorSpace = 11, /* Zs Separator, Space*/ |
|
kUCGenlCatSeparatorLine = 12, /* Zl Separator, Line*/ |
|
kUCGenlCatSeparatorParagraph = 13, /* Zp Separator, Paragraph*/ |
|
kUCGenlCatLetterUppercase = 14, /* Lu Letter, Uppercase*/ |
|
kUCGenlCatLetterLowercase = 15, /* Ll Letter, Lowercase*/ |
|
kUCGenlCatLetterTitlecase = 16, /* Lt Letter, Titlecase*/ |
|
/* Informative categories:*/ |
|
kUCGenlCatLetterModifier = 17, /* Lm Letter, Modifier*/ |
|
kUCGenlCatLetterOther = 18, /* Lo Letter, Other*/ |
|
kUCGenlCatPunctConnector = 20, /* Pc Punctuation, Connector*/ |
|
kUCGenlCatPunctDash = 21, /* Pd Punctuation, Dash*/ |
|
kUCGenlCatPunctOpen = 22, /* Ps Punctuation, Open*/ |
|
kUCGenlCatPunctClose = 23, /* Pe Punctuation, Close*/ |
|
kUCGenlCatPunctInitialQuote = 24, /* Pi Punctuation, Initial quote*/ |
|
kUCGenlCatPunctFinalQuote = 25, /* Pf Punctuation, Final quote*/ |
|
kUCGenlCatPunctOther = 26, /* Po Punctuation, Other*/ |
|
kUCGenlCatSymbolMath = 28, /* Sm Symbol, Math*/ |
|
kUCGenlCatSymbolCurrency = 29, /* Sc Symbol, Currency*/ |
|
kUCGenlCatSymbolModifier = 30, /* Sk Symbol, Modifier*/ |
|
kUCGenlCatSymbolOther = 31 /* So Symbol, Other*/ |
|
}; |
|
|
|
/* Bidirectional Category enumeration values (requested by kUCCharPropTypeBidiCategory)*/ |
|
enum { |
|
kUCBidiCatNotApplicable = 0, /* for now use this for unassigned*/ |
|
/* Strong types:*/ |
|
kUCBidiCatLeftRight = 1, /* L Left-to-Right*/ |
|
kUCBidiCatRightLeft = 2, /* R Right-to-Left*/ |
|
/* Weak types:*/ |
|
kUCBidiCatEuroNumber = 3, /* EN European Number*/ |
|
kUCBidiCatEuroNumberSeparator = 4, /* ES European Number Separator*/ |
|
kUCBidiCatEuroNumberTerminator = 5, /* ET European Number Terminator*/ |
|
kUCBidiCatArabicNumber = 6, /* AN Arabic Number*/ |
|
kUCBidiCatCommonNumberSeparator = 7, /* CS Common Number Separator*/ |
|
/* Separators:*/ |
|
kUCBidiCatBlockSeparator = 8, /* B Paragraph Separator (was Block Separator)*/ |
|
kUCBidiCatSegmentSeparator = 9, /* S Segment Separator*/ |
|
/* Neutrals:*/ |
|
kUCBidiCatWhitespace = 10, /* WS Whitespace*/ |
|
kUCBidiCatOtherNeutral = 11, /* ON Other Neutrals (unassigned codes could use this)*/ |
|
/* New categories for Unicode 3.0*/ |
|
kUCBidiCatRightLeftArabic = 12, /* AL Right-to-Left Arabic (was Arabic Letter)*/ |
|
kUCBidiCatLeftRightEmbedding = 13, /* LRE Left-to-Right Embedding*/ |
|
kUCBidiCatRightLeftEmbedding = 14, /* RLE Right-to-Left Embedding*/ |
|
kUCBidiCatLeftRightOverride = 15, /* LRO Left-to-Right Override*/ |
|
kUCBidiCatRightLeftOverride = 16, /* RLO Right-to-Left Override*/ |
|
kUCBidiCatPopDirectionalFormat = 17, /* PDF Pop Directional Format*/ |
|
kUCBidiCatNonSpacingMark = 18, /* NSM Non-Spacing Mark*/ |
|
kUCBidiCatBoundaryNeutral = 19 /* BN Boundary Neutral*/ |
|
}; |
|
|
|
/* |
|
------------------------------------------------------------------------------------------------- |
|
Prototypes for TextEncoding functions |
|
------------------------------------------------------------------------------------------------- |
|
*/ |
|
|
|
|
|
/* |
|
* CreateTextEncoding() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: in TextCommon 1.1 and later |
|
* CarbonLib: in CarbonLib 1.0 and later |
|
* Mac OS X: in version 10.0 and later |
|
*/ |
|
EXTERN_API( TextEncoding ) |
|
CreateTextEncoding( |
|
TextEncodingBase encodingBase, |
|
TextEncodingVariant encodingVariant, |
|
TextEncodingFormat encodingFormat); |
|
|
|
|
|
/* |
|
* GetTextEncodingBase() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: in TextCommon 1.1 and later |
|
* CarbonLib: in CarbonLib 1.0 and later |
|
* Mac OS X: in version 10.0 and later |
|
*/ |
|
EXTERN_API( TextEncodingBase ) |
|
GetTextEncodingBase(TextEncoding encoding); |
|
|
|
|
|
/* |
|
* GetTextEncodingVariant() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: in TextCommon 1.1 and later |
|
* CarbonLib: in CarbonLib 1.0 and later |
|
* Mac OS X: in version 10.0 and later |
|
*/ |
|
EXTERN_API( TextEncodingVariant ) |
|
GetTextEncodingVariant(TextEncoding encoding); |
|
|
|
|
|
/* |
|
* GetTextEncodingFormat() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: in TextCommon 1.1 and later |
|
* CarbonLib: in CarbonLib 1.0 and later |
|
* Mac OS X: in version 10.0 and later |
|
*/ |
|
EXTERN_API( TextEncodingFormat ) |
|
GetTextEncodingFormat(TextEncoding encoding); |
|
|
|
|
|
/* |
|
* ResolveDefaultTextEncoding() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: in TextCommon 1.1 and later |
|
* CarbonLib: in CarbonLib 1.0 and later |
|
* Mac OS X: in version 10.0 and later |
|
*/ |
|
EXTERN_API( TextEncoding ) |
|
ResolveDefaultTextEncoding(TextEncoding encoding); |
|
|
|
|
|
/* |
|
* GetTextEncodingName() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: in TextCommon 1.1 and later |
|
* CarbonLib: in CarbonLib 1.0 and later |
|
* Mac OS X: in version 10.0 and later |
|
*/ |
|
EXTERN_API_C( OSStatus ) |
|
GetTextEncodingName( |
|
TextEncoding iEncoding, |
|
TextEncodingNameSelector iNamePartSelector, |
|
RegionCode iPreferredRegion, |
|
TextEncoding iPreferredEncoding, |
|
ByteCount iOutputBufLen, |
|
ByteCount * oNameLength, |
|
RegionCode * oActualRegion, /* can be NULL */ |
|
TextEncoding * oActualEncoding, /* can be NULL */ |
|
TextPtr oEncodingName); |
|
|
|
|
|
/* |
|
* TECGetInfo() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: in TextCommon 1.2.1 and later |
|
* CarbonLib: in CarbonLib 1.0 and later |
|
* Mac OS X: in version 10.0 and later |
|
*/ |
|
EXTERN_API( OSStatus ) |
|
TECGetInfo(TECInfoHandle * tecInfo); |
|
|
|
|
|
|
|
/* |
|
* UpgradeScriptInfoToTextEncoding() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: in TextCommon 1.1 and later |
|
* CarbonLib: in CarbonLib 1.0 and later |
|
* Mac OS X: in version 10.0 and later |
|
*/ |
|
EXTERN_API( OSStatus ) |
|
UpgradeScriptInfoToTextEncoding( |
|
ScriptCode iTextScriptID, |
|
LangCode iTextLanguageID, |
|
RegionCode iRegionID, |
|
ConstStr255Param iTextFontname, |
|
TextEncoding * oEncoding); |
|
|
|
|
|
/* |
|
* RevertTextEncodingToScriptInfo() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: in TextCommon 1.1 and later |
|
* CarbonLib: in CarbonLib 1.0 and later |
|
* Mac OS X: in version 10.0 and later |
|
*/ |
|
EXTERN_API( OSStatus ) |
|
RevertTextEncodingToScriptInfo( |
|
TextEncoding iEncoding, |
|
ScriptCode * oTextScriptID, |
|
LangCode * oTextLanguageID, /* can be NULL */ |
|
Str255 oTextFontname); /* can be NULL */ |
|
|
|
|
|
/* |
|
* GetTextEncodingFromScriptInfo() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: not available |
|
* CarbonLib: not available in CarbonLib 1.x, is available on Mac OS X version 10.2 and later |
|
* Mac OS X: in version 10.2 and later |
|
*/ |
|
EXTERN_API( OSStatus ) |
|
GetTextEncodingFromScriptInfo( |
|
ScriptCode iTextScriptID, |
|
LangCode iTextLanguageID, |
|
RegionCode iTextRegionID, |
|
TextEncoding * oEncoding); |
|
|
|
|
|
/* |
|
* GetScriptInfoFromTextEncoding() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: not available |
|
* CarbonLib: not available in CarbonLib 1.x, is available on Mac OS X version 10.2 and later |
|
* Mac OS X: in version 10.2 and later |
|
*/ |
|
EXTERN_API( OSStatus ) |
|
GetScriptInfoFromTextEncoding( |
|
TextEncoding iEncoding, |
|
ScriptCode * oTextScriptID, |
|
LangCode * oTextLanguageID); /* can be NULL */ |
|
|
|
|
|
/* |
|
* NearestMacTextEncodings() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: in TextCommon 1.5 and later |
|
* CarbonLib: in CarbonLib 1.0 and later |
|
* Mac OS X: in version 10.0 and later |
|
*/ |
|
EXTERN_API( OSStatus ) |
|
NearestMacTextEncodings( |
|
TextEncoding generalEncoding, |
|
TextEncoding * bestMacEncoding, |
|
TextEncoding * alternateMacEncoding); |
|
|
|
|
|
/* |
|
* UCGetCharProperty() |
|
* |
|
* Availability: |
|
* Non-Carbon CFM: in TextCommon 1.5 and later |
|
* CarbonLib: in CarbonLib 1.0 and later |
|
* Mac OS X: in version 10.0 and later |
|
*/ |
|
EXTERN_API_C( OSStatus ) |
|
UCGetCharProperty( |
|
const UniChar * charPtr, |
|
UniCharCount textLength, |
|
UCCharPropertyType propType, |
|
UCCharPropertyValue * propValue); |
|
|
|
|
|
/* |
|
------------------------------------------------------------------------------------------------- |
|
Surrogate pair utilities |
|
------------------------------------------------------------------------------------------------- |
|
*/ |
|
|
|
|
|
#if !defined(UC_INLINE) |
|
#if defined(__GNUC__) |
|
#define UC_INLINE static __inline__ |
|
#elif defined(__MWERKS__) || defined(__cplusplus) |
|
#define UC_INLINE static inline |
|
#else |
|
#define UC_INLINE static |
|
#endif |
|
#endif |
|
|
|
// surrogate ranges |
|
enum { |
|
kUCHighSurrogateRangeStart = 0xD800UL, |
|
kUCHighSurrogateRangeEnd = 0xDBFFUL, |
|
kUCLowSurrogateRangeStart = 0xDC00UL, |
|
kUCLowSurrogateRangeEnd = 0xDFFFUL |
|
}; |
|
|
|
|
|
/*! |
|
@function UCIsSurrogateHighCharacter |
|
Reports whether or not the character is a high surrogate. |
|
@param character The character to be checked. |
|
@result true, if character is a high surrogate, otherwise false. |
|
*/ |
|
UC_INLINE Boolean UCIsSurrogateHighCharacter( UniChar character ) { |
|
/* return ( ( character >= kUCHighSurrogateRangeStart ) && (character <= kUCHighSurrogateRangeEnd ) ? true : false ); */ |
|
return ( ( character & 0xFC00UL ) == kUCHighSurrogateRangeStart ); |
|
} |
|
|
|
/*! |
|
@function UCIsSurrogateLowCharacter |
|
Reports whether or not the character is a low surrogate. |
|
@param character The character to be checked. |
|
@result true, if character is a low surrogate, otherwise false. |
|
*/ |
|
UC_INLINE Boolean UCIsSurrogateLowCharacter( UniChar character ) { |
|
/* return ( ( character >= kUCLowSurrogateRangeStart ) && ( character <= kUCLowSurrogateRangeEnd ) ? true : false ); */ |
|
return ( ( character & 0xFC00UL ) == kUCLowSurrogateRangeStart ); |
|
} |
|
|
|
/*! |
|
@function UCGetUnicodeScalarValueForSurrogatePair |
|
Returns the UTF-32 value corresponding to the surrogate pair passed in. |
|
@param surrogateHigh The high surrogate character. If this parameter |
|
is not a valid high surrogate character, the behavior is undefined. |
|
@param surrogateLow The low surrogate character. If this parameter |
|
is not a valid low surrogate character, the behavior is undefined. |
|
@result The UTF-32 value for the surrogate pair. |
|
*/ |
|
UC_INLINE UnicodeScalarValue UCGetUnicodeScalarValueForSurrogatePair( UniChar surrogateHigh, UniChar surrogateLow ) { |
|
return ( ( surrogateHigh - kUCHighSurrogateRangeStart ) << 10 ) + ( surrogateLow - kUCLowSurrogateRangeStart ) + 0x0010000UL; |
|
} |
|
|
|
|
|
|
|
#if PRAGMA_STRUCT_ALIGN |
|
#pragma options align=reset |
|
#elif PRAGMA_STRUCT_PACKPUSH |
|
#pragma pack(pop) |
|
#elif PRAGMA_STRUCT_PACK |
|
#pragma pack() |
|
#endif |
|
|
|
#ifdef PRAGMA_IMPORT_OFF |
|
#pragma import off |
|
#elif PRAGMA_IMPORT |
|
#pragma import reset |
|
#endif |
|
|
|
#ifdef __cplusplus |
|
} |
|
#endif |
|
|
|
#endif /* __TEXTCOMMON__ */ |
|
|
|
|