You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
392 lines
18 KiB
392 lines
18 KiB
/* CFCharacterSet.h |
|
Copyright (c) 1999-2003, Apple, Inc. All rights reserved. |
|
*/ |
|
|
|
/*! |
|
@header CFCharacterSet |
|
CFCharacterSet represents a set, or a bag, of Unicode characters. |
|
The API consists of 3 groups: |
|
1) creation/manipulation of CFCharacterSet instances, |
|
2) query of a single Unicode character membership, |
|
and 3) bitmap representation related (reading/writing). |
|
Conceptually, CFCharacterSet is a 136K byte bitmap array of |
|
which each bit represents a Unicode code point. It could |
|
contain the Unicode characters in ISO 10646 Basic Multilingual |
|
Plane (BMP) and characters in Plane 1 through Plane 16 |
|
accessible via surrogate paris in the Unicode Transformation |
|
Format, 16-bit encoding form (UTF-16). In other words, it can |
|
store values from 0x00000 to 0x10FFFF in the Unicode |
|
Transformation Format, 32-bit encoding form (UTF-32). However, |
|
in general, how CFCharacterSet stores the information is an |
|
implementation detail. Note even CFData used for the external |
|
bitmap representation rarely has 136K byte. For detailed |
|
discussion of the external bitmap representation, refer to the |
|
comments for CFCharacterSetCreateWithBitmapRepresentation below. |
|
Note that the existance of non-BMP characters in a character set |
|
does not imply the membership of the corresponding surrogate |
|
characters. For example, a character set with U+10000 does not |
|
match with U+D800. |
|
*/ |
|
|
|
#if !defined(__COREFOUNDATION_CFCHARACTERSET__) |
|
#define __COREFOUNDATION_CFCHARACTERSET__ 1 |
|
|
|
#include <CoreFoundation/CFBase.h> |
|
#include <CoreFoundation/CFData.h> |
|
|
|
#if defined(__cplusplus) |
|
extern "C" { |
|
#endif |
|
|
|
/*! |
|
@typedef CFCharacterSetRef |
|
This is the type of a reference to immutable CFCharacterSets. |
|
*/ |
|
typedef const struct __CFCharacterSet * CFCharacterSetRef; |
|
|
|
/*! |
|
@typedef CFMutableCharacterSetRef |
|
This is the type of a reference to mutable CFMutableCharacterSets. |
|
*/ |
|
typedef struct __CFCharacterSet * CFMutableCharacterSetRef; |
|
|
|
/*! |
|
@typedef CFCharacterSetPredefinedSet |
|
Type of the predefined CFCharacterSet selector values. |
|
*/ |
|
typedef enum { |
|
kCFCharacterSetControl = 1, /* Control character set (Unicode General Category Cc and Cf) */ |
|
kCFCharacterSetWhitespace, /* Whitespace character set (Unicode General Category Zs and U0009 CHARACTER TABULATION) */ |
|
kCFCharacterSetWhitespaceAndNewline, /* Whitespace and Newline character set (Unicode General Category Z*, U000A ~ U000D, and U0085) */ |
|
kCFCharacterSetDecimalDigit, /* Decimal digit character set */ |
|
kCFCharacterSetLetter, /* Letter character set (Unicode General Category L* & M*) */ |
|
kCFCharacterSetLowercaseLetter, /* Lowercase character set (Unicode General Category Ll) */ |
|
kCFCharacterSetUppercaseLetter, /* Uppercase character set (Unicode General Category Lu and Lt) */ |
|
kCFCharacterSetNonBase, /* Non-base character set (Unicode General Category M*) */ |
|
kCFCharacterSetDecomposable, /* Canonically decomposable character set */ |
|
kCFCharacterSetAlphaNumeric, /* Alpha Numeric character set (Unicode General Category L*, M*, & N*) */ |
|
kCFCharacterSetPunctuation, /* Punctuation character set (Unicode General Category P*) */ |
|
kCFCharacterSetIllegal /* Illegal character set */ |
|
#if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED |
|
, kCFCharacterSetCapitalizedLetter /* Titlecase character set (Unicode General Category Lt) */ |
|
#endif |
|
#if MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED |
|
, kCFCharacterSetSymbol /* Symbol character set (Unicode General Category S*) */ |
|
#endif |
|
} CFCharacterSetPredefinedSet; |
|
|
|
/*! |
|
@function CFCharacterSetGetTypeID |
|
Returns the type identifier of all CFCharacterSet instances. |
|
*/ |
|
CF_EXPORT |
|
CFTypeID CFCharacterSetGetTypeID(void); |
|
|
|
/*! |
|
@function CFCharacterSetGetPredefined |
|
Returns a predefined CFCharacterSet instance. |
|
@param theSetIdentifier The CFCharacterSetPredefinedSet selector |
|
which specifies the predefined character set. If the |
|
value is not in CFCharacterSetPredefinedSet, the behavior |
|
is undefined. |
|
@result A reference to the predefined immutable CFCharacterSet. |
|
This instance is owned by CF. |
|
*/ |
|
CF_EXPORT |
|
CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier); |
|
|
|
/*! |
|
@function CFCharacterSetCreateWithCharactersInRange |
|
Creates a new immutable character set with the values from the given range. |
|
@param alloc The CFAllocator which should be used to allocate |
|
memory for the array and its storage for values. This |
|
parameter may be NULL in which case the current default |
|
CFAllocator is used. If this reference is not a valid |
|
CFAllocator, the behavior is undefined. |
|
@param theRange The CFRange which should be used to specify the |
|
Unicode range the character set is filled with. It |
|
accepts the range in 32-bit in the UTF-32 format. The |
|
valid character point range is from 0x00000 to 0x10FFFF. |
|
If the range is outside of the valid Unicode character |
|
point, the behavior is undefined. |
|
@result A reference to the new immutable CFCharacterSet. |
|
*/ |
|
CF_EXPORT |
|
CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef alloc, CFRange theRange); |
|
|
|
/*! |
|
@function CFCharacterSetCreateWithCharactersInString |
|
Creates a new immutable character set with the values in the given string. |
|
@param alloc The CFAllocator which should be used to allocate |
|
memory for the array and its storage for values. This |
|
parameter may be NULL in which case the current default |
|
CFAllocator is used. If this reference is not a valid |
|
CFAllocator, the behavior is undefined. |
|
@param theString The CFString which should be used to specify |
|
the Unicode characters the character set is filled with. |
|
If this parameter is not a valid CFString, the behavior |
|
is undefined. |
|
@result A reference to the new immutable CFCharacterSet. |
|
*/ |
|
CF_EXPORT |
|
CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef alloc, CFStringRef theString); |
|
|
|
/*! |
|
@function CFCharacterSetCreateWithBitmapRepresentation |
|
Creates a new immutable character set with the bitmap representtion in the given data. |
|
@param alloc The CFAllocator which should be used to allocate |
|
memory for the array and its storage for values. This |
|
parameter may be NULL in which case the current default |
|
CFAllocator is used. If this reference is not a valid |
|
CFAllocator, the behavior is undefined. |
|
@param theData The CFData which should be used to specify the |
|
bitmap representation of the Unicode character points |
|
the character set is filled with. The bitmap |
|
representation could contain all the Unicode character |
|
range starting from BMP to Plane 16. The first 8K bytes |
|
of the data represents the BMP range. The BMP range 8K |
|
bytes can be followed by zero to sixteen 8K byte |
|
bitmaps, each one with the plane index byte prepended. |
|
For example, the bitmap representing the BMP and Plane 2 |
|
has the size of 16385 bytes (8K bytes for BMP, 1 byte |
|
index + 8K bytes bitmap for Plane 2). The plane index |
|
byte, in this case, contains the integer value two. If |
|
this parameter is not a valid CFData or it contains a |
|
Plane index byte outside of the valid Plane range |
|
(1 to 16), the behavior is undefined. |
|
@result A reference to the new immutable CFCharacterSet. |
|
*/ |
|
CF_EXPORT |
|
CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef alloc, CFDataRef theData); |
|
|
|
#if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED |
|
/*! |
|
@function CFCharacterSetCreateInvertedSet |
|
Creates a new immutable character set that is the invert of the specified character set. |
|
@param alloc The CFAllocator which should be used to allocate |
|
memory for the array and its storage for values. This |
|
parameter may be NULL in which case the current default |
|
CFAllocator is used. If this reference is not a valid |
|
CFAllocator, the behavior is undefined. |
|
@param theSet The CFCharacterSet which is to be inverted. If this |
|
parameter is not a valid CFCharacterSet, the behavior is |
|
undefined. |
|
@result A reference to the new immutable CFCharacterSet. |
|
*/ |
|
CF_EXPORT CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet); |
|
|
|
/*! |
|
@function CFCharacterSetIsSupersetOfSet |
|
Reports whether or not the character set is a superset of the character set specified as the second parameter. |
|
@param theSet The character set to be checked for the membership of theOtherSet. |
|
If this parameter is not a valid CFCharacterSet, the behavior is undefined. |
|
@param theOtherset The character set to be checked whether or not it is a subset of theSet. |
|
If this parameter is not a valid CFCharacterSet, the behavior is undefined. |
|
*/ |
|
CF_EXPORT Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherset); |
|
|
|
/*! |
|
@function CFCharacterSetHasMemberInPlane |
|
Reports whether or not the character set contains at least one member character in the specified plane. |
|
@param theSet The character set to be checked for the membership. If this |
|
parameter is not a valid CFCharacterSet, the behavior is undefined. |
|
@param thePlane The plane number to be checked for the membership. |
|
The valid value range is from 0 to 16. If the value is outside of the valid |
|
plane number range, the behavior is undefined. |
|
*/ |
|
CF_EXPORT Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane); |
|
#endif |
|
|
|
/*! |
|
@function CFCharacterSetCreateMutable |
|
Creates a new empty mutable character set. |
|
@param allocator The CFAllocator which should be used to allocate |
|
memory for the array and its storage for values. This |
|
parameter may be NULL in which case the current default |
|
CFAllocator is used. If this reference is not a valid |
|
CFAllocator, the behavior is undefined. |
|
@result A reference to the new mutable CFCharacterSet. |
|
*/ |
|
CF_EXPORT |
|
CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef alloc); |
|
|
|
#if MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED |
|
/*! |
|
@function CFCharacterSetCreateCopy |
|
Creates a new character set with the values from the given character set. This function tries to compact the backing store where applicable. |
|
@param allocator The CFAllocator which should be used to allocate |
|
memory for the array and its storage for values. This |
|
parameter may be NULL in which case the current default |
|
CFAllocator is used. If this reference is not a valid |
|
CFAllocator, the behavior is undefined. |
|
@param theSet The CFCharacterSet which is to be copied. If this |
|
parameter is not a valid CFCharacterSet, the behavior is |
|
undefined. |
|
@result A reference to the new CFCharacterSet. |
|
*/ |
|
CF_EXPORT |
|
CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER; |
|
#endif /* MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED */ |
|
|
|
/*! |
|
@function CFCharacterSetCreateMutableCopy |
|
Creates a new mutable character set with the values from the given character set. |
|
@param allocator The CFAllocator which should be used to allocate |
|
memory for the array and its storage for values. This |
|
parameter may be NULL in which case the current default |
|
CFAllocator is used. If this reference is not a valid |
|
CFAllocator, the behavior is undefined. |
|
@param theSet The CFCharacterSet which is to be copied. If this |
|
parameter is not a valid CFCharacterSet, the behavior is |
|
undefined. |
|
@result A reference to the new mutable CFCharacterSet. |
|
*/ |
|
CF_EXPORT |
|
CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet); |
|
|
|
/*! |
|
@function CFCharacterSetIsCharacterMember |
|
Reports whether or not the Unicode character is in the character set. |
|
@param theSet The character set to be searched. If this parameter |
|
is not a valid CFCharacterSet, the behavior is undefined. |
|
@param theChar The Unicode character for which to test against the |
|
character set. Note that this function takes 16-bit Unicode |
|
character value; hence, it does not support access to the |
|
non-BMP planes. |
|
@result true, if the value is in the character set, otherwise false. |
|
*/ |
|
CF_EXPORT |
|
Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar); |
|
|
|
#if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED |
|
/*! |
|
@function CFCharacterSetIsLongCharacterMember |
|
Reports whether or not the UTF-32 character is in the character set. |
|
@param theSet The character set to be searched. If this parameter |
|
is not a valid CFCharacterSet, the behavior is undefined. |
|
@param theChar The UTF-32 character for which to test against the |
|
character set. |
|
@result true, if the value is in the character set, otherwise false. |
|
*/ |
|
CF_EXPORT Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar); |
|
#endif |
|
|
|
/*! |
|
@function CFCharacterSetCreateBitmapRepresentation |
|
Creates a new immutable data with the bitmap representation from the given character set. |
|
@param allocator The CFAllocator which should be used to allocate |
|
memory for the array and its storage for values. This |
|
parameter may be NULL in which case the current default |
|
CFAllocator is used. If this reference is not a valid |
|
CFAllocator, the behavior is undefined. |
|
@param theSet The CFCharacterSet which is to be used create the |
|
bitmap representation from. Refer to the comments for |
|
CFCharacterSetCreateWithBitmapRepresentation for the |
|
detailed discussion of the bitmap representation format. |
|
If this parameter is not a valid CFCharacterSet, the |
|
behavior is undefined. |
|
@result A reference to the new immutable CFData. |
|
*/ |
|
CF_EXPORT |
|
CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet); |
|
|
|
/*! |
|
@function CFCharacterSetAddCharactersInRange |
|
Adds the given range to the charaacter set. |
|
@param theSet The character set to which the range is to be added. |
|
If this parameter is not a valid mutable CFCharacterSet, |
|
the behavior is undefined. |
|
@param theRange The range to add to the character set. It accepts |
|
the range in 32-bit in the UTF-32 format. The valid |
|
character point range is from 0x00000 to 0x10FFFF. If the |
|
range is outside of the valid Unicode character point, |
|
the behavior is undefined. |
|
*/ |
|
CF_EXPORT |
|
void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange); |
|
|
|
/*! |
|
@function CFCharacterSetRemoveCharactersInRange |
|
Removes the given range from the charaacter set. |
|
@param theSet The character set from which the range is to be |
|
removed. If this parameter is not a valid mutable |
|
CFCharacterSet, the behavior is undefined. |
|
@param theRange The range to remove from the character set. |
|
It accepts the range in 32-bit in the UTF-32 format. |
|
The valid character point range is from 0x00000 to 0x10FFFF. |
|
If the range is outside of the valid Unicode character point, |
|
the behavior is undefined. |
|
*/ |
|
CF_EXPORT |
|
void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange); |
|
|
|
/*! |
|
@function CFCharacterSetAddCharactersInString |
|
Adds the characters in the given string to the charaacter set. |
|
@param theSet The character set to which the characters in the |
|
string are to be added. If this parameter is not a |
|
valid mutable CFCharacterSet, the behavior is undefined. |
|
@param theString The string to add to the character set. |
|
If this parameter is not a valid CFString, the behavior |
|
is undefined. |
|
*/ |
|
CF_EXPORT |
|
void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString); |
|
|
|
/*! |
|
@function CFCharacterSetRemoveCharactersInString |
|
Removes the characters in the given string from the charaacter set. |
|
@param theSet The character set from which the characters in the |
|
string are to be remove. If this parameter is not a |
|
valid mutable CFCharacterSet, the behavior is undefined. |
|
@param theString The string to remove from the character set. |
|
If this parameter is not a valid CFString, the behavior |
|
is undefined. |
|
*/ |
|
CF_EXPORT |
|
void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString); |
|
|
|
/*! |
|
@function CFCharacterSetUnion |
|
Forms the union with the given character set. |
|
@param theSet The destination character set into which the |
|
union of the two character sets is stored. If this |
|
parameter is not a valid mutable CFCharacterSet, the |
|
behavior is undefined. |
|
@param theOtherSet The character set with which the union is |
|
formed. If this parameter is not a valid CFCharacterSet, |
|
the behavior is undefined. |
|
*/ |
|
CF_EXPORT |
|
void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet); |
|
|
|
/*! |
|
@function CFCharacterSetIntersect |
|
Forms the intersection with the given character set. |
|
@param theSet The destination character set into which the |
|
intersection of the two character sets is stored. |
|
If this parameter is not a valid mutable CFCharacterSet, |
|
the behavior is undefined. |
|
@param theOtherSet The character set with which the intersection |
|
is formed. If this parameter is not a valid CFCharacterSet, |
|
the behavior is undefined. |
|
*/ |
|
CF_EXPORT |
|
void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet); |
|
|
|
/*! |
|
@function CFCharacterSetInvert |
|
Inverts the content of the given character set. |
|
@param theSet The character set to be inverted. |
|
If this parameter is not a valid mutable CFCharacterSet, |
|
the behavior is undefined. |
|
*/ |
|
CF_EXPORT |
|
void CFCharacterSetInvert(CFMutableCharacterSetRef theSet); |
|
|
|
#if defined(__cplusplus) |
|
} |
|
#endif |
|
|
|
#endif /* !__COREFOUNDATION_CFCHARACTERSET__ */ |
|
|
|
|