Differential D3094 Diff 14018 ps/trunk/libraries/source/spidermonkey/include-win32-debug/js/CharacterEncoding.h
Changeset View
Changeset View
Standalone View
Standalone View
ps/trunk/libraries/source/spidermonkey/include-win32-debug/js/CharacterEncoding.h
Show All 25 Lines | |||||
* byte is treated as a 2-byte character, and there is no way to pass in a | * byte is treated as a 2-byte character, and there is no way to pass in a | ||||
* string containing characters beyond U+00FF. | * string containing characters beyond U+00FF. | ||||
*/ | */ | ||||
class Latin1Chars : public mozilla::Range<Latin1Char> | class Latin1Chars : public mozilla::Range<Latin1Char> | ||||
{ | { | ||||
typedef mozilla::Range<Latin1Char> Base; | typedef mozilla::Range<Latin1Char> Base; | ||||
public: | public: | ||||
using CharT = Latin1Char; | |||||
Latin1Chars() : Base() {} | Latin1Chars() : Base() {} | ||||
Latin1Chars(char* aBytes, size_t aLength) : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) {} | Latin1Chars(char* aBytes, size_t aLength) : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) {} | ||||
Latin1Chars(const Latin1Char* aBytes, size_t aLength) | Latin1Chars(const Latin1Char* aBytes, size_t aLength) | ||||
: Base(const_cast<Latin1Char*>(aBytes), aLength) | : Base(const_cast<Latin1Char*>(aBytes), aLength) | ||||
{} | {} | ||||
Latin1Chars(const char* aBytes, size_t aLength) | Latin1Chars(const char* aBytes, size_t aLength) | ||||
: Base(reinterpret_cast<Latin1Char*>(const_cast<char*>(aBytes)), aLength) | : Base(reinterpret_cast<Latin1Char*>(const_cast<char*>(aBytes)), aLength) | ||||
{} | {} | ||||
}; | }; | ||||
/* | /* | ||||
* A Latin1Chars, but with \0 termination for C compatibility. | * A Latin1Chars, but with \0 termination for C compatibility. | ||||
*/ | */ | ||||
class Latin1CharsZ : public mozilla::RangedPtr<Latin1Char> | class Latin1CharsZ : public mozilla::RangedPtr<Latin1Char> | ||||
{ | { | ||||
typedef mozilla::RangedPtr<Latin1Char> Base; | typedef mozilla::RangedPtr<Latin1Char> Base; | ||||
public: | public: | ||||
using CharT = Latin1Char; | |||||
Latin1CharsZ() : Base(nullptr, 0) {} | Latin1CharsZ() : Base(nullptr, 0) {} | ||||
Latin1CharsZ(char* aBytes, size_t aLength) | Latin1CharsZ(char* aBytes, size_t aLength) | ||||
: Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) | : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) | ||||
{ | { | ||||
MOZ_ASSERT(aBytes[aLength] == '\0'); | MOZ_ASSERT(aBytes[aLength] == '\0'); | ||||
} | } | ||||
Latin1CharsZ(Latin1Char* aBytes, size_t aLength) | Latin1CharsZ(Latin1Char* aBytes, size_t aLength) | ||||
: Base(aBytes, aLength) | : Base(aBytes, aLength) | ||||
{ | { | ||||
MOZ_ASSERT(aBytes[aLength] == '\0'); | MOZ_ASSERT(aBytes[aLength] == '\0'); | ||||
} | } | ||||
using Base::operator=; | using Base::operator=; | ||||
char* c_str() { return reinterpret_cast<char*>(get()); } | char* c_str() { return reinterpret_cast<char*>(get()); } | ||||
}; | }; | ||||
class UTF8Chars : public mozilla::Range<unsigned char> | class UTF8Chars : public mozilla::Range<unsigned char> | ||||
{ | { | ||||
typedef mozilla::Range<unsigned char> Base; | typedef mozilla::Range<unsigned char> Base; | ||||
public: | public: | ||||
using CharT = unsigned char; | |||||
UTF8Chars() : Base() {} | UTF8Chars() : Base() {} | ||||
UTF8Chars(char* aBytes, size_t aLength) | UTF8Chars(char* aBytes, size_t aLength) | ||||
: Base(reinterpret_cast<unsigned char*>(aBytes), aLength) | : Base(reinterpret_cast<unsigned char*>(aBytes), aLength) | ||||
{} | {} | ||||
UTF8Chars(const char* aBytes, size_t aLength) | UTF8Chars(const char* aBytes, size_t aLength) | ||||
: Base(reinterpret_cast<unsigned char*>(const_cast<char*>(aBytes)), aLength) | : Base(reinterpret_cast<unsigned char*>(const_cast<char*>(aBytes)), aLength) | ||||
{} | {} | ||||
}; | }; | ||||
/* | /* | ||||
* SpiderMonkey also deals directly with UTF-8 encoded text in some places. | * SpiderMonkey also deals directly with UTF-8 encoded text in some places. | ||||
*/ | */ | ||||
class UTF8CharsZ : public mozilla::RangedPtr<unsigned char> | class UTF8CharsZ : public mozilla::RangedPtr<unsigned char> | ||||
{ | { | ||||
typedef mozilla::RangedPtr<unsigned char> Base; | typedef mozilla::RangedPtr<unsigned char> Base; | ||||
public: | public: | ||||
using CharT = unsigned char; | |||||
UTF8CharsZ() : Base(nullptr, 0) {} | UTF8CharsZ() : Base(nullptr, 0) {} | ||||
UTF8CharsZ(char* aBytes, size_t aLength) | UTF8CharsZ(char* aBytes, size_t aLength) | ||||
: Base(reinterpret_cast<unsigned char*>(aBytes), aLength) | : Base(reinterpret_cast<unsigned char*>(aBytes), aLength) | ||||
{ | { | ||||
MOZ_ASSERT(aBytes[aLength] == '\0'); | MOZ_ASSERT(aBytes[aLength] == '\0'); | ||||
} | } | ||||
UTF8CharsZ(unsigned char* aBytes, size_t aLength) | UTF8CharsZ(unsigned char* aBytes, size_t aLength) | ||||
: Base(aBytes, aLength) | : Base(aBytes, aLength) | ||||
{ | { | ||||
MOZ_ASSERT(aBytes[aLength] == '\0'); | MOZ_ASSERT(aBytes[aLength] == '\0'); | ||||
} | } | ||||
using Base::operator=; | using Base::operator=; | ||||
char* c_str() { return reinterpret_cast<char*>(get()); } | char* c_str() { return reinterpret_cast<char*>(get()); } | ||||
}; | }; | ||||
/* | /* | ||||
* A wrapper for a "const char*" that is encoded using UTF-8. | |||||
* This class does not manage ownership of the data; that is left | |||||
* to others. This differs from UTF8CharsZ in that the chars are | |||||
* const and it allows assignment. | |||||
*/ | |||||
class JS_PUBLIC_API(ConstUTF8CharsZ) | |||||
{ | |||||
const char* data_; | |||||
public: | |||||
using CharT = unsigned char; | |||||
ConstUTF8CharsZ() : data_(nullptr) | |||||
{} | |||||
ConstUTF8CharsZ(const char* aBytes, size_t aLength) | |||||
: data_(aBytes) | |||||
{ | |||||
MOZ_ASSERT(aBytes[aLength] == '\0'); | |||||
#ifdef DEBUG | |||||
validate(aLength); | |||||
#endif | |||||
} | |||||
const void* get() const { return data_; } | |||||
const char* c_str() const { return data_; } | |||||
explicit operator bool() const { return data_ != nullptr; } | |||||
private: | |||||
#ifdef DEBUG | |||||
void validate(size_t aLength); | |||||
#endif | |||||
}; | |||||
/* | |||||
* SpiderMonkey uses a 2-byte character representation: it is a | * SpiderMonkey uses a 2-byte character representation: it is a | ||||
* 2-byte-at-a-time view of a UTF-16 byte stream. This is similar to UCS-2, | * 2-byte-at-a-time view of a UTF-16 byte stream. This is similar to UCS-2, | ||||
* but unlike UCS-2, we do not strip UTF-16 extension bytes. This allows a | * but unlike UCS-2, we do not strip UTF-16 extension bytes. This allows a | ||||
* sufficiently dedicated JavaScript program to be fully unicode-aware by | * sufficiently dedicated JavaScript program to be fully unicode-aware by | ||||
* manually interpreting UTF-16 extension characters embedded in the JS | * manually interpreting UTF-16 extension characters embedded in the JS | ||||
* string. | * string. | ||||
*/ | */ | ||||
class TwoByteChars : public mozilla::Range<char16_t> | class TwoByteChars : public mozilla::Range<char16_t> | ||||
{ | { | ||||
typedef mozilla::Range<char16_t> Base; | typedef mozilla::Range<char16_t> Base; | ||||
public: | public: | ||||
using CharT = char16_t; | |||||
TwoByteChars() : Base() {} | TwoByteChars() : Base() {} | ||||
TwoByteChars(char16_t* aChars, size_t aLength) : Base(aChars, aLength) {} | TwoByteChars(char16_t* aChars, size_t aLength) : Base(aChars, aLength) {} | ||||
TwoByteChars(const char16_t* aChars, size_t aLength) : Base(const_cast<char16_t*>(aChars), aLength) {} | TwoByteChars(const char16_t* aChars, size_t aLength) : Base(const_cast<char16_t*>(aChars), aLength) {} | ||||
}; | }; | ||||
/* | /* | ||||
* A TwoByteChars, but \0 terminated for compatibility with JSFlatString. | * A TwoByteChars, but \0 terminated for compatibility with JSFlatString. | ||||
*/ | */ | ||||
class TwoByteCharsZ : public mozilla::RangedPtr<char16_t> | class TwoByteCharsZ : public mozilla::RangedPtr<char16_t> | ||||
{ | { | ||||
typedef mozilla::RangedPtr<char16_t> Base; | typedef mozilla::RangedPtr<char16_t> Base; | ||||
public: | public: | ||||
using CharT = char16_t; | |||||
TwoByteCharsZ() : Base(nullptr, 0) {} | TwoByteCharsZ() : Base(nullptr, 0) {} | ||||
TwoByteCharsZ(char16_t* chars, size_t length) | TwoByteCharsZ(char16_t* chars, size_t length) | ||||
: Base(chars, length) | : Base(chars, length) | ||||
{ | { | ||||
MOZ_ASSERT(chars[length] == '\0'); | MOZ_ASSERT(chars[length] == '\0'); | ||||
} | } | ||||
using Base::operator=; | using Base::operator=; | ||||
}; | }; | ||||
typedef mozilla::RangedPtr<const char16_t> ConstCharPtr; | typedef mozilla::RangedPtr<const char16_t> ConstCharPtr; | ||||
/* | /* | ||||
* Like TwoByteChars, but the chars are const. | * Like TwoByteChars, but the chars are const. | ||||
*/ | */ | ||||
class ConstTwoByteChars : public mozilla::Range<const char16_t> | class ConstTwoByteChars : public mozilla::Range<const char16_t> | ||||
{ | { | ||||
typedef mozilla::Range<const char16_t> Base; | typedef mozilla::Range<const char16_t> Base; | ||||
public: | public: | ||||
using CharT = char16_t; | |||||
ConstTwoByteChars() : Base() {} | ConstTwoByteChars() : Base() {} | ||||
ConstTwoByteChars(const char16_t* aChars, size_t aLength) : Base(aChars, aLength) {} | ConstTwoByteChars(const char16_t* aChars, size_t aLength) : Base(aChars, aLength) {} | ||||
}; | }; | ||||
/* | /* | ||||
* Convert a 2-byte character sequence to "ISO-Latin-1". This works by | * Convert a 2-byte character sequence to "ISO-Latin-1". This works by | ||||
* truncating each 2-byte pair in the sequence to a 1-byte pair. If the source | * truncating each 2-byte pair in the sequence to a 1-byte pair. If the source | ||||
* contains any UTF-16 extension characters, then this may give invalid Latin1 | * contains any UTF-16 extension characters, then this may give invalid Latin1 | ||||
* output. The returned string is zero terminated. The returned string or the | * output. The returned string is zero terminated. The returned string or the | ||||
* returned string's |start()| must be freed with JS_free or js_free, | * returned string's |start()| must be freed with JS_free or js_free, | ||||
* respectively. If allocation fails, an OOM error will be set and the method | * respectively. If allocation fails, an OOM error will be set and the method | ||||
* will return a nullptr chars (which can be tested for with the ! operator). | * will return a nullptr chars (which can be tested for with the ! operator). | ||||
* This method cannot trigger GC. | * This method cannot trigger GC. | ||||
*/ | */ | ||||
extern Latin1CharsZ | extern Latin1CharsZ | ||||
LossyTwoByteCharsToNewLatin1CharsZ(js::ExclusiveContext* cx, | LossyTwoByteCharsToNewLatin1CharsZ(js::ExclusiveContext* cx, | ||||
const mozilla::Range<const char16_t> tbchars); | const mozilla::Range<const char16_t> tbchars); | ||||
inline Latin1CharsZ | |||||
LossyTwoByteCharsToNewLatin1CharsZ(js::ExclusiveContext* cx, const char16_t* begin, size_t length) | |||||
{ | |||||
const mozilla::Range<const char16_t> tbchars(begin, length); | |||||
return JS::LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars); | |||||
} | |||||
template <typename CharT> | template <typename CharT> | ||||
extern UTF8CharsZ | extern UTF8CharsZ | ||||
CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx, const mozilla::Range<const CharT> chars); | CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx, const mozilla::Range<CharT> chars); | ||||
uint32_t | JS_PUBLIC_API(uint32_t) | ||||
Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length); | Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length); | ||||
/* | /* | ||||
* Inflate bytes in UTF-8 encoding to char16_t. | * Inflate bytes in UTF-8 encoding to char16_t. | ||||
* - On error, returns an empty TwoByteCharsZ. | * - On error, returns an empty TwoByteCharsZ. | ||||
* - On success, returns a malloc'd TwoByteCharsZ, and updates |outlen| to hold | * - On success, returns a malloc'd TwoByteCharsZ, and updates |outlen| to hold | ||||
* its length; the length value excludes the trailing null. | * its length; the length value excludes the trailing null. | ||||
*/ | */ | ||||
extern TwoByteCharsZ | extern JS_PUBLIC_API(TwoByteCharsZ) | ||||
UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen); | UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen); | ||||
/* | /* | ||||
* Like UTF8CharsToNewTwoByteCharsZ, but for ConstUTF8CharsZ. | |||||
*/ | |||||
extern JS_PUBLIC_API(TwoByteCharsZ) | |||||
UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen); | |||||
/* | |||||
* The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters | * The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters | ||||
* will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8 | * will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8 | ||||
* input. | * input. | ||||
*/ | */ | ||||
extern TwoByteCharsZ | extern JS_PUBLIC_API(TwoByteCharsZ) | ||||
LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen); | LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen); | ||||
extern JS_PUBLIC_API(TwoByteCharsZ) | |||||
LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen); | |||||
/* | /* | ||||
* Returns the length of the char buffer required to encode |s| as UTF8. | * Returns the length of the char buffer required to encode |s| as UTF8. | ||||
* Does not include the null-terminator. | * Does not include the null-terminator. | ||||
*/ | */ | ||||
JS_PUBLIC_API(size_t) | JS_PUBLIC_API(size_t) | ||||
GetDeflatedUTF8StringLength(JSFlatString* s); | GetDeflatedUTF8StringLength(JSFlatString* s); | ||||
/* | /* | ||||
* Encode |src| as UTF8. The caller must ensure |dst| has enough space. | * Encode |src| as UTF8. The caller must either ensure |dst| has enough space | ||||
* Does not write the null terminator. | * to encode the entire string or pass the length of the buffer as |dstlenp|, | ||||
* in which case the function will encode characters from the string until | |||||
* the buffer is exhausted. Does not write the null terminator. | |||||
* | |||||
* If |dstlenp| is provided, it will be updated to hold the number of bytes | |||||
* written to the buffer. If |numcharsp| is provided, it will be updated to hold | |||||
* the number of Unicode characters written to the buffer (which can be less | |||||
* than the length of the string, if the buffer is exhausted before the string | |||||
* is fully encoded). | |||||
*/ | */ | ||||
JS_PUBLIC_API(void) | JS_PUBLIC_API(void) | ||||
DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst); | DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst, | ||||
size_t* dstlenp = nullptr, size_t* numcharsp = nullptr); | |||||
/* | |||||
* The smallest character encoding capable of fully representing a particular | |||||
* string. | |||||
*/ | |||||
enum class SmallestEncoding { | |||||
ASCII, | |||||
Latin1, | |||||
UTF16 | |||||
}; | |||||
/* | |||||
* Returns the smallest encoding possible for the given string: if all | |||||
* codepoints are <128 then ASCII, otherwise if all codepoints are <256 | |||||
* Latin-1, else UTF16. | |||||
*/ | |||||
JS_PUBLIC_API(SmallestEncoding) | |||||
FindSmallestEncoding(UTF8Chars utf8); | |||||
/* | |||||
* Return a null-terminated Latin-1 string copied from the input string, | |||||
* storing its length (excluding null terminator) in |*outlen|. Fail and | |||||
* report an error if the string contains non-Latin-1 codepoints. Returns | |||||
* Latin1CharsZ() on failure. | |||||
*/ | |||||
extern JS_PUBLIC_API(Latin1CharsZ) | |||||
UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen); | |||||
/* | |||||
* Return a null-terminated Latin-1 string copied from the input string, | |||||
* storing its length (excluding null terminator) in |*outlen|. Non-Latin-1 | |||||
* codepoints are replaced by '?'. Returns Latin1CharsZ() on failure. | |||||
*/ | |||||
extern JS_PUBLIC_API(Latin1CharsZ) | |||||
LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen); | |||||
/* | |||||
* Returns true if all characters in the given null-terminated string are | |||||
* ASCII, i.e. < 0x80, false otherwise. | |||||
*/ | |||||
extern JS_PUBLIC_API(bool) | |||||
StringIsASCII(const char* s); | |||||
} // namespace JS | } // namespace JS | ||||
inline void JS_free(JS::Latin1CharsZ& ptr) { js_free((void*)ptr.get()); } | inline void JS_free(JS::Latin1CharsZ& ptr) { js_free((void*)ptr.get()); } | ||||
inline void JS_free(JS::UTF8CharsZ& ptr) { js_free((void*)ptr.get()); } | inline void JS_free(JS::UTF8CharsZ& ptr) { js_free((void*)ptr.get()); } | ||||
#endif /* js_CharacterEncoding_h */ | #endif /* js_CharacterEncoding_h */ |
Wildfire Games · Phabricator