Differential D3094 Diff 14018 ps/trunk/libraries/source/spidermonkey/include-win32-debug/js/CharacterEncoding.h

Changeset View

Standalone View

ps/trunk/libraries/source/spidermonkey/include-win32-debug/js/CharacterEncoding.h

	Show All 25 Lines
	* byte is treated as a 2-byte character, and there is no way to pass in a			* byte is treated as a 2-byte character, and there is no way to pass in a
	* string containing characters beyond U+00FF.			* string containing characters beyond U+00FF.
	*/			*/
	class Latin1Chars : public mozilla::Range<Latin1Char>			class Latin1Chars : public mozilla::Range<Latin1Char>
	{			{
	typedef mozilla::Range<Latin1Char> Base;			typedef mozilla::Range<Latin1Char> Base;

	public:			public:
				using CharT = Latin1Char;

	Latin1Chars() : Base() {}			Latin1Chars() : Base() {}
	Latin1Chars(char* aBytes, size_t aLength) : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) {}			Latin1Chars(char* aBytes, size_t aLength) : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) {}
	Latin1Chars(const Latin1Char* aBytes, size_t aLength)			Latin1Chars(const Latin1Char* aBytes, size_t aLength)
	: Base(const_cast<Latin1Char*>(aBytes), aLength)			: Base(const_cast<Latin1Char*>(aBytes), aLength)
	{}			{}
	Latin1Chars(const char* aBytes, size_t aLength)			Latin1Chars(const char* aBytes, size_t aLength)
	: Base(reinterpret_cast<Latin1Char>(const_cast<char>(aBytes)), aLength)			: Base(reinterpret_cast<Latin1Char>(const_cast<char>(aBytes)), aLength)
	{}			{}
	};			};

	/*			/*
	* A Latin1Chars, but with \0 termination for C compatibility.			* A Latin1Chars, but with \0 termination for C compatibility.
	*/			*/
	class Latin1CharsZ : public mozilla::RangedPtr<Latin1Char>			class Latin1CharsZ : public mozilla::RangedPtr<Latin1Char>
	{			{
	typedef mozilla::RangedPtr<Latin1Char> Base;			typedef mozilla::RangedPtr<Latin1Char> Base;

	public:			public:
				using CharT = Latin1Char;

	Latin1CharsZ() : Base(nullptr, 0) {}			Latin1CharsZ() : Base(nullptr, 0) {}

	Latin1CharsZ(char* aBytes, size_t aLength)			Latin1CharsZ(char* aBytes, size_t aLength)
	: Base(reinterpret_cast<Latin1Char*>(aBytes), aLength)			: Base(reinterpret_cast<Latin1Char*>(aBytes), aLength)
	{			{
	MOZ_ASSERT(aBytes[aLength] == '\0');			MOZ_ASSERT(aBytes[aLength] == '\0');
	}			}

	Latin1CharsZ(Latin1Char* aBytes, size_t aLength)			Latin1CharsZ(Latin1Char* aBytes, size_t aLength)
	: Base(aBytes, aLength)			: Base(aBytes, aLength)
	{			{
	MOZ_ASSERT(aBytes[aLength] == '\0');			MOZ_ASSERT(aBytes[aLength] == '\0');
	}			}

	using Base::operator=;			using Base::operator=;

	char* c_str() { return reinterpret_cast<char*>(get()); }			char* c_str() { return reinterpret_cast<char*>(get()); }
	};			};

	class UTF8Chars : public mozilla::Range<unsigned char>			class UTF8Chars : public mozilla::Range<unsigned char>
	{			{
	typedef mozilla::Range<unsigned char> Base;			typedef mozilla::Range<unsigned char> Base;

	public:			public:
				using CharT = unsigned char;

	UTF8Chars() : Base() {}			UTF8Chars() : Base() {}
	UTF8Chars(char* aBytes, size_t aLength)			UTF8Chars(char* aBytes, size_t aLength)
	: Base(reinterpret_cast<unsigned char*>(aBytes), aLength)			: Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
	{}			{}
	UTF8Chars(const char* aBytes, size_t aLength)			UTF8Chars(const char* aBytes, size_t aLength)
	: Base(reinterpret_cast<unsigned char>(const_cast<char>(aBytes)), aLength)			: Base(reinterpret_cast<unsigned char>(const_cast<char>(aBytes)), aLength)
	{}			{}
	};			};

	/*			/*
	* SpiderMonkey also deals directly with UTF-8 encoded text in some places.			* SpiderMonkey also deals directly with UTF-8 encoded text in some places.
	*/			*/
	class UTF8CharsZ : public mozilla::RangedPtr<unsigned char>			class UTF8CharsZ : public mozilla::RangedPtr<unsigned char>
	{			{
	typedef mozilla::RangedPtr<unsigned char> Base;			typedef mozilla::RangedPtr<unsigned char> Base;

	public:			public:
				using CharT = unsigned char;

	UTF8CharsZ() : Base(nullptr, 0) {}			UTF8CharsZ() : Base(nullptr, 0) {}

	UTF8CharsZ(char* aBytes, size_t aLength)			UTF8CharsZ(char* aBytes, size_t aLength)
	: Base(reinterpret_cast<unsigned char*>(aBytes), aLength)			: Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
	{			{
	MOZ_ASSERT(aBytes[aLength] == '\0');			MOZ_ASSERT(aBytes[aLength] == '\0');
	}			}

	UTF8CharsZ(unsigned char* aBytes, size_t aLength)			UTF8CharsZ(unsigned char* aBytes, size_t aLength)
	: Base(aBytes, aLength)			: Base(aBytes, aLength)
	{			{
	MOZ_ASSERT(aBytes[aLength] == '\0');			MOZ_ASSERT(aBytes[aLength] == '\0');
	}			}

	using Base::operator=;			using Base::operator=;

	char* c_str() { return reinterpret_cast<char*>(get()); }			char* c_str() { return reinterpret_cast<char*>(get()); }
	};			};

	/*			/*
				* A wrapper for a "const char*" that is encoded using UTF-8.
				* This class does not manage ownership of the data; that is left
				* to others. This differs from UTF8CharsZ in that the chars are
				* const and it allows assignment.
				*/
				class JS_PUBLIC_API(ConstUTF8CharsZ)
				{
				const char* data_;

				public:
				using CharT = unsigned char;

				ConstUTF8CharsZ() : data_(nullptr)
				{}

				ConstUTF8CharsZ(const char* aBytes, size_t aLength)
				: data_(aBytes)
				{
				MOZ_ASSERT(aBytes[aLength] == '\0');
				#ifdef DEBUG
				validate(aLength);
				#endif
				}

				const void* get() const { return data_; }

				const char* c_str() const { return data_; }

				explicit operator bool() const { return data_ != nullptr; }

				private:
				#ifdef DEBUG
				void validate(size_t aLength);
				#endif
				};

				/*
	* SpiderMonkey uses a 2-byte character representation: it is a			* SpiderMonkey uses a 2-byte character representation: it is a
	* 2-byte-at-a-time view of a UTF-16 byte stream. This is similar to UCS-2,			* 2-byte-at-a-time view of a UTF-16 byte stream. This is similar to UCS-2,
	* but unlike UCS-2, we do not strip UTF-16 extension bytes. This allows a			* but unlike UCS-2, we do not strip UTF-16 extension bytes. This allows a
	* sufficiently dedicated JavaScript program to be fully unicode-aware by			* sufficiently dedicated JavaScript program to be fully unicode-aware by
	* manually interpreting UTF-16 extension characters embedded in the JS			* manually interpreting UTF-16 extension characters embedded in the JS
	* string.			* string.
	*/			*/
	class TwoByteChars : public mozilla::Range<char16_t>			class TwoByteChars : public mozilla::Range<char16_t>
	{			{
	typedef mozilla::Range<char16_t> Base;			typedef mozilla::Range<char16_t> Base;

	public:			public:
				using CharT = char16_t;

	TwoByteChars() : Base() {}			TwoByteChars() : Base() {}
	TwoByteChars(char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}			TwoByteChars(char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
	TwoByteChars(const char16_t* aChars, size_t aLength) : Base(const_cast<char16_t*>(aChars), aLength) {}			TwoByteChars(const char16_t* aChars, size_t aLength) : Base(const_cast<char16_t*>(aChars), aLength) {}
	};			};

	/*			/*
	* A TwoByteChars, but \0 terminated for compatibility with JSFlatString.			* A TwoByteChars, but \0 terminated for compatibility with JSFlatString.
	*/			*/
	class TwoByteCharsZ : public mozilla::RangedPtr<char16_t>			class TwoByteCharsZ : public mozilla::RangedPtr<char16_t>
	{			{
	typedef mozilla::RangedPtr<char16_t> Base;			typedef mozilla::RangedPtr<char16_t> Base;

	public:			public:
				using CharT = char16_t;

	TwoByteCharsZ() : Base(nullptr, 0) {}			TwoByteCharsZ() : Base(nullptr, 0) {}

	TwoByteCharsZ(char16_t* chars, size_t length)			TwoByteCharsZ(char16_t* chars, size_t length)
	: Base(chars, length)			: Base(chars, length)
	{			{
	MOZ_ASSERT(chars[length] == '\0');			MOZ_ASSERT(chars[length] == '\0');
	}			}

	using Base::operator=;			using Base::operator=;
	};			};

	typedef mozilla::RangedPtr<const char16_t> ConstCharPtr;			typedef mozilla::RangedPtr<const char16_t> ConstCharPtr;

	/*			/*
	* Like TwoByteChars, but the chars are const.			* Like TwoByteChars, but the chars are const.
	*/			*/
	class ConstTwoByteChars : public mozilla::Range<const char16_t>			class ConstTwoByteChars : public mozilla::Range<const char16_t>
	{			{
	typedef mozilla::Range<const char16_t> Base;			typedef mozilla::Range<const char16_t> Base;

	public:			public:
				using CharT = char16_t;

	ConstTwoByteChars() : Base() {}			ConstTwoByteChars() : Base() {}
	ConstTwoByteChars(const char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}			ConstTwoByteChars(const char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
	};			};

	/*			/*
	* Convert a 2-byte character sequence to "ISO-Latin-1". This works by			* Convert a 2-byte character sequence to "ISO-Latin-1". This works by
	* truncating each 2-byte pair in the sequence to a 1-byte pair. If the source			* truncating each 2-byte pair in the sequence to a 1-byte pair. If the source
	* contains any UTF-16 extension characters, then this may give invalid Latin1			* contains any UTF-16 extension characters, then this may give invalid Latin1
	* output. The returned string is zero terminated. The returned string or the			* output. The returned string is zero terminated. The returned string or the
	* returned string's \|start()\| must be freed with JS_free or js_free,			* returned string's \|start()\| must be freed with JS_free or js_free,
	* respectively. If allocation fails, an OOM error will be set and the method			* respectively. If allocation fails, an OOM error will be set and the method
	* will return a nullptr chars (which can be tested for with the ! operator).			* will return a nullptr chars (which can be tested for with the ! operator).
	* This method cannot trigger GC.			* This method cannot trigger GC.
	*/			*/
	extern Latin1CharsZ			extern Latin1CharsZ
	LossyTwoByteCharsToNewLatin1CharsZ(js::ExclusiveContext* cx,			LossyTwoByteCharsToNewLatin1CharsZ(js::ExclusiveContext* cx,
	const mozilla::Range<const char16_t> tbchars);			const mozilla::Range<const char16_t> tbchars);

				inline Latin1CharsZ
				LossyTwoByteCharsToNewLatin1CharsZ(js::ExclusiveContext* cx, const char16_t* begin, size_t length)
				{
				const mozilla::Range<const char16_t> tbchars(begin, length);
				return JS::LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars);
				}

	template <typename CharT>			template <typename CharT>
	extern UTF8CharsZ			extern UTF8CharsZ
	CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx, const mozilla::Range<const CharT> chars);			CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx, const mozilla::Range<CharT> chars);

	uint32_t			JS_PUBLIC_API(uint32_t)
	Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length);			Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length);

	/*			/*
	* Inflate bytes in UTF-8 encoding to char16_t.			* Inflate bytes in UTF-8 encoding to char16_t.
	* - On error, returns an empty TwoByteCharsZ.			* - On error, returns an empty TwoByteCharsZ.
	* - On success, returns a malloc'd TwoByteCharsZ, and updates \|outlen\| to hold			* - On success, returns a malloc'd TwoByteCharsZ, and updates \|outlen\| to hold
	* its length; the length value excludes the trailing null.			* its length; the length value excludes the trailing null.
	*/			*/
	extern TwoByteCharsZ			extern JS_PUBLIC_API(TwoByteCharsZ)
	UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);			UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

	/*			/*
				* Like UTF8CharsToNewTwoByteCharsZ, but for ConstUTF8CharsZ.
				*/
				extern JS_PUBLIC_API(TwoByteCharsZ)
				UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen);

				/*
	* The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters			* The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters
	* will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8			* will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8
	* input.			* input.
	*/			*/
	extern TwoByteCharsZ			extern JS_PUBLIC_API(TwoByteCharsZ)
	LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);			LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

				extern JS_PUBLIC_API(TwoByteCharsZ)
				LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen);

	/*			/*
	* Returns the length of the char buffer required to encode \|s\| as UTF8.			* Returns the length of the char buffer required to encode \|s\| as UTF8.
	* Does not include the null-terminator.			* Does not include the null-terminator.
	*/			*/
	JS_PUBLIC_API(size_t)			JS_PUBLIC_API(size_t)
	GetDeflatedUTF8StringLength(JSFlatString* s);			GetDeflatedUTF8StringLength(JSFlatString* s);

	/*			/*
	* Encode \|src\| as UTF8. The caller must ensure \|dst\| has enough space.			* Encode \|src\| as UTF8. The caller must either ensure \|dst\| has enough space
	* Does not write the null terminator.			* to encode the entire string or pass the length of the buffer as \|dstlenp\|,
				* in which case the function will encode characters from the string until
				* the buffer is exhausted. Does not write the null terminator.
				*
				* If \|dstlenp\| is provided, it will be updated to hold the number of bytes
				* written to the buffer. If \|numcharsp\| is provided, it will be updated to hold
				* the number of Unicode characters written to the buffer (which can be less
				* than the length of the string, if the buffer is exhausted before the string
				* is fully encoded).
	*/			*/
	JS_PUBLIC_API(void)			JS_PUBLIC_API(void)
	DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst);			DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst,
				size_t* dstlenp = nullptr, size_t* numcharsp = nullptr);

				/*
				* The smallest character encoding capable of fully representing a particular
				* string.
				*/
				enum class SmallestEncoding {
				ASCII,
				Latin1,
				UTF16
				};

				/*
				* Returns the smallest encoding possible for the given string: if all
				* codepoints are <128 then ASCII, otherwise if all codepoints are <256
				* Latin-1, else UTF16.
				*/
				JS_PUBLIC_API(SmallestEncoding)
				FindSmallestEncoding(UTF8Chars utf8);

				/*
				* Return a null-terminated Latin-1 string copied from the input string,
				* storing its length (excluding null terminator) in \|*outlen\|. Fail and
				* report an error if the string contains non-Latin-1 codepoints. Returns
				* Latin1CharsZ() on failure.
				*/
				extern JS_PUBLIC_API(Latin1CharsZ)
				UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

				/*
				* Return a null-terminated Latin-1 string copied from the input string,
				* storing its length (excluding null terminator) in \|*outlen\|. Non-Latin-1
				* codepoints are replaced by '?'. Returns Latin1CharsZ() on failure.
				*/
				extern JS_PUBLIC_API(Latin1CharsZ)
				LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

				/*
				* Returns true if all characters in the given null-terminated string are
				* ASCII, i.e. < 0x80, false otherwise.
				*/
				extern JS_PUBLIC_API(bool)
				StringIsASCII(const char* s);

	} // namespace JS			} // namespace JS

	inline void JS_free(JS::Latin1CharsZ& ptr) { js_free((void*)ptr.get()); }			inline void JS_free(JS::Latin1CharsZ& ptr) { js_free((void*)ptr.get()); }
	inline void JS_free(JS::UTF8CharsZ& ptr) { js_free((void*)ptr.get()); }			inline void JS_free(JS::UTF8CharsZ& ptr) { js_free((void*)ptr.get()); }

	#endif /* js_CharacterEncoding_h */			#endif /* js_CharacterEncoding_h */