30#ifndef UTF8_CHECKED_HH
31#define UTF8_CHECKED_HH
45 [[nodiscard]]
const char*
what() const noexcept
override {
return "Invalid code point"; }
54 [[nodiscard]]
const char*
what() const noexcept
override {
return "Invalid UTF-8"; }
63 [[nodiscard]]
const char*
what() const noexcept
override {
return "Invalid UTF-16"; }
64 [[nodiscard]] uint16_t
utf16_word()
const {
return u16; }
70 [[nodiscard]]
const char*
what() const noexcept
override {
return "Not enough space"; }
75template<
typename octet_iterator,
typename output_iterator>
77 output_iterator out, uint32_t replacement)
79 while (start !=
end) {
80 auto sequence_start = start;
85 for (
auto it = sequence_start; it != start; ++it) {
95 case INCOMPLETE_SEQUENCE:
96 case OVERLONG_SEQUENCE:
97 case INVALID_CODE_POINT:
110template<
typename octet_iterator,
typename output_iterator>
117template<
typename octet_iterator>
118octet_iterator
append(uint32_t cp, octet_iterator result)
126 }
else if (cp < 0x800) {
128 *result++ = ((cp >> 6) & 0x1f) | 0xc0;
129 *result++ = ((cp >> 0) & 0x3f) | 0x80;
130 }
else if (cp < 0x10000) {
132 *result++ = ((cp >> 12) & 0x0f) | 0xe0;
133 *result++ = ((cp >> 6) & 0x3f) | 0x80;
134 *result++ = ((cp >> 0) & 0x3f) | 0x80;
137 *result++ = ((cp >> 18) & 0x07) | 0xf0;
138 *result++ = ((cp >> 12) & 0x3f) | 0x80;
139 *result++ = ((cp >> 6) & 0x3f) | 0x80;
140 *result++ = ((cp >> 0) & 0x3f) | 0x80;
147template<
typename octet_iterator>
148uint32_t
next(octet_iterator& it, octet_iterator
end)
156 case NOT_ENOUGH_ROOM:
159 case INCOMPLETE_SEQUENCE:
160 case OVERLONG_SEQUENCE:
162 case INVALID_CODE_POINT:
168template<
typename octet_iterator>
174template<
typename octet_iterator>
175uint32_t
prior(octet_iterator& it, octet_iterator start)
188template<
typename octet_iterator,
typename distance_type>
189void advance(octet_iterator& it, distance_type n, octet_iterator
end)
194template<
typename octet_iterator>
195[[nodiscard]]
auto distance(octet_iterator first, octet_iterator last)
197 typename std::iterator_traits<octet_iterator>::difference_type dist = 0;
198 while (first < last) {
205template<
typename u16bit_iterator,
typename octet_iterator>
206octet_iterator
utf16to8(u16bit_iterator start, u16bit_iterator
end,
207 octet_iterator result)
209 while (start !=
end) {
210 uint32_t cp = *start++;
216 auto trail_surrogate = *start++;
223 result =
append(cp, result);
228template<
typename u16bit_iterator,
typename octet_iterator>
230 u16bit_iterator result)
232 while (start !=
end) {
233 uint32_t cp =
next(start,
end);
244template<
typename octet_iterator,
typename u32bit_iterator>
245octet_iterator
utf32to8(u32bit_iterator start, u32bit_iterator
end,
246 octet_iterator result)
248 while (start !=
end) {
249 result =
append(*start++, result);
254template<
typename octet_iterator,
typename u32bit_iterator>
256 u32bit_iterator result)
258 while (start <
end) {
265template<
typename octet_iterator>
269 octet_iterator range_start;
270 octet_iterator range_end;
281 const octet_iterator& range_start_,
282 const octet_iterator& range_end_)
284 , range_start(range_start_)
285 , range_end(range_end_)
287 if (it < range_start || it > range_end) {
288 throw std::out_of_range(
"Invalid utf-8 iterator position");
292 [[nodiscard]] octet_iterator
base()
const {
return it; }
296 return next(temp, range_end);
300 if ((range_start != rhs.range_start) ||
301 (range_end != rhs.range_end)) {
302 throw std::logic_error(
303 "Comparing utf-8 iterators defined with different ranges");
320 prior(it, range_start);
326 prior(it, range_start);
334[[nodiscard]] std::string
utf16to8 (
const std::wstring& utf16);
uint32_t code_point() const
const char * what() const noexcept override
invalid_code_point(uint32_t cp_)
const char * what() const noexcept override
uint16_t utf16_word() const
invalid_utf16(uint16_t u)
const char * what() const noexcept override
uint8_t utf8_octet() const
uint32_t operator*() const
bool operator==(const iterator &rhs) const
octet_iterator base() const
std::bidirectional_iterator_tag iterator_category
iterator(const octet_iterator &octet_it, const octet_iterator &range_start_, const octet_iterator &range_end_)
ptrdiff_t difference_type
const char * what() const noexcept override
Like std::string_view, but with the extra guarantee that it refers to a zero-terminated string.
constexpr bool is_code_point_valid(uint32_t cp)
constexpr uint32_t SURROGATE_OFFSET
constexpr uint16_t TRAIL_SURROGATE_MAX
constexpr uint16_t TRAIL_SURROGATE_MIN
constexpr bool is_trail(uint8_t oc)
constexpr uint16_t LEAD_OFFSET
constexpr uint32_t CODE_POINT_MAX
constexpr bool is_surrogate(uint32_t cp)
constexpr utf_error validate_next(octet_iterator &it, octet_iterator end, uint32_t *code_point)
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
octet_iterator append(uint32_t cp, octet_iterator result)
void advance(octet_iterator &it, distance_type n, octet_iterator end)
uint32_t peek_next(octet_iterator it, octet_iterator end)
octet_iterator utf32to8(u32bit_iterator start, u32bit_iterator end, octet_iterator result)
u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result)
u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result)
auto distance(octet_iterator first, octet_iterator last)
uint32_t prior(octet_iterator &it, octet_iterator start)
octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result)
uint32_t next(octet_iterator &it, octet_iterator end)
constexpr void repeat(T n, Op op)
Repeat the given operation 'op' 'n' times.
constexpr auto end(const zstring_view &x)