30#ifndef UTF8_UNCHECKED_HH
31#define UTF8_UNCHECKED_HH
40template<
typename octet_iterator>
41octet_iterator
append(uint32_t cp, octet_iterator result)
45 *result++ = narrow_cast<uint8_t>(cp);
46 }
else if (cp < 0x800) {
48 *result++ = narrow_cast<uint8_t>(((cp >> 6) & 0x1f) | 0xc0);
49 *result++ = narrow_cast<uint8_t>(((cp >> 0) & 0x3f) | 0x80);
50 }
else if (cp < 0x10000) {
52 *result++ = narrow_cast<uint8_t>(((cp >> 12) & 0x0f) | 0xe0);
53 *result++ = narrow_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
54 *result++ = narrow_cast<uint8_t>(((cp >> 0) & 0x3f) | 0x80);
57 *result++ = narrow_cast<uint8_t>(((cp >> 18) & 0x07) | 0xf0);
58 *result++ = narrow_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
59 *result++ = narrow_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
60 *result++ = narrow_cast<uint8_t>(((cp >> 0) & 0x3f) | 0x80);
65template<
typename octet_iterator>
66uint32_t
next(octet_iterator& it)
68 uint32_t cp = narrow_cast<uint8_t>(*it);
74 cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
78 cp = ((cp << 12) & 0xffff) + ((*it << 6) & 0xfff);
84 cp = ((cp << 18) & 0x1fffff) + ((*it << 12) & 0x3ffff);
86 cp += (*it << 6) & 0xfff;
95template<
typename octet_iterator>
101template<
typename octet_iterator>
109template<
typename octet_iterator,
typename distance_type>
110void advance(octet_iterator& it, distance_type n)
115template<
typename octet_iterator>
116[[nodiscard]]
auto distance(octet_iterator first, octet_iterator last)
118 typename std::iterator_traits<octet_iterator>::difference_type dist = 0;
119 while (first < last) {
126template<
typename u16bit_iterator,
typename octet_iterator>
127octet_iterator
utf16to8(u16bit_iterator start, u16bit_iterator
end,
128 octet_iterator result)
130 while (start !=
end) {
131 uint32_t cp = *start++;
134 uint32_t trail_surrogate = *start++;
137 result =
append(cp, result);
142template<
typename u16bit_iterator,
typename octet_iterator>
144 u16bit_iterator result)
146 while (start !=
end) {
147 uint32_t cp =
next(start);
159template<
typename octet_iterator,
typename u32bit_iterator>
160octet_iterator
utf32to8(u32bit_iterator start, u32bit_iterator
end,
161 octet_iterator result)
163 while (start !=
end) {
164 result =
append(*start++, result);
169template<
typename octet_iterator,
typename u32bit_iterator>
171 u32bit_iterator result)
173 while (start <
end) {
174 *result++ =
next(start);
180template<
typename octet_iterator>
195 [[nodiscard]] octet_iterator
base()
const {
return it; }
198 octet_iterator temp = it;
227[[nodiscard]]
inline size_t size(std::string_view
utf8)
231[[nodiscard]]
inline std::string_view
substr(std::string_view
utf8, std::string_view::size_type first = 0,
232 std::string_view::size_type len = std::string_view::npos)
236 std::string_view::const_iterator e;
237 if (len != std::string_view::npos) {
239 while (len && (e !=
end(
utf8))) {
245 return {std::to_address(b), narrow<std::string_view::size_type>(e - b)};
ptrdiff_t difference_type
iterator(const octet_iterator &octet_it)
bool operator==(const iterator &) const =default
uint32_t operator*() const
std::bidirectional_iterator_tag iterator_category
octet_iterator base() const
constexpr uint32_t SURROGATE_OFFSET
constexpr uint16_t TRAIL_SURROGATE_MIN
constexpr bool is_trail(uint8_t oc)
constexpr uint16_t LEAD_OFFSET
constexpr bool is_surrogate(uint32_t cp)
constexpr unsigned sequence_length(uint8_t lead)
std::string_view substr(std::string_view utf8, std::string_view::size_type first=0, std::string_view::size_type len=std::string_view::npos)
void advance(octet_iterator &it, distance_type n)
size_t size(std::string_view utf8)
auto distance(octet_iterator first, octet_iterator last)
u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result)
uint32_t peek_next(octet_iterator it)
uint32_t next(octet_iterator &it)
octet_iterator append(uint32_t cp, octet_iterator result)
uint32_t prior(octet_iterator &it)
octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result)
u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result)
octet_iterator utf32to8(u32bit_iterator start, u32bit_iterator end, octet_iterator result)
constexpr void repeat(T n, Op op)
Repeat the given operation 'op' 'n' times.
constexpr auto begin(const zstring_view &x)
constexpr auto end(const zstring_view &x)