openMSX
small_compare.hh
Go to the documentation of this file.
1#ifndef SMALL_COMPARE_HH
2#define SMALL_COMPARE_HH
3
4// small_compare utility function
5//
6// This can be used to replace
7// string_view s1 = ...
8// if (s1 == "foo") { ... }
9// with
10// if (small_compare<"foo">(s1)) { ... }
11// This looks more ugly, but it can execute faster.
12//
13// The first variant internally calls memcmp() to do the actual string
14// comparison (after the string length has been checked).
15// Update: gcc-12 optimizes memcmp() on small string-literals better:
16// e.g. it performs a 7-char comparison by using 2 4-byte loads
17// where the middle byte overlaps (older versions did a 4-byte,
18// 2-byte and 1-byte load)
19// In the future, when gcc-12 is more common, we could remove this
20// small_compare utility.
21//
22// The second variant does a 4 byte unaligned load, masks the unnecessary 4th
23// byte (so and with 0x00ffffff) and then compares with 0x006f6f66 ('f'->0x66,
24// 'o'->0x6f) (on big endian system the mask and comparison constants are
25// different). So only 3 instructions instead of a call to memcmp().
26//
27// In the general case small_compare does a 1, 2, 4 or 8 byte unaligned load
28// (only strings upto 8 characters are supported). Possibly followed by a 'and'
29// instruction (only needed for lengths 3, 5, 6 or 7), followed by a compare
30// with a constant. This is much faster than memcmp().
31//
32// There are also some disadvantages.
33// - (minor) We need unaligned load instructions. Many CPU architectures
34// support these, but even in case these must be emulated it's still fast.
35// - (major) We read upto 8 bytes, and that might be past the end of the
36// input string. So it's only safe to use small_compare() if there's enough
37// padding at the end of the buffer.
38
39#include "aligned.hh"
40#include "endian.hh"
41
42#include <algorithm>
43#include <array>
44#include <bit>
45#include <cassert>
46#include <cstdint>
47#include <cstring>
48#include <string_view>
49#include <type_traits>
50
51// A wrapper around a (zero-terminated) string literal.
52// Could be moved to a separate header (when also needed in other contexts).
53template<size_t N> struct StringLiteral {
54 constexpr StringLiteral(const char (&str)[N])
55 {
56 assert(str[N - 1] == '\0');
57 std::copy_n(str, N - 1, value.data());
58 }
59
60 [[nodiscard]] constexpr size_t size() const { return value.size(); }
61 [[nodiscard]] constexpr const char* data() const { return value.data(); }
62
63 std::array<char, N - 1> value;
64};
65
66namespace detail {
67
68// Loader can load an 8/16/32/64 unaligned value.
69struct Load8 {
70 using type = uint8_t;
71 [[nodiscard]] type operator()(const void* p) const { return *std::bit_cast<const uint8_t*>(p); }
72};
73struct Load16 {
74 using type = uint16_t;
75 [[nodiscard]] type operator()(const void* p) const { return unalignedLoad16(p); }
76};
77struct Load32 {
78 using type = uint32_t;
79 [[nodiscard]] type operator()(const void* p) const { return unalignedLoad32(p); }
80};
81struct Load64 {
82 using type = uint64_t;
83 [[nodiscard]] type operator()(const void* p) const { return unalignedLoad64(p); }
84};
85struct ErrorNotSupportedLoader; // load only implemented up to 64 bit
86template<size_t N> struct SelectLoader
87 : std::conditional_t<N <= 1, Load8,
88 std::conditional_t<N <= 2, Load16,
89 std::conditional_t<N <= 4, Load32,
90 std::conditional_t<N <= 8, Load64,
91 ErrorNotSupportedLoader>>>> {};
92
93
94template<typename T> constexpr std::pair<T, T> calcValueMask(auto str)
95{
96 T v = 0;
97 T m = 0;
98 int s = Endian::LITTLE ? 0 : (8 * (sizeof(T) - 1));
99 for (size_t i = 0; i < str.size(); ++i) {
100 v = T(v + (T(str.data()[i]) << s));
101 m = T(m + (T(255) << s));
102 s += Endian::LITTLE ? 8 : -8;
103 }
104 return {v, m};
105}
106
107} // namespace detail
108
109template<StringLiteral Literal>
110[[nodiscard]] bool small_compare(const char* p)
111{
112 using Loader = detail::SelectLoader<Literal.size()>;
113 using Type = typename Loader::type;
114 static constexpr auto vm = detail::calcValueMask<Type>(Literal);
115 auto [value, mask] = vm;
116
117 Loader loader;
118 return (loader(p) & mask) == value;
119}
120
121template<StringLiteral Literal>
122[[nodiscard]] bool small_compare(std::string_view str)
123{
124 if (str.size() != Literal.size()) return false;
125 return small_compare<Literal>(str.data());
126}
127
128#endif
ALWAYS_INLINE uint64_t unalignedLoad64(const void *p)
Definition aligned.hh:47
ALWAYS_INLINE uint16_t unalignedLoad16(const void *p)
Definition aligned.hh:41
ALWAYS_INLINE uint32_t unalignedLoad32(const void *p)
Definition aligned.hh:44
Definition join.hh:10
constexpr const char * data() const
constexpr size_t size() const
std::array< char, N - 1 > value
constexpr StringLiteral(const char(&str)[N])
type operator()(const void *p) const
type operator()(const void *p) const
type operator()(const void *p) const
type operator()(const void *p) const