openMSX
small_compare.hh
Go to the documentation of this file.
1#ifndef SMALL_COMPARE_HH
2#define SMALL_COMPARE_HH
3
4// small_compare utility function
5//
6// This can be used to replace
7// string_view s1 = ...
8// if (s1 == "foo") { ... }
9// with
10// if (small_compare<"foo">(s1)) { ... }
11// This looks more ugly, but it can execute faster.
12//
13// The first variant internally calls memcmp() to do the actual string
14// comparison (after the string length has been checked).
15// Update: gcc-12 optimizes memcmp() on small string-literals better:
16// e.g. it performs a 7-char comparison by using 2 4-byte loads
17// where the middle byte overlaps (older versions did a 4-byte,
18// 2-byte and 1-byte load)
19// In the future, when gcc-12 is more common, we could remove this
20// small_compare utility.
21//
22// The second variant does a 4 byte unaligned load, masks the unnecessary 4th
23// byte (so and with 0x00ffffff) and then compares with 0x006f6f66 ('f'->0x66,
24// 'o'->0x6f) (on big endian system the mask and comparison constants are
25// different). So only 3 instructions instead of a call to memcmp().
26//
27// In the general case small_compare does a 1, 2, 4 or 8 byte unaligned load
28// (only strings upto 8 characters are supported). Possibly followed by a 'and'
29// instruction (only needed for lengths 3, 5, 6 or 7), followed by a compare
30// with a constant. This is much faster than memcmp().
31//
32// There are also some disadvantages.
33// - (minor) We need unaligned load instructions. Many CPU architectures
34// support these, but even in case these must be emulated it's still fast.
35// - (major) We read upto 8 bytes, and that might be past the end of the
36// input string. So it's only safe to use small_compare() if there's enough
37// padding at the end of the buffer.
38
39#include "aligned.hh"
40#include "endian.hh"
41#include <algorithm>
42#include <array>
43#include <cassert>
44#include <cstdint>
45#include <cstring>
46#include <string_view>
47#include <type_traits>
48
49// A wrapper around a (zero-terminated) string literal.
50// Could be moved to a separate header (when also needed in other contexts).
51template<size_t N> struct StringLiteral {
52 constexpr StringLiteral(const char (&str)[N])
53 {
54 assert(str[N - 1] == '\0');
55 std::copy_n(str, N - 1, value.data());
56 }
57
58 [[nodiscard]] constexpr size_t size() const { return value.size(); }
59 [[nodiscard]] constexpr const char* data() const { return value.data(); }
60
61 std::array<char, N - 1> value;
62};
63
64namespace detail {
65
66// Loader can load an 8/16/32/64 unaligned value.
67struct Load8 {
68 using type = uint8_t;
69 [[nodiscard]] type operator()(const void* p) { return *reinterpret_cast<const uint8_t*>(p); }
70};
71struct Load16 {
72 using type = uint16_t;
73 [[nodiscard]] type operator()(const void* p) { return unalignedLoad16(p); }
74};
75struct Load32 {
76 using type = uint32_t;
77 [[nodiscard]] type operator()(const void* p) { return unalignedLoad32(p); }
78};
79struct Load64 {
80 using type = uint64_t;
81 [[nodiscard]] type operator()(const void* p) { return unalignedLoad64(p); }
82};
83struct ErrorNotSupportedLoader; // load only implemented up to 64 bit
84template<size_t N> struct SelectLoader
85 : std::conditional_t<N <= 1, Load8,
86 std::conditional_t<N <= 2, Load16,
87 std::conditional_t<N <= 4, Load32,
88 std::conditional_t<N <= 8, Load64,
89 ErrorNotSupportedLoader>>>> {};
90
91
92template<typename T> constexpr std::pair<T, T> calcValueMask(auto str)
93{
94 T v = 0;
95 T m = 0;
96 int s = Endian::LITTLE ? 0 : (8 * (sizeof(T) - 1));
97 for (size_t i = 0; i < str.size(); ++i) {
98 v = T(v + (T(str.data()[i]) << s));
99 m = T(m + (T(255) << s));
100 s += Endian::LITTLE ? 8 : -8;
101 }
102 return {v, m};
103}
104
105} // namespace detail
106
107template<StringLiteral Literal>
108[[nodiscard]] bool small_compare(const char* p)
109{
110 using Loader = detail::SelectLoader<Literal.size()>;
111 using Type = typename Loader::type;
112 static constexpr auto vm = detail::calcValueMask<Type>(Literal);
113 auto [value, mask] = vm;
114
115 Loader loader;
116 return (loader(p) & mask) == value;
117}
118
119template<StringLiteral Literal>
120[[nodiscard]] bool small_compare(std::string_view str)
121{
122 if (str.size() != Literal.size()) return false;
123 return small_compare<Literal>(str.data());
124}
125
126#endif
ALWAYS_INLINE uint64_t unalignedLoad64(const void *p)
Definition aligned.hh:45
ALWAYS_INLINE uint16_t unalignedLoad16(const void *p)
Definition aligned.hh:39
ALWAYS_INLINE uint32_t unalignedLoad32(const void *p)
Definition aligned.hh:42
Definition join.hh:10
constexpr const char * data() const
constexpr size_t size() const
std::array< char, N - 1 > value
constexpr StringLiteral(const char(&str)[N])
type operator()(const void *p)
type operator()(const void *p)
type operator()(const void *p)
type operator()(const void *p)