28 #include <emmintrin.h> 36 inline static uint32_t rol32(uint32_t value,
int bits)
38 return (value << bits) | (value >> (32 - bits));
47 data[i] = Endian::readB32(&data[i]);
52 return data[i & 15] = rol32(
53 data[(i + 13) & 15] ^ data[(i + 8) & 15] ^
54 data[(i + 2) & 15] ^ data[ i & 15]
62 void r0(uint32_t v, uint32_t& w, uint32_t x, uint32_t y, uint32_t& z,
int i)
64 z += ((w & (x ^ y)) ^ y) + next0(i) + 0x5A827999 + rol32(v, 5);
67 void r1(uint32_t v, uint32_t& w, uint32_t x, uint32_t y, uint32_t& z,
int i)
69 z += ((w & (x ^ y)) ^ y) + next(i) + 0x5A827999 + rol32(v, 5);
72 void r2(uint32_t v, uint32_t& w, uint32_t x, uint32_t y, uint32_t& z,
int i)
74 z += (w ^ x ^ y) + next(i) + 0x6ED9EBA1 + rol32(v, 5);
77 void r3(uint32_t v, uint32_t& w, uint32_t x, uint32_t y, uint32_t& z,
int i)
79 z += (((w | x) & y) | (w & x)) + next(i) + 0x8F1BBCDC + rol32(v, 5);
82 void r4(uint32_t v, uint32_t& w, uint32_t x, uint32_t y, uint32_t& z,
int i)
84 z += (w ^ x ^ y) + next(i) + 0xCA62C1D6 + rol32(v, 5);
91 memcpy(data, buffer,
sizeof(data));
104 if (str.
size() != 40) {
105 throw MSXException(
"Invalid sha1, should be exactly 40 digits long: " + str);
110 static inline unsigned hex(
char x,
const char* str)
112 if ((
'0' <= x) && (x <=
'9'))
return x -
'0';
113 if ((
'a' <= x) && (x <=
'f'))
return x -
'a' + 10;
114 if ((
'A' <= x) && (x <=
'F'))
return x -
'A' + 10;
115 throw MSXException(
"Invalid sha1, digits should be 0-9, a-f: " +
121 static inline __m128i _mm_cmpge_epu8(__m128i a, __m128i b)
123 return _mm_cmpeq_epi8(_mm_max_epu8(a, b), a);
126 static inline __m128i _mm_cmple_epu8(__m128i a, __m128i b)
128 return _mm_cmpge_epu8(b, a);
132 static inline uint64_t loadSwap64(
const char* s)
134 return Endian::bswap64(*reinterpret_cast<const uint64_t*>(s));
144 __m128i s0 = _mm_set_epi64x(loadSwap64(str + 8), loadSwap64(str + 0));
145 __m128i s1 = _mm_set_epi64x(loadSwap64(str + 24), loadSwap64(str + 16));
146 __m128i s2 = _mm_set_epi64x(
'0' * 0x0101010101010101, loadSwap64(str + 32));
149 __m128i cc0 = _mm_set1_epi8(
char(-
'0'));
150 __m128i s0_0 = _mm_add_epi8(s0, cc0);
151 __m128i s1_0 = _mm_add_epi8(s1, cc0);
152 __m128i s2_0 = _mm_add_epi8(s2, cc0);
155 __m128i c32 = _mm_set1_epi8(32);
156 __m128i cca = _mm_set1_epi8(
char(-
'a'));
157 __m128i s0_a = _mm_add_epi8(_mm_or_si128(s0, c32), cca);
158 __m128i s1_a = _mm_add_epi8(_mm_or_si128(s1, c32), cca);
159 __m128i s2_a = _mm_add_epi8(_mm_or_si128(s2, c32), cca);
162 __m128i c9 = _mm_set1_epi8(9);
163 __m128i c0_0 = _mm_cmple_epu8(s0_0, c9);
164 __m128i c1_0 = _mm_cmple_epu8(s1_0, c9);
165 __m128i c2_0 = _mm_cmple_epu8(s2_0, c9);
168 __m128i c5 = _mm_set1_epi8(5);
169 __m128i c0_a = _mm_cmple_epu8(s0_a, c5);
170 __m128i c1_a = _mm_cmple_epu8(s1_a, c5);
171 __m128i c2_a = _mm_cmple_epu8(s2_a, c5);
174 __m128i ok0 = _mm_or_si128(c0_0, c0_a);
175 __m128i ok1 = _mm_or_si128(c1_0, c1_a);
176 __m128i ok2 = _mm_or_si128(c2_0, c2_a);
177 __m128i ok = _mm_and_si128(_mm_and_si128(ok0, ok1), ok2);
178 if (
unlikely(_mm_movemask_epi8(ok) != 0xffff)) {
179 throw string(
"Invalid sha1, digits should be 0-9, a-f: " +
184 __m128i d0_0 = _mm_and_si128(s0_0, c0_0);
185 __m128i d1_0 = _mm_and_si128(s1_0, c1_0);
186 __m128i d2_0 = _mm_and_si128(s2_0, c2_0);
189 __m128i c10 = _mm_set1_epi8(10);
190 __m128i d0_a = _mm_and_si128(_mm_add_epi8(s0_a, c10), c0_a);
191 __m128i d1_a = _mm_and_si128(_mm_add_epi8(s1_a, c10), c1_a);
192 __m128i d2_a = _mm_and_si128(_mm_add_epi8(s2_a, c10), c2_a);
195 __m128i d0 = _mm_or_si128(d0_0, d0_a);
196 __m128i d1 = _mm_or_si128(d1_0, d1_a);
197 __m128i d2 = _mm_or_si128(d2_0, d2_a);
200 __m128i c00ff = _mm_set1_epi16(0x00ff);
201 __m128i e0 = _mm_and_si128(_mm_or_si128(d0, _mm_srli_epi16(d0, 4)), c00ff);
202 __m128i e1 = _mm_and_si128(_mm_or_si128(d1, _mm_srli_epi16(d1, 4)), c00ff);
203 __m128i e2 = _mm_and_si128(_mm_or_si128(d2, _mm_srli_epi16(d2, 4)), c00ff);
204 __m128i f0 = _mm_packus_epi16(e0, e0);
205 __m128i f1 = _mm_packus_epi16(e1, e1);
206 __m128i f2 = _mm_packus_epi16(e2, e2);
209 _mm_storeu_si128(reinterpret_cast<__m128i*>(a), _mm_unpacklo_epi64(f0, f1));
210 a[4] = _mm_cvtsi128_si32(f2);
216 for (
int j = 0; j < 8; ++j) {
225 static inline char digit(
unsigned x)
227 return (x < 10) ? (x +
'0') : (x - 10 +
'a');
233 for (
const auto& ai : a) {
234 for (
int j = 28; j >= 0; j -= 4) {
235 *p++ = digit((ai >> j) & 0xf);
238 return string(buf, 40);
243 for (
const auto& ai : a) {
244 if (ai != 0)
return false;
250 for (
auto& ai : a) ai = 0;
259 m_state.a[0] = 0x67452301;
260 m_state.a[1] = 0xEFCDAB89;
261 m_state.a[2] = 0x98BADCFE;
262 m_state.a[3] = 0x10325476;
263 m_state.a[4] = 0xC3D2E1F0;
269 void SHA1::transform(
const uint8_t buffer[64])
274 uint32_t a = m_state.a[0];
275 uint32_t b = m_state.a[1];
276 uint32_t c = m_state.a[2];
277 uint32_t d = m_state.a[3];
278 uint32_t e = m_state.a[4];
281 block.
r0(a,b,c,d,e, 0); block.
r0(e,a,b,c,d, 1); block.
r0(d,e,a,b,c, 2);
282 block.
r0(c,d,e,a,b, 3); block.
r0(b,c,d,e,a, 4); block.
r0(a,b,c,d,e, 5);
283 block.
r0(e,a,b,c,d, 6); block.
r0(d,e,a,b,c, 7); block.
r0(c,d,e,a,b, 8);
284 block.
r0(b,c,d,e,a, 9); block.
r0(a,b,c,d,e,10); block.
r0(e,a,b,c,d,11);
285 block.
r0(d,e,a,b,c,12); block.
r0(c,d,e,a,b,13); block.
r0(b,c,d,e,a,14);
286 block.
r0(a,b,c,d,e,15); block.
r1(e,a,b,c,d,16); block.
r1(d,e,a,b,c,17);
287 block.
r1(c,d,e,a,b,18); block.
r1(b,c,d,e,a,19); block.
r2(a,b,c,d,e,20);
288 block.
r2(e,a,b,c,d,21); block.
r2(d,e,a,b,c,22); block.
r2(c,d,e,a,b,23);
289 block.
r2(b,c,d,e,a,24); block.
r2(a,b,c,d,e,25); block.
r2(e,a,b,c,d,26);
290 block.
r2(d,e,a,b,c,27); block.
r2(c,d,e,a,b,28); block.
r2(b,c,d,e,a,29);
291 block.
r2(a,b,c,d,e,30); block.
r2(e,a,b,c,d,31); block.
r2(d,e,a,b,c,32);
292 block.
r2(c,d,e,a,b,33); block.
r2(b,c,d,e,a,34); block.
r2(a,b,c,d,e,35);
293 block.
r2(e,a,b,c,d,36); block.
r2(d,e,a,b,c,37); block.
r2(c,d,e,a,b,38);
294 block.
r2(b,c,d,e,a,39); block.
r3(a,b,c,d,e,40); block.
r3(e,a,b,c,d,41);
295 block.
r3(d,e,a,b,c,42); block.
r3(c,d,e,a,b,43); block.
r3(b,c,d,e,a,44);
296 block.
r3(a,b,c,d,e,45); block.
r3(e,a,b,c,d,46); block.
r3(d,e,a,b,c,47);
297 block.
r3(c,d,e,a,b,48); block.
r3(b,c,d,e,a,49); block.
r3(a,b,c,d,e,50);
298 block.
r3(e,a,b,c,d,51); block.
r3(d,e,a,b,c,52); block.
r3(c,d,e,a,b,53);
299 block.
r3(b,c,d,e,a,54); block.
r3(a,b,c,d,e,55); block.
r3(e,a,b,c,d,56);
300 block.
r3(d,e,a,b,c,57); block.
r3(c,d,e,a,b,58); block.
r3(b,c,d,e,a,59);
301 block.
r4(a,b,c,d,e,60); block.
r4(e,a,b,c,d,61); block.
r4(d,e,a,b,c,62);
302 block.
r4(c,d,e,a,b,63); block.
r4(b,c,d,e,a,64); block.
r4(a,b,c,d,e,65);
303 block.
r4(e,a,b,c,d,66); block.
r4(d,e,a,b,c,67); block.
r4(c,d,e,a,b,68);
304 block.
r4(b,c,d,e,a,69); block.
r4(a,b,c,d,e,70); block.
r4(e,a,b,c,d,71);
305 block.
r4(d,e,a,b,c,72); block.
r4(c,d,e,a,b,73); block.
r4(b,c,d,e,a,74);
306 block.
r4(a,b,c,d,e,75); block.
r4(e,a,b,c,d,76); block.
r4(d,e,a,b,c,77);
307 block.
r4(c,d,e,a,b,78); block.
r4(b,c,d,e,a,79);
320 assert(!m_finalized);
321 uint32_t j = (m_count >> 3) & 63;
323 m_count += uint64_t(len) << 3;
326 if ((j + len) > 63) {
327 memcpy(&m_buffer[j], data, (i = 64 - j));
329 for (; i + 63 < len; i += 64) {
336 memcpy(&m_buffer[j], &data[i], len - i);
339 void SHA1::finalize()
341 assert(!m_finalized);
342 uint8_t finalcount[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
343 for (
int i = 0; i < 8; i++) {
344 finalcount[i] = uint8_t(m_count >> ((7 - i) * 8));
347 update(reinterpret_cast<const uint8_t*>(
"\200"), 1);
348 while ((m_count & 504) != 448) {
349 update(reinterpret_cast<const uint8_t*>(
"\0"), 1);
351 update(finalcount, 8);
357 if (!m_finalized) finalize();
void update(const uint8_t *data, size_t len)
Incrementally calculate the hash value.
void r0(uint32_t v, uint32_t &w, uint32_t x, uint32_t y, uint32_t &z, int i)
void r4(uint32_t v, uint32_t &w, uint32_t x, uint32_t y, uint32_t &z, int i)
WorkspaceBlock(const uint8_t buffer[64])
static Sha1Sum calc(const uint8_t *data, size_t len)
Easier to use interface, if you can pass all data in one go.
This class implements a subset of the proposal for std::string_ref (proposed for the next c++ standar...
void r2(uint32_t v, uint32_t &w, uint32_t x, uint32_t y, uint32_t &z, int i)
const char * data() const
This class represents the result of a sha1 calculation (a 160-bit value).
Sha1Sum digest()
Get the final hash.
Thanks to enen for testing this on a real cartridge:
Helper class to perform a sha1 calculation.
std::string toString() const
void parse40(const char *str)
void r3(uint32_t v, uint32_t &w, uint32_t x, uint32_t y, uint32_t &z, int i)
void r1(uint32_t v, uint32_t &w, uint32_t x, uint32_t y, uint32_t &z, int i)