iclass.c: speeding up MAC calculation

This commit is contained in:
pwpiwi 2019-08-24 15:07:56 +02:00
parent 17505ce2a7
commit 1477ba8a3c
2 changed files with 119 additions and 89 deletions

View file

@ -1146,7 +1146,7 @@ int doIClassSimulation(int simulationMode, uint8_t *reader_mac_buf) {
memcpy(data_response, ToSend, ToSendMax); memcpy(data_response, ToSend, ToSendMax);
modulated_response = data_response; modulated_response = data_response;
modulated_response_size = ToSendMax; modulated_response_size = ToSendMax;
response_delay = 0;//We need to hurry here... response_delay = 0; //We need to hurry here... (but maybe not too much... ??)
//exitLoop = true; //exitLoop = true;
} else { //Not fullsim, we don't respond } else { //Not fullsim, we don't respond
// We do not know what to answer, so lets keep quiet // We do not know what to answer, so lets keep quiet

View file

@ -60,15 +60,63 @@
-- MHS 2015 -- MHS 2015
**/ **/
/**
The runtime of opt_doTagMAC_2() with the MHS optimized version was 403 microseconds on Proxmark3.
This was still to slow for some newer readers which didn't want to wait that long.
Further optimizations to speedup the MAC calculations:
* Optimized opt_Tt logic
* Look up table for opt_select
* Removing many unnecessary bit maskings (& 0x1)
* updating state in place instead of alternating use of a second state structure
* remove the necessity to reverse bits of input and output bytes
opt_doTagMAC_2() now completes in 270 microseconds.
-- piwi 2019
**/
#include "optimized_cipher.h" #include "optimized_cipher.h"
#include <stddef.h> #include <stddef.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include "string.h"
static const uint8_t opt_select_LUT[256] = {
00, 03, 02, 01, 02, 03, 00, 01, 04, 07, 07, 04, 06, 07, 05, 04,
01, 02, 03, 00, 02, 03, 00, 01, 05, 06, 06, 05, 06, 07, 05, 04,
06, 05, 04, 07, 04, 05, 06, 07, 06, 05, 05, 06, 04, 05, 07, 06,
07, 04, 05, 06, 04, 05, 06, 07, 07, 04, 04, 07, 04, 05, 07, 06,
06, 05, 04, 07, 04, 05, 06, 07, 02, 01, 01, 02, 00, 01, 03, 02,
03, 00, 01, 02, 00, 01, 02, 03, 07, 04, 04, 07, 04, 05, 07, 06,
00, 03, 02, 01, 02, 03, 00, 01, 00, 03, 03, 00, 02, 03, 01, 00,
05, 06, 07, 04, 06, 07, 04, 05, 05, 06, 06, 05, 06, 07, 05, 04,
02, 01, 00, 03, 00, 01, 02, 03, 06, 05, 05, 06, 04, 05, 07, 06,
03, 00, 01, 02, 00, 01, 02, 03, 07, 04, 04, 07, 04, 05, 07, 06,
02, 01, 00, 03, 00, 01, 02, 03, 02, 01, 01, 02, 00, 01, 03, 02,
03, 00, 01, 02, 00, 01, 02, 03, 03, 00, 00, 03, 00, 01, 03, 02,
04, 07, 06, 05, 06, 07, 04, 05, 00, 03, 03, 00, 02, 03, 01, 00,
01, 02, 03, 00, 02, 03, 00, 01, 05, 06, 06, 05, 06, 07, 05, 04,
04, 07, 06, 05, 06, 07, 04, 05, 04, 07, 07, 04, 06, 07, 05, 04,
01, 02, 03, 00, 02, 03, 00, 01, 01, 02, 02, 01, 02, 03, 01, 00
};
#define opt_T(s) (0x1 & ((s->t >> 15) ^ (s->t >> 14)^ (s->t >> 10)^ (s->t >> 8)^ (s->t >> 5)^ (s->t >> 4)^ (s->t >> 1)^ s->t)) /********************** the table above has been generated with this code: ********
#include "util.h"
#define opt_B(s) (((s->b >> 6) ^ (s->b >> 5) ^ (s->b >> 4) ^ (s->b)) & 0x1) static void init_opt_select_LUT(void) {
for (int r = 0; r < 256; r++) {
uint8_t r_ls2 = r << 2;
uint8_t r_and_ls2 = r & r_ls2;
uint8_t r_or_ls2 = r | r_ls2;
uint8_t z0 = (r_and_ls2 >> 5) ^ ((r & ~r_ls2) >> 4) ^ ( r_or_ls2 >> 3);
uint8_t z1 = (r_or_ls2 >> 6) ^ ( r_or_ls2 >> 1) ^ (r >> 5) ^ r;
uint8_t z2 = ((r & ~r_ls2) >> 4) ^ (r_and_ls2 >> 3) ^ r;
opt_select_LUT[r] = (z0 & 4) | (z1 & 2) | (z2 & 1);
}
print_result("", opt_select_LUT, 256);
}
***********************************************************************************/
#define opt__select(x,y,r) (4 & (((r & (r << 2)) >> 5) ^ ((r & ~(r << 2)) >> 4) ^ ( (r | r << 2) >> 3)))\ #define opt__select(x,y,r) (4 & (((r & (r << 2)) >> 5) ^ ((r & ~(r << 2)) >> 4) ^ ( (r | r << 2) >> 3)))\
|(2 & (((r | r << 2) >> 6) ^ ( (r | r << 2) >> 1) ^ (r >> 5) ^ r ^ ((x^y) << 1)))\ |(2 & (((r | r << 2) >> 6) ^ ( (r | r << 2) >> 1) ^ (r >> 5) ^ r ^ ((x^y) << 1)))\
@ -78,9 +126,6 @@
* Some background on the expression above can be found here... * Some background on the expression above can be found here...
uint8_t xopt__select(bool x, bool y, uint8_t r) uint8_t xopt__select(bool x, bool y, uint8_t r)
{ {
uint8_t r_ls2 = r << 2;
uint8_t r_and_ls2 = r & r_ls2;
uint8_t r_or_ls2 = r | r_ls2;
//r: r0 r1 r2 r3 r4 r5 r6 r7 //r: r0 r1 r2 r3 r4 r5 r6 r7
//r_ls2: r2 r3 r4 r5 r6 r7 0 0 //r_ls2: r2 r3 r4 r5 r6 r7 0 0
@ -100,81 +145,95 @@ uint8_t xopt__select(bool x, bool y, uint8_t r)
} }
*/ */
void opt_successor(const uint8_t *k, State *s, bool y, State *successor) { static void opt_successor(const uint8_t *k, State *s, uint8_t y) {
uint8_t Tt = 1 & opt_T(s); // #define opt_T(s) (0x1 & ((s->t >> 15) ^ (s->t >> 14) ^ (s->t >> 10) ^ (s->t >> 8) ^ (s->t >> 5) ^ (s->t >> 4)^ (s->t >> 1) ^ s->t))
// uint8_t Tt = opt_T(s);
uint16_t Tt = s->t & 0xc533;
Tt = Tt ^ (Tt >> 1);
Tt = Tt ^ (Tt >> 4);
Tt = Tt ^ (Tt >> 10);
Tt = Tt ^ (Tt >> 8);
successor->t = (s->t >> 1); s->t = (s->t >> 1);
successor->t |= (Tt ^ (s->r >> 7 & 0x1) ^ (s->r >> 3 & 0x1)) << 15; s->t |= (Tt ^ (s->r >> 7) ^ (s->r >> 3)) << 15;
successor->b = s->b >> 1; uint8_t opt_B = s->b;
successor->b |= (opt_B(s) ^ (s->r & 0x1)) << 7; opt_B ^= s->b >> 6;
opt_B ^= s->b >> 5;
opt_B ^= s->b >> 4;
successor->r = (k[opt__select(Tt, y, s->r)] ^ successor->b) + s->l ; s->b = s->b >> 1;
successor->l = successor->r + s->r; s->b |= (opt_B ^ s->r) << 7;
uint8_t opt_select = opt_select_LUT[s->r] & 0x04;
opt_select |= (opt_select_LUT[s->r] ^ ((Tt ^ y) << 1)) & 0x02;
opt_select |= (opt_select_LUT[s->r] ^ Tt) & 0x01;
uint8_t r = s->r;
s->r = (k[opt_select] ^ s->b) + s->l ;
s->l = s->r + r;
} }
void opt_suc(const uint8_t *k, State *s, uint8_t *in, uint8_t length, bool add32Zeroes) { static void opt_suc(const uint8_t *k, State *s, uint8_t *in, uint8_t length, bool add32Zeroes) {
State x2;
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
uint8_t head; uint8_t head;
head = 1 & (in[i] >> 7); head = in[i];
opt_successor(k, s, head, &x2); opt_successor(k, s, head);
head = 1 & (in[i] >> 6); head >>= 1;
opt_successor(k, &x2, head, s); opt_successor(k, s, head);
head = 1 & (in[i] >> 5); head >>= 1;
opt_successor(k, s, head, &x2); opt_successor(k, s, head);
head = 1 & (in[i] >> 4); head >>= 1;
opt_successor(k, &x2, head, s); opt_successor(k, s, head);
head = 1 & (in[i] >> 3); head >>= 1;
opt_successor(k, s, head, &x2); opt_successor(k, s, head);
head = 1 & (in[i] >> 2); head >>= 1;
opt_successor(k, &x2, head, s); opt_successor(k, s, head);
head = 1 & (in[i] >> 1); head >>= 1;
opt_successor(k, s, head, &x2); opt_successor(k, s, head);
head = 1 & in[i]; head >>= 1;
opt_successor(k, &x2, head, s); opt_successor(k, s, head);
} }
//For tag MAC, an additional 32 zeroes //For tag MAC, an additional 32 zeroes
if (add32Zeroes) { if (add32Zeroes) {
for(int i = 0; i < 16; i++) { for(int i = 0; i < 16; i++) {
opt_successor(k, s, 0, &x2); opt_successor(k, s, 0);
opt_successor(k, &x2, 0, s); opt_successor(k, s, 0);
} }
} }
} }
void opt_output(const uint8_t *k, State *s, uint8_t *buffer) { static void opt_output(const uint8_t *k, State *s, uint8_t *buffer) {
State temp = {0, 0, 0, 0};
for (uint8_t times = 0; times < 4; times++) { for (uint8_t times = 0; times < 4; times++) {
uint8_t bout = 0; uint8_t bout = 0;
bout |= (s->r & 0x4) << 5; bout |= (s->r & 0x4) >> 2;
opt_successor(k, s, 0, &temp); opt_successor(k, s, 0);
bout |= (temp.r & 0x4) << 4;
opt_successor(k, &temp, 0, s);
bout |= (s->r & 0x4) << 3;
opt_successor(k, s, 0, &temp);
bout |= (temp.r & 0x4) << 2;
opt_successor(k, &temp, 0, s);
bout |= (s->r & 0x4) << 1;
opt_successor(k, s, 0, &temp);
bout |= (temp.r & 0x4) ;
opt_successor(k, &temp, 0, s);
bout |= (s->r & 0x4) >> 1; bout |= (s->r & 0x4) >> 1;
opt_successor(k, s, 0, &temp); opt_successor(k, s, 0);
bout |= (temp.r & 0x4) >> 2; bout |= (s->r & 0x4);
opt_successor(k, &temp, 0, s); opt_successor(k, s, 0);
bout |= (s->r & 0x4) << 1;
opt_successor(k, s, 0);
bout |= (s->r & 0x4) << 2;
opt_successor(k, s, 0);
bout |= (s->r & 0x4) << 3;
opt_successor(k, s, 0);
bout |= (s->r & 0x4) << 4;
opt_successor(k, s, 0);
bout |= (s->r & 0x4) << 5;
opt_successor(k, s, 0);
buffer[times] = bout; buffer[times] = bout;
} }
} }
void opt_MAC(uint8_t *k, uint8_t *input, uint8_t *out) { static void opt_MAC(uint8_t *k, uint8_t *input, uint8_t *out) {
State _init = { State _init = {
((k[0] ^ 0x4c) + 0xEC) & 0xFF,// l ((k[0] ^ 0x4c) + 0xEC) & 0xFF,// l
((k[0] ^ 0x4c) + 0x21) & 0xFF,// r ((k[0] ^ 0x4c) + 0x21) & 0xFF,// r
@ -187,43 +246,22 @@ void opt_MAC(uint8_t *k, uint8_t *input, uint8_t *out) {
opt_output(k, &_init, out); opt_output(k, &_init, out);
} }
uint8_t rev_byte(uint8_t b) {
b = (b & 0xF0) >> 4 | (b & 0x0F) << 4;
b = (b & 0xCC) >> 2 | (b & 0x33) << 2;
b = (b & 0xAA) >> 1 | (b & 0x55) << 1;
return b;
}
void opt_reverse_arraybytecpy(uint8_t *dest, uint8_t *src, size_t len) {
for (size_t i = 0; i < len; i++) {
dest[i] = rev_byte(src[i]);
}
}
void opt_doReaderMAC(uint8_t *cc_nr_p, uint8_t *div_key_p, uint8_t mac[4]) { void opt_doReaderMAC(uint8_t *cc_nr_p, uint8_t *div_key_p, uint8_t mac[4]) {
static uint8_t cc_nr[12];
opt_reverse_arraybytecpy(cc_nr, cc_nr_p, 12);
uint8_t dest[] = {0, 0, 0, 0, 0, 0, 0, 0}; uint8_t dest[] = {0, 0, 0, 0, 0, 0, 0, 0};
opt_MAC(div_key_p, cc_nr, dest); opt_MAC(div_key_p, cc_nr_p, dest);
//The output MAC must also be reversed memcpy(mac, dest, 4);
opt_reverse_arraybytecpy(mac, dest, 4);
return; return;
} }
void opt_doTagMAC(uint8_t *cc_p, const uint8_t *div_key_p, uint8_t mac[4]) { void opt_doTagMAC(uint8_t *cc_p, const uint8_t *div_key_p, uint8_t mac[4]) {
static uint8_t cc_nr[8+4+4];
opt_reverse_arraybytecpy(cc_nr, cc_p, 12);
State _init = { State _init = {
((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l ((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l
((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r ((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r
0x4c, // b 0x4c, // b
0xE012 // t 0xE012 // t
}; };
opt_suc(div_key_p, &_init,cc_nr, 12, true); opt_suc(div_key_p, &_init, cc_p, 12, true);
uint8_t dest[] = {0, 0, 0, 0}; opt_output(div_key_p, &_init, mac);
opt_output(div_key_p, &_init, dest);
//The output MAC must also be reversed
opt_reverse_arraybytecpy(mac, dest, 4);
return; return;
} }
@ -236,15 +274,13 @@ void opt_doTagMAC(uint8_t *cc_p, const uint8_t *div_key_p, uint8_t mac[4]) {
* @return the cipher state * @return the cipher state
*/ */
State opt_doTagMAC_1(uint8_t *cc_p, const uint8_t *div_key_p) { State opt_doTagMAC_1(uint8_t *cc_p, const uint8_t *div_key_p) {
static uint8_t cc_nr[8];
opt_reverse_arraybytecpy(cc_nr, cc_p, 8);
State _init = { State _init = {
((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l ((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l
((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r ((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r
0x4c, // b 0x4c, // b
0xE012 // t 0xE012 // t
}; };
opt_suc(div_key_p, &_init, cc_nr, 8, false); opt_suc(div_key_p, &_init, cc_p, 8, false);
return _init; return _init;
} }
@ -258,13 +294,7 @@ State opt_doTagMAC_1(uint8_t *cc_p, const uint8_t *div_key_p) {
* @param div_key_p - the key to use * @param div_key_p - the key to use
*/ */
void opt_doTagMAC_2(State _init, uint8_t *nr, uint8_t mac[4], const uint8_t *div_key_p) { void opt_doTagMAC_2(State _init, uint8_t *nr, uint8_t mac[4], const uint8_t *div_key_p) {
static uint8_t _nr[4]; opt_suc(div_key_p, &_init, nr, 4, true);
opt_reverse_arraybytecpy(_nr, nr, 4); opt_output(div_key_p, &_init, mac);
opt_suc(div_key_p, &_init, _nr, 4, true);
//opt_suc(div_key_p, &_init,nr, 4, false);
uint8_t dest[] = {0, 0, 0, 0};
opt_output(div_key_p, &_init, dest);
//The output MAC must also be reversed
opt_reverse_arraybytecpy(mac, dest, 4);
return; return;
} }