mirror of
				https://github.com/Proxmark/proxmark3.git
				synced 2025-10-28 07:00:18 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			664 lines
		
	
	
	
		
			28 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			664 lines
		
	
	
	
		
			28 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| //-----------------------------------------------------------------------------
 | |
| // Copyright (C) 2016, 2017 by piwi
 | |
| //
 | |
| // This code is licensed to you under the terms of the GNU GPL, version 2 or,
 | |
| // at your option, any later version. See the LICENSE.txt file for the text of
 | |
| // the license.
 | |
| //-----------------------------------------------------------------------------
 | |
| // Implements a card only attack based on crypto text (encrypted nonces
 | |
| // received during a nested authentication) only. Unlike other card only
 | |
| // attacks this doesn't rely on implementation errors but only on the
 | |
| // inherent weaknesses of the crypto1 cypher. Described in
 | |
| //   Carlo Meijer, Roel Verdult, "Ciphertext-only Cryptanalysis on Hardened
 | |
| //   Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on 
 | |
| //   Computer and Communications Security, 2015
 | |
| //-----------------------------------------------------------------------------
 | |
| //
 | |
| // brute forcing is based on @aczids bitsliced brute forcer
 | |
| // https://github.com/aczid/crypto1_bs with some modifications. Mainly:
 | |
| // - don't rollback. Start with 2nd byte of nonce instead
 | |
| // - reuse results of filter subfunctions
 | |
| // - reuse results of previous nonces if some first bits are identical
 | |
| // 
 | |
| //-----------------------------------------------------------------------------
 | |
| // aczid's Copyright notice:
 | |
| //
 | |
| // Bit-sliced Crypto-1 brute-forcing implementation
 | |
| // Builds on the data structures returned by CraptEV1 craptev1_get_space(nonces, threshold, uid)
 | |
| /*
 | |
| Copyright (c) 2015-2016 Aram Verstegen
 | |
| 
 | |
| Permission is hereby granted, free of charge, to any person obtaining a copy
 | |
| of this software and associated documentation files (the "Software"), to deal
 | |
| in the Software without restriction, including without limitation the rights
 | |
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 | |
| copies of the Software, and to permit persons to whom the Software is
 | |
| furnished to do so, subject to the following conditions:
 | |
| 
 | |
| The above copyright notice and this permission notice shall be included in
 | |
| all copies or substantial portions of the Software.
 | |
| 
 | |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | |
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | |
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | |
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | |
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 | |
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 | |
| THE SOFTWARE.
 | |
| */
 | |
| 
 | |
| #include "hardnested_bf_core.h"
 | |
| 
 | |
| #include <stdint.h>
 | |
| #include <stdbool.h>
 | |
| #include <stdlib.h>
 | |
| #ifndef __APPLE__
 | |
| #include <malloc.h>
 | |
| #endif
 | |
| #include <stdio.h>
 | |
| #include <string.h>
 | |
| #include "crapto1/crapto1.h"
 | |
| #include "parity.h"
 | |
| 
 | |
| // bitslice type
 | |
| // while AVX supports 256 bit vector floating point operations, we need integer operations for boolean logic
 | |
| // same for AVX2 and 512 bit vectors
 | |
| // using larger vectors works but seems to generate more register pressure
 | |
| #if defined(__AVX512F__)
 | |
| #define MAX_BITSLICES 512
 | |
| #elif defined(__AVX2__)
 | |
| #define MAX_BITSLICES 256
 | |
| #elif defined(__AVX__)
 | |
| #define MAX_BITSLICES 128
 | |
| #elif defined(__SSE2__)
 | |
| #define MAX_BITSLICES 128
 | |
| #else // MMX or SSE or NOSIMD
 | |
| #define MAX_BITSLICES 64
 | |
| #endif
 | |
| 
 | |
| #define VECTOR_SIZE (MAX_BITSLICES/8)
 | |
| typedef unsigned int __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t;
 | |
| typedef union {
 | |
|         bitslice_value_t value;
 | |
|         uint64_t bytes64[MAX_BITSLICES/64];
 | |
|         uint8_t bytes[MAX_BITSLICES/8];
 | |
| } bitslice_t;
 | |
| 
 | |
| // filter function (f20)
 | |
| // sourced from ``Wirelessly Pickpocketing a Mifare Classic Card'' by Flavio Garcia, Peter van Rossum, Roel Verdult and Ronny Wichers Schreur
 | |
| #define f20a(a,b,c,d) (((a|b)^(a&d))^(c&((a^b)|d)))
 | |
| #define f20b(a,b,c,d) (((a&b)|c)^((a^b)&(c|d)))
 | |
| #define f20c(a,b,c,d,e) ((a|((b|e)&(d^e)))^((a^(b&d))&((c^d)|(b&e))))
 | |
| 
 | |
| // bit indexing
 | |
| #define get_bit(n, word) (((word) >> (n)) & 1)
 | |
| #define get_vector_bit(slice, value) get_bit((slice)&0x3f, value.bytes64[(slice)>>6])
 | |
| 
 | |
| // size of crypto-1 state
 | |
| #define STATE_SIZE 48
 | |
| // size of nonce to be decrypted
 | |
| #define KEYSTREAM_SIZE 24
 | |
| 
 | |
| // endianness conversion
 | |
| #define rev32(word) ((((word) & 0xff) << 24) | ((((word) >> 8) & 0xff) << 16) | ((((word) >> 16) & 0xff) << 8) | ((((word) >> 24) & 0xff)))
 | |
| 
 | |
| // this needs to be compiled several times for each instruction set. 
 | |
| // For each instruction set, define a dedicated function name:
 | |
| #if defined (__AVX512F__)
 | |
| #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX512
 | |
| #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX512
 | |
| #elif defined (__AVX2__)
 | |
| #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX2
 | |
| #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX2
 | |
| #elif defined (__AVX__)
 | |
| #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX
 | |
| #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX
 | |
| #elif defined (__SSE2__)
 | |
| #define BITSLICE_TEST_NONCES bitslice_test_nonces_SSE2
 | |
| #define CRACK_STATES_BITSLICED crack_states_bitsliced_SSE2
 | |
| #elif defined (__MMX__) 
 | |
| #define BITSLICE_TEST_NONCES bitslice_test_nonces_MMX
 | |
| #define CRACK_STATES_BITSLICED crack_states_bitsliced_MMX
 | |
| #else
 | |
| #define BITSLICE_TEST_NONCES bitslice_test_nonces_NOSIMD
 | |
| #define CRACK_STATES_BITSLICED crack_states_bitsliced_NOSIMD
 | |
| #endif
 | |
| 
 | |
| // typedefs and declaration of functions:
 | |
| typedef const uint64_t crack_states_bitsliced_t(uint32_t, uint8_t*, statelist_t*, uint32_t*, uint64_t*, uint32_t, uint8_t*, noncelist_t*);
 | |
| crack_states_bitsliced_t crack_states_bitsliced_AVX512;
 | |
| crack_states_bitsliced_t crack_states_bitsliced_AVX2;
 | |
| crack_states_bitsliced_t crack_states_bitsliced_AVX;
 | |
| crack_states_bitsliced_t crack_states_bitsliced_SSE2;
 | |
| crack_states_bitsliced_t crack_states_bitsliced_MMX;
 | |
| crack_states_bitsliced_t crack_states_bitsliced_NOSIMD;
 | |
| crack_states_bitsliced_t crack_states_bitsliced_dispatch;
 | |
| 
 | |
| typedef void bitslice_test_nonces_t(uint32_t, uint32_t*, uint8_t*);
 | |
| bitslice_test_nonces_t bitslice_test_nonces_AVX512;
 | |
| bitslice_test_nonces_t bitslice_test_nonces_AVX2;
 | |
| bitslice_test_nonces_t bitslice_test_nonces_AVX;
 | |
| bitslice_test_nonces_t bitslice_test_nonces_SSE2;
 | |
| bitslice_test_nonces_t bitslice_test_nonces_MMX;
 | |
| bitslice_test_nonces_t bitslice_test_nonces_NOSIMD;
 | |
| bitslice_test_nonces_t bitslice_test_nonces_dispatch;
 | |
| 
 | |
| #if defined (_WIN32)
 | |
| #define malloc_bitslice(x) __builtin_assume_aligned(_aligned_malloc((x), MAX_BITSLICES/8), MAX_BITSLICES/8)
 | |
| #define free_bitslice(x) _aligned_free(x)
 | |
| #elif defined (__APPLE__)
 | |
| static void *malloc_bitslice(size_t x) {
 | |
| 	char *allocated_memory;
 | |
| 	if (posix_memalign((void**)&allocated_memory, MAX_BITSLICES/8, x)) {
 | |
| 		return NULL;
 | |
| 	} else {
 | |
| 		return __builtin_assume_aligned(allocated_memory, MAX_BITSLICES/8);
 | |
| 	}
 | |
| }
 | |
| #define free_bitslice(x) free(x)
 | |
| #else
 | |
| #define malloc_bitslice(x) memalign(MAX_BITSLICES/8, (x))
 | |
| #define free_bitslice(x) free(x)
 | |
| #endif
 | |
| 
 | |
| typedef enum {
 | |
| 	EVEN_STATE = 0,
 | |
| 	ODD_STATE = 1
 | |
| } odd_even_t;
 | |
| 
 | |
| 
 | |
| // arrays of bitsliced states with identical values in all slices
 | |
| static bitslice_t bitsliced_encrypted_nonces[256][KEYSTREAM_SIZE];
 | |
| static bitslice_t bitsliced_encrypted_parity_bits[256][4];
 | |
| // 1 and 0 vectors
 | |
| static bitslice_t bs_ones;
 | |
| static bitslice_t bs_zeroes;
 | |
| 
 | |
| 
 | |
| void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
 | |
| 
 | |
| 	// initialize 1 and 0 vectors
 | |
|     memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
 | |
|     memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
 | |
| 
 | |
| 	// bitslice nonces' 2nd to 4th byte
 | |
| 	for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
 | |
| 		for(uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++){
 | |
| 			bool bit = get_bit(KEYSTREAM_SIZE-1-bit_idx, rev32(bf_test_nonce[i] << 8));
 | |
| 			if(bit){
 | |
| 				bitsliced_encrypted_nonces[i][bit_idx].value = bs_ones.value;
 | |
| 			} else {
 | |
| 				bitsliced_encrypted_nonces[i][bit_idx].value = bs_zeroes.value;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	// bitslice nonces' parity (4 bits)
 | |
| 	for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
 | |
| 		for(uint32_t bit_idx = 0; bit_idx < 4; bit_idx++){
 | |
| 			bool bit = get_bit(4-1-bit_idx, bf_test_nonce_par[i]);
 | |
| 			if(bit){
 | |
| 				bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_ones.value;
 | |
| 			} else {
 | |
| 				bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_zeroes.value;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| }
 | |
| 
 | |
| 
 | |
| const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces){
 | |
| 
 | |
|     // Unlike aczid's implementation this doesn't roll back at all when performing bitsliced bruteforce.
 | |
| 	// We know that the best first byte is already shifted in. Testing with the remaining three bytes of 
 | |
| 	// the nonces is sufficient to eliminate most of them. The small rest is tested with a simple unsliced
 | |
| 	// brute forcing (including roll back).
 | |
| 
 | |
| 	bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE];
 | |
| 	bitslice_t * restrict state_p;
 | |
|     uint64_t key = -1;
 | |
|     uint64_t bucket_states_tested = 0;
 | |
|     uint32_t bucket_size[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
 | |
|     uint32_t bitsliced_blocks = 0;
 | |
|     uint32_t const *restrict p_even_end = p->states[EVEN_STATE] + p->len[EVEN_STATE];
 | |
| #if defined (DEBUG_BRUTE_FORCE)
 | |
| 	uint32_t elimination_step = 0;
 | |
| 	#define MAX_ELIMINATION_STEP	32
 | |
| 	uint64_t keys_eliminated[MAX_ELIMINATION_STEP] = {0};
 | |
| #endif	
 | |
| #ifdef DEBUG_KEY_ELIMINATION
 | |
| 	bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
 | |
| #endif
 | |
| 
 | |
| 	// constant ones/zeroes
 | |
| 	bitslice_t bs_ones;
 | |
|     memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
 | |
| 	bitslice_t bs_zeroes;
 | |
|     memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
 | |
| 	
 | |
|     // bitslice all the even states
 | |
|     bitslice_t **restrict bitsliced_even_states = (bitslice_t **)malloc(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_t *));
 | |
| 	if (bitsliced_even_states == NULL) {
 | |
| 		printf("Out of memory error in brute_force. Aborting...");
 | |
| 		exit(4);
 | |
| 	}
 | |
|     bitslice_value_t *restrict bitsliced_even_feedback = malloc_bitslice(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_value_t));
 | |
| 	if (bitsliced_even_feedback == NULL) {
 | |
| 		printf("Out of memory error in brute_force. Aborting...");
 | |
| 		exit(4);
 | |
| 	}
 | |
|     for(uint32_t *restrict p_even = p->states[EVEN_STATE]; p_even < p_even_end; p_even += MAX_BITSLICES){
 | |
|         bitslice_t *restrict lstate_p = malloc_bitslice(STATE_SIZE/2*sizeof(bitslice_t));
 | |
| 		if (lstate_p == NULL) {
 | |
| 			printf("Out of memory error in brute_force. Aborting... \n");
 | |
| 			exit(4);
 | |
| 		}
 | |
|         memset(lstate_p, 0x00, STATE_SIZE/2*sizeof(bitslice_t)); // zero even bits
 | |
|         // bitslice even half-states
 | |
|         const uint32_t max_slices = (p_even_end-p_even) < MAX_BITSLICES ? p_even_end-p_even : MAX_BITSLICES;
 | |
|         bucket_size[bitsliced_blocks] = max_slices;
 | |
| #ifdef DEBUG_KEY_ELIMINATION
 | |
| 		bucket_contains_test_key[bitsliced_blocks] = false;
 | |
| #endif
 | |
| 		uint32_t slice_idx;
 | |
|         for(slice_idx = 0; slice_idx < max_slices; ++slice_idx){
 | |
|             uint32_t e = *(p_even+slice_idx);
 | |
| #ifdef DEBUG_KEY_ELIMINATION
 | |
| 			if (known_target_key != -1 && e == test_state[EVEN_STATE]) {
 | |
| 				bucket_contains_test_key[bitsliced_blocks] = true;
 | |
| 				// printf("bucket %d contains test key even state\n", bitsliced_blocks);
 | |
| 				// printf("in slice %d\n", slice_idx);
 | |
| 			}
 | |
| #endif
 | |
|             for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
 | |
|                 // set even bits
 | |
|                 if(e&1){
 | |
|                     lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 		// padding with last even state
 | |
| 		for ( ; slice_idx < MAX_BITSLICES; ++slice_idx) {
 | |
|             uint32_t e = *(p_even_end-1);
 | |
|             for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
 | |
|                 // set even bits
 | |
|                 if(e&1){
 | |
|                     lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f);
 | |
|                 }
 | |
|             }
 | |
| 		}			
 | |
|         bitsliced_even_states[bitsliced_blocks] = lstate_p;
 | |
| 		// bitsliced_even_feedback[bitsliced_blocks] = bs_ones;
 | |
| 		bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47- 0)/2].value ^ 
 | |
|                                                     lstate_p[(47-10)/2].value ^ lstate_p[(47-12)/2].value ^ lstate_p[(47-14)/2].value ^
 | |
|                                                     lstate_p[(47-24)/2].value ^ lstate_p[(47-42)/2].value;
 | |
| 		bitsliced_blocks++;
 | |
|     }
 | |
|     // bitslice every odd state to every block of even states
 | |
|     for(uint32_t const *restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE] + p->len[ODD_STATE]; ++p_odd){
 | |
|         // early abort
 | |
|         if(*keys_found){
 | |
|             goto out;
 | |
|         }
 | |
| 		
 | |
| 		// set odd state bits and pre-compute first keystream bit vector. This is the same for all blocks of even states
 | |
| 		
 | |
| 		state_p = &states[KEYSTREAM_SIZE];
 | |
| 		uint32_t o = *p_odd;
 | |
| 
 | |
|         // pre-compute the odd feedback bit
 | |
|         bool odd_feedback_bit = evenparity32(o&0x29ce5c);
 | |
|         const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value;
 | |
| 
 | |
| 		// set odd state bits
 | |
| 		for (uint32_t state_idx = 0; state_idx < STATE_SIZE; o >>= 1, state_idx += 2) {
 | |
| 			if (o & 1){
 | |
| 				state_p[state_idx] = bs_ones;
 | |
| 			} else {
 | |
| 				state_p[state_idx] = bs_zeroes;
 | |
| 			}
 | |
| 		}
 | |
| 		
 | |
| 		bitslice_value_t crypto1_bs_f20b_2[16];
 | |
| 		bitslice_value_t crypto1_bs_f20b_3[8];
 | |
| 
 | |
| 		crypto1_bs_f20b_2[0] = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
 | |
| 		crypto1_bs_f20b_3[0] = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
 | |
| 		
 | |
| 		bitslice_value_t ksb[8];
 | |
| 		ksb[0] = f20c(f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value),
 | |
| 		              f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value),
 | |
| 		              crypto1_bs_f20b_2[0],
 | |
| 		              f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value),
 | |
| 		              crypto1_bs_f20b_3[0]);
 | |
| 
 | |
| 		uint32_t *restrict p_even = p->states[EVEN_STATE];
 | |
|         for (uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx, p_even += MAX_BITSLICES) {
 | |
| 
 | |
| #ifdef DEBUG_KEY_ELIMINATION
 | |
| 			// if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
 | |
| 				// printf("Now testing known target key.\n");
 | |
| 				// printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
 | |
| 			// }
 | |
| #endif
 | |
|             // add the even state bits
 | |
| 			const bitslice_t *restrict bitsliced_even_state = bitsliced_even_states[block_idx];
 | |
| 			for(uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) {
 | |
| 				state_p[state_idx] = bitsliced_even_state[state_idx/2];
 | |
| 			}
 | |
| 
 | |
| 			// pre-compute first feedback bit vector. This is the same for all nonces
 | |
| 			bitslice_value_t fbb[8];
 | |
|             fbb[0] = odd_feedback ^ bitsliced_even_feedback[block_idx]; 
 | |
| 
 | |
|             // vector to contain test results (1 = passed, 0 = failed)
 | |
|             bitslice_t results = bs_ones;
 | |
| 			
 | |
| 			// parity_bits
 | |
| 			bitslice_value_t par[8];
 | |
| 			par[0] = bs_zeroes.value;
 | |
| 			uint32_t next_common_bits = 0;
 | |
| 
 | |
|             for(uint32_t tests = 0; tests < nonces_to_bruteforce; ++tests){
 | |
| 				// common bits with preceding test nonce
 | |
| 				uint32_t common_bits = next_common_bits; //tests ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests-1]) : 0;
 | |
| 				next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests+1]) : 0;
 | |
|                 uint32_t parity_bit_idx = 1;							// start checking with the parity of second nonce byte
 | |
|                 bitslice_value_t fb_bits = fbb[common_bits];		// start with precomputed feedback bits from previous nonce
 | |
|                 bitslice_value_t ks_bits = ksb[common_bits];		// dito for first keystream bits
 | |
|                 bitslice_value_t parity_bit_vector = par[common_bits]; // dito for first parity vector
 | |
| 				// bitslice_value_t fb_bits = fbb[0];		// start with precomputed feedback bits from previous nonce
 | |
| 				// bitslice_value_t ks_bits = ksb[0];		// dito for first keystream bits
 | |
| 				// bitslice_value_t parity_bit_vector = par[0]; // dito for first parity vector
 | |
| 				state_p -= common_bits;								// and reuse the already calculated state bits
 | |
|                 // highest bit is transmitted/received first. We start with Bit 23 (highest bit of second nonce byte),
 | |
| 				// or the highest bit which differs from the previous nonce
 | |
|                 for (int32_t ks_idx = KEYSTREAM_SIZE-1-common_bits; ks_idx >= 0; --ks_idx) {
 | |
| 
 | |
|                     // decrypt nonce bits
 | |
|                     const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value;
 | |
|                     const bitslice_value_t decrypted_nonce_bit_vector = encrypted_nonce_bit_vector ^ ks_bits;
 | |
| 
 | |
|                     // compute real parity bits on the fly
 | |
|                     parity_bit_vector ^= decrypted_nonce_bit_vector;
 | |
| 
 | |
|                     // update state
 | |
| 					state_p--;
 | |
|                     state_p[0].value = fb_bits ^ decrypted_nonce_bit_vector;
 | |
| 
 | |
| 					// update crypto1 subfunctions
 | |
| 					bitslice_value_t f20a_1, f20b_1, f20b_2, f20a_2, f20b_3;
 | |
| 					f20a_2 = f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value);
 | |
| 					f20b_3 = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
 | |
| 					if (ks_idx > KEYSTREAM_SIZE - 8) {
 | |
| 						f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
 | |
| 						f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
 | |
| 						f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
 | |
| 						crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
 | |
| 						crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx] = f20b_3;
 | |
| 					} else if (ks_idx > KEYSTREAM_SIZE - 16) {
 | |
| 						f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
 | |
| 						f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
 | |
| 						f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
 | |
| 						crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2; 
 | |
| 					} else if (ks_idx > KEYSTREAM_SIZE - 24){
 | |
| 						f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
 | |
| 						f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
 | |
| 						f20b_2 = crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx - 16];
 | |
| 					} else {
 | |
| 						f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
 | |
| 						f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
 | |
| 						f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
 | |
| 					}						
 | |
| 					// update keystream bit
 | |
| 					ks_bits = f20c(f20a_1, f20b_1, f20b_2, f20a_2, f20b_3);
 | |
| 
 | |
|                     // for each completed byte:
 | |
|                     if ((ks_idx & 0x07) == 0) {
 | |
|                         // get encrypted parity bits
 | |
|                         const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value;
 | |
| 
 | |
|                         // decrypt parity bits
 | |
|                         const bitslice_value_t decrypted_parity_bit_vector = encrypted_parity_bit_vector ^ ks_bits;
 | |
| 
 | |
|                         // compare actual parity bits with decrypted parity bits and take count in results vector
 | |
|                         results.value &= ~parity_bit_vector ^ decrypted_parity_bit_vector;
 | |
| 
 | |
|                         // make sure we still have a match in our set
 | |
|                         // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){
 | |
| 
 | |
|                         // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ???
 | |
|                         // the short-circuiting also helps
 | |
|                         if(results.bytes64[0] == 0
 | |
| #if MAX_BITSLICES > 64
 | |
|                            && results.bytes64[1] == 0
 | |
| #endif
 | |
| #if MAX_BITSLICES > 128
 | |
|                            && results.bytes64[2] == 0
 | |
|                            && results.bytes64[3] == 0
 | |
| #endif
 | |
|                           ) {
 | |
| #if defined (DEBUG_BRUTE_FORCE)						  
 | |
| 							if (elimination_step < MAX_ELIMINATION_STEP) {
 | |
| 								keys_eliminated[elimination_step] += MAX_BITSLICES;
 | |
| 							}
 | |
| #endif
 | |
| #ifdef DEBUG_KEY_ELIMINATION
 | |
| 							if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
 | |
| 								printf("Known target key eliminated in brute_force.\n");
 | |
| 								printf("block_idx = %d/%d, nonce = %d/%d\n", block_idx, bitsliced_blocks, tests, nonces_to_bruteforce);
 | |
| 							}
 | |
| #endif
 | |
| 							goto stop_tests;
 | |
| 						}
 | |
| 						// prepare for next nonce byte
 | |
| #if defined (DEBUG_BRUTE_FORCE)							  
 | |
| 						elimination_step++;
 | |
| #endif
 | |
| 						parity_bit_vector = bs_zeroes.value;
 | |
| 					}						
 | |
| 					// update feedback bit vector
 | |
| 					if (ks_idx != 0) {
 | |
| 						fb_bits = 
 | |
| 								  (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
 | |
| 								   state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
 | |
| 								   state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
 | |
| 								   state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
 | |
| 								   state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
 | |
| 								   state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
 | |
| 					}
 | |
| 					// remember feedback and keystream vectors for later use
 | |
| 					uint8_t bit = KEYSTREAM_SIZE - ks_idx;
 | |
| 					if (bit <= next_common_bits) {  // if needed and not yet stored
 | |
| 						fbb[bit] = fb_bits;
 | |
| 						ksb[bit] = ks_bits;
 | |
| 						par[bit] = parity_bit_vector;
 | |
| 					}
 | |
|                 }
 | |
| 				// prepare for next nonce. Revert to initial state
 | |
| 				state_p = &states[KEYSTREAM_SIZE];
 | |
|             }
 | |
| 
 | |
|             // all nonce tests were successful: we've found a possible key in this block!
 | |
| 			uint32_t *p_even_test = p_even;
 | |
|             for (uint32_t results_word = 0; results_word < MAX_BITSLICES / 64; ++results_word) {
 | |
| 				uint64_t results64 = results.bytes64[results_word];
 | |
| 				for (uint32_t results_bit = 0; results_bit < 64; results_bit++) {
 | |
| 					if (results64 & 0x01) {
 | |
| 						if (verify_key(cuid, nonces, best_first_bytes, *p_odd, *p_even_test)) {
 | |
| 							struct Crypto1State pcs;
 | |
| 							pcs.odd = *p_odd;
 | |
| 							pcs.even = *p_even_test;
 | |
| 							lfsr_rollback_byte(&pcs, (cuid >> 24) ^ best_first_bytes[0], true);
 | |
| 							crypto1_get_lfsr(&pcs, &key);
 | |
| 							bucket_states_tested += 64 * results_word + results_bit;
 | |
| 							goto out;
 | |
| 						}
 | |
| #ifdef DEBUG_KEY_ELIMINATION
 | |
| 						if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
 | |
| 							printf("Known target key eliminated in brute_force verification.\n");
 | |
| 							printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
 | |
| 						}
 | |
| #endif
 | |
| 					}
 | |
| #ifdef DEBUG_KEY_ELIMINATION
 | |
| 					if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
 | |
| 						printf("Known target key eliminated in brute_force (results_bit == 0).\n");
 | |
| 						printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
 | |
| 					}
 | |
| #endif
 | |
| 					results64 >>= 1;
 | |
| 					p_even_test++;
 | |
| 					if (p_even_test == p_even_end) {
 | |
| 						goto stop_tests;
 | |
| 					}
 | |
| 				}
 | |
|             }
 | |
| stop_tests:
 | |
| #if defined (DEBUG_BRUTE_FORCE)							  
 | |
| 			elimination_step = 0;
 | |
| #endif			
 | |
|             bucket_states_tested += bucket_size[block_idx];
 | |
|             // prepare to set new states
 | |
| 			state_p = &states[KEYSTREAM_SIZE];
 | |
|             continue;
 | |
|         }
 | |
|     }
 | |
| out:
 | |
|     for(uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
 | |
|         free_bitslice(bitsliced_even_states[block_idx]);
 | |
|     }
 | |
| 	free(bitsliced_even_states);
 | |
| 	free_bitslice(bitsliced_even_feedback);
 | |
|     __sync_fetch_and_add(num_keys_tested, bucket_states_tested);
 | |
| 	
 | |
| #if defined (DEBUG_BRUTE_FORCE)	
 | |
| 	for (uint32_t i = 0; i < MAX_ELIMINATION_STEP; i++) {
 | |
| 		printf("Eliminated after %2u test_bytes: %5.2f%%\n", i+1, (float)keys_eliminated[i] / bucket_states_tested * 100);
 | |
| 	}
 | |
| #endif	
 | |
|     return key;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| #ifndef __MMX__
 | |
| 
 | |
| // pointers to functions:
 | |
| crack_states_bitsliced_t *crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch;
 | |
| bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch;
 | |
| 
 | |
| static SIMDExecInstr intSIMDInstr = SIMD_AUTO;
 | |
| 
 | |
| void SetSIMDInstr(SIMDExecInstr instr) {
 | |
| 	intSIMDInstr = instr;
 | |
| 	
 | |
| 	crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch;
 | |
| 	bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch;
 | |
| }
 | |
| 
 | |
| SIMDExecInstr GetSIMDInstr() {
 | |
| 	SIMDExecInstr instr = SIMD_NONE;
 | |
| 	
 | |
| #if defined (__i386__) || defined (__x86_64__)
 | |
| 	#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
 | |
| 		#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) 
 | |
| 		if (__builtin_cpu_supports("avx512f")) instr = SIMD_AVX512;
 | |
| 		else if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
 | |
| 		#else
 | |
| 		if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
 | |
| 		#endif
 | |
| 		else if (__builtin_cpu_supports("avx")) instr = SIMD_AVX;
 | |
| 		else if (__builtin_cpu_supports("sse2")) instr = SIMD_SSE2;
 | |
| 		else if (__builtin_cpu_supports("mmx")) instr = SIMD_MMX;
 | |
| 		else
 | |
| 	#endif
 | |
| #endif
 | |
| 		instr = SIMD_NONE;
 | |
| 		
 | |
| 	return instr;
 | |
| }
 | |
| 
 | |
| SIMDExecInstr GetSIMDInstrAuto() {
 | |
| 	SIMDExecInstr instr = intSIMDInstr;
 | |
| 	if (instr == SIMD_AUTO)
 | |
| 		return GetSIMDInstr();
 | |
| 	
 | |
| 	return instr;
 | |
| }
 | |
| 
 | |
| // determine the available instruction set at runtime and call the correct function
 | |
| const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
 | |
| 	switch(GetSIMDInstrAuto()) {
 | |
| #if defined (__i386__) || defined (__x86_64__)
 | |
| #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
 | |
| #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) 
 | |
| 		case SIMD_AVX512:
 | |
| 			crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512;
 | |
| 			break;
 | |
| #endif
 | |
| 		case SIMD_AVX2:
 | |
| 			crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2;
 | |
| 			break;
 | |
| 		case SIMD_AVX:
 | |
| 			crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX;
 | |
| 			break;
 | |
| 		case SIMD_SSE2:
 | |
| 			crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2;
 | |
| 			break;
 | |
| 		case SIMD_MMX:
 | |
| 			crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX;
 | |
| 			break;
 | |
| #endif
 | |
| #endif
 | |
| 		default:
 | |
| 			crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD;
 | |
| 			break;
 | |
| 	}	
 | |
| 
 | |
|     // call the most optimized function for this CPU
 | |
|     return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
 | |
| }
 | |
| 
 | |
| void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
 | |
| 	switch(GetSIMDInstrAuto()) {
 | |
| #if defined (__i386__) || defined (__x86_64__)
 | |
| #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
 | |
| #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) 
 | |
| 		case SIMD_AVX512:
 | |
| 			bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512;
 | |
| 			break;
 | |
| #endif
 | |
| 		case SIMD_AVX2:
 | |
| 			bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2;
 | |
| 			break;
 | |
| 		case SIMD_AVX:
 | |
| 			bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX;
 | |
| 			break;
 | |
| 		case SIMD_SSE2:
 | |
| 			bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2;
 | |
| 			break;
 | |
| 		case SIMD_MMX:
 | |
| 			bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX;
 | |
| 			break;
 | |
| #endif
 | |
| #endif
 | |
| 		default:
 | |
| 			bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD;
 | |
| 			break;
 | |
| 	}	
 | |
| 
 | |
|     // call the most optimized function for this CPU
 | |
|     (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
 | |
| }
 | |
| 
 | |
| // Entries to dispatched function calls
 | |
| const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
 | |
|     return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
 | |
| }
 | |
| 
 | |
| void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
 | |
|     (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
 | |
| }
 | |
| 
 | |
| #endif
 |