Last active
January 13, 2017 22:05
-
-
Save philsmd/99aebdc722472b5322a4e0b0408d743a to your computer and use it in GitHub Desktop.
A proof of concept/test of what is needed for LZMA/LZMA2 decompression for -m 11600 = 7-Zip in hashcat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// author: philsmd | |
// date: January 13, 2017 | |
// license: public domain | |
#include <stdio.h> | |
// extract the 7z-SDK - aka LZMA SDK - (http://www.7-zip.org/sdk.html) into lzma_sdk/ | |
#include "lzma_sdk/C/Alloc.h" | |
#include "lzma_sdk/C/LzmaDec.h" | |
#include "lzma_sdk/C/Lzma2Dec.h" | |
// actually we only need these files from the LZMA SDK: | |
// headers: 7zTypes.h Alloc.h Compiler.h Lzma2Dec.h LzmaDec.h Precomp.h | |
// code : Alloc.c Lzma2Dec.c LzmaDec.c | |
// compile this ANSI-C file like like this: | |
// gcc lzma_sdk_use.c lzma_sdk/C/Alloc.c lzma_sdk/C/LzmaDec.c lzma_sdk/C/Lzma2Dec.c -o lzma_sdk_use | |
/* | |
* input/output examples: | |
*/ | |
// 1st example: | |
// LZMA2 compressed data within the file: dic3_pass_DDD_3.7z (from https://hashcat.net/forum/thread-4328-post-32826.html#pid32826) | |
// password = "DDD" | |
// Full content of file (bytes): | |
// 377abcaf271c0004cef9b14f30000000000000006a00000000000000a3b33894ba92081d93d4188a8166b7cc18ee20bb3374742d42757485f5fa85fb | |
// de7787c7d7145286db5eed0b938fa5f072afc3840104060001093000070b0100022406f107010a5307311a01fdc86ace3a2121010001000c2d3f0008 | |
// 0a014f565f1f00000501190900000000000000000011130064006900630033002e0074007800740000001900140a010000c79705926bc90115060100 | |
// 200000000000 | |
// output of 7z2hashcat.pl: | |
// $7z$2$19$0$$8$311a01fdc86ace3a0000000000000000$526341711$48$45$ba92081d93d4188a8166b7cc18ee20bb3374742d42757485f5fa85fbde7787c7d7145286db5eed0b938fa5f072afc384 | |
// "$7z$2$..." => that means LZMA2 compressed | |
// the main data of dic3_pass_DDD_3.7z: | |
// ba92081d93d4188a8166b7cc18ee20bb3374742d42757485f5fa85fbde7787c7d7145286db5eed0b938fa5f072afc384 | |
// AES decrypted (but still compressed): | |
// e0003e00255d0030833d461ac7ee33c8b839516343ff6c46347d54cca973d715c8fa0100b3ede066268f0a6000 | |
// LZMA2 decompressed: | |
// echo e0003e00255d0030833d461ac7ee33c8b839516343ff6c46347d54cca973d715c8fa0100b3ede066268f0a6000 | xxd -p -r | lzma --lzma2 -d --format=raw 2>/dev/null | xxd -p | tr -d '\n'; echo | |
// 610d0a61610d0a6161610d0a620d0a62620d0a6262620d0a630d0a63630d0a6363630d0a736f6c76653132330d0a736f6c76650d0a640d0a64640d0a646464 | |
// therefore for this example the input to LZMA2 is: | |
// e0003e00255d0030833d461ac7ee33c8b839516343ff6c46347d54cca973d715c8fa0100b3ede066268f0a6000 | |
// and the output is: | |
// 610d0a61610d0a6161610d0a620d0a62620d0a6262620d0a630d0a63630d0a6363630d0a736f6c76653132330d0a736f6c76650d0a640d0a64640d0a646464 | |
// validate data: | |
// crc32 <((echo 610d0a61610d0a6161610d0a620d0a62620d0a6262620d0a630d0a63630d0a6363630d0a736f6c76653132330d0a736f6c76650d0a640d0a64640d0a646464 | xxd -r -p)) | |
// 1f5f564f | |
// note: 0x1f5f564f == 526341711 | |
// 2nd example: | |
// LZMA1 compressed data within the file: test5.7z | |
// password = "a" | |
// Full content of file (bytes): | |
// 377abcaf271c0003a6a804a88e000000000000002000000000000000bcfdf24138fd091f2e5e1c264e34a96f2eaa42662fbcefb316f9dcebdbdfe7c9 | |
// 1db77e6a0000813307ae0fd00eb03c9f39109c9fa6d533da7f1f3415e51920eea1d3fb5e5fc3e12a7e213bdf78b2fd46712c52e0f990f20181ac9a95 | |
// 850e0394c5330d231063b1a9ae6e3c6fefcde18f9344ddee2d90c161db10b0df652fdd2e2f6633e2f6979a055ea54e23d5950a00000017062001096e | |
// 00070b01000123030101055d001000000c7d0a01012ecf000000 | |
// output of 7z2hashcat.pl: | |
// $7z$1$19$0$$16$38fd091f2e5e1c264e34a96f2eaa4266$1200128334$16$1$2fbcefb316f9dcebdbdfe7c91db77e6a | |
// "$7z$1$..." => that means LZMA1 compressed | |
// note here we can use the padding attack trick, without that attack: | |
// $7z$1$19$0$$8$f9ece801eae3ff0c0000000000000000$1200128334$32$17$38fd091f2e5e1c264e34a96f2eaa42662fbcefb316f9dcebdbdfe7c91db77e6a | |
// the main data of test5.7z: | |
// 38fd091f2e5e1c264e34a96f2eaa42662fbcefb316f9dcebdbdfe7c91db77e6a | |
// AES decrypted (but still compressed): | |
// 00309888913abf896b510827bb03e1b360 | |
// LZMA1 decompressed: | |
// echo 00309888913abf896b510827bb03e1b360 | xxd -p -r | lzma --lzma1 -d --format=raw 2>/dev/null | xxd -p | tr -d '\n'; echo | |
// 6162630a6465660a6768690a | |
// therefore for this example the input to LZMA1 is: | |
// 00309888913abf896b510827bb03e1b360 | |
// and the output is: | |
// 6162630a6465660a6768690a | |
// validate data (ATTENTION: the first file contains only the string "abc\n"): | |
// crc32 <((echo 6162630a | xxd -r -p)) | |
// 4788814e | |
// note: 0x4788814e = 1200128334 | |
// ATTENTION: we currently do not have this info within the hash format, we would need the content of | |
// $substreams_info->{'unpack_sizes'}[0] from 7z2hashcat.pl within the hash format (i.e. the length of the first "file") | |
// otherwise we could brute-force it (by generating CRC checksums of all input lengths starting with the total output length | |
// and decrease it). In this example we would first test with length 12, 11, ..., 4 (matches therefore we can stop) | |
// It would be better to not use this unnecessary brute-force technique here, therefore we have to change the hash format again. | |
// Since the CRC32 checksums are anyway generated incrementally, brute-forcing could be implemented by checking the checksums | |
// on each and every "update"/iteration | |
// ATTENTION: I just noticed that at least for example 2 (to be safe) we also need the output of: | |
// print unpack ("H*", $coder->{'attributes'}) | |
// ... from 7z2hashcat.pl within the hash format, i.e. the so-called 'coder attributes' that define the lc, pb, lp and the dicSize. | |
// Where: | |
// d = data[0]; | |
// lc = d % 9; | |
// d /= 9; | |
// pb = d / 5; | |
// lp = d % 5 | |
// dicSize = data[1] | (data[2] << 8) | (data[3] << 16) | (data[4] << 24); | |
// ATTENTION: I just realized we also need the value of the decompressed lengths (see below): | |
// $streams_info->{'unpack_info'}->{'unpack_sizes'}[$lzma_coder_id] | |
// (from 7z2hashcat.pl, i.e. another reason why we need to change the hash format?) | |
/* | |
* some wrappers/helper functions: | |
*/ | |
int hc_lzma1_decompress (const unsigned char *in, SizeT /*unsigned int*/ *in_len, unsigned char *out, size_t *out_len) | |
{ | |
extern ISzAlloc g_Alloc; | |
ELzmaStatus status; | |
*in_len -= LZMA_PROPS_SIZE; | |
// parameters to LzmaDecode (): unsigned char *dest, size_t *destLen, const unsigned char *src, | |
// size_t *srcLen, const unsigned char *props, size_t propsSize, ELzmaFinishMode finishMode, ELzmaStatus status, ISzAlloc *alloc | |
return LzmaDecode (&out[0], out_len, &in[LZMA_PROPS_SIZE], in_len, &in[0], LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, &g_Alloc); | |
} | |
#define SZ_UNKNOWN_DICT_SIZE 40 | |
int hc_lzma2_decompress (const unsigned char *in, SizeT /*unsigned int*/ *in_len, unsigned char *out, size_t *out_len) | |
{ | |
extern ISzAlloc g_Alloc; | |
ELzmaStatus status; | |
// parameters to Lzma2Decode (): unsigned char *dest, size_t *destLen, const unsigned char *src, | |
// size_t *srcLen, const unsigned char props, ELzmaFinishMode finishMode, ELzmaStatus status, ISzAlloc *alloc | |
return Lzma2Decode (&out[0], out_len, &in[0], in_len, SZ_UNKNOWN_DICT_SIZE, LZMA_FINISH_ANY, &status, &g_Alloc); | |
} | |
// | |
// btw: the return codes are defined in lzma_sdk/C/7zTypes.h ("SZ" means "Seven Zip"): | |
// | |
// 0: SZ_OK | |
// 1: SZ_ERROR_DATA | |
// 2: SZ_ERROR_MEM | |
// 3: SZ_ERROR_CRC | |
// 4: SZ_ERROR_UNSUPPORTED | |
// 5: SZ_ERROR_PARAM | |
// 6: SZ_ERROR_INPUT_EOF | |
// 7: SZ_ERROR_OUTPUT_EOF | |
// 8: SZ_ERROR_READ | |
// 9: SZ_ERROR_WRITE | |
// 10: SZ_ERROR_PROGRESS | |
// 11: SZ_ERROR_FAIL | |
// 12: SZ_ERROR_THREAD | |
// | |
/* | |
* Main: | |
* ATTENTION: the commented lines correspond to example 1 (LZMA2), the other ones to example 2 (LZMA1) | |
*/ | |
int main () | |
{ | |
int ret = -1; | |
// the type of compression that is used (LZMA/LZMA/NONE): | |
//int compression_indicator = 2; // 0 = uncompressed, 1 = LZMA compressed, 2 = LZMA2 compressed | |
int compression_indicator = 1; | |
// the input data: | |
//unsigned long data_len = 45; | |
unsigned long data_len = LZMA_PROPS_SIZE + 17; | |
//unsigned char *data = "\xe0\x00\x3e\x00\x25\x5d\x00\x30\x83\x3d\x46\x1a\xc7\xee\x33\xc8" | |
// "\xb8\x39\x51\x63\x43\xff\x6c\x46\x34\x7d\x54\xcc\xa9\x73\xd7\x15" | |
// "\xc8\xfa\x01\x00\xb3\xed\xe0\x66\x26\x8f\x0a\x60\x00"; | |
// the coder attributes problem is explained above (here we use lc = 3, pb = 2, lp = 0 dicSize = 65536): | |
unsigned char *data = "\x5d\x00\x00\x01\x00" // this is from $coder->{'attributes'} not yet within the hash format | |
"\x00\x30\x98\x88\x91\x3a\xbf\x89\x6b\x51\x08\x27\xbb\x03\xe1\xb3" | |
"\x60"; | |
// the output data: | |
//size_t decompressed_len = 63; // this value is from $streams_info->{'unpack_info'}->{'unpack_sizes'}[$lzma_coder_id] | |
size_t decompressed_len = 12; | |
unsigned char data_decompressed[decompressed_len]; | |
// decompress it: | |
if (compression_indicator == 1) | |
{ | |
ret = hc_lzma1_decompress (data, &data_len, data_decompressed, &decompressed_len); | |
} | |
else if (compression_indicator == 2) | |
{ | |
ret = hc_lzma2_decompress (data, &data_len, data_decompressed, &decompressed_len); | |
} | |
// print the decompressed data: | |
if (ret == SZ_OK) | |
{ | |
int i; | |
for (i = 0; i < decompressed_len; i++) | |
{ | |
printf ("%02x", data_decompressed[i]); | |
} | |
printf ("\n"); | |
//for (i = 0; i < decompressed_len; i++) | |
//{ | |
// printf ("%c", data_decompressed[i]); | |
//} | |
//printf ("\n"); | |
} | |
return ret; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment