/* * MVCOMP compressor * Copyright (C) 2024 Mateusz Viste * * MIT License. * * This is a simple example implementation, not a production-ready tool. * * https://mvcomp.sourceforge.io */ #include #include /* write qlen literal bytes into dst, returns amount of "compressed" bytes */ static unsigned short mvcomp_litqueue_dump(unsigned short **dst, const unsigned char *q, unsigned short qlen) { unsigned short complen = 0; AGAIN: /* are we done? (also take care of guys calling me in for jokes) */ if (qlen == 0) return(complen); qlen--; /* now it's between 0 and 30 */ /* write the length and first char */ **dst = (unsigned short)((qlen / 2) << 8) | q[0]; *dst += 1; q++; complen += 2; /* anything left? */ if (qlen == 0) return(complen); /* write the pending words */ if (qlen > 1) { memcpy(*dst, q, (qlen/2)*2); *dst += qlen / 2; q += (qlen / 2) * 2; complen += (qlen / 2) * 2; qlen -= (qlen / 2) * 2; } /* one byte might still be left if it did not fit inside a word */ goto AGAIN; } /* mvcomp applies the MV-COMPRESSION algorithm to data and returns the compressed size */ static unsigned short mvcomp(void *dstbuf, size_t dstbufsz, const unsigned char *src, size_t len) { unsigned short complen = 0; unsigned short *dst = dstbuf; unsigned short bytesprocessed = 0; unsigned char litqueue[32]; unsigned char litqueuelen = 0; /* read src byte by byte, len times, each time look for a match of 15,14,13..2 chars in the back buffer */ while (len > 0) { unsigned short matchlen; unsigned short minmatch; unsigned short offset; matchlen = 16; if (len < matchlen) matchlen = (unsigned short)len; /* abort if no space in output buffer */ if (complen >= dstbufsz - 32) return(0); /* look for a minimum match of 2 bytes, unless I have some pending literal bytes * awaiting, in which case I am going through a new data pattern and it is more * efficient to wait for a 3-bytes match before breaking the literal string */ if (litqueuelen != 0) { minmatch = 3; } else { minmatch = 2; } for (; matchlen >= minmatch; matchlen--) { /* start at -matchlen and try to match something moving backward */ unsigned short maxoffset = 4096; if (maxoffset > bytesprocessed) maxoffset = bytesprocessed; for (offset = matchlen; offset <= maxoffset; offset++) { if (memcmp(src, src - offset, matchlen) == 0) { //printf("Found match of %u bytes at offset -%u: '%c%c%c...'\n", matchlen, offset, src[0], src[1], src[2]); goto FOUND; } } } /* if here: no match found, write a literal byte to queue */ litqueue[litqueuelen++] = *src; src++; bytesprocessed++; len--; /* dump literal queue to dst if max length reached */ if (litqueuelen == 31) { complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen); litqueuelen = 0; } continue; FOUND: /* found a match of matchlen bytes at -offset */ /* dump awaiting literal queue to dst first */ if (litqueuelen != 0) { complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen); litqueuelen = 0; } *dst = (unsigned short)((matchlen - 1) << 12) | (offset - 1); dst++; src += matchlen; bytesprocessed += matchlen; len -= matchlen; complen += 2; } /* dump awaiting literal queue to dst first */ if (litqueuelen != 0) { complen += mvcomp_litqueue_dump(&dst, litqueue, litqueuelen); litqueuelen = 0; } return(complen); } int main(int argc, char **argv) { FILE *fd; unsigned char bufin[65500]; unsigned char bufout[65500]; size_t bufin_len, bufout_len; /* help screen? */ if (argc != 3) { puts("usage: mvcomp file_in file_out"); return(1); } /* make sure output file does not exist */ fd = fopen(argv[2], "rb"); if (fd != NULL) { fclose(fd); puts("ERROR: output file already exists"); return(1); } /* load the input file */ fd = fopen(argv[1], "rb"); if (fd == NULL) { puts("ERROR: failed to open input file"); return(1); } bufin_len = fread(bufin, 1, sizeof(bufin), fd); fclose(fd); if (bufin_len == 0) { puts("ERROR: failed to load the input file"); return(1); } if (bufin_len == sizeof(bufin)) { printf("ERROR: input file too big, sorry. Max supported size is %zuK\n", sizeof(bufin) / 1024); return(1); } /* compress */ bufout_len = mvcomp(bufout, sizeof(bufout), bufin, bufin_len); if (bufout_len == 0) { puts("ERROR: compressed output too big"); return(1); } /* write to output */ fd = fopen(argv[2], "wb"); if (fd == NULL) { puts("ERROR: failed to open output file"); return(1); } if (fwrite(bufout, 1, bufout_len, fd) != bufout_len) { puts("ERROR: output file could not be written"); fclose(fd); return(1); } printf("%s -> %s [%zu%%] (%zu -> %zu)\n", argv[1], argv[2], (bufout_len * 100) / bufin_len, bufin_len, bufout_len); fclose(fd); return(0); }