// LZ77 Compresion Example // Daniel Hladek (c) 2019,2022 Technical University of Kosice // https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-wusp/fb98aa28-5cd7-407f-8869-a6cef1ff1ccb // // #define _GNU_SOURCE #include #include #include #include #define SIZE 1000 #define BSIZE 9 // head // | // v // DATA: 1 2 3 4 5 6 7 8 9 0 // ^ ^ ^ // | | | // buffer window end of window // |<--->|<--->| // bsz wsz int main(){ // Data that are encoded char data[SIZE]; memset(data,0,SIZE); int dsz = 0; // Load data into array while (dsz < SIZE){ int c = getchar(); if (c == EOF){ break; } data[dsz++] = c; } // 'head': coding position: The position of the byte in the input stream that is currently being coded (the beginning of the lookahead buffer). int head = 0; int osz = 0; while (head < dsz && data[head] > 0){ // Search buffer is before head // Search buffer size int bsz = BSIZE; if (head < bsz){ bsz = head; } // buffer: A buffer of size W that contains bytes from the coding position backward. The buffer is empty at the beginning of the compression, then grows to size BSIZE as the input stream is processed. Once it reaches size BSIZE, it then "slides" along with the coding position. char* buffer = data + head -bsz; // window: lookahead window: The byte sequence from the coding position to the end of the input stream. char* window = data + head; // Lookahead window size int wsz = BSIZE; if ((dsz - head) < wsz){ wsz = BSIZE - head; } // We search in search buffer // for the longest string in lookahead that starts with head // search and lookahead buffers can have different sizes // msz is adjusted wsz int msz = wsz; if (bsz < wsz){ // Max largest match size is the minimum msz = bsz; } // Offset of the match in the past int offset = -1; // Size of the match in the past int size = -1; for (int i = 1; i < msz; i++){ // Searches window,i in buffer,bsz char* r = memmem(buffer,bsz,window,i); if (r > 0){ offset = bsz - (r - buffer); size = i + 1 ; } else { break; } } // Print match if (size > 0){ printf("%d",offset); printf("%d",size); printf("%c\n",data[head]); head += size; } else { printf("00%c\n",data[head]); head += 1; } osz += 1; } printf("---\n"); printf ("%d characters into %d codewords\n",dsz,osz); return 0; }