diff --git a/compression/huffman/Makefile b/compression/huffman/Makefile new file mode 100644 index 0000000..70b5b33 --- /dev/null +++ b/compression/huffman/Makefile @@ -0,0 +1,14 @@ +project: main.o compress.o decompress.o heap.o table.o tree.o + cc main.o compress.o decompress.o heap.o table.o tree.o -o project -Wall +main.o: main.c heap.h + cc -c main.c -Wall +compress.o: compress.c heap.h + cc -c compress.c -Wall +decompress.o: decompress.c tree.h + cc -c decompress.c -Wall +heap.o: heap.c heap.h + cc -c heap.c -Wall +table.o: table.c table.h + cc -c table.c -Wall +tree.o: tree.c tree.h + cc -c tree.c -Wall diff --git a/compression/huffman/README.md b/compression/huffman/README.md new file mode 100644 index 0000000..3fb9ec8 --- /dev/null +++ b/compression/huffman/README.md @@ -0,0 +1,9 @@ +compression algorithms +1) hoffman coding + * read file and count the freq of each char + * create tree of each char and add it in heap with pseudo eof as well + * create a whole tree form taking 2 tree from heap continuously until we remain with one tree in heap + * create hash table from tree + * write compressed file + +2) lzw \ No newline at end of file diff --git a/compression/huffman/compress.c b/compression/huffman/compress.c new file mode 100644 index 0000000..842fcac --- /dev/null +++ b/compression/huffman/compress.c @@ -0,0 +1,194 @@ +#include +#include +#include +#include +#include"heap.h" + +#define MAX 257 + +void preorder(tree* t, int* arr, int* n){ + if(*t == NULL) + return; + if((*t)->left == (*t)->right) + arr[(*n)++] = 1; + else + arr[(*n)++] = 0; + preorder(&((*t)->left), arr, n); + preorder(&((*t)->right), arr, n); +} +void writeleaves(tree* t, int fdw){ + if(*t == NULL) + return; + if((*t)->left == NULL && (*t)->right == NULL){ + write(fdw, &((*t)->d.ch), sizeof(char)); + } + writeleaves(&((*t)->left), fdw); + writeleaves(&((*t)->right), fdw); +} +void header(int fdw, int* arr, int n){ + int i; + int bits = 0; + unsigned char w; + for(i = 0; i < n; i++){ + if(arr[i] == 1){ + w = w | 0x01; + } + else if(arr[i] == 0) + w = w & 0xfe; + bits++; + if(bits == 8){ + write(fdw, &w, sizeof(char)); + bits = 0; + w = '\0'; + } + else + w = w << 1; + } + char last = bits + '0'; + if(bits < 8){ + while(bits < 7){ + w = w & 0xfe; + w = w << 1; + bits++; + } + write(fdw, &w, sizeof(char)); + } + char no = '\0'; + write(fdw, &last, sizeof(char)); + write(fdw, &no, sizeof(char)); +} +/*void header(int fdw, tree* t, char w){ + unsigned char eof = 256; + static int i = 0; + if(*t == NULL) + return; + if((*t)->isleaf == 1){ + w = w | 0x01; + i++; + if(i == 8){ + write(fdw, &w, sizeof(char)); + i = 0; + w = '\0'; + } + else{ + w = w << 1; + } + } + else if((*t)->isleaf == 0){ + w = w & 0xfe; + i++; + if(i == 8){ + write(fdw, &w, sizeof(char)); + i = 0; + w = '\0'; + } + else{ + w = w << 1; + } + } + header(fdw, &((*t)->left), w); + header(fdw, &((*t)->right), w); + if(i < 8){ + while(i < 8){ + w = w & 0xfe; + i++; + } + write(fdw, &w, sizeof(char)); + } + write(fdw, &eof, sizeof(char)); +} +void header2(int fdw, tree* t){ + if(*t == NULL) + return; + if((*t)->isleaf == 1){ + write(fdw, &((*t)->d.ch), sizeof(char)); + } + header2(fdw, &((*t)->left)); + header2(fdw, &((*t)->right)); + write(fdw, '\0', sizeof(char)); +}*/ +int compress(int fdr, int fdw, table* th, tree* t){ + int index, j = 0, len; + unsigned char ch, str[1000], wrtbyte; + int arr[1024]; + int n = 0; + preorder(t, arr,&n); + header(fdw, arr, n); + writeleaves(t, fdw); + int c = 0; + int bits = 0; + while((n = read(fdr, &ch, sizeof(char)))){ + c++; + index = (int)ch; + strcpy((char*)str, th[index].str); + len = strlen((char*)str); + j = 0; + while(j < len){ + if(str[j] == '1'){ + wrtbyte = wrtbyte | 0x01; + } + else if(str[j] == '0'){ + wrtbyte = wrtbyte & 0xfe; + } + bits++; + if(bits == 8){ + write(fdw, &wrtbyte, sizeof(char)); + bits = 0; + wrtbyte = '\0'; + } + else{ + wrtbyte = wrtbyte << 1; + } + j++; + } + } + char last = bits + '0'; + if(bits < 8){ + while(bits < 7){ + wrtbyte = wrtbyte & 0xfe; + wrtbyte = wrtbyte << 1; + bits++; + } + if(wrtbyte != '\0') + write(fdw, &wrtbyte, sizeof(char)); + } + write(fdw, &last, sizeof(char)); + return c; +} +tree initial(int fdr, table *th){ + data d[MAX]; + tree t1, t2, t3, t4; + heap h; + InitHeap(&h); + int i, c2 = 0; + for(i = 0; i < MAX; i++) + d[i].count = 0; + unsigned char ch; + int index; + while(read(fdr, &ch, sizeof(char))){ + index = (int)ch; + d[index].ch = ch; + d[index].count++; + } + for(i = 0; i < MAX; i++){ + if(d[i].count != 0){ + t1 = CreateTreeOfOneNode(d[i]); + InsertInHeap(&h, &t1); + c2++; + } + } + while(!IsHeapEmpty(&h)){ + if(!IsHeapEmpty(&h)) + t2 = RemoveFromHeap(&h); + if(!IsHeapEmpty(&h)) + t3 = RemoveFromHeap(&h); + else{ + break; + } + t4 = CreateTreeFromTwoTree(&t2, &t3); + InsertInHeap(&h, &t4); + } + char str[1024]; + CreateTable(th, &t2, str, 0); + return t2; +} diff --git a/compression/huffman/decompress.c b/compression/huffman/decompress.c new file mode 100644 index 0000000..a034ef1 --- /dev/null +++ b/compression/huffman/decompress.c @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include"tree.h" +void getpreorder(int fdr, int* arr, int* n){ + unsigned char ch, next, nextkanext; + int i = 0, j, k; + read(fdr, &ch, sizeof(char)); + read(fdr, &next, sizeof(char)); + while(read(fdr, &nextkanext, sizeof(char))){ + if(nextkanext == '\0') + break; + while(i < 8){ + k = ch & 0x80; + if(k == 0) + arr[(*n)++] = 0; + else if(k != 0) + arr[(*n)++] = 1; + ch = ch << 1; + i++; + } + i = 0 ; + ch = next; + next = nextkanext; + } + int use = next - '0'; + for(j = 0; j < use; j++){ + k = ch & 0x80; + if(k == 0) + arr[(*n)++] = 0; + else if(k != 0) + arr[(*n)++] = 1; + ch = ch << 1; + } +} +tree buildtreeconti(int* arr, int* index, int n){ + int in = *index; + if(in == n) + return NULL; + node* tmp = (node*)malloc(sizeof(node)); + tmp->isleaf = arr[in]; + tmp->left = tmp->right = NULL; + (*index)++; + if(arr[in] == 0){ + tmp->left = buildtreeconti(arr, index, n); + tmp->right = buildtreeconti(arr, index, n); + } + return tmp; +} +tree buildtree(int*arr, int n){ + int index = 0; + return buildtreeconti(arr, &index, n); +} +void giveleaves(tree* t, int fdr){ + if(*t == NULL) + return; + if((*t)->left == NULL && (*t)->right == NULL){ + unsigned char ch; + read(fdr, &ch, sizeof(ch)); + (*t)->d.ch = ch; + } + giveleaves(&((*t)->left), fdr); + giveleaves(&((*t)->right), fdr); +} +void decode(int fdr, int fdw, tree* t){ + node* p = *t; + int i = 0, j, k; + unsigned char ch, next, nextkanext; + read(fdr, &ch, sizeof(char)); + read(fdr, &next, sizeof(char)); + while(read(fdr, &nextkanext, sizeof(char))){ + while(i < 8){ + k = ch & 0x80; + if(k == 0){ + if(p->left == p->right){ + write(fdw, &(p->d.ch), sizeof(p->d.ch)); + p = *t; + } + p = p->left; + ch = ch << 1; + } + else if(k != 0) { + if(p->left == p->right){ + write(fdw, &(p->d.ch), sizeof(p->d.ch)); + p = *t; + } + p = p->right; + ch = ch << 1; + } + i++; + } + i = 0 ; + ch = next; + next = nextkanext; + } + int use = next - '0'; + for(j = 0; j < use; j++){ + k = ch & 0x80; + if(k == 0){ + if(p->left == p->right){ + write(fdw, &(p->d.ch), sizeof(p->d.ch)); + p = *t; + } + p = p->left; + ch = ch << 1; + } + else if(k != 0){ + if(p->left == p->right){ + write(fdw, &(p->d.ch), sizeof(p->d.ch)); + p = *t; + } + p = p->right; + ch = ch << 1; + } + } + write(fdw, &(p->d.ch), sizeof(p->d.ch)); +} +void decompress(int fdr, int fdw){ + int arr[1024], n = 0, i; + getpreorder(fdr, arr, &n); + tree t; + t = buildtree(arr, n); + giveleaves(&t, fdr); + decode(fdr, fdw, &t); +} diff --git a/compression/huffman/heap.c b/compression/huffman/heap.c new file mode 100644 index 0000000..abfdb35 --- /dev/null +++ b/compression/huffman/heap.c @@ -0,0 +1,66 @@ +#include"heap.h" +#include +void InitHeap(heap *h){ + h->n = 0; +} +void InsertInHeap(heap *h, tree *t){ + h->t[h->n] = *t; + int c = h->n; + int p = (c - 1)/2; + tree tmp; + while(c > 0){ + if((h->t[c])->d.count < (h->t[p])->d.count){ + tmp = h->t[c]; + h->t[c] = h->t[p]; + h->t[p] = tmp; + } + else + break; + c = p; + p = (c - 1)/2; + } + (h->n)++; +} +int findmin(heap *h, int x, int y, int z){ + if((h->t[x])->d.count <= (h->t[y])->d.count && + (h->t[x])->d.count <= (h->t[z])->d.count) + return x; + else if((h->t[y])->d.count <= (h->t[x])->d.count && + (h->t[y])->d.count <= (h->t[z])->d.count) + return y; + else + return z; +} +tree RemoveFromHeap(heap *h){ + tree ret, tmp; + ret = h->t[0]; + h->t[0] = h->t[--(h->n)]; + int c1, c2, p, small; + p = 0; + c1 = 2*p + 1; + c2 = 2*p + 2; + while(c1 < h->n){ + small = findmin(h, p, c1, c2); + if(small == p) + break; + tmp = h->t[small]; + h->t[small] = h->t[p]; + h->t[p] = tmp; + p = small; + c1 = 2*p + 1; + c2 = 2*p + 2; + } + return ret; +} +int IsHeapEmpty(heap *h){ + return h->n == 0; +} +int IsHeapFull(heap *h){ + return h->n == SIZE; +} +void printheap(heap *h){ + int i = 0; + for(i = 0; i < h->n; i++){ + printf("%d ", (h->t[i])->d.count); + } +} diff --git a/compression/huffman/heap.h b/compression/huffman/heap.h new file mode 100644 index 0000000..b0b87cc --- /dev/null +++ b/compression/huffman/heap.h @@ -0,0 +1,12 @@ +#include"table.h" +#define SIZE 257 +typedef struct heap{ + tree t[SIZE]; + int n; +}heap; +void InitHeap(heap *h); +void InsertInHeap(heap *h, tree *t); +tree RemoveFromHeap(heap *h); +int IsHeapEmpty(heap *h); +int IsHeapFull(heap *h); +void printheap(heap *h); diff --git a/compression/huffman/main.c b/compression/huffman/main.c new file mode 100644 index 0000000..62658e3 --- /dev/null +++ b/compression/huffman/main.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include +#include +#include +#include"heap.h" + +int compress(int fdr, int fdw, table* th, tree* t); +int decompress(int fdr, int fdw); +tree initial(int fdr, table *th); +int main(int agrc, char* agrv[]){ + if(agrc < 4 || agrc > 4){ + printf("Usage: ./project operation filename1 filename2"); + printf("operation :\n\t-c1 huffman compression\n\t-uc1 huffman decompression\n"); + exit(1); + } + if(strcmp(agrv[1], "-c1") == 0){ + int fdr = open(agrv[2], O_RDONLY); + if(fdr == -1){ + perror("can't open file"); + return errno; + } + #define SZ 257 + table th[SZ]; + InitTable(th, SZ); + tree t2 = initial(fdr, th); + close(fdr); + fdr = open(agrv[2], O_RDONLY); + if(fdr == -1){ + perror("can't open file"); + return errno; + } + int fdw = open(agrv[3], O_WRONLY | O_CREAT, S_IRUSR); + if(fdw == -1){ + perror("Unable to creat file"); + return errno; + } + int z = compress(fdr, fdw, th, &t2); + printf("size of original file %d\n", z); + struct stat st; + stat(agrv[3], &st); + unsigned int size = st.st_size; + printf("size of compressed file %d\n", size); + } + else if(strcmp(agrv[1], "-uc1") == 0){ + int fdr = open(agrv[2], O_RDONLY); + if(fdr == -1){ + perror("can't open file"); + return errno; + } + int fdw = open(agrv[3], O_WRONLY|O_CREAT, S_IRWXU); + if(fdw == -1){ + perror("can't open file"); + return errno; + } + decompress(fdr, fdw); + struct stat st; + stat(agrv[2], &st); + unsigned int size = st.st_size; + printf("size of compressed file %d\n", size); + stat(agrv[3], &st); + unsigned int size1 = st.st_size; + printf("size of uncompressed file %d\n", size1); + } + return 0; +} diff --git a/compression/huffman/table.c b/compression/huffman/table.c new file mode 100644 index 0000000..5924429 --- /dev/null +++ b/compression/huffman/table.c @@ -0,0 +1,43 @@ +#include"table.h" +#include +#include +void InitTable(table *th, int n){ + int i; + for(i = 0; i < n; i++){ + th[i].count = 0; + strcpy(th[i].str, ""); + } +} +#define leftd '0' +#define rightd '1' +int CreateTable(table* th, tree *t, char* str,int pos){ + static int c = 0; + if((*t)->left){ + str[pos] = leftd; + CreateTable(th, &((*t)->left), str, pos + 1); + } + if((*t)->right){ + str[pos] = rightd; + CreateTable(th, &((*t)->right), str, pos + 1); + } + if((*t)->right == NULL && (*t)->left == NULL){ + str[pos] = '\0'; + int index = (int)(*t)->d.ch; + th[index].ch = (*t)->d.ch; + th[index].count = (*t)->d.count; + strcpy(th[index].str, str); + c++; + } + return c; +} +int printtable(table *th, int n){ + int i; + int c = 0; + for(i = 0; i < n; i++){ + if(th[i].count != 0){ + printf("%4d, %3c, %10s, %3d\n",i, th[i].ch, th[i].str, th[i].count); + c++; + } + } + return c; +} diff --git a/compression/huffman/table.h b/compression/huffman/table.h new file mode 100644 index 0000000..5ea4e3a --- /dev/null +++ b/compression/huffman/table.h @@ -0,0 +1,9 @@ +#include"tree.h" +typedef struct table{ + char ch; + char str[1024]; + int count; +}table; +void InitTable(table *t, int n); +int CreateTable(table* th, tree *t, char* str, int pos); +int printtable(table *t, int n); diff --git a/compression/huffman/tree.c b/compression/huffman/tree.c new file mode 100644 index 0000000..800ce04 --- /dev/null +++ b/compression/huffman/tree.c @@ -0,0 +1,46 @@ +#include"tree.h" +#include +#include +void inittree(tree *t){ + *t = NULL; +} +tree CreateTreeOfOneNode(data d){ + node* tmp = (node*)malloc(sizeof(node)); + tmp->d = d; + tmp->isleaf = 1; + tmp->left = tmp->right = NULL; + return tmp; +} +tree CreateTreeFromTwoTree(tree *t1, tree *t2){ + node *tmp = (node*)malloc(sizeof(node)); + tmp->d.ch = '\0'; + tmp->d.count = (*t1)->d.count + (*t2)->d.count; + tmp->isleaf = 0; + tmp->left = *t1; + tmp->right = *t2; + return tmp; +} +void inorder(tree *t){ + if(*t == NULL) + return; + if((*t)->left == (*t)->right){ + printf("1 %c ", (*t)->d.ch); + } + else{ + printf("0 "); + } + inorder(&((*t)->left)); + inorder(&((*t)->right)); +} +int printleaves(tree *t){ + if(*t == NULL) + return 0; + static int c = 0; + if((*t)->left == NULL && (*t)->right == NULL){ + printf("%d %c", (*t)->d.count, (*t)->d.ch); + c++; + } + printleaves(&((*t)->left)); + printleaves(&((*t)->right)); + return c; +} diff --git a/compression/huffman/tree.h b/compression/huffman/tree.h new file mode 100644 index 0000000..5864cbc --- /dev/null +++ b/compression/huffman/tree.h @@ -0,0 +1,17 @@ +typedef struct data{ + unsigned char ch; + int count; +}data; +typedef struct node{ + data d; + int isleaf; + struct node* left; + struct node* right; +}node; +typedef node* tree; + +void inittree(tree *t); +tree CreateTreeOfOneNode(data d); +tree CreateTreeFromTwoTree(tree *t1, tree *t2); +void inorder(tree *t); +int printleaves(tree *t);