基于哈夫曼编码的文件压缩

143 阅读 0 评论 95 点赞

我是靠谱客的博主开心皮皮虾，这篇文章主要介绍基于哈夫曼编码的文件压缩，现在分享给大家，希望可以做个参考。

//Heap.h

复制代码

//堆类
#pragma once
#include<iostream>
using namespace std;
#include<vector>
#include<cassert>
//仿函数（函数对象）--建小堆
template<typename T>
struct Less
{
    bool operator()(const T& left,const T& right)
    {
        return left<right;
    }
};
//仿函数（函数对象）--建大堆
template<typename T>
struct Great
{
    bool operator()(const T& left,const T& right)
    {
        return left>right;
    }
};

template<typename T,typename Compare>
class Heap
{
public:
    Heap()//默认构造函数（全缺省构造函数）
    {}
    Heap(const T* arr,size_t sz)//构造函数
    {
         assert(arr);
         //1.将数组中的元素放入堆中
         for (int i=0;i<sz;++i)
         {
             _heap.push_back(arr[i]);
         }
         //2.从堆中(sz-2)/2的位置（即最后一个非叶子节点位置）开始利用向下调整建堆
         for (int i=(sz-2)/2;i>=0;++i)
         {
             AdjustDown(i);
         }
    }
    void Push(const T& x)//插入--插入到最后一个位置，然后沿着该节点到根结点的路径向上调整
    {
         _heap.push_back(x);
         AdjustUp(_heap.size()-1);
    }
    void Pop()//删除（堆顶元素）---将堆顶元素和堆底元素交换；删除堆底元素；然后从堆顶向下调整
    {
        if (_heap.empty())
        {
            cout<<"heap empty"<<endl;
        }
        else
        {
            //先交换堆底和堆顶元素
            std::swap(_heap[0],_heap[_heap.size()-1]);
            _heap.pop_back();
            //然后从堆顶开始向下调整
            AdjustDown(0);
        }
    }
    const T& Top()//取最值（堆顶元素）
    {
        if (!_heap.empty())
        {
             return _heap[0];
        }
        else
        {
            cout<<"heap empty!"<<endl;
        }
    }
    size_t Size()//返回堆中元素个数
    {
       return _heap.size();
    }
    bool Empty()//判断堆是否为空
    {
       return _heap.empty();
    }
private:
    //向下调整
    void AdjustDown(size_t index)
    {
        size_t leftchild=index*2+1;//左孩子
        Compare com;//大小堆

while (leftchild<_heap.size())
        {
            //1.寻找左右孩子的最值下标
            if ( leftchild+1<_heap.size() && com(_heap[leftchild+1],_heap[leftchild]))
            {
                leftchild++;//记录左右孩子的最值下标
            }
            //2.比较并决定是否交换
            //2.1 交换,继续向下调整
            if (com(_heap[leftchild],_heap[index]))
            {
                std::swap(_heap[leftchild],_heap[index]);
                //继续向下调整
                index=leftchild;
                leftchild=index*2+1;
            }
            //2.2不交换，那么不用继续向下调整
            else
                 break;
        }
    }
    //向上调整
    void AdjustUp(int index)
    {
        int father=(index-2)/2;//父节点
        Compare com;
        while (father>=0)
        {
            //交换,继续向上调整
            if (com(_heap[index],_heap[father]))
            {
                std::swap(_heap[index],_heap[father]);
                index=father;
                father=(index-2)/2;//继续向上调整
            }
            //不交换，不用向上调整
            else
                break;
        }
    }
private:
    vector<T> _heap;
};

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
//堆类
#pragma once
#include<iostream>
using namespace std;
#include<vector>
#include<cassert>
//仿函数（函数对象）--建小堆
template<typename T>
struct Less
{
    bool operator()(const T& left,const T& right)
    {
        return left<right;
    }
};
//仿函数（函数对象）--建大堆
template<typename T>
struct Great
{
    bool operator()(const T& left,const T& right)
    {
        return left>right;
    }
};

template<typename T,typename Compare>
class Heap
{
public:
    Heap()//默认构造函数（全缺省构造函数）
    {}
    Heap(const T* arr,size_t sz)//构造函数
    {
         assert(arr);
         //1.将数组中的元素放入堆中
         for (int i=0;i<sz;++i)
         {
             _heap.push_back(arr[i]);
         }
         //2.从堆中(sz-2)/2的位置（即最后一个非叶子节点位置）开始利用向下调整建堆
         for (int i=(sz-2)/2;i>=0;++i)
         {
             AdjustDown(i);
         }
    }
    void Push(const T& x)//插入--插入到最后一个位置，然后沿着该节点到根结点的路径向上调整
    {
         _heap.push_back(x);
         AdjustUp(_heap.size()-1);
    }
    void Pop()//删除（堆顶元素）---将堆顶元素和堆底元素交换；删除堆底元素；然后从堆顶向下调整
    {
        if (_heap.empty())
        {
            cout<<"heap empty"<<endl;
        }
        else
        {
            //先交换堆底和堆顶元素
            std::swap(_heap[0],_heap[_heap.size()-1]);
            _heap.pop_back();
            //然后从堆顶开始向下调整
            AdjustDown(0);
        }
    }
    const T& Top()//取最值（堆顶元素）
    {
        if (!_heap.empty())
        {
             return _heap[0];
        }
        else
        {
            cout<<"heap empty!"<<endl;
        }
    }
    size_t Size()//返回堆中元素个数
    {
       return _heap.size();
    }
    bool Empty()//判断堆是否为空
    {
       return _heap.empty();
    }
private:
    //向下调整
    void AdjustDown(size_t index)
    {
        size_t leftchild=index*2+1;//左孩子
        Compare com;//大小堆

        while (leftchild<_heap.size())
        {
            //1.寻找左右孩子的最值下标
            if ( leftchild+1<_heap.size() && com(_heap[leftchild+1],_heap[leftchild]))
            {
                leftchild++;//记录左右孩子的最值下标
            }
            //2.比较并决定是否交换
            //2.1 交换,继续向下调整
            if (com(_heap[leftchild],_heap[index]))
            {
                std::swap(_heap[leftchild],_heap[index]);
                //继续向下调整
                index=leftchild;
                leftchild=index*2+1;
            }
            //2.2不交换，那么不用继续向下调整
            else
                 break;
        }
    }
    //向上调整
    void AdjustUp(int index)
    {
        int father=(index-2)/2;//父节点
        Compare com;
        while (father>=0)
        {
            //交换,继续向上调整
            if (com(_heap[index],_heap[father]))
            {
                std::swap(_heap[index],_heap[father]);
                index=father;
                father=(index-2)/2;//继续向上调整
            }
            //不交换，不用向上调整
            else
                break;
        }
    }
private:
    vector<T> _heap;
};

//HuffmanTree.h

复制代码

//哈夫曼树类
#pragma  once
#include<iostream>
using namespace std;
#include<cassert>
#include "Heap.h"//借助堆构建哈夫曼树

template<typename T>
struct HuffmanTreeNode
{
    HuffmanTreeNode<T>* _leftchild;//左孩子
    HuffmanTreeNode<T>* _rightchild;//右孩子

T _weight;//权值
    HuffmanTreeNode(const T& weight=0)
        :_leftchild(NULL)
        ,_rightchild(NULL)
        ,_weight(weight)
    {}
};

template<typename T>
class HuffmanTree
{
    typedef HuffmanTreeNode<T> Node;
public:
    HuffmanTree()//默认构造函数（全缺省构造函数）
        :_root(NULL)
    {}
    //利用贪心算法构造哈夫曼树
    HuffmanTree(const T* arr,size_t sz,const T& invaild)//构造
    { 
        _root=CreatHuffmanTree(arr,sz,invaild);
    }
    ~HuffmanTree()//析构
    {
        _Destroy(_root);
    }
    Node* GetRoot()//获取哈夫曼树的根结点
    {
         return _root;
    }
private:
    //构建哈夫曼树
    Node* CreatHuffmanTree( const T* arr,size_t sz,const T& invaild)
    {
        struct LessNode
        {
             bool operator()(const Node* left,const  Node* right)
             {
                 return left->_weight < right->_weight;
             }
        };
         Heap<Node*,LessNode> MinHeap;
         //1.依据权值建结点的MinHeap
         for (size_t i=0;i<sz;i++)
         {
             if (arr[i]!=invaild)
             {
                 Node* node=new Node(arr[i]);
                 MinHeap.Push(node);
             }
         }
         //2.构建huffmanTree
        while (MinHeap.Size()>1)
        {
            //2.从堆中依次去除两个最小的结点建树
            Node* left=MinHeap.Top();
            MinHeap.Pop();
            Node* right=MinHeap.Top();
            MinHeap.Pop();
            Node* father=new Node(left->_weight+right->_weight);
            father->_leftchild=left;
            father->_rightchild=right;
            //3.将两个最小值的和入堆
            MinHeap.Push(father);
        }
        _root=MinHeap.Top();
        MinHeap.Pop();
        return _root;
    }

void _Destroy(Node* root)
    {
        if (root==NULL)
        {
            return ;
        }
        else
        {
            _Destroy(root->_leftchild);
            _Destroy(root->_rightchild);
            delete root;
            root=NULL;
        }
    }
private:
    Node* _root;
};

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
//哈夫曼树类
#pragma  once
#include<iostream>
using namespace std;
#include<cassert>
#include "Heap.h"//借助堆构建哈夫曼树

template<typename T>
struct HuffmanTreeNode
{
    HuffmanTreeNode<T>* _leftchild;//左孩子
    HuffmanTreeNode<T>* _rightchild;//右孩子

    T _weight;//权值
    HuffmanTreeNode(const T& weight=0)
        :_leftchild(NULL)
        ,_rightchild(NULL)
        ,_weight(weight)
    {}
};

template<typename T>
class HuffmanTree
{
    typedef HuffmanTreeNode<T> Node;
public:
    HuffmanTree()//默认构造函数（全缺省构造函数）
        :_root(NULL)
    {}
    //利用贪心算法构造哈夫曼树
    HuffmanTree(const T* arr,size_t sz,const T& invaild)//构造
    { 
        _root=CreatHuffmanTree(arr,sz,invaild);
    }
    ~HuffmanTree()//析构
    {
        _Destroy(_root);
    }
    Node* GetRoot()//获取哈夫曼树的根结点
    {
         return _root;
    }
private:
    //构建哈夫曼树
    Node* CreatHuffmanTree( const T* arr,size_t sz,const T& invaild)
    {
        struct LessNode
        {
             bool operator()(const Node* left,const  Node* right)
             {
                 return left->_weight < right->_weight;
             }
        };
         Heap<Node*,LessNode> MinHeap;
         //1.依据权值建结点的MinHeap
         for (size_t i=0;i<sz;i++)
         {
             if (arr[i]!=invaild)
             {
                 Node* node=new Node(arr[i]);
                 MinHeap.Push(node);
             }
         }
         //2.构建huffmanTree
        while (MinHeap.Size()>1)
        {
            //2.从堆中依次去除两个最小的结点建树
            Node* left=MinHeap.Top();
            MinHeap.Pop();
            Node* right=MinHeap.Top();
            MinHeap.Pop();
            Node* father=new Node(left->_weight+right->_weight);
            father->_leftchild=left;
            father->_rightchild=right;
            //3.将两个最小值的和入堆
            MinHeap.Push(father);
        }
        _root=MinHeap.Top();
        MinHeap.Pop();
        return _root;
    }

    void _Destroy(Node* root)
    {
        if (root==NULL)
        {
            return ;
        }
        else
        {
            _Destroy(root->_leftchild);
            _Destroy(root->_rightchild);
            delete root;
            root=NULL;
        }
    }
private:
    Node* _root;
};

//FileCompression.h

复制代码

//文件压缩类
#pragma once 
#include "HuffmanTree.h"
#include<string>
#include<cstdlib>
#include <cstdio>
typedef long long LongType;
//字符信息类---充当哈夫曼树的结点权值
struct CharInformation
{

unsigned char _c;//出现的字符

//ASCLL表中一共有0-255十进制编号的256个字符
    //unsigned char 是单字节的全8位表示字符类型，可以表示这256个字符

string _code;//该字符的哈夫曼编码
    LongType _count;//该字符在文件中出现的次数

CharInformation()//构造函数
        :_c(0)
        ,_count(0)
        //_code不用初始化，会调用string类的默认构造函数初始化
    {}

//运算符重载，让该自定义类也能像内置类型一样，进行运算
    //建哈夫曼树时，和非法值比较使用
    bool operator!=(const CharInformation& ch)const 
    {
        return _count!=ch._count;
    }

//建小堆使用
    bool operator<(const CharInformation& ch)const
    {
        return _count<ch._count;
    }

//构造父节点时使用
    CharInformation operator+(const CharInformation& ch)
    {
        CharInformation cf;
        cf._count=_count+ch._count; 
        return cf;
    }
};

//文件压缩与解压缩类
class FileCompression
{
public:
    //构造函数
    FileCompression()
    {
        //初始化字符表数组的每个字符信息的字符成员
        for (int i=0;i<256;++i)
        {
            _ArrCh[i]._c=i;
        }
    }
    //压缩函数
    //传入一个待压缩的原始文件名，返回一个压缩之后的文件名，方便解压缩函数传参
    void  Compression(string PrimaryFileName)
    {

//1.打开源文件
          FILE* fout=fopen(PrimaryFileName.c_str(),"rb");//以只读的二进制方式打开文件，否则读汉字有问题
          if (fout==NULL)
          {
              perror("fopen");
              exit(1);
          }
          //2.统计256个字符出现的次数，并赋值给相应的数组的字符
          int ch=getc(fout);
          while (!feof(fout))
          {
              _ArrCh[ch]._count++;
              ch=getc(fout);
          }
          //3.根据字符的出现次数构建哈夫曼树，从而得到每个字符对应的哈夫曼编码
            //3.1根据字符信息构建哈夫曼树
          CharInformation invaild;//count=0相当于一个非法值，不用构建到huffman树中
          HuffmanTree<CharInformation>  huffmantree(_ArrCh,256,invaild);
            //3.2根据哈夫曼树得到对应的哈夫曼编码，并赋值正确的字符信息中
          string code;//用于递归记录字符的哈夫曼编码
          GetHuffmanCode(huffmantree.GetRoot(),code);

//至此已经根据已经将源文件的全部信息（字符，字符的出现次数，字符对应的哈夫曼编码）
          //全部统计出来，放入成员变量字符表数组中，接下来就要操作数组，压缩文件

//4.压缩文件---即将字符的全部哈夫曼编码以字节为单位放入压缩文件中
            //4.1创建压缩文件
            string CompressFileName=PrimaryFileName;
            size_t index= CompressFileName.find('.',0);
            CompressFileName=CompressFileName.substr(0,index);
            CompressFileName+=".compress";
            FILE* Input=fopen(CompressFileName.c_str(),"wb");//以只写防止打开二进制文件,文件不存在则创建新文件
            if (Input==NULL)
            {
                perror("fopen");
                exit(1);
            }
            //4.2将所有字符的哈夫曼编码以一字节为单位放入压缩文件中
            fseek(fout,0,SEEK_SET);//更正文原件流的读写位置到文件起始处
            ch=getc(fout);
            char c=0;//以一字节为单位放入压缩文件
            int size=0;//记录一个字节八位是否写满
            while (!feof(fout))
            {
                //处理一个字符的所有哈夫曼编码
                 for (size_t i=0;i<_ArrCh[ch]._code.size();i++)
                 {
                      c<<=1;
                      if (_ArrCh[ch]._code[i]=='1')
                      {
                          c|=1;//先把1放到最低位
                      }
                      //如果为'0'不用管；
                      size++;
                      //当一个字节八位装满，则写入压缩文件
                      if (size==8)
                      {
                          //写入压缩文件
                          if (fputc(c,Input)==EOF)
                          {
                              cout<<"压缩时哈夫曼编码写入失败"<<endl;
                          }
                          //重置
                          size=0;
                          c=0;
                      }
                 }
                 //一个字符处理完，读取下一个字符
                 ch=getc(fout);
            }
            //处理最后一个不满八位的字符，后面全加0
            if (size>0)
            {
                c<<=(8-size);
                fputc(c,Input);
            }
            fclose(fout);//关闭源文件
            fclose(Input);//关闭压缩文件
            //至此压缩源文件完毕

//5.写配置文件---由于解压时，收件人没有原始文件，而解压压缩文件，要根据哈夫曼树
                            //恢复字符，形成文中信息；所以，在压缩函数中要写配置文件，记录
                            //哈夫曼树的相关信息，从而在解压缩文件时，可以根据此信息，重建
                            //哈夫曼树
             //配置文件格式：
             //a:5
             //b:3
              //5.1定义配置文件名
            string ConfigurationFile=PrimaryFileName;
            ConfigurationFile=ConfigurationFile.substr(0,index);
            ConfigurationFile+=". config";
            FILE* FinConfi=fopen(ConfigurationFile.c_str(),"wb");
              //5.2给配置文件写信息
            string line;//记录一个字符的全部信息的字符串
            for (int i=0;i<256;i++)
            {
                if (_ArrCh[i]._count!=0)
                {
                    line+=_ArrCh[i]._c;//字符
                    line+=',';//分隔符
                    char StringCount[25]={0};
                    _itoa(_ArrCh[i]._count,StringCount,10);//将次数转化为字符串
                    line+=StringCount;
                    line+='n';
                    if (fputs(line.c_str(),FinConfi)==EOF)
                    {
                        cout<<"配置文件写入失败"<<endl;
                    }
                    line.clear();//清除字符串信息，方便下一次统计
                }
            }

fclose(FinConfi);//关闭配置文件
    }
    //解压缩函数
    //传入一个压缩文件名
    void UnCompression(string CompressFileName)
    {
        //1.通过配置信息构建哈夫曼树
           //1.1还原字符表数组
        string ConfigurationFile=CompressFileName;
        size_t index=ConfigurationFile.find('.',0);
        ConfigurationFile=ConfigurationFile.substr(0,index);
        ConfigurationFile+=". config";
        FILE* FinConfi=fopen(ConfigurationFile.c_str(),"rb");
        if (FinConfi==NULL)
        {
            perror("fopen");
            exit(1);
        }
        string line;//记录每行配置文件的信息
        while(ReadLine(FinConfi,line))
        {
            //该行为空
            if (line.empty())
            {
                line+='n';
                continue;
            }
            else
            {
                unsigned char ch=line[0];
                //使用string::substr(pos)函数提取字符出现的次数
                _ArrCh[ch]._count=atoi(line.substr(2).c_str());
                line.clear();
            }
        }

//1.2构建哈夫曼树
        CharInformation invaild;
        HuffmanTree<CharInformation> ht(_ArrCh,256,invaild);

//1.3解压缩文件--通过从根结点遍历哈夫曼树得到原始文件
        string UnCompressFileName=CompressFileName;
        UnCompressFileName=UnCompressFileName.substr(0,index);
        UnCompressFileName+=" .unCompress";
        FILE* Func=fopen(UnCompressFileName.c_str(),"wb");
        if (Func==NULL)
        {
            perror("fopen");
            exit(1);
        }
           //1.4读取压缩文件
        FILE* Fcom=fopen(CompressFileName.c_str(),"rb");
        if (Fcom==NULL)
        {
            perror("fopen");
            exit(1);
        }

HuffmanTreeNode<CharInformation>* root=ht.GetRoot();
        LongType TotalCites=root->_weight._count;//原始文件所有字符的个数
        HuffmanTreeNode<CharInformation>* cur=root;//遍历二叉树
        int ch=fgetc(Fcom);
        int pos=7;
        while (TotalCites>0)
        {
             if (ch&(1<<pos))//右1
             {
                 cur=cur->_rightchild;
             }
             else//左0
             {
                 cur=cur->_leftchild;
             }

if (cur->_leftchild==NULL&&cur->_rightchild==NULL)
             {
                  fputc(cur->_weight._c,Func);//将对应的字符写入解压缩文件中
                  cur=root;//回到根结点继续寻找下一个字符
                  --TotalCites;//总还原字符减少1个
                  if(TotalCites==0)
                  {
                       break;
                  }
             }
             pos--;
             //一个压缩字符每位处理完毕
             if (pos<0)
             {
                 ch=fgetc(Fcom);//从压缩文件中读下一个压缩字符
                 pos=7;
             }
        }
        fclose(FinConfi);
        fclose(Fcom);
        fclose(Func);
    }
protected:
    //获取对应字符的哈夫曼编码
    void GetHuffmanCode(HuffmanTreeNode<CharInformation>* root,string code)
    {
        if (root==NULL)
        {
            return;
        }
        //遍历到叶子节点，即找到了对应字符的哈夫曼编码
        if (root->_leftchild==NULL&&root->_rightchild==NULL)
        {
            _ArrCh[root->_weight._c]._code=code;
        }
        //左0，右1
        GetHuffmanCode(root->_leftchild,code+'0');
        GetHuffmanCode(root->_rightchild,code+'1');

}
    //读取配置文件的一行
    bool ReadLine(FILE* FinConfi,string& line)
    {
        int ch=fgetc(FinConfi);
        if (feof(FinConfi))//如果读到文件末尾，返回假
        {
            return false;
        }
        while (!feof(FinConfi)&&ch!='n')
        {
            line+=ch;
            ch=fgetc(FinConfi);
        }
        //没有读到文件末尾
        return true;
    }
protected:
    CharInformation _ArrCh[256];//文件操作的对象，字符信息表
};

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
//文件压缩类
#pragma once 
#include "HuffmanTree.h"
#include<string>
#include<cstdlib>
#include <cstdio>
typedef long long LongType;
//字符信息类---充当哈夫曼树的结点权值
struct CharInformation
{

    unsigned char _c;//出现的字符

    //ASCLL表中一共有0-255十进制编号的256个字符
    //unsigned char 是单字节的全8位表示字符类型，可以表示这256个字符

    string _code;//该字符的哈夫曼编码
    LongType _count;//该字符在文件中出现的次数

    CharInformation()//构造函数
        :_c(0)
        ,_count(0)
        //_code不用初始化，会调用string类的默认构造函数初始化
    {}

    //运算符重载，让该自定义类也能像内置类型一样，进行运算
    //建哈夫曼树时，和非法值比较使用
    bool operator!=(const CharInformation& ch)const 
    {
        return _count!=ch._count;
    }

    //建小堆使用
    bool operator<(const CharInformation& ch)const
    {
        return _count<ch._count;
    }

    //构造父节点时使用
    CharInformation operator+(const CharInformation& ch)
    {
        CharInformation cf;
        cf._count=_count+ch._count; 
        return cf;
    }
};

//文件压缩与解压缩类
class FileCompression
{
public:
    //构造函数
    FileCompression()
    {
        //初始化字符表数组的每个字符信息的字符成员
        for (int i=0;i<256;++i)
        {
            _ArrCh[i]._c=i;
        }
    }
    //压缩函数
    //传入一个待压缩的原始文件名，返回一个压缩之后的文件名，方便解压缩函数传参
    void  Compression(string PrimaryFileName)
    {

          //1.打开源文件
          FILE* fout=fopen(PrimaryFileName.c_str(),"rb");//以只读的二进制方式打开文件，否则读汉字有问题
          if (fout==NULL)
          {
              perror("fopen");
              exit(1);
          }
          //2.统计256个字符出现的次数，并赋值给相应的数组的字符
          int ch=getc(fout);
          while (!feof(fout))
          {
              _ArrCh[ch]._count++;
              ch=getc(fout);
          }
          //3.根据字符的出现次数构建哈夫曼树，从而得到每个字符对应的哈夫曼编码
            //3.1根据字符信息构建哈夫曼树
          CharInformation invaild;//count=0相当于一个非法值，不用构建到huffman树中
          HuffmanTree<CharInformation>  huffmantree(_ArrCh,256,invaild);
            //3.2根据哈夫曼树得到对应的哈夫曼编码，并赋值正确的字符信息中
          string code;//用于递归记录字符的哈夫曼编码
          GetHuffmanCode(huffmantree.GetRoot(),code);

          //至此已经根据已经将源文件的全部信息（字符，字符的出现次数，字符对应的哈夫曼编码）
          //全部统计出来，放入成员变量字符表数组中，接下来就要操作数组，压缩文件

          //4.压缩文件---即将字符的全部哈夫曼编码以字节为单位放入压缩文件中
            //4.1创建压缩文件
            string CompressFileName=PrimaryFileName;
            size_t index= CompressFileName.find('.',0);
            CompressFileName=CompressFileName.substr(0,index);
            CompressFileName+=".compress";
            FILE* Input=fopen(CompressFileName.c_str(),"wb");//以只写防止打开二进制文件,文件不存在则创建新文件
            if (Input==NULL)
            {
                perror("fopen");
                exit(1);
            }
            //4.2将所有字符的哈夫曼编码以一字节为单位放入压缩文件中
            fseek(fout,0,SEEK_SET);//更正文原件流的读写位置到文件起始处
            ch=getc(fout);
            char c=0;//以一字节为单位放入压缩文件
            int size=0;//记录一个字节八位是否写满
            while (!feof(fout))
            {
                //处理一个字符的所有哈夫曼编码
                 for (size_t i=0;i<_ArrCh[ch]._code.size();i++)
                 {
                      c<<=1;
                      if (_ArrCh[ch]._code[i]=='1')
                      {
                          c|=1;//先把1放到最低位
                      }
                      //如果为'0'不用管；
                      size++;
                      //当一个字节八位装满，则写入压缩文件
                      if (size==8)
                      {
                          //写入压缩文件
                          if (fputc(c,Input)==EOF)
                          {
                              cout<<"压缩时哈夫曼编码写入失败"<<endl;
                          }
                          //重置
                          size=0;
                          c=0;
                      }
                 }
                 //一个字符处理完，读取下一个字符
                 ch=getc(fout);
            }
            //处理最后一个不满八位的字符，后面全加0
            if (size>0)
            {
                c<<=(8-size);
                fputc(c,Input);
            }
            fclose(fout);//关闭源文件
            fclose(Input);//关闭压缩文件
            //至此压缩源文件完毕

            //5.写配置文件---由于解压时，收件人没有原始文件，而解压压缩文件，要根据哈夫曼树
                            //恢复字符，形成文中信息；所以，在压缩函数中要写配置文件，记录
                            //哈夫曼树的相关信息，从而在解压缩文件时，可以根据此信息，重建
                            //哈夫曼树
             //配置文件格式：
             //a:5
             //b:3
              //5.1定义配置文件名
            string ConfigurationFile=PrimaryFileName;
            ConfigurationFile=ConfigurationFile.substr(0,index);
            ConfigurationFile+=". config";
            FILE* FinConfi=fopen(ConfigurationFile.c_str(),"wb");
              //5.2给配置文件写信息
            string line;//记录一个字符的全部信息的字符串
            for (int i=0;i<256;i++)
            {
                if (_ArrCh[i]._count!=0)
                {
                    line+=_ArrCh[i]._c;//字符
                    line+=',';//分隔符
                    char StringCount[25]={0};
                    _itoa(_ArrCh[i]._count,StringCount,10);//将次数转化为字符串
                    line+=StringCount;
                    line+='n';
                    if (fputs(line.c_str(),FinConfi)==EOF)
                    {
                        cout<<"配置文件写入失败"<<endl;
                    }
                    line.clear();//清除字符串信息，方便下一次统计
                }
            }

            fclose(FinConfi);//关闭配置文件
    }
    //解压缩函数
    //传入一个压缩文件名
    void UnCompression(string CompressFileName)
    {
        //1.通过配置信息构建哈夫曼树
           //1.1还原字符表数组
        string ConfigurationFile=CompressFileName;
        size_t index=ConfigurationFile.find('.',0);
        ConfigurationFile=ConfigurationFile.substr(0,index);
        ConfigurationFile+=". config";
        FILE* FinConfi=fopen(ConfigurationFile.c_str(),"rb");
        if (FinConfi==NULL)
        {
            perror("fopen");
            exit(1);
        }
        string line;//记录每行配置文件的信息
        while(ReadLine(FinConfi,line))
        {
            //该行为空
            if (line.empty())
            {
                line+='n';
                continue;
            }
            else
            {
                unsigned char ch=line[0];
                //使用string::substr(pos)函数提取字符出现的次数
                _ArrCh[ch]._count=atoi(line.substr(2).c_str());
                line.clear();
            }
        }

          //1.2构建哈夫曼树
        CharInformation invaild;
        HuffmanTree<CharInformation> ht(_ArrCh,256,invaild);

          //1.3解压缩文件--通过从根结点遍历哈夫曼树得到原始文件
        string UnCompressFileName=CompressFileName;
        UnCompressFileName=UnCompressFileName.substr(0,index);
        UnCompressFileName+=" .unCompress";
        FILE* Func=fopen(UnCompressFileName.c_str(),"wb");
        if (Func==NULL)
        {
            perror("fopen");
            exit(1);
        }
           //1.4读取压缩文件
        FILE* Fcom=fopen(CompressFileName.c_str(),"rb");
        if (Fcom==NULL)
        {
            perror("fopen");
            exit(1);
        }

        HuffmanTreeNode<CharInformation>* root=ht.GetRoot();
        LongType TotalCites=root->_weight._count;//原始文件所有字符的个数
        HuffmanTreeNode<CharInformation>* cur=root;//遍历二叉树
        int ch=fgetc(Fcom);
        int pos=7;
        while (TotalCites>0)
        {
             if (ch&(1<<pos))//右1
             {
                 cur=cur->_rightchild;
             }
             else//左0
             {
                 cur=cur->_leftchild;
             }

             if (cur->_leftchild==NULL&&cur->_rightchild==NULL)
             {
                  fputc(cur->_weight._c,Func);//将对应的字符写入解压缩文件中
                  cur=root;//回到根结点继续寻找下一个字符
                  --TotalCites;//总还原字符减少1个
                  if(TotalCites==0)
                  {
                       break;
                  }
             }
             pos--;
             //一个压缩字符每位处理完毕
             if (pos<0)
             {
                 ch=fgetc(Fcom);//从压缩文件中读下一个压缩字符
                 pos=7;
             }
        }
        fclose(FinConfi);
        fclose(Fcom);
        fclose(Func);
    }
protected:
    //获取对应字符的哈夫曼编码
    void GetHuffmanCode(HuffmanTreeNode<CharInformation>* root,string code)
    {
        if (root==NULL)
        {
            return;
        }
        //遍历到叶子节点，即找到了对应字符的哈夫曼编码
        if (root->_leftchild==NULL&&root->_rightchild==NULL)
        {
            _ArrCh[root->_weight._c]._code=code;
        }
        //左0，右1
        GetHuffmanCode(root->_leftchild,code+'0');
        GetHuffmanCode(root->_rightchild,code+'1');

    }
    //读取配置文件的一行
    bool ReadLine(FILE* FinConfi,string& line)
    {
        int ch=fgetc(FinConfi);
        if (feof(FinConfi))//如果读到文件末尾，返回假
        {
            return false;
        }
        while (!feof(FinConfi)&&ch!='n')
        {
            line+=ch;
            ch=fgetc(FinConfi);
        }
        //没有读到文件末尾
        return true;
    }
protected:
    CharInformation _ArrCh[256];//文件操作的对象，字符信息表
};

测试用例

复制代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#include "FileCompression.h"
#include<windows.h>
void test()
{

    FileCompression fc;

    cout<<"开始压缩"<<endl;
    cout<<"压缩用时";
    int start = GetTickCount();
    //第一组：文本文件
    //fc.Compression("123.txt");----✔

    //第二组：图片文件
    //fc.Compression("picture.png");//----✔

    //第三组：视频文件
    //fc.Compression("video.flv");
    //第四组：音频文件
    //fc.Compression("music.mp3");----✔

    //第五组：大文件字符串文章---压缩效果最好
    fc.Compression("yingyu.txt");

    int end=GetTickCount();
    cout<<"compress time:"<<(end-start)<<endl;

    cout<<"开始解压"<<endl;
    cout<<"解压用时:";
    start=GetTickCount();
    //第一组：文本文件
    //fc.UnCompression("123.compress");----✔

    //第二组：图片文件
    //fc.UnCompression("picture.compress");//----✔

    //第三组：视频文件
    //fc.UnCompression("video.compress");

    //第四组：音频文件
    //fc.UnCompression("music.compress");//----✔

    //第五组：大文件字符串文章---压缩效果最好
    fc.UnCompression("yingyu.compress");

    end=GetTickCount();
    cout<<"uncompress time:"<<(end-start)<<endl;

};