哈夫曼树的数组实现

原文链接：http://www.infocool.net/kb/CPlus/201610/207114.html

如需转载请联系听云College团队成员小尹邮箱：yinhy#tingyun.com

（本篇博客是本人第一篇数据结构的博客，有什么不足还望各位看官指出！！）

题目来源：SOJ 1000. Huffman Coding V1,V3

题目描述

V3：

Description

对输入的英文大写字母序列进行统计概率，然后构建Huffman树，得出每个字母的Huffman编码，输出字母序列的总编码长度。

Input

第一行是大写字母个数n(0<n<=100)

第二行为n个字母，中间以一个空格分隔。

Output

输出字母序列的总编码长度。

Sample Input
10
S S U U U S U L U U
Sample Output
14
V1:
Description

对输入的英文大写字母序列进行统计概率，然后构建Huffman树，输出按照概率降序排序输出Huffman编码。

Input

第一行是大写字母个数n(0<n<=100)

第二行为n个字母，中间以一个空格分隔。

建树过程把权值较小的子树作为左孩子，数据保证建树过程不会出现左右子树权值一样的情况。

Output

假设输入中共有m个不同的字母，按照出现概率降序输出，每个字母单独一行输出。格式如下：

　　字母1 出现次数 Huffman编码

　　字母2 出现次数 Huffman编码

　　字母3 出现次数 Huffman编码

　　…

　　字母m 出现次数 Huffman编码

Sample Input

Copy sample input to clipboard

S S U U U S U L U U

Sample Output

U 6 1

S 3 01

L 1 00

算法描述

其实哈夫曼树的建树规则的话网上都有不少资料可以看，此处不予赘述。讲讲个人的一些收获和想法：数组这种实现方法也是我在网上学习来的，简单讲就是先计算输入数据对应的字符的权重并进行记录。主要的结构体是哈夫曼树的节点，存储的是每个字符的权重以及左右子树的权重，还有就是很有用的一个数据：父节点的权重。这样就可以以权重代替指针域进行上下的寻址，可以减少由于指针使用不当带来的内存问题。然后写代码的过程中遇到的一个缠最久的bug就是在建立非字符节点时候查找无父节点的节点的函数select()中，遇到的很棘手的一个问题是已经标记为有父节点的节点仍未被识别，后来才发现问题是出现在查找权重最小的节点的过程中，for循环的边界写错了= =

不多说了，直接上代码吧

个人代码实现

#include <iostream>
#include <cstdio>
#include <cstring>
using namespace std;
#define N 26
#define M (2*N-1)
struct huffmanTreeNode
{
    int weight;
    int left, right, parent;
};
struct huffmanCode
{
    char data;
    int weight;
    char code[N];
};
int initialize(huffmanCode hfmCodeSet[], int n)
{
    int set[N + 1];
    char inputStr[5];
    memset(set, 0, sizeof(set));

    memset(inputStr, 0, sizeof(inputStr));
    int i, j;
    
    huffmanCode cd;
    cd.data = 0;
    cd.weight = 0;
    memset(cd.code, 0, sizeof(cd.code));
    for (i = 0; i < N + 1; i ++)
        hfmCodeSet[i] = cd;
    for (i = 0; i < n; i ++) {
        scanf("%s", inputStr);
        set[inputStr[0] - 'A']++;
    }
    j = 1;
    for (i = 0; i < N + 1; i ++) {
        if (set[i] > 0) {
            hfmCodeSet[j].data = 'A' + i;
            hfmCodeSet[j].weight = set[i];
            j++;
        }
    }
    return (j - 1);
}
void select(huffmanTreeNode hfmTree[],int idx, int* i1, int* i2)

{
    int i, j;
    for (i = 1; i <= idx; i ++) {
        if (hfmTree[i].parent == 0) {
            *i1 = i;
            break;
        }
    }
    for (i = 1; i <= idx; i ++) {
        if (hfmTree[i].parent == 0 &&
            hfmTree[i].weight < hfmTree[*i1].weight) {
            *i1 = i;
        }
    }
    for (i = 1; i <= idx; i ++) {
        
        if (i != *i1 && hfmTree[i].parent == 0) {
            *i2 = i;
            break;
        }
    }
    for (i = 1; i <= idx; i ++) {

        
        if (i != *i1) {
            if (hfmTree[i].parent == 0 &&
                hfmTree[i].weight < hfmTree[*i2].weight) {
                *i2 = i;
            } 
        }
    }
}
void hfmCoding(huffmanCode hfmCodeSet[], huffmanTreeNode hfmTree[], int n)
{
    int i;
    char tempCode[N + 1];
    for (i = 1; i <= 2 * n - 1; i ++) {
        hfmTree[i].weight = (i <= n ? hfmCodeSet[i].weight : 0);
        hfmTree[i].parent = hfmTree[i].left = hfmTree[i].right = 0;
    }
    int minIdx1, minIdx2;
    for (i = n + 1; i <= 2 * n - 1; i ++) {
        select(hfmTree, i - 1, &minIdx1, &minIdx2);
        hfmTree[i].weight = hfmTree[minIdx1].weight + hfmTree[minIdx2].weight;
        hfmTree[i].left = minIdx1;
        hfmTree[i].right = minIdx2;
        hfmTree[minIdx1].parent = i;
        hfmTree[minIdx2].parent = i;

    }
    int start, childIdx, parentIdx;
    for (i = 1; i <= n; i ++) {
        start = n - 1;
        tempCode[n] = '\0';     
        childIdx = i;
        parentIdx = hfmTree[childIdx].parent;
        while (parentIdx) {
            if (hfmTree[parentIdx].left == childIdx) {
                tempCode[--start] = '0';
            } else if (hfmTree[parentIdx].right == childIdx) {
                tempCode[--start] = '1';
            }
            childIdx = parentIdx;
            parentIdx = hfmTree[childIdx].parent;
        }
        strcpy(hfmCodeSet[i].code, &tempCode[start]);
    }
}
int totalLength(huffmanCode hfmCodeSet[])
{
    int i, sum = 0;
    for (i = 1; i <= N; i ++) {

        if (hfmCodeSet[i].weight > 0) {
            sum += ((hfmCodeSet[i].weight) * strlen(hfmCodeSet[i].code));
        }
    }
    return sum;
}
void sortDisplayCode(huffmanCode hfmCodeSet[], int n)
{
    int i, j;
    huffmanCode tempCode;
    for (i = 1; i <= n - 1; i ++) {
        for (j = 1; j <= n - 1; j ++) {
            if (hfmCodeSet[j].weight < hfmCodeSet[j + 1].weight) {
                tempCode = hfmCodeSet[j];
                hfmCodeSet[j] = hfmCodeSet[j + 1];
                hfmCodeSet[j + 1] = tempCode;
            }
        }
    }
    for (i = 1; i <= n; i ++) {
        printf("%c %d %s\n", hfmCodeSet[i].data, hfmCodeSet[i].weight, hfmCodeSet[i].code);
    }
}
int main(int argc, char const *argv[])

{
    huffmanCode hfmCodeSet[N + 1];
    huffmanTreeNode hfmTree[M + 1];
    int n;
    // input the number of letters
    scanf("%d", &n);
    
    // input the letters
    n = initialize(hfmCodeSet, n);
    
    // build a huffman tree using arrays and calculate the huffman codes
    hfmCoding(hfmCodeSet, hfmTree, n);
    if (n == 1) {
        printf("%d\n", strlen(hfmCodeSet[1].code));
        return 0;
    }
    // output the total length of huffman Code
    printf("%d\n", totalLength(hfmCodeSet));
    // output the weight and huffman Code of corresponding char
    sortDisplayCode(hfmCodeSet, n);
    return 0;
}

想阅读更多技术文章，请访问听云技术博客，访问听云官方网站感受更多应用性能优化魔力。