C言語-ハッシュ法(2)

C言語アルゴリズム-ハッシュ法

6 ハッシュ法(hashing) (2)

　QuickBASICの予約語ファイルqbfunc.datのデータをハッシュテーブルに格納する。[hashing2.c]

　予約語は、200個程度あるのでテーブルサイズは余裕を持って300にする。qbfunc.dat DOWNLOAD

　データファイルqbfunc.datは、次のような構造である。

ABS,1
ABSOLUTE,2
ACCESS,3
ALIAS,4
AS,5
ASC,6
ATN,7
BASE,8

　予約語とコード番号はカンマで区切られており、予約語は、アルファベット順に整列されている。コード番号は通し番号であるので、特に意味は持たない。

 1:
 2:
 3:
 4:
 5:
 6:
 7:
 8:
 9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

struct yoyakugo {  /* 予約語 */
    char        name[12];
    int         code;
};
int     hashsize;                   /*ハッシュテーブルサイズ */
static struct yoyakugo  yy[300];    /* ハッシュテーブル */

int hashing(struct yoyakugo);   /* ハッシュテーブルへ登録 */
int hash1(char *);              /* ハッシュ関数1 */
int hash2(int);                 /* ハッシュ関数2 */
int primes(int);                /* 素数計算 */

int main(void)
{
    FILE    *fp;
    char    buff[80], *pb;
    int     i, cnt, cntmax = 1, line = 0;
    struct yoyakugo         ydata;

    hashsize = primes(300);         /* 300以下の最大の素数を計算 */
    printf("Hash Size=%d\n", hashsize);
    if((fp = fopen("qbfunc.dat", "r")) == NULL){    /* データファイル */
        printf("Can't open file.\n");
        exit(1);
    }
    while(fgets(buff, 80, fp) != NULL){       /* ハッシュテーブルへの登録 */
        pb = buff;
        do{         /* 小文字に変換 */
            *pb = tolower(*pb);
        }while(*++pb != '\n');
        *pb = '\0';
        sscanf(buff, "%[a-z0-9],%d", ydata.name, &ydata.code);
        cnt = hashing(ydata);
        if(cnt > cntmax)    /* 最大衝突回数 */
            cntmax = cnt;
        ++line;
        if(line >= 20){
            printf("--- Hit Return Key ---");
            gets(buff);
            line = 0;
        }
    }
    fclose(fp);
    printf("ハッシュ関数最大再計算回数 = %d\n", cntmax);
    printf("Return key\n");
    gets(buff);
    for(i = 0; i <= hashsize; ++i){       /* ハッシュテーブル格納状況表示 */
        if(yy[i].code)
            printf("1 ");
        else
            printf("0 ");
    }
    printf("\nReturn key\n");
    gets(buff);
    for(i = 0; i <= hashsize; ++i){       /* ハッシュテーブル内容表示 */
        printf("%s(%d) ", yy[i].name, yy[i].code);
    }
}
int hashing(struct yoyakugo yd)
{
    int     h, h1, h2 = 0;
    int     cnt = 1;    /* 衝突回数 */

    h1 = hash1(yd.name);    /* ハッシュ関数1値計算 */
    while(1){
        h = (h1 + h2 * cnt) % hashsize;   /* ハッシュテーブル番号計算 */
        if(yy[h].code == 0){        /* 登録 */
            strcpy(yy[h].name, yd.name);
            yy[h].code = yd.code;
            break;
        }
        h2 = hash2(h1);         /* ハッシュ関数値再計算 */
        ++cnt;      /* 衝突回数カウント */
    }
    --cnt;
    printf("Data=%-12s(%d)  Hash値=%3d  衝突回数=%4d\n", yd.name, yd.code, h, cnt);
    return(cnt);
}

　ユーザ関数hash1, hash2とprimesは前と同じであるので省略している。
　関数scanfは、標準入力からのデータを書式にしたがって取り込むが、37行目の関数sscanfは文字列データ(この場合文字配列buff)からデータを書式にしたがって取り込むものである。

sscanf(buff, "%[a-z0-9],%d", ydata.name, &ydata.code);

　%[a-z0-9]は、%sと同じ文字列の指定であるが、[ ]内の文字だけを取り込む。[a-z0-9]は、a～zまたは0～9のどれかに一致する文字だけを取り込み、それ以外の文字のときに取り込みを終える。

　QuickBASICの予約語ファイルqbfunc.datのデータをハッシュテーブル格納し、予約語を検索できるようにする。[hashing3.c]
　ユーザ関数hashing、hash1, hash2とprimesは省略している。
　47行目以降が検索の処理である。

 1:
 2:
 3:
 4:
 5:
 6:
 7:
 8:
 9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

struct yoyakugo {   /* 予約語 */
    char        name[12];
    int         code;
};
int     hashsize;   /*ハッシュテーブルサイズ */
static struct yoyakugo  yy[300];    /* ハッシュテーブル */

int hashing(struct yoyakugo);   /* ハッシュテーブルへ登録 */
int hash1(char *);      /* ハッシュ関数1 */
int hash2(int);             /* ハッシュ関数2 */
int primes(int);        /* 素数計算 */

int main(void)
{
    FILE    *fp;
    char    buff[80], *pb;
    int     h, h1, h2;
    int     cnt, cntmax = 1;
    struct yoyakugo         ydata;

    hashsize = primes(300);         /* 300以下の最大の素数を計算 */
    printf("Hash Size=%d\n", hashsize);
    if((fp = fopen("qbfunc.dat", "r")) == NULL){    /* データファイル */
        printf("Can't open file.\n");
        exit(1);
    }
    /* ハッシュテーブルへの登録 */
    while(fgets(buff, 80, fp) != NULL){
        pb = buff;
        do{         /* 小文字に変換 */
            *pb = tolower(*pb);
        }while(*++pb != '\n');
        *pb = '\0';
        sscanf(buff, "%[a-z0-9],%d", ydata.name, &ydata.code);
        cnt = hashing(ydata);
        if(cnt > cntmax)    /* 最大衝突回数 */
            cntmax = cnt;
    }
    fclose(fp);
    printf("ハッシュ関数最大再計算回数 = %d\n", cntmax);

    while(1){
        printf("検索データ : ");
        gets(buff);
        if(!strcmp(buff, "end"))
            break;
        h1 = hash1(buff);
        h2 = 0;
        for(cnt = 1; cnt <= cntmax; ++cnt){
            h = (h1 + h2 * cnt) % hashsize;
            if(!strcmp(buff, yy[h].name)){
                printf("発見:%d回 Code=%d\n", cnt, yy[h].code);
                break;
            }
            h2 = hash2(h1);
        }
        if(cnt > cntmax)
            printf("見つかりませんでした\n");
    }
}