post
poster: psYchotic
description: Word frequency counter
language: C
[download]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include "myarray.h"
#include "mystring.h"

struct Frequencies {
    MArray words;
    MArray freqs;
    unsigned int longest;
    unsigned long long total;
};

void sort_freqs(struct Frequencies *freq) {
    int i, j;
    for (i = 0; i < freq->freqs->len - 1; i++) {
        unsigned long long cur = myarray_get(freq->freqs, unsigned long long, i + 1);
        MString temp = myarray_get(freq->words, MString, i + 1);
        myarray_remove_index(freq->freqs, i + 1);
        myarray_remove_index(freq->words, i + 1);
        for (j = i; j >= 0; j--) {
            if (cur > myarray_get(freq->freqs, unsigned long long, j)) continue;
            else {
                myarray_insert(freq->freqs, cur, j + 1);
                myarray_insert(freq->words, temp, j + 1);
                break;
            }
        }
        if (j == -1) {
            myarray_insert(freq->freqs, cur, 0);
            myarray_insert(freq->words, temp, 0);
        }
    }
}

int indexOf(struct Frequencies *freq, MString word) {
    int i;
    for (i = 0; i < freq->words->len; i++) {
        if (myarray_get(freq->words, MString, i)->len == word->len) {
            if (!strcmp(myarray_get(freq->words, MString, i)->str, word->str)) {
                return i;
            }
        }
    }

    return -1;
}

void freq_update(struct Frequencies *freq, MString word) {
    static const unsigned long long zero = 0;
    int index = indexOf(freq, word);
    if (index == -1) {
        MString dup = mystring_dup(word);
        myarray_append(freq->words, dup);
        myarray_append(freq->freqs, zero);
        if (word->len > freq->longest) {
            freq->longest = word->len;
        }
        index = freq->words->len - 1;
    }
    myarray_get(freq->freqs, unsigned long long, index)++;
    freq->total++;
}

void print_stats(struct Frequencies *freq) {
    int i;
    printf("%-*s | %-20s | %s\n\n", freq->longest, "Word", "Frequency", "Percentage");
    for (i = 0; i < freq->words->len; i++) {
        printf("%-*s | %-20llu | %.1f\n", freq->longest, myarray_get(freq->words, MString, i)->str, myarray_get(freq->freqs, unsigned long long, i), (double) myarray_get(freq->freqs, unsigned long long, i)/freq->total);
    }
}

int main(int argc, char **argv) {
    if (argc != 2) {
        printf("I expect a single parameter, which should be a valid path to a file.\n");
        exit(EXIT_SUCCESS);
    }
    FILE *input = fopen(argv[1], "r");
    struct Frequencies *freq = malloc(sizeof(struct Frequencies));
    freq->words = myarray_new(1, 5, sizeof(MString));
    freq->freqs = myarray_new(1, 5, sizeof(unsigned long long));
    freq->longest = 0;
    MString word = mystring_new(1, 1);
    char ch;

    if (input == NULL) {
        perror("*ERROR* Can't open file");
    }

    while (1) {
        ch = fgetc(input);
        if (ch == EOF) {
            break;
        } else if (isalpha(ch)) {
            mystring_char_append(word, tolower(ch));
        } else {
            if (word->len > 0) {
                freq_update(freq, word);
            }
            mystring_clear(word);
        }
    }

    sort_freqs(freq);
    print_stats(freq);

    fclose(input);
    mystring_free(word);
    myarray_free(freq->words);
    myarray_free(freq->freqs);
    free(freq);

    return 0;
}