Ok here is the final version for stage 2 - Im not planning to do anything more to it for now.
I improved it by moving more of the code from main() into functions and Ive made a custom isword() thats really a macro. It behaves exactly like isalnum() except that it uses a table that you can add characters to by specifying a character set file as the second parameter on the shell.
I jumped ahead of myself a bit using a macro for that but it seemed easy enough for this exercise.
I also redid the count loop the way falemagn suggested. His modification seems obvious in hindsight. I guess I have a while to go yet :-)
/*
Simple word count program stage 2 revised
counts words in text file passed as argument
presents summary of word lengths
uses custom character list for determining words
*/
#include <stdio.h>
#include <ctype.h>
/* data */
static char chartypetable[256] = {0};
/* prototypes */
void maketypetable(const char* charset);
int countwords(const char* filename, int* lengths);
void printsummary(const char* filename, int numwords, int* lengths);
#define isword(c) (chartypetable[(c)])
/* main program */
int main(int argc, char** argv)
{
const char* filename = 0;
const char* charsetfilename = 0;
int numwords = 0;
int lengths[9] = {0};
/* get the filename from the command line */
if (argc<2) {
puts("Usage: wordcount <file name> [character set]");
return 1;
}
filename = argv[1];
/* get any additional character set filename */
if (argc>2) {
charsetfilename = argv[2];
}
/* make our character table */
maketypetable(charsetfilename);
/* count and show summary */
numwords = countwords(filename, lengths);
printsummary(filename, numwords, lengths);
return 0;
}
void printsummary(const char* filename, int numwords, int* lengths)
{
int n;
printf("Counted a total of %d word(s) in file '%s'\n", numwords, filename);
printf("Words < 4 chars: %d\n", lengths[0]);
for (n=1; n<7; n++) {
printf("Words of %d chars: %d\n", n+3, lengths[n]);
}
printf("Words > 10 chars: %d\n", lengths[8]);
}
int countwords(const char* filename, int* lengths)
{
int numwords = 0;
int length = 0;
int character;
FILE* file;
file = fopen(filename, "r");
if (!file) {
printf("Error: couldn't open file '%s' for input\n", filename);
exit(1);
}
while( (character = getc(file)) != EOF ) {
if (isword(character)) {
/* character is in a word so increment the length */
length++;
}
else {
/* if we just got here then update our counters */
if (length>0) {
numwords++;
/* fill a range bucket based on the word length*/
if (length<4) {
/* for lengths < 4 fill bucket 0 */
lengths[0]++;
}
else if (length<11) {
/* for lengths 4...10 fill buckets 1-7*/
lengths[length-3]++;
} else {
/* for lengths > 10 fill bucket 8*/
lengths[8]++;
}
length = 0;
}
}
}
fclose(file);
return numwords;
}
void maketypetable(const char* charset)
{
/*
make own character type table
*/
int c;
/* first fill with isalnum() */
for (c=0; c<256; c++) {
if (isalnum(c)) {
chartypetable[c]=1;
}
}
/* try to add any user defined characters too - excluding spaces */
if (charset) {
FILE* file = fopen(charset, "r");
if (file) {
while ( (c = getc(file)) != EOF ) {
if (!isspace(c)) {
chartypetable[c] = 1;
}
}
fclose(file);
/* show the recognised "isword()" character set */
printf("Added character set definition from file '%s'\nThe following characters will be treat as valid within words:\n", charset);
for (c=0; c<256; c++) {
if (chartypetable[c]) {
printf("%c ", c);
}
}
putchar('\n');
}
else {
printf("Couldn't open character set definition file '%s'\n", charset);
}
}
printf("\n");
}
I think thats enough for one evening!