Exercise 6.1 - getword#
Question#
Our version of getword does not properly handle underscores, string constants, comments, or preprocessor control lines. Write a better version.
#include <stdio.h>
#include <ctype.h>
#include <string.h>
struct key {
char *word;
int count;
} keytab[] = {
"auto", 0,
"break", 0,
"case", 0,
"char", 0,
"const", 0,
"continue", 0,
"default", 0,
"do", 0,
"double", 0,
"else", 0,
"enum", 0,
"extern", 0,
"float", 0,
"for", 0,
"goto", 0,
"if", 0,
"int", 0,
"long", 0,
"register", 0,
"return", 0,
"short", 0,
"signed", 0,
"sizeof", 0,
"static", 0,
"struct", 0,
"switch", 0,
"typedef", 0,
"union", 0,
"unsigned", 0,
"void", 0,
"volatite", 0,
"while", 0
};
int mygetword(char *, int);
int binsearch(char *, struct key*, int);
#define NKEYS (sizeof(keytab)/sizeof(keytab[0]))
#define MAXWORD 100
int main(int argc, char *argv[])
{
int n;
char word[MAXWORD];
while (mygetword(word, MAXWORD) != EOF)
if (isalpha(word[0]))
if ((n = binsearch(word, keytab, NKEYS)) >= 0)
keytab[n].count++;
for (n = 0; n < NKEYS; n++)
if (keytab[n].count > 0)
printf("%4d %s\n",
keytab[n].count, keytab[n].word);
}
int binsearch(char *word, struct key keytab[], int n)
{
int cond;
int low, high, mid;
low = 0;
high = n -1;
while (low <= high) {
mid = (low+high) /2;
if ((cond = strcmp(word, keytab[mid].word)) < 0 )
high = mid - 1;
else if (cond > 0)
low = mid + 1;
else
return mid;
}
return -1;
}
int mygetword(char *word, int lim)
{
int c, getch(void);
void ungetch(int);
char *w = word;
int t;
while (isspace(c=getch()))
;
if (c != EOF)
*w++ = c;
if (!isalpha(c)) {
if (c == '\"') { /*string constant*/
for(c=getch(); c!= '\"'; c=getch())
;
}
else if (c == '#') { /*preprocessor*/
for(c=getch(); c!= '\n'; c=getch())
;
}
else if (c == '/') /*comment*/
if ((c=getch()) == '/') { /*single comment*/
for(c=getch(); c!= '\n'; c=getch())
;
}
else if (c == '*') { /*mutiline comment*/
for(c=getch(),t=getch(); c!= '*' && t!= '/'; c=getch(), t=getch())
ungetch(t);
}
else ungetch(c);
else /*underscore*/
for( ; !isspace(c) && c!=EOF; c=getch())
;
if (c != '\"' && c!='\n' && c!='/')
ungetch(c);
*w = '\0';
return c;
}
for ( ; --lim > 0; w++)
if (!isalnum(*w = getch())) {
if (!isspace(*w)){
ungetch(*w);
return (*w);
}
else {
ungetch(*w);
break;
}
}
*w = '\0';
return word[0];
}
#define BUFSIZE 100
char buf[BUFSIZE];
int bufp = 0;
int getch(void)
{
return (bufp>0) ? buf[--bufp] :getchar();
}
void ungetch(int c)
{
if (bufp >= BUFSIZE)
printf("ungetch: too many characters\n");
else buf[bufp++] = c;
}