/* * CSSE, Monash University * CSE2304 Prac 3 Group B * Semester 1, 2003 * Sample Solution * * wurts [-c|-C] [fileName] * Counts and displays the frequency of each wurt in the input file * (filename) or standard input. * A wurt is defined as one of: * a sequence of digits * a sequence of alphanumeric characters beginning with a letter * a sequence of non-alphanumeric printable ASCII characters * All other characters are ignored except to indicate wurt boundaries. */ #include #include #include #include #include /* For convenience; specifies how wurts and their frequency are output. */ /* Wurt followed by decimal number and newline */ #define OUTPUT_FORMAT(wurt, freq) "%s %d\n", wurt, freq /* Increment for reading (up to INC characters before realloc) */ #define INC 10 /* Tree node */ typedef struct TNode { struct TNode *left, *right; char *key; int count; } Node; /* Abstract "wurts" data structure */ typedef struct { Node *root; int cs; /* case sensitive */ } WurtsDS; /* killTree * Recursively frees a tree and its contents. */ void killTree(Node *cur) { if(cur) { /* Node exists. Destroy subtrees. */ killTree(cur->left); killTree(cur->right); /* Free data */ free(cur->key); /* Finished; free self. */ free(cur); } } /* displayWurtsCount1 * Helper function for displayWurtsCount: inorder tree traversal to output * wurts and their frequencies. */ void displayWurtsCount1(Node *cur) { if(cur) { /* Node exists, output left subtree */ displayWurtsCount1(cur->left); /* Output current node */ printf(OUTPUT_FORMAT(cur->key, cur->count)); /* Output right subtree */ displayWurtsCount1(cur->right); } } /* displayWurtsCount * Outputs all wurts and their frequencies. */ void displayWurtsCount(WurtsDS *w) { /* Call helper function */ displayWurtsCount1(w->root); } /* incWurtsCount1 * Helper function for incWurtsCount: binary search tree insertion/update to * increment the frequency count of the provided wurt. The parameter cs * indicates case sensitivity. Returns the current node or NULL on failure. * Note that if something goes wrong, the operation is aborted and the tree * is unrecoverably destroyed; memory is not freed. */ Node *incWurtsCount1(char *wurt, Node *cur, int cs) { int cmp; if(cur) { /* Node exists, compare key value */ cmp = (cs?strcmp:strcasecmp)(wurt, cur->key); if(cmp < 0) { /* Key is greater than wurt, go left */ cur->left = incWurtsCount1(wurt, cur->left, cs); } else if(cmp > 0) { /* Key is greater than wurt, go right */ cur->right = incWurtsCount1(wurt, cur->right, cs); } else { /* Found the appropriate key. Increment count. */ cur->count++; } } else { /* Empty subtree found, insert wurt as new node. */ cur = (Node *)malloc(sizeof(Node)); if(!cur) return NULL; /* Memory allocation failure */ cur->key = strdup(wurt); if(!cur->key) { /* Memory allocation failure, clean up and abort */ free(cur); return NULL; } /* No children */ cur->left = NULL; cur->right = NULL; /* Single occurrence */ cur->count = 1; } /* Pass node up the tree */ return cur; } /* incWurtsCount * Increments the frequency count of the provided wurt. Returns 1 if * successful, 0 otherwise. */ int incWurtsCount(char *wurt, WurtsDS *w) { /* Call helper function */ w->root = incWurtsCount1(wurt, w->root, w->cs); return w->root != NULL; } /* initWurtsDS * Sets up the data structure used to store the wurts. Returns 1 if * successful, 0 otherwise. */ int initWurtsDS(WurtsDS *w, int caseSensitive) { /* No root node */ w->root = NULL; /* Remember case sensitivity */ w->cs = caseSensitive; return 1; } /* destroyWurtsDS * Cleans up the data structure used to store the wurts. Returns 1 if * successful, 0 otherwise. */ int destroyWurtsDS(WurtsDS *w) { /* Destroy tree and its data */ killTree(w->root); return 1; } /* getNextWurt * Reads the next wurt from the provided input stream (source). Returns a * dynamically allocated string containing the wurt, or NULL to indicate EOF * or error. Caller must free the returned wurt. */ char *getNextWurt(FILE *source) { /* Allocate one "block" of memory */ char *t = (char *)malloc(sizeof(char) * INC); int i = 0, c, state; if(!t) /* Out of memory */ return NULL; while(isspace(c = getc(source)) || c > 127) ; /* eat irrelevant characters */ /* c contains the first character of our wurt. */ if (isdigit(c)) state = 1; /* number */ else if(isalpha(c)) state = 2; /* identifier */ else state = 3; /* symbol */ while(c != EOF) { if((state == 1 && !isdigit(c)) || (state == 2 && !isalnum(c)) || (state == 3 && (!ispunct(c) || c > 127))) /* Found unwanted character, finished reading. */ break; t[i++] = c; /* Add current character to string */ if(i % INC == 0) { /* Out of space, expand string */ t = (char *)realloc(t, sizeof(char) * (i + INC)); if(!t) /* Out of memory */ return NULL; } /* Get next character */ c = getc(source); } /* Pretend we didn't just read the next character */ ungetc(c, source); if(!i) { /* String length is zero, EOF. Clean up. */ free(t); return NULL; } /* Null-terminate and return string. */ t[i] = 0; return t; } /* wurts * Determines and outputs the frequency of each wurt in an input sequence. * Reads wurts from the input stream IN, which must be open for reading. * cs represents required case sensitivity (nonzero = case sensitive). * Returns 1 if successful, 0 otherwise (indicating memory allocation * failure). */ int wurts(int cs, FILE *in) { WurtsDS wurt; char *s; /* Set up */ if(!initWurtsDS(&wurt, cs)) /* Couldn't allocate space for data structure, clean up */ return 0; while((s = getNextWurt(in))) { /* Remember and free current wurt */ if(!incWurtsCount(s, &wurt)) /* Out of memory */ return 0; free(s); } /* All wurts read, output */ displayWurtsCount(&wurt); /* Clean up */ destroyWurtsDS(&wurt); return 1; } /* main * Error-checks parameters and handles error messages. */ int main(int argc, char *argv[]) { int cs = 1; FILE *source = stdin; /* Use stdin if no file supplied. */ /* Skip program name. */ argv++; argc--; /* Check for command-line options. */ if(argc) { if(!strcmp(*argv, "-c")) { cs = 0; /* insensitive */ argv++; argc--; } else if(!strcmp(*argv, "-C")) { argv++; argc--; /* default */ } } /* Check for filename. */ if(argc) { /* Found one. Try opening it. */ if(!(source = fopen(*argv, "r"))) { /* No luck; quit. */ fprintf(stderr, "error: could not open file '%s'\n", *argv); return 1; } } /* Call the wurts function to do all the dirty work. */ if(!wurts(cs, source)) /* Something went wrong allocating memory; quit. */ fprintf(stderr, "error: could not allocate memory\n"); /* Close any opened files. */ if(source != stdin) fclose(source); /* Finished. */ return 0; }