#ifndef ROUTINES_H #define ROUTINES_H #include #include #include #include "basic.h" /* function NextWord will return the next word (if any) from a specified file descriptor */ int NextWord(FILE *fd, char *buffer) { int input; int i=0,idx; int is_identifier=OFF; /* the flag indicates whether the current buffer is an identifier or not */ while((input=fgetc(fd))!=EOF) { if((is_identifier==OFF)&&(i==0)) { if(((input>='A')&&(input<='Z'))||((input>='a')&&(input<='z'))) { buffer[i++]=input; is_identifier=ON; } else { // i++; /* commented later */ continue; } } else if((is_identifier==OFF)&&(i!=0)) { if((input==10)||(input==32)) { break; // for(i=0;i='A')&&(input<='Z'))||((input>='a')&&(input<='z'))) { if(i<(LIMIT-2)) buffer[i++]=input; else { i++; continue; } } else if ((input==10)||(input==32)) { is_identifier=OFF;/* added later */ break; } else { is_identifier=OFF; i++; continue; } } } if(input!=EOF) return 1; else return 0; } /* function check will look up word in the dictionary data-structure */ /* Binary search is used here for its efficiency */ /* it returns the index of the matching element in the records; if there is no matching element found, -1 is returned */ int check(char *word,struct Element *record, int size) { int l=0;int r=size;int x; int metric; for(x=l;x<=r;x++) /* woefully inefficient -- should use binary search */ { metric=strcmp(word,record[x].words); if(metric==0) return x; } // while(r>=l) // { // x=(l+r)/2; // metric=strcmp(word,record[x].words); // if (metric<0) r=x-1; // else l=x+1; // if(metric==0) //return x; // } return -1; } /* function chopStem will remove a stem, if possible */ /* for simplicity, we only consider "ed","er","ing","s" at this moment; however, this can be extended easily */ /* if the stemming is failed, returns 0; otherwise, 1 is returned */ char * chopStem(char *word ) { char *key; if((key=strstr(word,"ed"))!=NULL) { if(*(key+2)=='\0') { *key='\0'; return word; } else return NULL; } else if((key=strstr(word,"er"))!=NULL) { if(*(key+2)=='\0') { *key='\0'; return word; } else return NULL; } else if((key=strstr(word,"ing"))!=NULL) { if(*(key+3)=='\0') { *key='\0'; return word; } else return NULL; } else if((key=strstr(word,"s"))!=NULL) { if(*(key+1)=='\0') { *key='\0'; return word; } else return NULL; } else return NULL; } /* function count will add one to the counter for the word */ void count(struct Element *record,int index) { record[index].count++; return; } int size(FILE *fp) { int input; int i=0; int index; char buffer[LIMIT]; for(index=0;index='A')&&(buffer[idx]<='Z')) buffer[idx]+=32; /* if uppercase then convert it to lower case */ } strcpy(record[i].words,buffer); record[i].length=strlen(buffer); record[i].rank=0; record[i].count=0; /* the next line is for debugging purpose */ // printf("No.%d %s \n",i,record[i].words); for(idx=0;idx=LIMIT) idx=LIMIT-1; // record[i].length=idx; // record[i].rank=0; // record[i].count=0; // for(idx=0;idx 0) { record[i].rank=rank++; printf("\"%s\" has rank No.%d (%ix)\n", record[i].words, record[i].rank, record[i].count); } i++; } return; } #endif