Listing 1 caption: A natural language processor<197><F105>NATURAL.C<F255D>


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define  ING   73        /* Restriction for ING word */

void initialize(void);
void reset_sentence(void);
void get_record(char *);
char *extract_word(void);
int  match_record(char *, int);
char *extract_root(void);
void check_underlying(void);
int  check_type(char *,int);
void check_subject(void);
void check_action(void);
void check_place(void);
void make_response(void);
void make_answer(int);
void get_verb_ing(void);
int  match_verb_ing(void);

FILE *infile;
char dic_record[80];
int  sentence;
int  word_ct;
char word_array[10][15];
char root_array[10][15];
char prime_types[10][11];
char phrases[10][11];
char type_array[10][5][11];
char subjects[20][15];
char actions[20][15];
char places[20][31];
char response[80];


void main()
{
    char  *cur_word;
    char  in_sentence[80];

    initialize();
    if ((infile = fopen("diction", "r+")) == NULL) {
        printf ("\nError opening dictionary\n");
        exit(0);
    }
    printf("\nSentence: ");

    while(gets(in_sentence)) {
        if (in_sentence[0] == '\0') break;
        reset_sentence();

        cur_word = strtok(in_sentence, " ");
        while(cur_word != NULL) {
            get_record(cur_word);
            cur_word = strtok(NULL, " ");
            if (++word_ct > 9) break;
        }

        check_underlying();

        check_subject();
        check_action();
        check_place();

        make_response();
        printf("Response: %s\n\nSentence: ", response);

        if (++sentence > 19) break;
    }   /*  end while  */

    fclose(infile);
    return;
}

/*****************************************************/
/* Initialize variables (subjects, actions and       */
/* places arrays contain entries for 20 sentences).  */
/*****************************************************/
void initialize()
{
    int i;
    for (i=0; i<20; i++) {
        subjects[i][0]    = '\0';
        actions[i][0]     = '\0';
        places[i][0]      = '\0';
    }
    sentence              = 0;
    return;
}

/*****************************************************/
/* These variables are initialized for each new      */
/* input sentence (each of the 10 word entries for   */
/* the input sentence has 5 type_array entries).     */
/*****************************************************/
void reset_sentence()
{
    int i,j;
    word_ct                     = 0;
    for (i=0; i<10; i++) {
        word_array[i][0]        = '\0';
        root_array[i][0]        = '\0';
        prime_types[i][0]       = '\0';
        phrases[i][0]           = '\0';
        for (j=0; j<5; j++)
            type_array[i][j][0] = '\0';
    }
    return;
}

/*****************************************************/
/* Get all the records from the dictionary. If the   */
/* passed word is not in the dictionary, then the    */
/* word could be a name.                             */
/*****************************************************/
void get_record(char *pass_word)
{
    int types = 0;
    rewind (infile);
    fgets(dic_record, 80, infile);
    while (! feof(infile)) {
        if (match_record(pass_word, types) == 0)
            types++;
        fgets(dic_record, 80, infile);
    }
    if (types == 0) {
        if (isupper( (int) pass_word[0]))
            strcpy(type_array[word_ct][types], "NAME");
        else
            strcpy(type_array[word_ct][types],
                   "NOTFOUND");
    }
    strcpy(word_array[word_ct], pass_word);
    return;
}

/*****************************************************/
/* Compare the passed word with the word in the      */
/* current dictionary record. If they are the same,  */
/* then extract the type (NOUN, VERB, etc.). If the  */
/* type is a VERB, then also extract the root and    */
/* and copy it to the root array.                    */
/*****************************************************/
int  match_record(char *pass_word, int types)
{
    int i, j;
    char *root;
    char *dic_word;
    dic_word = extract_word();
    /* Check if passed word equals dictionary word   */
    if (strcmpi(pass_word, dic_word) != 0) return(1);

    /* Word found, get the type                      */
    for (i=14,j=0; i<20; i++) {
       if (isspace(dic_record[i])) break;
       type_array[word_ct][types][j++] = dic_record[i];
    }
    /* Trim the type                                 */
    type_array[word_ct][types][j] = '\0';

    if (strcmp(type_array[word_ct][types],
                "VERB") == 0) {
        root = extract_root();
        strcpy(root_array[word_ct], root);
    }

    return(0);
}

/*****************************************************/
/* Extract the word from the dictionary. The word is */
/* 14 characters in length and starts in column 1.   */
/*****************************************************/
char *extract_word()
{
    int i, j;
    char dic_word[15];
    for (i=0,j=0; i<14; i++) {
        if (isspace(dic_record[i])) break;
        dic_word[j++] = dic_record[i];
    }
    /* Trim the dictionary word                      */
    dic_word[j] = '\0';
    return(dic_word);
}

/*****************************************************/
/* Extract the root from the dictionary. It          */
/* identifies a group of similar words (the root for */
/* run, ran, runs and running is run). It is 14      */
/* characters in length and starts in column 35.     */
/*****************************************************/
char *extract_root()
{
    int i, j;
    char root[15];
    for (i=34,j=0; i<48; i++) {
        if (isspace(dic_record[i])) break;
        root[j++] = dic_record[i];
    }
    /* Trim the root                                 */
    root[j] = '\0';
    return(root);
}

/*****************************************************/
/* Determine if the input sentence contains a known, */
/* underlying structure. If it does, then assign the */
/* correct types and phrases for the words.          */
/*****************************************************/
void check_underlying()
{
    int i;

    /* Structure WH-AUX-NAME-VERB                    */
    i = 0;
    if ( (check_type("WH",     i) == 0) &&
         (check_type("AUX",  i+1) == 0) &&
         (check_type("NAME", i+2) == 0) &&
         (check_type("VERB", i+3) == 0) ) {
        strcpy(prime_types[i],   "WH");
        strcpy(prime_types[i+1], "AUX");
        strcpy(prime_types[i+2], "NAME");
        strcpy(prime_types[i+3], "VERB");
        strcpy(phrases[i],   "WHQUESTION");
        strcpy(phrases[i+1], "VERBPHRASE");
        strcpy(phrases[i+2], "NOUNPHRASE");
        strcpy(phrases[i+3], "VERBPHRASE");
        return;
    }

    /* Structure NAME-AUX-VERB-PREP-DET-NOUN         */
    if ( (check_type("NAME",   i) == 0) &&
         (check_type("AUX",  i+1) == 0) &&
         (check_type("VERB", i+2) == 0) &&
         (check_type("PREP", i+3) == 0) &&
         (check_type("DET",  i+4) == 0) &&
         (check_type("NOUN", i+5) == 0) ) {
        strcpy(prime_types[i],   "NAME");
        strcpy(prime_types[i+1], "AUX");
        strcpy(prime_types[i+2], "VERB");
        strcpy(prime_types[i+3], "PREP");
        strcpy(prime_types[i+4], "DET");
        strcpy(prime_types[i+5], "NOUN");
        strcpy(phrases[i],   "NOUNPHRASE");
        strcpy(phrases[i+1], "VERBPHRASE");
        strcpy(phrases[i+2], "VERBPHRASE");
        strcpy(phrases[i+3], "PREPPHRASE");
        strcpy(phrases[i+4], "PREPPHRASE");
        strcpy(phrases[i+5], "PREPPHRASE");
        return;
    }

    return;
}

/*****************************************************/
/* Compare the passed type with all the types for    */
/* this word in the type_array. If the type is       */
/* found, then return 0. The pass_number parameter   */
/* identifies the word in the input sentence.        */
/*****************************************************/
int check_type(char *pass_type, int pass_number)
{
    int i;
    for (i=0; type_array[pass_number][i][0]; i++) {
        if (strcmp(type_array[pass_number][i],
                    pass_type) == 0)
            /*  Passed type is found in array        */
            return(0);
    }
    /*  Passed type is not found in array            */
    return(1);
}

/*****************************************************/
/* If the correct type is "NAME", then the word      */
/* refers to a subject so copy the word to the       */
/* subjects array.                                   */
/*****************************************************/
void check_subject()
{
    int i;
    for (i=0; i<word_ct; i++) {
        if (strcmp(prime_types[i], "NAME") == 0) {
            strcpy(subjects[sentence], word_array[i]);
            break;
        }
    }
    return;
}

/*****************************************************/
/* If the correct type is "VERB", then the word      */
/* refers to an action so copy the word's root from  */
/* the root array to the actions array.              */
/*****************************************************/
void check_action()
{
    int i;
    for (i=0; i<word_ct; i++) {
        if (strcmp(prime_types[i], "VERB") == 0) {
            strcpy(actions[sentence], root_array[i]);
            break;
        }
    }
    return;
}

/*****************************************************/
/* If the phrase is a "PREPPHRASE", then all the     */
/* words in the phrase refer to a place. Concatenate */
/* these words to the places array.                  */
/*****************************************************/
void check_place()
{
    int i;
    for (i=0; i<word_ct; i++) {
        if (strcmp(phrases[i], "PREPPHRASE") == 0) {
            strcat(places[sentence], " ");
            strcat(places[sentence], word_array[i]);
        }
    }
    return;
}

/*****************************************************/
/* Determine the kind of response to generate. If    */
/* the input sentence is a where-question and the    */
/* subject and action is found in a previous array   */
/* entry, then the response can state the location   */
/* of where the subject and action occured.          */
/*****************************************************/
void make_response()
{
    int i;

    /* Last input sentence is not a where-question   */
    if (strcmpi(word_array[0],"where") != 0) {
        strcpy(response, "Ok");
        return;
    }

    /* Last input sentence is a where-question       */
    for (i=sentence-1; i >= 0; i--) {
        if ( (strcmp(subjects[i],
                      subjects[sentence]) == 0) &&
             (strcmp(actions[i],
                      actions[sentence])  == 0) &&
             (strlen(places[i])           != 0) ) {
            make_answer(i);
            return;
        }
    }

    /* Not enough information in actions and         */
    /* subjects arrays.                              */
    strcpy(response, "I don't know");
    return;
}

/*****************************************************/
/* Generate a response that states the location of   */
/* where the subject and action occured.             */
/*****************************************************/
void make_answer(int prev_sentence)
{
    strcpy(response, subjects[prev_sentence]);
    strcat(response, " ");
    strcat(response, "was ");
    get_verb_ing();
    strcat(response, places[prev_sentence]);
    return;
}

/*****************************************************/
/* Retrieve the ING version of the word from the     */
/* dictionary (the ING version of run is running).   */
/*****************************************************/
void get_verb_ing()
{
    rewind (infile);
    fgets(dic_record, 80, infile);
    while (! feof(infile)) {
        if (match_verb_ing() == 0) break;
        fgets(dic_record, 80, infile);
    }
    return;
}

/*****************************************************/
/* If the root in the current dictionary record      */
/* matches the root in the actions array, and the    */
/* current dictionary record has an ING restriction, */
/* then extract the dictionary word and return 0.    */
/*****************************************************/
int  match_verb_ing()
{
    int i;
    char *root;
    char *dic_word;

    root = extract_root();
    if (strcmp(actions[sentence],root) == 0) {
        /* Root found, look for ING restriction      */
        for (i=24; i<33; i++) {
            if (isspace(dic_record[i])) break;
            if (dic_record[i] == ING) {
                dic_word = extract_word();
                strcat(response, dic_word);
                return(0);
            }
        }
    }
    return(1);
}

