SCI: result_word_t -> ResultWord; switched to Common::List for storing lists of ResultWords

svn-id: r39278
This commit is contained in:
Max Horn 2009-03-09 22:25:33 +00:00
parent f3e50fba7c
commit 6c93249715
6 changed files with 84 additions and 120 deletions

View file

@ -195,7 +195,7 @@ static parse_rule_t *_vbuild_rule(parse_tree_branch_t *branch) {
return rule;
}
static parse_rule_t *_vsatisfy_rule(parse_rule_t *rule, result_word_t *input) {
static parse_rule_t *_vsatisfy_rule(parse_rule_t *rule, const ResultWord &input) {
int dep;
if (!rule->specials_nr)
@ -203,15 +203,15 @@ static parse_rule_t *_vsatisfy_rule(parse_rule_t *rule, result_word_t *input) {
dep = rule->data[rule->first_special];
if (((dep & TOKEN_TERMINAL_CLASS) && ((dep & 0xffff) & input->w_class)) ||
((dep & TOKEN_TERMINAL_GROUP) && ((dep & 0xffff) & input->group))) {
if (((dep & TOKEN_TERMINAL_CLASS) && ((dep & 0xffff) & input.w_class)) ||
((dep & TOKEN_TERMINAL_GROUP) && ((dep & 0xffff) & input.group))) {
parse_rule_t *retval = (parse_rule_t*)sci_malloc(sizeof(int) * (4 + rule->length));
++_allocd_rules;
retval->id = rule->id;
retval->specials_nr = rule->specials_nr - 1;
retval->length = rule->length;
memcpy(retval->data, rule->data, sizeof(int) * retval->length);
retval->data[rule->first_special] = TOKEN_STUFFING_WORD | input->group;
retval->data[rule->first_special] = TOKEN_STUFFING_WORD | input.group;
retval->first_special = 0;
if (retval->specials_nr) { // find first special, if it exists
@ -417,9 +417,9 @@ void vocab_gnf_dump(parse_tree_branch_t *branches, int branches_nr) {
vocab_free_rule_list(tlist);
}
int vocab_build_parse_tree(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
int vocab_build_parse_tree(parse_tree_node_t *nodes, const ResultWordList &words,
parse_tree_branch_t *branch0, parse_rule_list_t *rules) {
return vocab_gnf_parse(nodes, words, words_nr, branch0, rules, 0);
return vocab_gnf_parse(nodes, words, branch0, rules, 0);
}
static int
@ -478,7 +478,7 @@ static int _vbpt_write_subexpression(parse_tree_node_t *nodes, int *pos, parse_r
else
writepos = _vbpt_append(nodes, pos, writepos, token & 0xffff);
} else {
sciprintf("\nError in parser (grammar.c, _vbpt_write_subexpression()): Rule data broken in rule ");
sciprintf("\nError in parser (grammar.cpp, _vbpt_write_subexpression()): Rule data broken in rule ");
vocab_print_rule(rule);
sciprintf(", at token position %d\n", *pos);
return rulepos;
@ -488,14 +488,16 @@ static int _vbpt_write_subexpression(parse_tree_node_t *nodes, int *pos, parse_r
return rulepos;
}
int vocab_gnf_parse(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
int vocab_gnf_parse(parse_tree_node_t *nodes, const ResultWordList &words,
parse_tree_branch_t *branch0, parse_rule_list_t *tlist, int verbose) {
// Get the start rules:
parse_rule_list_t *work = _vocab_clone_rule_list_by_id(tlist, branch0->data[1]);
parse_rule_list_t *results = NULL;
int word;
int word = 0;
const int words_nr = words.size();
ResultWordList::const_iterator word_iter = words.begin();
for (word = 0; word < words_nr; word++) {
for (word_iter = words.begin(); word_iter != words.end(); ++word_iter, ++word) {
parse_rule_list_t *new_work = NULL;
parse_rule_list_t *reduced_rules = NULL;
parse_rule_list_t *seeker, *subseeker;
@ -506,7 +508,7 @@ int vocab_gnf_parse(parse_tree_node_t *nodes, result_word_t *words, int words_nr
seeker = work;
while (seeker) {
if (seeker->rule->specials_nr <= (words_nr - word))
reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, words + word));
reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, *word_iter));
seeker = seeker->next;
}

View file

@ -254,9 +254,8 @@ reg_t kSetSynonyms(EngineState *s, int funct_nr, int argc, reg_t *argv) {
reg_t kParse(EngineState *s, int funct_nr, int argc, reg_t *argv) {
reg_t stringpos = argv[0];
char *string = kernel_dereference_char_pointer(s, stringpos, 0);
int words_nr;
char *error;
result_word_t *words;
ResultWordList words;
reg_t event = argv[1];
s->parser_event = event;
@ -268,39 +267,35 @@ reg_t kParse(EngineState *s, int funct_nr, int argc, reg_t *argv) {
return s->r_acc;
}
words = vocab_tokenize_string(string, &words_nr,
words = vocab_tokenize_string(string,
s->parser_words, s->parser_words_nr,
s->_parserSuffixes,
&error);
s->parser_valid = 0; /* not valid */
if (words) {
if (!words.empty()) {
int syntax_fail = 0;
vocab_synonymize_tokens(words, words_nr, s->synonyms, s->synonyms_nr);
vocab_synonymize_tokens(words, s->synonyms, s->synonyms_nr);
s->r_acc = make_reg(0, 1);
if (s->debug_mode & (1 << SCIkPARSER_NR)) {
int i;
SCIkdebug(SCIkPARSER, "Parsed to the following blocks:\n", 0);
for (i = 0; i < words_nr; i++)
SCIkdebug(SCIkPARSER, " Type[%04x] Group[%04x]\n", words[i].w_class, words[i].group);
for (ResultWordList::const_iterator i = words.begin(); i != words.end(); ++i)
SCIkdebug(SCIkPARSER, " Type[%04x] Group[%04x]\n", i->w_class, i->group);
}
if (vocab_build_parse_tree(&(s->parser_nodes[0]), words, words_nr, s->parser_branches,
if (vocab_build_parse_tree(&(s->parser_nodes[0]), words, s->parser_branches,
s->parser_rules))
syntax_fail = 1; /* Building a tree failed */
#ifdef SCI_SIMPLE_SAID_CODE
vocab_build_simple_parse_tree(&(s->parser_nodes[0]), words, words_nr);
vocab_build_simple_parse_tree(&(s->parser_nodes[0]), words);
#endif /* SCI_SIMPLE_SAID_CODE */
free(words);
if (syntax_fail) {
s->r_acc = make_reg(0, 1);

View file

@ -113,7 +113,7 @@ int script_find_selector(EngineState *s, const char *selectorname) {
return pos;
}
warning("Could not map '%s' to any selector!", selectorname);
warning("Could not map '%s' to any selector", selectorname);
return -1;
}

View file

@ -786,17 +786,16 @@ int c_sim_parse(EngineState *s) {
if (!flag) {
char *openb = strchr(token, '['); // look for opening braces
result_word_t *result;
ResultWord result;
if (openb)
*openb = 0; // remove them and the rest
result = vocab_lookup_word(token, strlen(token), s->parser_words, s->parser_words_nr, s->_parserSuffixes);
if (result) {
if (result.w_class != -1) {
s->parser_nodes[i].type = 0;
s->parser_nodes[i].content.value = result->group;
free(result);
s->parser_nodes[i].content.value = result.group;
} else { // group name was specified directly?
int val = strtol(token, NULL, 0);
if (val) {
@ -1008,13 +1007,10 @@ int c_set_parse_nodes(EngineState *s) {
return 0;
}
int vocab_gnf_parse(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
parse_tree_branch_t *branch0, parse_rule_list_t *tlist, int verbose);
// parses with a GNF rule set
int c_parse(EngineState *s) {
result_word_t *words;
int words_nr;
ResultWordList words;
char *error;
char *string;
@ -1025,23 +1021,21 @@ int c_parse(EngineState *s) {
string = cmd_params[0].str;
sciprintf("Parsing '%s'\n", string);
words = vocab_tokenize_string(string, &words_nr, s->parser_words, s->parser_words_nr,
words = vocab_tokenize_string(string, s->parser_words, s->parser_words_nr,
s->_parserSuffixes, &error);
if (words) {
int i, syntax_fail = 0;
if (!words.empty()) {
int syntax_fail = 0;
vocab_synonymize_tokens(words, words_nr, s->synonyms, s->synonyms_nr);
vocab_synonymize_tokens(words, s->synonyms, s->synonyms_nr);
sciprintf("Parsed to the following blocks:\n");
for (i = 0; i < words_nr; i++)
sciprintf(" Type[%04x] Group[%04x]\n", words[i].w_class, words[i].group);
for (ResultWordList::const_iterator i = words.begin(); i != words.end(); ++i)
sciprintf(" Type[%04x] Group[%04x]\n", i->w_class, i->group);
if (vocab_gnf_parse(&(s->parser_nodes[0]), words, words_nr, s->parser_branches, s->parser_rules, 1))
if (vocab_gnf_parse(&(s->parser_nodes[0]), words, s->parser_branches, s->parser_rules, 1))
syntax_fail = 1; // Building a tree failed
free(words);
if (syntax_fail)
sciprintf("Building a tree failed.\n");
else

View file

@ -261,12 +261,11 @@ parse_tree_branch_t *vocab_get_branches(ResourceManager * resmgr, int *branches_
}
result_word_t *vocab_lookup_word(char *word, int word_len, word_t **words, int words_nr,
ResultWord vocab_lookup_word(char *word, int word_len, word_t **words, int words_nr,
const SuffixList &suffixes) {
word_t *tempword = (word_t*)sci_malloc(sizeof(word_t) + word_len + 256);
// 256: For suffixes. Should suffice.
word_t **dict_word;
result_word_t *retval;
char *tester;
int word_len_tmp;
@ -277,17 +276,14 @@ result_word_t *vocab_lookup_word(char *word, int word_len, word_t **words, int w
while ((tester = strchr(tempword->word, '-')))
memmove(tester, tester + 1, (tempword->word + word_len_tmp--) - tester);
retval = (result_word_t *)sci_malloc(sizeof(result_word_t));
dict_word = (word_t **)bsearch(&tempword, words, words_nr, sizeof(word_t *), _vocab_cmp_words);
if (dict_word) {
free(tempword);
retval->w_class = (*dict_word)->w_class;
retval->group = (*dict_word)->group;
ResultWord tmp = { (*dict_word)->w_class, (*dict_word)->group };
return retval;
return tmp;
}
// Now try all suffixes
@ -307,10 +303,10 @@ result_word_t *vocab_lookup_word(char *word, int word_len, word_t **words, int w
if ((dict_word) && ((*dict_word)->w_class & suffix->class_mask)) { // Found it?
free(tempword);
retval->w_class = suffix->result_class; // Use suffix class
retval->group = (*dict_word)->group;
// Use suffix class
ResultWord tmp = { suffix->result_class, (*dict_word)->group };
return retval;
return tmp;
}
}
}
@ -324,19 +320,15 @@ result_word_t *vocab_lookup_word(char *word, int word_len, word_t **words, int w
while ((tester = strchr(tempword->word, '-')))
memmove(tester, tester + 1, (tempword->word + word_len--) - tester);
ResultWord retval = { -1, -1 };
if ((strtol(&(tempword->word[0]), &tester, 10) >= 0) && (*tester == '\0')) { // Do we have a complete number here?
free(tempword);
retval->group = VOCAB_MAGIC_NUMBER_GROUP;
retval->w_class = VOCAB_CLASS_NUMBER;
return(retval);
ResultWord tmp = { VOCAB_CLASS_NUMBER, VOCAB_MAGIC_NUMBER_GROUP };
retval = tmp;
}
free(tempword);
free(retval);
return NULL;
return retval;
}
void vocab_decypher_said_block(EngineState *s, byte *addr) {
@ -392,7 +384,7 @@ void vocab_decypher_said_block(EngineState *s, byte *addr) {
#ifdef SCI_SIMPLE_SAID_CODE
static short _related_words[][2] = { // 0 is backwards, 1 is forward
static const short _related_words[][2] = { // 0 is backwards, 1 is forward
{0x800, 0x180}, // preposition
{0x000, 0x180}, // article
{0x000, 0x180}, // adjective
@ -408,8 +400,8 @@ int vocab_build_simple_parse_tree(parse_tree_node_t *nodes, result_word_t *words
int i, length, pos = 0;
for (i = 0; i < words_nr; ++i) {
if (words[i].classID != VOCAB_CLASS_ANYWORD) {
nodes[pos].type = words[i].classID;
if (words[i].w_class != VOCAB_CLASS_ANYWORD) {
nodes[pos].type = words[i].w_class;
nodes[pos].content.value = words[i].group;
pos += 2; // Link information is filled in below
}
@ -485,18 +477,14 @@ int vocab_build_simple_parse_tree(parse_tree_node_t *nodes, result_word_t *words
}
#endif
result_word_t *vocab_tokenize_string(char *sentence, int *result_nr, word_t **words, int words_nr,
ResultWordList vocab_tokenize_string(char *sentence, word_t **words, int words_nr,
const SuffixList &suffixes, char **error) {
char *lastword = sentence;
int pos_in_sentence = 0;
char c;
int wordlen = 0;
result_word_t *retval = (result_word_t*)sci_malloc(sizeof(result_word_t));
// malloc'd size is always one result_word_t too big
ResultWordList retval;
result_word_t *lookup_result;
*result_nr = 0;
*error = NULL;
do {
@ -511,24 +499,19 @@ result_word_t *vocab_tokenize_string(char *sentence, int *result_nr, word_t **wo
else {
if (wordlen) { // Finished a word?
lookup_result =
ResultWord lookup_result =
vocab_lookup_word(lastword, wordlen, words, words_nr, suffixes);
// Look it up
if (!lookup_result) { // Not found?
if (lookup_result.w_class == -1) { // Not found?
*error = (char *)sci_calloc(wordlen + 1, 1);
strncpy(*error, lastword, wordlen); // Set the offending word
free(retval);
return NULL; // And return with error
retval.clear();
return retval; // And return with error
}
memcpy(retval + *result_nr, lookup_result, sizeof(result_word_t));
// Copy into list
++(*result_nr); // Increase number of resulting words
free(lookup_result);
retval = (result_word_t*)sci_realloc(retval, sizeof(result_word_t) * (*result_nr + 1));
retval.push_back(lookup_result);
}
lastword = sentence + pos_in_sentence;
@ -537,11 +520,6 @@ result_word_t *vocab_tokenize_string(char *sentence, int *result_nr, word_t **wo
} while (c); // Until terminator is hit
if (*result_nr == 0) {
free(retval);
return NULL;
}
return retval;
}
@ -621,16 +599,16 @@ void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes) {
sciprintf("))\n");
}
void vocab_synonymize_tokens(result_word_t *words, int words_nr, synonym_t *synonyms, int synonyms_nr) {
int i, sync;
void vocab_synonymize_tokens(ResultWordList &words, synonym_t *synonyms, int synonyms_nr) {
int sync;
if (!synonyms || !synonyms_nr)
return; // No synonyms: Nothing to check
for (i = 0; i < words_nr; i++)
for (ResultWordList::iterator i = words.begin(); i != words.end(); ++i)
for (sync = 0; sync < synonyms_nr; sync++)
if (words[i].group == synonyms[sync].replaceant)
words[i].group = synonyms[sync].replacement;
if (i->group == synonyms[sync].replaceant)
i->group = synonyms[sync].replacement;
}
} // End of namespace Sci

View file

@ -65,15 +65,17 @@ struct opcode {
#define VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB 902
#define VOCAB_RESOURCE_SCI1_CHAR_TRANSFORMS 913
#define VOCAB_CLASS_PREPOSITION 0x01
#define VOCAB_CLASS_ARTICLE 0x02
#define VOCAB_CLASS_ADJECTIVE 0x04
#define VOCAB_CLASS_PRONOUN 0x08
#define VOCAB_CLASS_NOUN 0x10
#define VOCAB_CLASS_INDICATIVE_VERB 0x20
#define VOCAB_CLASS_ADVERB 0x40
#define VOCAB_CLASS_IMPERATIVE_VERB 0x80
#define VOCAB_CLASS_NUMBER 0x001
enum {
VOCAB_CLASS_PREPOSITION = 0x01,
VOCAB_CLASS_ARTICLE = 0x02,
VOCAB_CLASS_ADJECTIVE = 0x04,
VOCAB_CLASS_PRONOUN = 0x08,
VOCAB_CLASS_NOUN = 0x10,
VOCAB_CLASS_INDICATIVE_VERB = 0x20,
VOCAB_CLASS_ADVERB = 0x40,
VOCAB_CLASS_IMPERATIVE_VERB = 0x80,
VOCAB_CLASS_NUMBER = 0x001
};
extern const char *class_names[]; /* Vocabulary class names */
@ -112,12 +114,17 @@ extern const char *class_names[]; /* Vocabulary class names */
#define SAID_LONG(x) ((x) << 8)
struct word_t {
struct ResultWord {
int w_class; /* Word class */
int group; /* Word group */
};
typedef Common::List<ResultWord> ResultWordList;
struct word_t {
int w_class; /* Word class */
int group; /* Word group */
char word[1]; /* The actual word */
};
@ -153,14 +160,6 @@ struct suffix_t {
typedef Common::List<suffix_t> SuffixList;
struct result_word_t {
int w_class; /* Word class */
int group; /* Word group */
};
struct synonym_t {
int replaceant; /* The word group to replace */
int replacement; /* The replacement word group for this one */
@ -279,7 +278,7 @@ void vocab_free_branches(parse_tree_branch_t *parser_branches);
** Returns : (null)
*/
result_word_t *vocab_lookup_word(char *word, int word_len,
ResultWord vocab_lookup_word(char *word, int word_len,
word_t **words, int words_nr, const SuffixList &suffixes);
/* Looks up a single word in the words and suffixes list
** Parameters: (char *) word: Pointer to the word to look up
@ -287,16 +286,14 @@ result_word_t *vocab_lookup_word(char *word, int word_len,
** (word_t **) words: List of words
** (int) words_nr: Number of elements in 'words'
** (SuffixList) suffixes: List of suffixes
** Returns : (result_word_t *) A malloc'd result_word_t, or NULL if the word
** could not be found.
** Returns : (const ResultWordList &) A list containing 1 or 0 words
*/
result_word_t *vocab_tokenize_string(char *sentence, int *result_nr,
ResultWordList vocab_tokenize_string(char *sentence,
word_t **words, int words_nr, const SuffixList &suffixes, char **error);
/* Tokenizes a string and compiles it into word_ts.
** Parameters: (char *) sentence: The sentence to examine
** (int *) result_nr: The variable to store the resulting number of words in
** (word_t **) words: The words to scan for
** (int) words_nr: Number of words to scan for
** (SuffixList) suffixes: suffixes to scan for
@ -328,13 +325,12 @@ void vocab_free_rule_list(parse_rule_list_t *rule_list);
*/
int vocab_build_parse_tree(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
int vocab_build_parse_tree(parse_tree_node_t *nodes, const ResultWordList &words,
parse_tree_branch_t *branch0, parse_rule_list_t *rules);
/* Builds a parse tree from a list of words
** Parameters: (parse_tree_node_t *) nodes: A node list to store the tree in (must have
** at least VOCAB_TREE_NODES entries)
** (result_word_t *) words: The words to build the tree from
** (int) words_nr: The number of words
** (const ResultWordList &) words: The words to build the tree from
** (parse_tree_branch_t *) branche0: The zeroeth original branch of the
** original CNF parser grammar
** (parse_rule_list *) rules: The GNF ruleset to parse with
@ -378,15 +374,14 @@ void vocab_decypher_said_block(EngineState *s, byte *pos);
*/
void vocab_synonymize_tokens(result_word_t *words, int words_nr, synonym_t *synonyms, int synonyms_nr);
void vocab_synonymize_tokens(ResultWordList &words, synonym_t *synonyms, int synonyms_nr);
/* Synonymizes a token list
** Parameters: (result_wort_t *) words: The word list to synonymize
** (int) words_nr: Number of word_ts in the list
** Parameters: (ResultWordList &) words: The word list to synonymize
** (synonym_t *) synonyms: Synonym list
** (int) synonyms_nr: Number of synonyms in the list
*/
int vocab_gnf_parse(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
int vocab_gnf_parse(parse_tree_node_t *nodes, const ResultWordList &words,
parse_tree_branch_t *branch0, parse_rule_list_t *tlist, int verbose);
void vocab_gnf_dump(parse_tree_branch_t *branches, int branches_nr);