SCI: Allow multiple word groups in parser

In SCI01 and up, each typed word may be interpreted as multiple
class,group pairs. This patch adds support to the vocabulary and
parser. It uses the matcher support added in r52985.

This fixes parser issues in German LSL3, but needs testing.

svn-id: r52989
This commit is contained in:
Willem Jan Palenstijn 2010-10-03 10:49:42 +00:00
parent 694758fd2a
commit f98536eef5
7 changed files with 208 additions and 67 deletions

View file

@ -150,6 +150,12 @@ public:
insert_aux(_storage + idx, &element, &element + 1); insert_aux(_storage + idx, &element, &element + 1);
} }
void insert_at(int idx, const Array<T> &array) {
assert(idx >= 0 && (uint)idx <= _size);
insert_aux(_storage + idx, array.begin(), array.end());
}
T remove_at(int idx) { T remove_at(int idx) {
assert(idx >= 0 && (uint)idx < _size); assert(idx >= 0 && (uint)idx < _size);
T tmp = _storage[idx]; T tmp = _storage[idx];

View file

@ -1204,7 +1204,6 @@ bool Console::cmdParse(int argc, const char **argv) {
return true; return true;
} }
ResultWordList words;
char *error; char *error;
char string[1000]; char string[1000];
@ -1216,6 +1215,8 @@ bool Console::cmdParse(int argc, const char **argv) {
} }
DebugPrintf("Parsing '%s'\n", string); DebugPrintf("Parsing '%s'\n", string);
ResultWordListList words;
bool res = _engine->getVocabulary()->tokenizeString(words, string, &error); bool res = _engine->getVocabulary()->tokenizeString(words, string, &error);
if (res && !words.empty()) { if (res && !words.empty()) {
int syntax_fail = 0; int syntax_fail = 0;
@ -1224,8 +1225,13 @@ bool Console::cmdParse(int argc, const char **argv) {
DebugPrintf("Parsed to the following blocks:\n"); DebugPrintf("Parsed to the following blocks:\n");
for (ResultWordList::const_iterator i = words.begin(); i != words.end(); ++i) for (ResultWordListList::const_iterator i = words.begin(); i != words.end(); ++i) {
DebugPrintf(" Type[%04x] Group[%04x]\n", i->_class, i->_group); DebugPrintf(" ");
for (ResultWordList::const_iterator j = i->begin(); j != i->end(); ++j) {
DebugPrintf("%sType[%04x] Group[%04x]", j == i->begin() ? "" : " / ", j->_class, j->_group);
}
DebugPrintf("\n");
}
if (_engine->getVocabulary()->parseGNF(words, true)) if (_engine->getVocabulary()->parseGNF(words, true))
syntax_fail = 1; // Building a tree failed syntax_fail = 1; // Building a tree failed
@ -1252,7 +1258,6 @@ bool Console::cmdSaid(int argc, const char **argv) {
return true; return true;
} }
ResultWordList words;
char *error; char *error;
char string[1000]; char string[1000];
byte spec[1000]; byte spec[1000];
@ -1326,6 +1331,7 @@ bool Console::cmdSaid(int argc, const char **argv) {
_engine->getVocabulary()->debugDecipherSaidBlock(spec); _engine->getVocabulary()->debugDecipherSaidBlock(spec);
printf("\n"); printf("\n");
ResultWordListList words;
bool res = _engine->getVocabulary()->tokenizeString(words, string, &error); bool res = _engine->getVocabulary()->tokenizeString(words, string, &error);
if (res && !words.empty()) { if (res && !words.empty()) {
int syntax_fail = 0; int syntax_fail = 0;
@ -1334,8 +1340,15 @@ bool Console::cmdSaid(int argc, const char **argv) {
DebugPrintf("Parsed to the following blocks:\n"); DebugPrintf("Parsed to the following blocks:\n");
for (ResultWordList::const_iterator i = words.begin(); i != words.end(); ++i) for (ResultWordListList::const_iterator i = words.begin(); i != words.end(); ++i) {
DebugPrintf(" Type[%04x] Group[%04x]\n", i->_class, i->_group); DebugPrintf(" ");
for (ResultWordList::const_iterator j = i->begin(); j != i->end(); ++j) {
DebugPrintf("%sType[%04x] Group[%04x]", j == i->begin() ? "" : " / ", j->_class, j->_group);
}
DebugPrintf("\n");
}
if (_engine->getVocabulary()->parseGNF(words, true)) if (_engine->getVocabulary()->parseGNF(words, true))
syntax_fail = 1; // Building a tree failed syntax_fail = 1; // Building a tree failed

View file

@ -93,13 +93,13 @@ reg_t kParse(EngineState *s, int argc, reg_t *argv) {
reg_t stringpos = argv[0]; reg_t stringpos = argv[0];
Common::String string = s->_segMan->getString(stringpos); Common::String string = s->_segMan->getString(stringpos);
char *error; char *error;
ResultWordList words;
reg_t event = argv[1]; reg_t event = argv[1];
g_sci->checkVocabularySwitch(); g_sci->checkVocabularySwitch();
Vocabulary *voc = g_sci->getVocabulary(); Vocabulary *voc = g_sci->getVocabulary();
voc->parser_event = event; voc->parser_event = event;
reg_t params[2] = { voc->parser_base, stringpos }; reg_t params[2] = { voc->parser_base, stringpos };
ResultWordListList words;
bool res = voc->tokenizeString(words, string.c_str(), &error); bool res = voc->tokenizeString(words, string.c_str(), &error);
voc->parserIsValid = false; /* not valid */ voc->parserIsValid = false; /* not valid */
@ -109,10 +109,15 @@ reg_t kParse(EngineState *s, int argc, reg_t *argv) {
s->r_acc = make_reg(0, 1); s->r_acc = make_reg(0, 1);
#ifdef DEBUG_PARSER #ifdef DEBUG_PARSER
debugC(2, kDebugLevelParser, "Parsed to the following blocks:"); debugC(2, kDebugLevelParser, "Parsed to the following blocks:");
for (ResultWordList::const_iterator i = words.begin(); i != words.end(); ++i) for (ResultWordListList::const_iterator i = words.begin(); i != words.end(); ++i) {
debugC(2, kDebugLevelParser, " Type[%04x] Group[%04x]", i->_class, i->_group); debugCN(2, kDebugLevelParser, " ");
for (ResultWordList::const_iterator j = i->begin(); j != i->end(); ++j) {
debugCN(2, kDebugLevelParser, "%sType[%04x] Group[%04x]", j == i->begin() ? "" : " / ", j->_class, j->_group);
}
debugCN(2, kDebugLevelParser, "\n");
}
#endif #endif
int syntax_fail = voc->parseGNF(words); int syntax_fail = voc->parseGNF(words);

View file

@ -38,8 +38,9 @@ namespace Sci {
#define TOKEN_CPAREN 0xfe000000 #define TOKEN_CPAREN 0xfe000000
#define TOKEN_TERMINAL_CLASS 0x10000 #define TOKEN_TERMINAL_CLASS 0x10000
#define TOKEN_TERMINAL_GROUP 0x20000 #define TOKEN_TERMINAL_GROUP 0x20000
#define TOKEN_STUFFING_WORD 0x40000 #define TOKEN_STUFFING_LEAF 0x40000
#define TOKEN_NON_NT (TOKEN_OPAREN | TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP | TOKEN_STUFFING_WORD) #define TOKEN_STUFFING_WORD 0x80000
#define TOKEN_NON_NT (TOKEN_OPAREN | TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP | TOKEN_STUFFING_LEAF | TOKEN_STUFFING_WORD)
#define TOKEN_TERMINAL (TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP) #define TOKEN_TERMINAL (TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP)
static int _allocd_rules = 0; // FIXME: Avoid non-const global vars static int _allocd_rules = 0; // FIXME: Avoid non-const global vars
@ -122,8 +123,10 @@ static void vocab_print_rule(ParseRule *rule) {
printf("C(%04x)", token & 0xffff); printf("C(%04x)", token & 0xffff);
else if (token & TOKEN_TERMINAL_GROUP) else if (token & TOKEN_TERMINAL_GROUP)
printf("G(%04x)", token & 0xffff); printf("G(%04x)", token & 0xffff);
else if (token & TOKEN_STUFFING_WORD) else if (token & TOKEN_STUFFING_LEAF)
printf("%03x", token & 0xffff); printf("%03x", token & 0xffff);
else if (token & TOKEN_STUFFING_WORD)
printf("{%03x}", token & 0xffff);
else else
printf("[%03x]", token); /* non-terminal */ printf("[%03x]", token); /* non-terminal */
wspace = 1; wspace = 1;
@ -206,8 +209,8 @@ static ParseRule *_vbuild_rule(const parse_tree_branch_t *branch) {
rule->_data[tokens++] = value | TOKEN_STUFFING_WORD; rule->_data[tokens++] = value | TOKEN_STUFFING_WORD;
else { // normal inductive rule else { // normal inductive rule
rule->_data[tokens++] = TOKEN_OPAREN; rule->_data[tokens++] = TOKEN_OPAREN;
rule->_data[tokens++] = type | TOKEN_STUFFING_WORD; rule->_data[tokens++] = type | TOKEN_STUFFING_LEAF;
rule->_data[tokens++] = value | TOKEN_STUFFING_WORD; rule->_data[tokens++] = value | TOKEN_STUFFING_LEAF;
if (i == 0) if (i == 0)
rule->_firstSpecial = tokens; rule->_firstSpecial = tokens;
@ -220,7 +223,7 @@ static ParseRule *_vbuild_rule(const parse_tree_branch_t *branch) {
return rule; return rule;
} }
static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWord &input) { static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWordList &input) {
int dep; int dep;
if (!rule->_numSpecials) if (!rule->_numSpecials)
@ -228,11 +231,32 @@ static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWord &input) {
dep = rule->_data[rule->_firstSpecial]; dep = rule->_data[rule->_firstSpecial];
if (((dep & TOKEN_TERMINAL_CLASS) && ((dep & 0xffff) & input._class)) || int count = 0;
((dep & TOKEN_TERMINAL_GROUP) && ((dep & 0xffff) & input._group))) { int match = 0;
ResultWordList::const_iterator iter;
// TODO: Inserting an array in the middle of another array is slow
Common::Array<int> matches;
matches.reserve(input.size());
// We store the first match in 'match', and any subsequent matches in
// 'matches'. 'match' replaces the special in the rule, and 'matches' gets
// inserted after it.
for (iter = input.begin(); iter != input.end(); ++iter)
if (((dep & TOKEN_TERMINAL_CLASS) && ((dep & 0xffff) & iter->_class)) ||
((dep & TOKEN_TERMINAL_GROUP) && ((dep & 0xffff) & iter->_group))) {
if (count == 0)
match = TOKEN_STUFFING_WORD | iter->_group;
else
matches.push_back(TOKEN_STUFFING_WORD | iter->_group);
count++;
}
if (count) {
ParseRule *retval = new ParseRule(*rule); ParseRule *retval = new ParseRule(*rule);
++_allocd_rules; ++_allocd_rules;
retval->_data[rule->_firstSpecial] = TOKEN_STUFFING_WORD | input._group; retval->_data[rule->_firstSpecial] = match;
if (count > 1)
retval->_data.insert_at(rule->_firstSpecial+1, matches);
retval->_numSpecials--; retval->_numSpecials--;
retval->_firstSpecial = 0; retval->_firstSpecial = 0;
@ -458,6 +482,25 @@ static int _vbpt_terminate(ParseTreeNode *nodes, int *pos, int base, int value)
nodes[base].right = 0; nodes[base].right = 0;
return *pos; return *pos;
} }
static int _vbpt_append_word(ParseTreeNode *nodes, int *pos, int base, int value) {
// writes one value to an existing node and creates a sibling for writing
nodes[base].type = kParseTreeWordNode;
nodes[base].value = value;
nodes[base].right = &nodes[++(*pos)];
nodes[*pos].type = kParseTreeBranchNode;
nodes[*pos].left = 0;
nodes[*pos].right = 0;
return *pos;
}
static int _vbpt_terminate_word(ParseTreeNode *nodes, int *pos, int base, int value) {
// Terminates, overwriting a nextwrite forknode
nodes[base].type = kParseTreeWordNode;
nodes[base].value = value;
nodes[base].right = 0;
return *pos;
}
static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *rule, uint rulepos, int writepos) { static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *rule, uint rulepos, int writepos) {
uint token; uint token;
@ -470,11 +513,16 @@ static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *
nexttoken = (rulepos < rule->_data.size()) ? rule->_data[rulepos] : TOKEN_CPAREN; nexttoken = (rulepos < rule->_data.size()) ? rule->_data[rulepos] : TOKEN_CPAREN;
if (nexttoken != TOKEN_CPAREN) if (nexttoken != TOKEN_CPAREN)
writepos = _vbpt_parenc(nodes, pos, writepos); writepos = _vbpt_parenc(nodes, pos, writepos);
} else if (token & TOKEN_STUFFING_WORD) { } else if (token & TOKEN_STUFFING_LEAF) {
if (nexttoken == TOKEN_CPAREN) if (nexttoken == TOKEN_CPAREN)
writepos = _vbpt_terminate(nodes, pos, writepos, token & 0xffff); writepos = _vbpt_terminate(nodes, pos, writepos, token & 0xffff);
else else
writepos = _vbpt_append(nodes, pos, writepos, token & 0xffff); writepos = _vbpt_append(nodes, pos, writepos, token & 0xffff);
} else if (token & TOKEN_STUFFING_WORD) {
if (nexttoken == TOKEN_CPAREN)
writepos = _vbpt_terminate_word(nodes, pos, writepos, token & 0xffff);
else
writepos = _vbpt_append_word(nodes, pos, writepos, token & 0xffff);
} else { } else {
printf("\nError in parser (grammar.cpp, _vbpt_write_subexpression()): Rule data broken in rule "); printf("\nError in parser (grammar.cpp, _vbpt_write_subexpression()): Rule data broken in rule ");
vocab_print_rule(rule); vocab_print_rule(rule);
@ -486,16 +534,16 @@ static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *
return rulepos; return rulepos;
} }
int Vocabulary::parseGNF(const ResultWordList &words, bool verbose) { int Vocabulary::parseGNF(const ResultWordListList &words, bool verbose) {
Console *con = g_sci->getSciDebugger(); Console *con = g_sci->getSciDebugger();
// Get the start rules: // Get the start rules:
ParseRuleList *work = _vocab_clone_rule_list_by_id(_parserRules, _parserBranches[0].data[1]); ParseRuleList *work = _vocab_clone_rule_list_by_id(_parserRules, _parserBranches[0].data[1]);
ParseRuleList *results = NULL; ParseRuleList *results = NULL;
uint word = 0; uint word = 0;
const uint words_nr = words.size(); const uint words_nr = words.size();
ResultWordList::const_iterator word_iter = words.begin(); ResultWordListList::const_iterator words_iter;
for (word_iter = words.begin(); word_iter != words.end(); ++word_iter, ++word) { for (words_iter = words.begin(); words_iter != words.end(); ++words_iter, ++word) {
ParseRuleList *new_work = NULL; ParseRuleList *new_work = NULL;
ParseRuleList *reduced_rules = NULL; ParseRuleList *reduced_rules = NULL;
ParseRuleList *seeker, *subseeker; ParseRuleList *seeker, *subseeker;
@ -505,8 +553,9 @@ int Vocabulary::parseGNF(const ResultWordList &words, bool verbose) {
seeker = work; seeker = work;
while (seeker) { while (seeker) {
if (seeker->rule->_numSpecials <= (words_nr - word)) if (seeker->rule->_numSpecials <= (words_nr - word)) {
reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, *word_iter)); reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, *words_iter));
}
seeker = seeker->next; seeker = seeker->next;
} }

View file

@ -166,8 +166,14 @@ bool Vocabulary::loadParserWords() {
newWord._class = ((resource->data[seeker]) << 4) | ((c & 0xf0) >> 4); newWord._class = ((resource->data[seeker]) << 4) | ((c & 0xf0) >> 4);
newWord._group = (resource->data[seeker + 2]) | ((c & 0x0f) << 8); newWord._group = (resource->data[seeker + 2]) | ((c & 0x0f) << 8);
// Add the word to the list // SCI01 was the first version to support multiple class/group pairs
_parserWords[currentWord] = newWord; // per word, so we clear the list in earlier versions
// in earlier versions.
if (getSciVersion() < SCI_VERSION_01)
_parserWords[currentWord].clear();
// Add this to the list of possible class,group pairs for this word
_parserWords[currentWord].push_back(newWord);
seeker += 3; seeker += 3;
} }
@ -182,8 +188,9 @@ const char *Vocabulary::getAnyWordFromGroup(int group) {
return "{nothing}"; return "{nothing}";
for (WordMap::const_iterator i = _parserWords.begin(); i != _parserWords.end(); ++i) { for (WordMap::const_iterator i = _parserWords.begin(); i != _parserWords.end(); ++i) {
if (i->_value._group == group) for (ResultWordList::const_iterator j = i->_value.begin(); j != i->_value.end(); ++j)
return i->_key.c_str(); if (j->_group == group)
return i->_key.c_str();
} }
return "{invalid}"; return "{invalid}";
@ -266,7 +273,9 @@ bool Vocabulary::loadBranches() {
} }
// we assume that *word points to an already lowercased word // we assume that *word points to an already lowercased word
ResultWord Vocabulary::lookupWord(const char *word, int word_len) { void Vocabulary::lookupWord(ResultWordList& retval, const char *word, int word_len) {
retval.clear();
Common::String tempword(word, word_len); Common::String tempword(word, word_len);
// Remove all dashes from tempword // Remove all dashes from tempword
@ -278,15 +287,22 @@ ResultWord Vocabulary::lookupWord(const char *word, int word_len) {
} }
// Look it up: // Look it up:
WordMap::iterator dict_word = _parserWords.find(tempword); WordMap::iterator dict_words = _parserWords.find(tempword);
// Match found? Return it! // Match found? Return it!
if (dict_word != _parserWords.end()) { if (dict_words != _parserWords.end()) {
return dict_word->_value; retval = dict_words->_value;
// SCI01 was the first version to support
// multiple matches, so no need to look further
// in earlier versions.
if (getSciVersion() < SCI_VERSION_01)
return;
} }
// Now try all suffixes // Now try all suffixes
for (SuffixList::const_iterator suffix = _parserSuffixes.begin(); suffix != _parserSuffixes.end(); ++suffix) for (SuffixList::const_iterator suffix = _parserSuffixes.begin(); suffix != _parserSuffixes.end(); ++suffix) {
if (suffix->alt_suffix_length <= word_len) { if (suffix->alt_suffix_length <= word_len) {
int suff_index = word_len - suffix->alt_suffix_length; int suff_index = word_len - suffix->alt_suffix_length;
@ -299,27 +315,38 @@ ResultWord Vocabulary::lookupWord(const char *word, int word_len) {
// ...and append "correct" suffix // ...and append "correct" suffix
tempword2 += Common::String(suffix->word_suffix, suffix->word_suffix_length); tempword2 += Common::String(suffix->word_suffix, suffix->word_suffix_length);
dict_word = _parserWords.find(tempword2); dict_words = _parserWords.find(tempword2);
if ((dict_word != _parserWords.end()) && (dict_word->_value._class & suffix->class_mask)) { // Found it? if (dict_words != _parserWords.end()) {
// Use suffix class for (ResultWordList::const_iterator j = dict_words->_value.begin(); j != dict_words->_value.end(); ++j) {
ResultWord tmp = dict_word->_value; if (j->_class & suffix->class_mask) { // Found it?
tmp._class = suffix->result_class; // Use suffix class
return tmp; ResultWord tmp = *j;
tmp._class = suffix->result_class;
retval.push_back(tmp);
// SCI01 was the first version to support
// multiple matches, so no need to look further
// in earlier versions.
if (getSciVersion() < SCI_VERSION_01)
return;
}
}
} }
} }
} }
}
if (!retval.empty())
return;
// No match so far? Check if it's a number. // No match so far? Check if it's a number.
ResultWord retval = { -1, -1 };
char *tester; char *tester;
if ((strtol(tempword.c_str(), &tester, 10) >= 0) && (*tester == '\0')) { // Do we have a complete number here? if ((strtol(tempword.c_str(), &tester, 10) >= 0) && (*tester == '\0')) { // Do we have a complete number here?
ResultWord tmp = { VOCAB_CLASS_NUMBER, VOCAB_MAGIC_NUMBER_GROUP }; ResultWord tmp = { VOCAB_CLASS_NUMBER, VOCAB_MAGIC_NUMBER_GROUP };
retval = tmp; retval.push_back(tmp);
} }
return retval;
} }
void Vocabulary::debugDecipherSaidBlock(const byte *addr) { void Vocabulary::debugDecipherSaidBlock(const byte *addr) {
@ -398,7 +425,7 @@ static const byte lowerCaseMap[256] = {
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff // 0xf0 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff // 0xf0
}; };
bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, char **error) { bool Vocabulary::tokenizeString(ResultWordListList &retval, const char *sentence, char **error) {
char currentWord[VOCAB_MAX_WORDLENGTH] = ""; char currentWord[VOCAB_MAX_WORDLENGTH] = "";
int pos_in_sentence = 0; int pos_in_sentence = 0;
unsigned char c; unsigned char c;
@ -419,10 +446,12 @@ bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, ch
else { else {
if (wordLen) { // Finished a word? if (wordLen) { // Finished a word?
ResultWord lookup_result = lookupWord(currentWord, wordLen); ResultWordList lookup_result;
// Look it up
if (lookup_result._class == -1) { // Not found? // Look it up
lookupWord(lookup_result, currentWord, wordLen);
if (lookup_result.empty()) { // Not found?
*error = (char *)calloc(wordLen + 1, 1); *error = (char *)calloc(wordLen + 1, 1);
strncpy(*error, currentWord, wordLen); // Set the offending word strncpy(*error, currentWord, wordLen); // Set the offending word
retval.clear(); retval.clear();
@ -460,12 +489,14 @@ void Vocabulary::printSuffixes() const {
void Vocabulary::printParserWords() const { void Vocabulary::printParserWords() const {
Console *con = g_sci->getSciDebugger(); Console *con = g_sci->getSciDebugger();
int j = 0; int n = 0;
for (WordMap::iterator i = _parserWords.begin(); i != _parserWords.end(); ++i) { for (WordMap::iterator i = _parserWords.begin(); i != _parserWords.end(); ++i) {
con->DebugPrintf("%4d: %03x [%03x] %20s |", j, i->_value._class, i->_value._group, i->_key.c_str()); for (ResultWordList::iterator j = i->_value.begin(); j != i->_value.end(); ++j) {
if (j % 3 == 0) con->DebugPrintf("%4d: %03x [%03x] %20s |", n, j->_class, j->_group, i->_key.c_str());
con->DebugPrintf("\n"); if (n % 3 == 0)
j++; con->DebugPrintf("\n");
n++;
}
} }
con->DebugPrintf("\n"); con->DebugPrintf("\n");
@ -527,8 +558,13 @@ void _vocab_recursive_ptree_dump(ParseTreeNode *tree, int blanks) {
if (rbranch) { if (rbranch) {
if (rbranch->type == kParseTreeBranchNode) if (rbranch->type == kParseTreeBranchNode)
_vocab_recursive_ptree_dump(rbranch, blanks); _vocab_recursive_ptree_dump(rbranch, blanks);
else else {
printf("%x", rbranch->value); printf("%x", rbranch->value);
while (rbranch->right) {
rbranch = rbranch->right;
printf("/%x", rbranch->value);
}
}
}/* else printf("nil");*/ }/* else printf("nil");*/
} }
@ -546,14 +582,15 @@ void Vocabulary::dumpParseTree() {
printf("))\n"); printf("))\n");
} }
void Vocabulary::synonymizeTokens(ResultWordList &words) { void Vocabulary::synonymizeTokens(ResultWordListList &words) {
if (_synonyms.empty()) if (_synonyms.empty())
return; // No synonyms: Nothing to check return; // No synonyms: Nothing to check
for (ResultWordList::iterator i = words.begin(); i != words.end(); ++i) for (ResultWordListList::iterator i = words.begin(); i != words.end(); ++i)
for (SynonymList::const_iterator sync = _synonyms.begin(); sync != _synonyms.end(); ++sync) for (ResultWordList::iterator j = i->begin(); j != i->end(); ++j)
if (i->_group == sync->replaceant) for (SynonymList::const_iterator sync = _synonyms.begin(); sync != _synonyms.end(); ++sync)
i->_group = sync->replacement; if (j->_group == sync->replaceant)
j->_group = sync->replacement;
} }
void Vocabulary::printParserNodes(int num) { void Vocabulary::printParserNodes(int num) {

View file

@ -117,8 +117,9 @@ struct ResultWord {
}; };
typedef Common::List<ResultWord> ResultWordList; typedef Common::List<ResultWord> ResultWordList;
typedef Common::List<ResultWordList> ResultWordListList;
typedef Common::HashMap<Common::String, ResultWord, Common::CaseSensitiveString_Hash, Common::CaseSensitiveString_EqualTo> WordMap; typedef Common::HashMap<Common::String, ResultWordList, Common::CaseSensitiveString_Hash, Common::CaseSensitiveString_EqualTo> WordMap;
struct ParseRuleList; struct ParseRuleList;
@ -161,7 +162,7 @@ struct ParseTreeNode {
ParseTypes type; /**< leaf or branch */ ParseTypes type; /**< leaf or branch */
int value; /**< For leaves */ int value; /**< For leaves */
ParseTreeNode* left; /**< Left child, for branches */ ParseTreeNode* left; /**< Left child, for branches */
ParseTreeNode* right; /**< Right child, for branches */ ParseTreeNode* right; /**< Right child, for branches (and word leaves) */
}; };
enum VocabularyVersions { enum VocabularyVersions {
@ -186,11 +187,11 @@ public:
/** /**
* Looks up a single word in the words and suffixes list. * Looks up a single word in the words and suffixes list.
* @param retval the list of matches
* @param word pointer to the word to look up * @param word pointer to the word to look up
* @param word_len length of the word to look up * @param word_len length of the word to look up
* @return the matching word (or (-1,-1) if there was no match)
*/ */
ResultWord lookupWord(const char *word, int word_len); void lookupWord(ResultWordList &retval, const char *word, int word_len);
/** /**
@ -204,7 +205,7 @@ public:
* contain any useful words; if not, *error points to a malloc'd copy of * contain any useful words; if not, *error points to a malloc'd copy of
* the offending word. The returned list may contain anywords. * the offending word. The returned list may contain anywords.
*/ */
bool tokenizeString(ResultWordList &retval, const char *sentence, char **error); bool tokenizeString(ResultWordListList &retval, const char *sentence, char **error);
/** /**
* Builds a parse tree from a list of words, using a set of Greibach Normal * Builds a parse tree from a list of words, using a set of Greibach Normal
@ -215,7 +216,7 @@ public:
* nodes or if the sentence structure in 'words' is not part of the * nodes or if the sentence structure in 'words' is not part of the
* language described by the grammar passed in 'rules'. * language described by the grammar passed in 'rules'.
*/ */
int parseGNF(const ResultWordList &words, bool verbose = false); int parseGNF(const ResultWordListList &words, bool verbose = false);
/** /**
* Constructs the Greibach Normal Form of the grammar supplied in 'branches'. * Constructs the Greibach Normal Form of the grammar supplied in 'branches'.
@ -262,9 +263,9 @@ public:
/** /**
* Synonymizes a token list * Synonymizes a token list
* Parameters: (ResultWordList &) words: The word list to synonymize * Parameters: (ResultWordListList &) words: The word list to synonymize
*/ */
void synonymizeTokens(ResultWordList &words); void synonymizeTokens(ResultWordListList &words);
void printParserNodes(int num); void printParserNodes(int num);

View file

@ -78,6 +78,36 @@ class ArrayTestSuite : public CxxTest::TestSuite
TS_ASSERT_EQUALS(array.size(), (unsigned int)5); TS_ASSERT_EQUALS(array.size(), (unsigned int)5);
} }
void test_insert_at_array() {
Common::Array<int> array;
Common::Array<int> array2;
// First of all some data
array.push_back(-12);
array.push_back(17);
array.push_back(25);
array.push_back(-11);
array2.push_back(42);
array2.push_back(105);
array2.push_back(-1);
// Insert some data
array.insert_at(2, array2);
TS_ASSERT_EQUALS(array.size(), (unsigned int)7);
TS_ASSERT_EQUALS(array[0], -12);
TS_ASSERT_EQUALS(array[1], 17);
TS_ASSERT_EQUALS(array[2], 42);
TS_ASSERT_EQUALS(array[3], 105);
TS_ASSERT_EQUALS(array[4], -1);
TS_ASSERT_EQUALS(array[5], 25);
TS_ASSERT_EQUALS(array[6], -11);
}
void test_remove_at() { void test_remove_at() {
Common::Array<int> array; Common::Array<int> array;