formating

svn-id: r38698
This commit is contained in:
Paweł Kołodziejski 2009-02-21 15:20:41 +00:00
parent 6e2848ff98
commit 58fa6822c6

View file

@ -23,8 +23,7 @@
* *
*/ */
/* Main vocabulary support functions and word lookup */ // Main vocabulary support functions and word lookup
#include "sci/include/sciresource.h" #include "sci/include/sciresource.h"
#include "sci/include/engine.h" #include "sci/include/engine.h"
@ -34,7 +33,7 @@ namespace Sci {
int vocab_version; int vocab_version;
#define VOCAB_RESOURCE_PARSE_TREE_BRANCHES vocab_version==1 ? \ #define VOCAB_RESOURCE_PARSE_TREE_BRANCHES vocab_version == 1 ? \
VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES : \ VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES : \
VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES
@ -42,84 +41,76 @@ int vocab_version;
VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB : \ VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB : \
VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB
const char *class_names[] = {"", /* These strange names were taken from an SCI01 interpreter */ const char *class_names[] = {"", // These strange names were taken from an SCI01 interpreter
"", "",
"conj", /* conjunction */ "conj", // conjunction
"ass", /* ? */ "ass", // ?
"pos", /* preposition ? */ "pos", // preposition ?
"art", /* article */ "art", // article
"adj", /* adjective */ "adj", // adjective
"pron", /* pronoun */ "pron", // pronoun
"noun", /* noun */ "noun", // noun
"auxv", /* auxillary verb */ "auxv", // auxillary verb
"adv", /* adverb */ "adv", // adverb
"verb", /* verb */ "verb", // verb
"", "",
"", "",
"", "",
"" ""
}; };
int _vocab_cmp_words(const void *word1, const void *word2) {
int return scumm_stricmp((*((word_t **) word1))->word, (*((word_t **)word2))->word);
_vocab_cmp_words(const void *word1, const void *word2) {
return scumm_stricmp((*((word_t **) word1))->word, (*((word_t **) word2))->word);
} }
word_t **vocab_get_words(ResourceManager *resmgr, int *word_counter) {
word_t **
vocab_get_words(ResourceManager *resmgr, int *word_counter) {
int counter = 0; int counter = 0;
unsigned int seeker; unsigned int seeker;
word_t **words; word_t **words;
char currentword[256] = ""; /* They're not going to use words longer than 255 ;-) */ char currentword[256] = ""; // They're not going to use words longer than 255 ;-)
int currentwordpos = 0; int currentwordpos = 0;
resource_t *resource; resource_t *resource;
/* First try to load the SCI0 vocab resource. */ // First try to load the SCI0 vocab resource.
resource = scir_find_resource(resmgr, sci_vocab, resource = scir_find_resource(resmgr, sci_vocab, VOCAB_RESOURCE_SCI0_MAIN_VOCAB, 0);
VOCAB_RESOURCE_SCI0_MAIN_VOCAB, 0);
vocab_version = 0; vocab_version = 0;
if (!resource) { if (!resource) {
warning("SCI0: Could not find a main vocabulary, trying SCI01"); warning("SCI0: Could not find a main vocabulary, trying SCI01");
resource = scir_find_resource(resmgr, sci_vocab, resource = scir_find_resource(resmgr, sci_vocab, VOCAB_RESOURCE_SCI1_MAIN_VOCAB, 0);
VOCAB_RESOURCE_SCI1_MAIN_VOCAB, 0);
vocab_version = 1; vocab_version = 1;
} }
if (!resource) { if (!resource) {
warning("SCI1: Could not find a main vocabulary"); warning("SCI1: Could not find a main vocabulary");
return NULL; /* NOT critical: SCI1 games and some demos don't have one! */ return NULL; // NOT critical: SCI1 games and some demos don't have one!
} }
if (vocab_version == 1) if (vocab_version == 1)
seeker = 255 * 2; /* vocab.900 starts with 255 16-bit pointers which we don't use */ seeker = 255 * 2; // vocab.900 starts with 255 16-bit pointers which we don't use
else else
seeker = 26 * 2; /* vocab.000 starts with 26 16-bit pointers which we don't use */ seeker = 26 * 2; // vocab.000 starts with 26 16-bit pointers which we don't use
if (resource->size < seeker) { if (resource->size < seeker) {
fprintf(stderr, "Invalid main vocabulary encountered: Too small\n"); fprintf(stderr, "Invalid main vocabulary encountered: Too small\n");
return NULL; return NULL;
/* Now this ought to be critical, but it'll just cause parse() and said() not to work */ // Now this ought to be critical, but it'll just cause parse() and said() not to work
} }
words = (word_t**)sci_malloc(sizeof(word_t *)); words = (word_t **)sci_malloc(sizeof(word_t *));
while (seeker < resource->size) { while (seeker < resource->size) {
byte c; byte c;
words = (word_t**)sci_realloc(words, (counter + 1) * sizeof(word_t *)); words = (word_t**)sci_realloc(words, (counter + 1) * sizeof(word_t *));
currentwordpos = resource->data[seeker++]; /* Parts of previous words may be re-used */ currentwordpos = resource->data[seeker++]; // Parts of previous words may be re-used
if (vocab_version == 1) { if (vocab_version == 1) {
c = 1; c = 1;
while (seeker < resource->size while (seeker < resource->size && currentwordpos < 255 && c) {
&& currentwordpos < 255
&& c) {
c = resource->data[seeker++]; c = resource->data[seeker++];
currentword[currentwordpos++] = c; currentword[currentwordpos++] = c;
} }
@ -131,37 +122,34 @@ vocab_get_words(ResourceManager *resmgr, int *word_counter) {
} else { } else {
do { do {
c = resource->data[seeker++]; c = resource->data[seeker++];
currentword[currentwordpos++] = c & 0x7f; /* 0x80 is used to terminate the string */ currentword[currentwordpos++] = c & 0x7f; // 0x80 is used to terminate the string
} while (c < 0x80); } while (c < 0x80);
} }
currentword[currentwordpos] = 0; currentword[currentwordpos] = 0;
words[counter] = (word_t*)sci_malloc(sizeof(word_t) + currentwordpos); words[counter] = (word_t*)sci_malloc(sizeof(word_t) + currentwordpos);
/* Allocate more memory, so that the word fits into the structure */ // Allocate more memory, so that the word fits into the structure
strcpy(&(words[counter]->word[0]), &(currentword[0])); /* Copy the word */ strcpy(&(words[counter]->word[0]), &(currentword[0])); // Copy the word
/* Now decode class and group: */ // Now decode class and group:
c = resource->data[seeker + 1]; c = resource->data[seeker + 1];
words[counter]->w_class = ((resource->data[seeker]) << 4) | ((c & 0xf0) >> 4); words[counter]->w_class = ((resource->data[seeker]) << 4) | ((c & 0xf0) >> 4);
words[counter]->group = (resource->data[seeker + 2]) | ((c & 0x0f) << 8); words[counter]->group = (resource->data[seeker + 2]) | ((c & 0x0f) << 8);
seeker += 3; seeker += 3;
++counter; ++counter;
} }
*word_counter = counter; *word_counter = counter;
qsort(words, counter, sizeof(word_t *), _vocab_cmp_words); /* Sort entries */ qsort(words, counter, sizeof(word_t *), _vocab_cmp_words); // Sort entries
return words; return words;
} }
void vocab_free_words(word_t **words, int words_nr) {
void
vocab_free_words(word_t **words, int words_nr) {
int i; int i;
for (i = 0; i < words_nr; i++) for (i = 0; i < words_nr; i++)
@ -170,9 +158,7 @@ vocab_free_words(word_t **words, int words_nr) {
free(words); free(words);
} }
const char *vocab_get_any_group_word(int group, word_t **words, int words_nr) {
const char *
vocab_get_any_group_word(int group, word_t **words, int words_nr) {
int i; int i;
if (group == VOCAB_MAGIC_NUMBER_GROUP) if (group == VOCAB_MAGIC_NUMBER_GROUP)
@ -185,14 +171,11 @@ vocab_get_any_group_word(int group, word_t **words, int words_nr) {
return "{invalid}"; return "{invalid}";
} }
static inline unsigned int inverse_16(unsigned int foo) {
static inline unsigned int
inverse_16(unsigned int foo) {
return (((foo & 0xff) << 8) | ((foo & 0xff00) >> 8)); return (((foo & 0xff) << 8) | ((foo & 0xff00) >> 8));
} }
suffix_t ** suffix_t **vocab_get_suffices(ResourceManager *resmgr, int *suffices_nr) {
vocab_get_suffices(ResourceManager *resmgr, int *suffices_nr) {
int counter = 0; int counter = 0;
suffix_t **suffices; suffix_t **suffices;
resource_t *resource = scir_find_resource(resmgr, sci_vocab, VOCAB_RESOURCE_SUFFIX_VOCAB, 1); resource_t *resource = scir_find_resource(resmgr, sci_vocab, VOCAB_RESOURCE_SUFFIX_VOCAB, 1);
@ -200,13 +183,12 @@ vocab_get_suffices(ResourceManager *resmgr, int *suffices_nr) {
if (!resource) { if (!resource) {
fprintf(stderr, "Could not find suffix vocabulary!\n"); fprintf(stderr, "Could not find suffix vocabulary!\n");
return NULL; /* Not critical */ return NULL; // Not critical
} }
suffices = (suffix_t**)sci_malloc(sizeof(suffix_t *)); suffices = (suffix_t**)sci_malloc(sizeof(suffix_t *));
while ((seeker < resource->size - 1) && (resource->data[seeker + 1] != 0xff)) { while ((seeker < resource->size - 1) && (resource->data[seeker + 1] != 0xff)) {
char *alt_suffix = (char *) resource->data + seeker; char *alt_suffix = (char *) resource->data + seeker;
int alt_len = strlen(alt_suffix); int alt_len = strlen(alt_suffix);
char *word_suffix; char *word_suffix;
@ -214,36 +196,33 @@ vocab_get_suffices(ResourceManager *resmgr, int *suffices_nr) {
suffices = (suffix_t**)sci_realloc(suffices, sizeof(suffix_t *) * (counter + 1)); suffices = (suffix_t**)sci_realloc(suffices, sizeof(suffix_t *) * (counter + 1));
seeker += alt_len + 1; /* Hit end of string */ seeker += alt_len + 1; // Hit end of string
word_suffix = (char *) resource->data + seeker + 3; /* Beginning of next string +1 (ignore '*') */ word_suffix = (char *)resource->data + seeker + 3; // Beginning of next string +1 (ignore '*')
word_len = strlen(word_suffix); word_len = strlen(word_suffix);
suffices[counter] = (suffix_t*)sci_malloc(sizeof(suffix_t)); suffices[counter] = (suffix_t *)sci_malloc(sizeof(suffix_t));
/* allocate enough memory to store the strings */ // allocate enough memory to store the strings
suffices[counter]->word_suffix = word_suffix; suffices[counter]->word_suffix = word_suffix;
suffices[counter]->alt_suffix = alt_suffix; suffices[counter]->alt_suffix = alt_suffix;
suffices[counter]->alt_suffix_length = alt_len; suffices[counter]->alt_suffix_length = alt_len;
suffices[counter]->word_suffix_length = word_len; suffices[counter]->word_suffix_length = word_len;
suffices[counter]->class_mask = inverse_16(getInt16(resource->data + seeker)); /* Inverse endianness */ suffices[counter]->class_mask = inverse_16(getInt16(resource->data + seeker)); // Inverse endianness
seeker += word_len + 4; seeker += word_len + 4;
suffices[counter]->result_class = inverse_16(getInt16(resource->data + seeker)); suffices[counter]->result_class = inverse_16(getInt16(resource->data + seeker));
seeker += 3; /* Next entry */ seeker += 3; // Next entry
++counter; ++counter;
} }
*suffices_nr = counter; *suffices_nr = counter;
return suffices; return suffices;
} }
void vocab_free_suffices(ResourceManager *resmgr, suffix_t **suffices, int suffices_nr) {
void
vocab_free_suffices(ResourceManager *resmgr, suffix_t **suffices, int suffices_nr) {
int i; int i;
scir_unlock_resource(resmgr, scir_find_resource(resmgr, sci_vocab, VOCAB_RESOURCE_SUFFIX_VOCAB, 0), scir_unlock_resource(resmgr, scir_find_resource(resmgr, sci_vocab, VOCAB_RESOURCE_SUFFIX_VOCAB, 0),
@ -255,18 +234,13 @@ vocab_free_suffices(ResourceManager *resmgr, suffix_t **suffices, int suffices_n
free(suffices); free(suffices);
} }
void vocab_free_branches(parse_tree_branch_t *parser_branches) {
void
vocab_free_branches(parse_tree_branch_t *parser_branches) {
if (parser_branches) if (parser_branches)
free(parser_branches); free(parser_branches);
} }
parse_tree_branch_t *vocab_get_branches(ResourceManager * resmgr, int *branches_nr) {
parse_tree_branch_t * resource_t *resource = scir_find_resource(resmgr, sci_vocab, VOCAB_RESOURCE_PARSE_TREE_BRANCHES, 0);
vocab_get_branches(ResourceManager * resmgr, int *branches_nr) {
resource_t *resource = scir_find_resource(resmgr, sci_vocab,
VOCAB_RESOURCE_PARSE_TREE_BRANCHES, 0);
parse_tree_branch_t *retval; parse_tree_branch_t *retval;
int i; int i;
@ -282,7 +256,7 @@ vocab_get_branches(ResourceManager * resmgr, int *branches_nr) {
return NULL; return NULL;
} }
retval = (parse_tree_branch_t*)sci_malloc(sizeof(parse_tree_branch_t) * *branches_nr); retval = (parse_tree_branch_t *)sci_malloc(sizeof(parse_tree_branch_t) * *branches_nr);
for (i = 0; i < *branches_nr; i++) { for (i = 0; i < *branches_nr; i++) {
int k; int k;
@ -294,7 +268,7 @@ vocab_get_branches(ResourceManager * resmgr, int *branches_nr) {
for (k = 0; k < 9; k++) for (k = 0; k < 9; k++)
retval[i].data[k] = getUInt16(base + 2 + 2 * k); retval[i].data[k] = getUInt16(base + 2 + 2 * k);
retval[i].data[9] = 0; /* Always terminate */ retval[i].data[9] = 0; // Always terminate
} }
if (!retval[*branches_nr - 1].id) /* branch lists may be terminated by empty rules */ if (!retval[*branches_nr - 1].id) /* branch lists may be terminated by empty rules */
@ -304,12 +278,10 @@ vocab_get_branches(ResourceManager * resmgr, int *branches_nr) {
} }
result_word_t * result_word_t *vocab_lookup_word(char *word, int word_len, word_t **words, int words_nr,
vocab_lookup_word(char *word, int word_len,
word_t **words, int words_nr,
suffix_t **suffices, int suffices_nr) { suffix_t **suffices, int suffices_nr) {
word_t *tempword = (word_t*)sci_malloc(sizeof(word_t) + word_len + 256); word_t *tempword = (word_t*)sci_malloc(sizeof(word_t) + word_len + 256);
/* 256: For suffices. Should suffice. */ // 256: For suffices. Should suffice.
word_t **dict_word; word_t **dict_word;
result_word_t *retval; result_word_t *retval;
char *tester; char *tester;
@ -322,9 +294,9 @@ vocab_lookup_word(char *word, int word_len,
while ((tester = strchr(tempword->word, '-'))) while ((tester = strchr(tempword->word, '-')))
memmove(tester, tester + 1, (tempword->word + word_len_tmp--) - tester); memmove(tester, tester + 1, (tempword->word + word_len_tmp--) - tester);
retval = (result_word_t*)sci_malloc(sizeof(result_word_t)); retval = (result_word_t *)sci_malloc(sizeof(result_word_t));
dict_word = (word_t**)bsearch(&tempword, words, words_nr, sizeof(word_t *), _vocab_cmp_words); dict_word = (word_t **)bsearch(&tempword, words, words_nr, sizeof(word_t *), _vocab_cmp_words);
if (dict_word) { if (dict_word) {
free(tempword); free(tempword);
@ -335,26 +307,24 @@ vocab_lookup_word(char *word, int word_len,
return retval; return retval;
} }
/* Now try all suffices */ // Now try all suffices
for (i = 0; i < suffices_nr; i++) for (i = 0; i < suffices_nr; i++)
if (suffices[i]->alt_suffix_length <= word_len) { if (suffices[i]->alt_suffix_length <= word_len) {
int suff_index = word_len - suffices[i]->alt_suffix_length; int suff_index = word_len - suffices[i]->alt_suffix_length;
/* Offset of the start of the suffix */ // Offset of the start of the suffix
if (scumm_strnicmp(suffices[i]->alt_suffix, word + suff_index,
suffices[i]->alt_suffix_length) == 0) { /* Suffix matched! */
if (scumm_strnicmp(suffices[i]->alt_suffix, word + suff_index, suffices[i]->alt_suffix_length) == 0) { // Suffix matched!
strncpy(&(tempword->word[0]), word, word_len); strncpy(&(tempword->word[0]), word, word_len);
tempword->word[suff_index] = 0; /* Terminate word at suffix start position... */ tempword->word[suff_index] = 0; // Terminate word at suffix start position...
strncat(&(tempword->word[0]), suffices[i]->word_suffix, suffices[i]->word_suffix_length); /* ...and append "correct" suffix */ strncat(&(tempword->word[0]), suffices[i]->word_suffix, suffices[i]->word_suffix_length); // ...and append "correct" suffix
dict_word = (word_t**)bsearch(&tempword, words, words_nr, sizeof(word_t *), _vocab_cmp_words); dict_word = (word_t**)bsearch(&tempword, words, words_nr, sizeof(word_t *), _vocab_cmp_words);
if ((dict_word) && ((*dict_word)->w_class & suffices[i]->class_mask)) { /* Found it? */ if ((dict_word) && ((*dict_word)->w_class & suffices[i]->class_mask)) { // Found it?
free(tempword); free(tempword);
retval->w_class = suffices[i]->result_class; /* Use suffix class */ retval->w_class = suffices[i]->result_class; // Use suffix class
retval->group = (*dict_word)->group; retval->group = (*dict_word)->group;
return retval; return retval;
@ -362,7 +332,7 @@ vocab_lookup_word(char *word, int word_len,
} }
} }
/* No match so far? Check if it's a number. */ // No match so far? Check if it's a number.
strncpy(&(tempword->word[0]), word, word_len); strncpy(&(tempword->word[0]), word, word_len);
tempword->word[word_len] = 0; tempword->word[word_len] = 0;
@ -371,8 +341,7 @@ vocab_lookup_word(char *word, int word_len,
while ((tester = strchr(tempword->word, '-'))) while ((tester = strchr(tempword->word, '-')))
memmove(tester, tester + 1, (tempword->word + word_len--) - tester); memmove(tester, tester + 1, (tempword->word + word_len--) - tester);
if ((strtol(&(tempword->word[0]), &tester, 10) >= 0) if ((strtol(&(tempword->word[0]), &tester, 10) >= 0) && (*tester == '\0')) { // Do we have a complete number here?
&& (*tester == '\0')) { /* Do we have a complete number here? */
free(tempword); free(tempword);
retval->group = VOCAB_MAGIC_NUMBER_GROUP; retval->group = VOCAB_MAGIC_NUMBER_GROUP;
@ -383,11 +352,11 @@ vocab_lookup_word(char *word, int word_len,
free(tempword); free(tempword);
free(retval); free(retval);
return NULL; return NULL;
} }
int int vocab_get_said_spec_length(byte *addr) {
vocab_get_said_spec_length(byte *addr) {
int result = 0; int result = 0;
while (*addr != 0xff) { while (*addr != 0xff) {
@ -403,8 +372,7 @@ vocab_get_said_spec_length(byte *addr) {
return result + 1; return result + 1;
} }
void void vocab_decypher_said_block(EngineState *s, byte *addr) {
vocab_decypher_said_block(EngineState *s, byte *addr) {
int nextitem; int nextitem;
do { do {
@ -412,10 +380,9 @@ vocab_decypher_said_block(EngineState *s, byte *addr) {
if (nextitem < 0xf0) { if (nextitem < 0xf0) {
nextitem = nextitem << 8 | *addr++; nextitem = nextitem << 8 | *addr++;
sciprintf(" %s[%03x]", vocab_get_any_group_word(nextitem, s->parser_words, s->parser_words_nr), sciprintf(" %s[%03x]", vocab_get_any_group_word(nextitem, s->parser_words, s->parser_words_nr), nextitem);
nextitem);
nextitem = 42; /* Make sure that group 0xff doesn't abort */ nextitem = 42; // Make sure that group 0xff doesn't abort
} else switch (nextitem) { } else switch (nextitem) {
case 0xf0: case 0xf0:
sciprintf(" ,"); sciprintf(" ,");
@ -458,33 +425,32 @@ vocab_decypher_said_block(EngineState *s, byte *addr) {
#ifdef SCI_SIMPLE_SAID_CODE #ifdef SCI_SIMPLE_SAID_CODE
static short _related_words[][2] = { /* 0 is backwards, 1 is forward */ static short _related_words[][2] = { // 0 is backwards, 1 is forward
{0x800, 0x180}, /* preposition */ {0x800, 0x180}, // preposition
{0x000, 0x180}, /* article */ {0x000, 0x180}, // article
{0x000, 0x180}, /* adjective */ {0x000, 0x180}, // adjective
{0x800, 0x000}, /* pronoun */ {0x800, 0x000}, // pronoun
{0x800, 0x180}, /* noun */ {0x800, 0x180}, // noun
{0x000, 0x800}, /* auxiliary verb */ {0x000, 0x800}, // auxiliary verb
{0x800, 0x800}, /* adverb */ {0x800, 0x800}, // adverb
{0x000, 0x180}, /* verb */ {0x000, 0x180}, // verb
{0x000, 0x180} /* number */ {0x000, 0x180} // number
}; };
int int vocab_build_simple_parse_tree(parse_tree_node_t *nodes, result_word_t *words, int words_nr) {
vocab_build_simple_parse_tree(parse_tree_node_t *nodes, result_word_t *words, int words_nr) {
int i, length, pos = 0; int i, length, pos = 0;
for (i = 0; i < words_nr; ++i) { for (i = 0; i < words_nr; ++i) {
if (words[i].classID != VOCAB_CLASS_ANYWORD) { if (words[i].classID != VOCAB_CLASS_ANYWORD) {
nodes[pos].type = words[i].classID; nodes[pos].type = words[i].classID;
nodes[pos].content.value = words[i].group; nodes[pos].content.value = words[i].group;
pos += 2; /* Link information is filled in below */ pos += 2; // Link information is filled in below
} }
} }
nodes[pos].type = -1; /* terminate */ nodes[pos].type = -1; // terminate
length = pos >> 1; length = pos >> 1;
/* now find all referenced words */ // now find all referenced words
#ifdef SCI_SIMPLE_SAID_DEBUG #ifdef SCI_SIMPLE_SAID_DEBUG
sciprintf("Semantic references:\n"); sciprintf("Semantic references:\n");
#endif #endif
@ -500,14 +466,14 @@ vocab_build_simple_parse_tree(parse_tree_node_t *nodes, result_word_t *words, in
if (type) { if (type) {
int found = -1; int found = -1;
type -= 5; /* 1 because ffs starts counting at 1, 4 because nodes[pos].type is a nibble off */ type -= 5; // 1 because ffs starts counting at 1, 4 because nodes[pos].type is a nibble off
if (type < 0) if (type < 0)
type = 0; type = 0;
#ifdef SCI_SIMPLE_SAID_DEBUG #ifdef SCI_SIMPLE_SAID_DEBUG
sciprintf("#%d: Word %04x: type %04x\n", i, nodes[pos].content.value, type); sciprintf("#%d: Word %04x: type %04x\n", i, nodes[pos].content.value, type);
#endif #endif
/* search backwards */ // search backwards
searchmask = _related_words[type][0]; searchmask = _related_words[type][0];
if (searchmask) { if (searchmask) {
for (j = i - 1; j >= 0; j--) for (j = i - 1; j >= 0; j--)
@ -522,7 +488,7 @@ vocab_build_simple_parse_tree(parse_tree_node_t *nodes, result_word_t *words, in
sciprintf(" %d <\n", found >> 1); sciprintf(" %d <\n", found >> 1);
#endif #endif
/* search forward */ // search forward
found = -1; found = -1;
searchmask = _related_words[type][1]; searchmask = _related_words[type][1];
if (searchmask) { if (searchmask) {
@ -536,13 +502,12 @@ vocab_build_simple_parse_tree(parse_tree_node_t *nodes, result_word_t *words, in
if (found > -1) if (found > -1)
sciprintf(" > %d\n", found >> 1); sciprintf(" > %d\n", found >> 1);
#endif #endif
} else { } else {
#ifdef SCI_SIMPLE_SAID_DEBUG #ifdef SCI_SIMPLE_SAID_DEBUG
sciprintf("#%d: Untypified word\n", i); /* Weird, but not fatal */ sciprintf("#%d: Untypified word\n", i); /* Weird, but not fatal */
#endif #endif
nodes[pos+1].content.branches[0] = -1; nodes[pos + 1].content.branches[0] = -1;
nodes[pos+1].content.branches[1] = -1; nodes[pos + 1].content.branches[1] = -1;
} }
} }
#ifdef SCI_SIMPLE_SAID_DEBUG #ifdef SCI_SIMPLE_SAID_DEBUG
@ -553,21 +518,17 @@ vocab_build_simple_parse_tree(parse_tree_node_t *nodes, result_word_t *words, in
} }
#endif #endif
result_word_t * result_word_t *vocab_tokenize_string(char *sentence, int *result_nr, word_t **words, int words_nr,
vocab_tokenize_string(char *sentence, int *result_nr, suffix_t **suffices, int suffices_nr, char **error) {
word_t **words, int words_nr,
suffix_t **suffices, int suffices_nr,
char **error) {
char *lastword = sentence; char *lastword = sentence;
int pos_in_sentence = 0; int pos_in_sentence = 0;
char c; char c;
int wordlen = 0; int wordlen = 0;
result_word_t *retval = (result_word_t*)sci_malloc(sizeof(result_word_t)); result_word_t *retval = (result_word_t*)sci_malloc(sizeof(result_word_t));
/* malloc'd size is always one result_word_t too big */ // malloc'd size is always one result_word_t too big
result_word_t *lookup_result; result_word_t *lookup_result;
*result_nr = 0; *result_nr = 0;
*error = NULL; *error = NULL;
@ -576,30 +537,27 @@ vocab_tokenize_string(char *sentence, int *result_nr,
c = sentence[pos_in_sentence++]; c = sentence[pos_in_sentence++];
if (isalnum(c) || (c == '-' && wordlen)) if (isalnum(c) || (c == '-' && wordlen))
++wordlen; /* Continue on this word */ ++wordlen;
/* Words may contain a '-', but may not // Continue on this word */
** start with one. */ // Words may contain a '-', but may not
// start with one.
else { else {
if (wordlen) { // Finished a word?
if (wordlen) { /* Finished a word? */
lookup_result = lookup_result =
vocab_lookup_word(lastword, wordlen, vocab_lookup_word(lastword, wordlen, words, words_nr, suffices, suffices_nr);
words, words_nr, // Look it up
suffices, suffices_nr);
/* Look it up */
if (!lookup_result) { /* Not found? */ if (!lookup_result) { // Not found?
*error = (char*)sci_calloc(wordlen + 1, 1); *error = (char *)sci_calloc(wordlen + 1, 1);
strncpy(*error, lastword, wordlen); /* Set the offending word */ strncpy(*error, lastword, wordlen); // Set the offending word
free(retval); free(retval);
return NULL; /* And return with error */ return NULL; // And return with error
} }
memcpy(retval + *result_nr, lookup_result, sizeof(result_word_t)); memcpy(retval + *result_nr, lookup_result, sizeof(result_word_t));
/* Copy into list */ // Copy into list
++(*result_nr); /* Increase number of resulting words */ ++(*result_nr); // Increase number of resulting words
free(lookup_result); free(lookup_result);
retval = (result_word_t*)sci_realloc(retval, sizeof(result_word_t) * (*result_nr + 1)); retval = (result_word_t*)sci_realloc(retval, sizeof(result_word_t) * (*result_nr + 1));
@ -610,7 +568,7 @@ vocab_tokenize_string(char *sentence, int *result_nr,
wordlen = 0; wordlen = 0;
} }
} while (c); /* Until terminator is hit */ } while (c); // Until terminator is hit
if (*result_nr == 0) { if (*result_nr == 0) {
free(retval); free(retval);
@ -620,39 +578,38 @@ vocab_tokenize_string(char *sentence, int *result_nr,
return retval; return retval;
} }
void _vocab_recursive_ptree_dump_treelike(parse_tree_node_t *nodes, int nr, int prevnr) {
void
_vocab_recursive_ptree_dump_treelike(parse_tree_node_t *nodes, int nr, int prevnr) {
if ((nr > VOCAB_TREE_NODES)/* || (nr < prevnr)*/) { if ((nr > VOCAB_TREE_NODES)/* || (nr < prevnr)*/) {
sciprintf("Error(%04x)", nr); sciprintf("Error(%04x)", nr);
return; return;
} }
if (nodes[nr].type == PARSE_TREE_NODE_LEAF) if (nodes[nr].type == PARSE_TREE_NODE_LEAF)
/* sciprintf("[%03x]%04x", nr, nodes[nr].content.value); */ //sciprintf("[%03x]%04x", nr, nodes[nr].content.value);
sciprintf("%x", nodes[nr].content.value); sciprintf("%x", nodes[nr].content.value);
else { else {
int lbranch = nodes[nr].content.branches[0]; int lbranch = nodes[nr].content.branches[0];
int rbranch = nodes[nr].content.branches[1]; int rbranch = nodes[nr].content.branches[1];
/* sciprintf("<[%03x]",nr); */ //sciprintf("<[%03x]", nr);
sciprintf("<"); sciprintf("<");
if (lbranch) if (lbranch)
_vocab_recursive_ptree_dump_treelike(nodes, lbranch, nr); _vocab_recursive_ptree_dump_treelike(nodes, lbranch, nr);
else sciprintf("NULL"); else
sciprintf("NULL");
sciprintf(","); sciprintf(",");
if (rbranch) if (rbranch)
_vocab_recursive_ptree_dump_treelike(nodes, rbranch, nr); _vocab_recursive_ptree_dump_treelike(nodes, rbranch, nr);
else sciprintf("NULL"); else
sciprintf("NULL");
sciprintf(">"); sciprintf(">");
} }
} }
void void _vocab_recursive_ptree_dump(parse_tree_node_t *nodes, int nr, int prevnr, int blanks) {
_vocab_recursive_ptree_dump(parse_tree_node_t *nodes, int nr, int prevnr, int blanks) {
int lbranch = nodes[nr].content.branches[0]; int lbranch = nodes[nr].content.branches[0];
int rbranch = nodes[nr].content.branches[1]; int rbranch = nodes[nr].content.branches[1];
int i; int i;
@ -680,7 +637,7 @@ _vocab_recursive_ptree_dump(parse_tree_node_t *nodes, int nr, int prevnr, int bl
} else } else
sciprintf("%x", nodes[lbranch].content.value); sciprintf("%x", nodes[lbranch].content.value);
sciprintf(" "); sciprintf(" ");
}/* else sciprintf ("nil"); */ }/* else sciprintf ("nil");*/
if (rbranch) { if (rbranch) {
if (nodes[rbranch].type == PARSE_TREE_NODE_BRANCH) if (nodes[rbranch].type == PARSE_TREE_NODE_BRANCH)
@ -690,20 +647,18 @@ _vocab_recursive_ptree_dump(parse_tree_node_t *nodes, int nr, int prevnr, int bl
}/* else sciprintf("nil");*/ }/* else sciprintf("nil");*/
} }
void void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes) {
vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes) { //_vocab_recursive_ptree_dump_treelike(nodes, 0, 0);
/* _vocab_recursive_ptree_dump_treelike(nodes, 0, 0); */
sciprintf("(setq %s \n'(", tree_name); sciprintf("(setq %s \n'(", tree_name);
_vocab_recursive_ptree_dump(nodes, 0, 0, 1); _vocab_recursive_ptree_dump(nodes, 0, 0, 1);
sciprintf("))\n"); sciprintf("))\n");
} }
void void vocab_synonymize_tokens(result_word_t *words, int words_nr, synonym_t *synonyms, int synonyms_nr) {
vocab_synonymize_tokens(result_word_t *words, int words_nr, synonym_t *synonyms, int synonyms_nr) {
int i, sync; int i, sync;
if (!synonyms || !synonyms_nr) if (!synonyms || !synonyms_nr)
return; /* No synonyms: Nothing to check */ return; // No synonyms: Nothing to check
for (i = 0; i < words_nr; i++) for (i = 0; i < words_nr; i++)
for (sync = 0; sync < synonyms_nr; sync++) for (sync = 0; sync < synonyms_nr; sync++)