Alvis-NLPPlatform
view release on metacpan or search on metacpan
lib/Alvis/NLPPlatform/patches/link-4.1a-WithWhiteSpace.diff view on Meta::CPAN
sent->string_set = string_set_create();
! if (!separate_sentence(input_string, sent, opts)) {
string_set_delete(sent->string_set);
xfree(sent, sizeof(struct Sentence_s));
return NULL;
diff -cr ./src/command-line.c ../link-4.1a-WithWhiteSpace/src/command-line.c
*** ./src/command-line.c 2004-02-01 21:57:54.000000000 +0100
--- ../link-4.1a-WithWhiteSpace/src/command-line.c 2006-11-02 11:32:06.000000000 +0100
***************
*** 21,26 ****
--- 21,27 ----
int null_block;
int islands_ok;
int short_length;
+ int whitespace;
int batch_mode;
int panic_mode;
int allow_null;
***************
*** 64,69 ****
--- 65,71 ----
{"links", 1, "Showing of complete link data", &local.display_links},
{"walls", 1, "Showing of wall words", &local.display_walls},
{"union", 1, "Showing of 'union' linkage", &local.display_union},
+ {"whitespace", 1, "White space tokenization", &local.whitespace},
{NULL, 1, NULL, NULL}
};
***************
*** 229,234 ****
--- 231,237 ----
local.display_links = parse_options_get_display_links(opts);
local.display_walls = parse_options_get_display_walls(opts);
local.display_union = parse_options_get_display_union(opts);
+ local.whitespace = parse_options_get_whitespace(opts);
}
void put_local_vars_in_opts(Parse_Options opts) {
***************
*** 253,258 ****
--- 256,262 ----
parse_options_set_display_links(opts, local.display_links);
parse_options_set_display_walls(opts, local.display_walls);
parse_options_set_display_union(opts, local.display_union);
+ parse_options_set_whitespace(opts, local.whitespace);
}
void issue_special_command(char * line, Parse_Options opts, Dictionary dict) {
diff -cr ./src/parse.c ../link-4.1a-WithWhiteSpace/src/parse.c
*** ./src/parse.c 2004-02-01 21:57:55.000000000 +0100
--- ../link-4.1a-WithWhiteSpace/src/parse.c 2006-11-02 11:32:06.000000000 +0100
***************
*** 441,447 ****
label = strip_off_label(input_string);
}
! sent = sentence_create(input_string, dict);
if (sent == NULL) {
if (verbosity > 0) fprintf(stderr, "%s\n", lperrmsg);
--- 441,447 ----
label = strip_off_label(input_string);
}
! sent = sentence_create(input_string, dict, opts);
if (sent == NULL) {
if (verbosity > 0) fprintf(stderr, "%s\n", lperrmsg);
diff -cr ./src/tokenize.c ../link-4.1a-WithWhiteSpace/src/tokenize.c
*** ./src/tokenize.c 2004-02-01 21:57:55.000000000 +0100
--- ../link-4.1a-WithWhiteSpace/src/tokenize.c 2006-11-02 11:32:07.000000000 +0100
***************
*** 15,20 ****
--- 15,25 ----
#define MAX_STRIP 10
+ /* If true, attempt to break words into word text and POS tag parts,
+ * separated by a slash ("/").
+ */
+ static int attempt_to_find_POS_tag = 0;
+
int post_quote[MAX_SENTENCE];
/*static char * strip_left[] = {"(", "$", "``", NULL}; */
/*static char * strip_right[] = {")", "%", ",", ".", ":", ";", "?", "!", "''", "'", "'s", NULL};*/
***************
*** 376,388 ****
return TRUE;
}
! int separate_sentence(char * s, Sentence sent) {
/* The string s has just been read in from standard input.
This function breaks it up into words and stores these words in
the sent->word[] array. Returns TRUE if all is well, FALSE otherwise.
Quote marks are treated just like blanks.
*/
char *t;
int i, is_first, quote_found;
Dictionary dict = sent->dict;
--- 381,395 ----
return TRUE;
}
! int separate_sentence(char * s, Sentence sent, Parse_Options opts) {
/* The string s has just been read in from standard input.
This function breaks it up into words and stores these words in
the sent->word[] array. Returns TRUE if all is well, FALSE otherwise.
Quote marks are treated just like blanks.
*/
char *t;
+ int st_tokenization=!parse_options_get_whitespace(opts); /* Whether to use the original Sleator&Temperley tokenization routine. */
+ char *tmp_word=(char *) xalloc(MAX_WORD+1);
int i, is_first, quote_found;
Dictionary dict = sent->dict;
***************
*** 401,407 ****
}
if (*s == '\0') break;
for (t=s; !((isspace((int)*t) || (*t == '\"')) || *t=='\0'); t++);
! if (!separate_word(sent, s, t, is_first, quote_found)) return FALSE;
is_first = FALSE;
s = t;
if (*s == '\0') break;
--- 408,439 ----
}
if (*s == '\0') break;
( run in 3.153 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )