Alvis-NLPPlatform

 view release on metacpan or  search on metacpan

lib/Alvis/NLPPlatform/patches/link-4.1b-WithWhiteSpace.diff  view on Meta::CPAN

      sent->string_set = string_set_create();
  
!     if (!separate_sentence(input_string, sent, opts)) {
  	string_set_delete(sent->string_set);
  	xfree(sent, sizeof(struct Sentence_s));
  	return NULL;
diff -rc ./src/command-line.c ../link-4.1b-WithWhiteSpace/src/command-line.c
*** ./src/command-line.c	2005-01-12 18:09:54.000000000 +0100
--- ../link-4.1b-WithWhiteSpace/src/command-line.c	2006-11-02 14:08:24.000000000 +0100
***************
*** 21,26 ****
--- 21,27 ----
      int null_block;
      int islands_ok;
      int short_length;
+     int whitespace;
      int batch_mode;  
      int panic_mode;  
      int allow_null;
***************
*** 64,69 ****
--- 65,71 ----
      {"links",        1, "Showing of complete link data",    &local.display_links},
      {"walls",        1, "Showing of wall words",            &local.display_walls},
      {"union",        1, "Showing of 'union' linkage",       &local.display_union},
+     {"whitespace",   1, "White space tokenization",         &local.whitespace},
      {NULL,           1,  NULL,                              NULL}
  };
  
***************
*** 229,234 ****
--- 231,237 ----
      local.display_links = parse_options_get_display_links(opts);
      local.display_walls = parse_options_get_display_walls(opts);
      local.display_union = parse_options_get_display_union(opts);
+     local.whitespace = parse_options_get_whitespace(opts);
  }
  
  void put_local_vars_in_opts(Parse_Options opts) {
***************
*** 253,258 ****
--- 256,262 ----
      parse_options_set_display_links(opts, local.display_links);
      parse_options_set_display_walls(opts, local.display_walls);
      parse_options_set_display_union(opts, local.display_union);
+     parse_options_set_whitespace(opts, local.whitespace);
  }
  
  void issue_special_command(char * line, Parse_Options opts, Dictionary dict) {
diff -rc ./src/parse.c ../link-4.1b-WithWhiteSpace/src/parse.c
*** ./src/parse.c	2005-01-12 18:09:54.000000000 +0100
--- ../link-4.1b-WithWhiteSpace/src/parse.c	2006-11-02 14:08:24.000000000 +0100
***************
*** 441,447 ****
  	    label = strip_off_label(input_string);
  	}
  
! 	sent = sentence_create(input_string, dict);
  
  	if (sent == NULL) {
  	    if (verbosity > 0) fprintf(stderr, "%s\n", lperrmsg);
--- 441,447 ----
  	    label = strip_off_label(input_string);
  	}
  
! 	sent = sentence_create(input_string, dict, opts);
  
  	if (sent == NULL) {
  	    if (verbosity > 0) fprintf(stderr, "%s\n", lperrmsg);
diff -rc ./src/tokenize.c ../link-4.1b-WithWhiteSpace/src/tokenize.c
*** ./src/tokenize.c	2005-01-12 18:09:54.000000000 +0100
--- ../link-4.1b-WithWhiteSpace/src/tokenize.c	2006-11-02 14:08:24.000000000 +0100
***************
*** 15,20 ****
--- 15,25 ----
  
  #define MAX_STRIP 10
  
+ /* If true, attempt to break words into word text and POS tag parts,
+  * separated by a slash ("/").
+  */
+ static int attempt_to_find_POS_tag = 0;
+ 
  int post_quote[MAX_SENTENCE];
  /*static char * strip_left[] = {"(", "$", "``", NULL}; */
  /*static char * strip_right[] = {")", "%", ",", ".", ":", ";", "?", "!", "''", "'", "'s", NULL};*/
***************
*** 376,388 ****
      return TRUE;
  }
  
! int separate_sentence(char * s, Sentence sent) {
      /* The string s has just been read in from standard input.
         This function breaks it up into words and stores these words in
         the sent->word[] array.  Returns TRUE if all is well, FALSE otherwise.
         Quote marks are treated just like blanks.
         */
      char *t;
      int i, is_first, quote_found;
      Dictionary dict = sent->dict;
  
--- 381,395 ----
      return TRUE;
  }
  
! int separate_sentence(char * s, Sentence sent, Parse_Options opts) {
      /* The string s has just been read in from standard input.
         This function breaks it up into words and stores these words in
         the sent->word[] array.  Returns TRUE if all is well, FALSE otherwise.
         Quote marks are treated just like blanks.
         */
      char *t;
+     int st_tokenization=!parse_options_get_whitespace(opts); /* Whether to use the original Sleator&Temperley tokenization routine. */
+     char *tmp_word=(char *) xalloc(MAX_WORD+1);
      int i, is_first, quote_found;
      Dictionary dict = sent->dict;
  
***************
*** 401,407 ****
  	}
  	if (*s == '\0') break;
  	for (t=s; !((isspace((int)*t) || (*t == '\"')) || *t=='\0'); t++);
! 	if (!separate_word(sent, s, t, is_first, quote_found)) return FALSE;
  	is_first = FALSE;
  	s = t;
  	if (*s == '\0') break;
--- 408,439 ----
  	}
  	if (*s == '\0') break;



( run in 1.157 second using v1.01-cache-2.11-cpan-39bf76dae61 )