AI-Categorizer

 view release on metacpan or  search on metacpan

lib/AI/Categorizer/Document/XML.pm  view on Meta::CPAN

  #$self->SUPER::start_document($doc);
}

# Input: None
# Output: None
# Description:
# 	it is called whenever the parser ends the document
# 	it will be called at once
#	Nothing to do
sub end_document{
  my ($self, $doc)= @_;

  #$self->SUPER::end_document($doc);
}

# Input
#	LocalName: 	$el->{LocalName}
#	NamespaceURI: 	$el->{NamespaceURI}
#	Name		$el->{Name}
#	Prefix		$el->{Prefix}
#	Attributes	$el->{Attributes}
#	for each attribute
#		LocalName: 	$el->{LocalName}
#		NamespaceURI: 	$el->{NamespaceURI}
#		Name		$el->{Name}
#		Prefix		$el->{Prefix}
#		Value		$el->{Value}
# Output: None
# Description:
# 	it is called whenever the parser meets the element
sub start_element{
  my ($self, $el)= @_;

  # find the last location of the content
  # its meaning is to append the new data at this location
  my $location= length $self->{content};

  # save the last location of the current content
  # so that at end_element the starting location of data of this element can be known
  $self->{locationArray}[$self->{levelPointer}] = $location;

  # for the next element, increase levelPointer
  $self->{levelPointer}++;

  #$self->SUPER::start_document($el);
}

# Input: None
# Output: None
# Description:
# 	it is called whenever the parser ends the element
sub end_element{
  my ($self, $el)= @_;

  $self->{levelPointer}--;
  my $location= $self->{locationArray}[$self->{levelPointer}];

  # find the name of element
  my $elementName= $el->{Name};

  # set the default weight
  my $weight= 1;

  # check if user give the weight to duplicate data
  $weight= $self->{weightHash}{$elementName} if exists $self->{weightHash}{$elementName};

  # 0 - remove all the data to be related to this element
  if($weight == 0){
    $self->{content} = substr($self->{content}, 0, $location);
    return;
  }

  # 1 - dont duplicate
  if($weight == 1){
    return;
  }
  
  # n - duplicate data by n times
  # get new content
  my $newContent= substr($self->{content}, $location);

  # start to copy
  for(my $i=1; $i<$weight;$i++){
    $self->{content} .= $newContent;
  }

  #$self->SUPER::end_document($el);
}

# Input: a hash which consists of pair <Data, Value>
# Output: None
# Description:
# 	it is called whenever the parser meets the text which comes from Text, CDataSection and etc
#	Value must be saved into content buffer.
sub characters{
  my ($self, $args)= @_;

  # save "data plus new line" into content
  $self->{content} .= "$args->{Data}\n";
}
	
# Input: a hash which consists of pair <Data, Value>
# Output: None
# Description:
# 	it is called whenever the parser meets the comment
#	Currently, it will be ignored
sub comment{
  my ($self, $args)= @_;
}

# Input: a hash which consists of pair <Data, Value> and <Target, Value>
# Output: None
# Description:
# 	it is called whenever the parser meets the processing_instructing
#	Currently, it will be ignored
sub processing_instruction{
  my ($self, $args)= @_;
}

# Input: None
# Output: the converted data, that is, content



( run in 0.811 second using v1.01-cache-2.11-cpan-39bf76dae61 )