Lingua-AlignmentSet
view release on metacpan or search on metacpan
AlignmentSet.pm view on Meta::CPAN
push @$st_alignments,$theRest;
while ($$alFH{sourceToTarget}->getline() =~ m/^$sentPairNum (.*)$/) {
push @$st_alignments,$1;
$fhPos = $$alFH{sourceToTarget}->getpos;
}
}
$$alFH{sourceToTarget}->setpos($fhPos); #if we changed sentences, we read the first line of next sentence=>go back one line
if ($$alFH{targetToSource}){
$fhPos = $$alFH{targetToSource}->getpos;
$alString = $$alFH{targetToSource}->getline();
my ($num,$theRest)=split " ",$alString,2;
if ($num==$sentPairNum){ #skip if there is no link for this sentence pair
$fhPos = $$alFH{targetToSource}->getpos;
push @$ts_alignments,$theRest;
while ($$alFH{targetToSource}->getline() =~ m/^$sentPairNum (.*)$/) {
push @$ts_alignments,$1;
$fhPos = $$alFH{targetToSource}->getpos;
}
}
$$alFH{targetToSource}->setpos($fhPos); #if we changed sentences, we read the first line of next sentence=>go back one line
}
$al = Lingua::Alignment->new;
$al->loadFromBlinker($st_alignments,$ts_alignments,$sourceString,$targetString);
}
} elsif ($alSet->{format} eq "BLINKER"){
if ($alSet->{lastSentPair} eq "eof"){
$theEnd = !(-e $alSet->{location}->{sourceToTarget}."/samp".$alSet->{location}->{sampleNum}.".SentPair".($sentPairNum-1));
}else{
$theEnd = !(-e $alSet->{location}->{sourceToTarget}."/samp".$alSet->{location}->{sampleNum}.".SentPair".($sentPairNum-1)) || $sentPairNum > $alSet->{lastSentPair};
}
if ($theEnd){
return 0;
}else{
if ($alFH->{source}){
$sourceString = $alFH->{source}->getline();
}
if ($alFH->{target}){
$targetString = $alFH->{target}->getline();
}
open(AL,"< ".$alSet->{location}->{sourceToTarget}."/samp".$alSet->{location}->{sampleNum}.".SentPair".($sentPairNum-1));
@$st_alignments = <AL>;
close(AL);
if ($alSet->{location}->{targetToSource}){
open(AL,"< ".$alSet->{location}->{targetToSource}."/samp".$alSet->{location}->{sampleNum}.".SentPair".($sentPairNum-1));
@$ts_alignments = <AL>;
close(AL);
}
$al = Lingua::Alignment->new;
$al->loadFromBlinker($st_alignments,$ts_alignments,$sourceString,$targetString);
}
}
if ($alignMode eq "null-align"){
$al->forceNullAlign();
}elsif ($alignMode eq "no-null-align"){
$al->forceNoNullAlign();
}
return [$al];
}
sub updateObject {
my ($alSet,$newFormat,$newLocation,$lastSentPairNum)=@_;
$alSet->{location}->{sourceToTarget}=$newLocation->{sourceToTarget};
$alSet->{location}->{targetToSource}=$newLocation->{targetToSource};
if ($newLocation->{source}){
$alSet->{location}->{source}=$newLocation->{source};
}else{
if ($alSet->{firstSentPair} != 1 || $alSet->{format} ne $newFormat){
# in this case the numeration of the converted alignment file and that of the (not converted) source file will not correspond
delete($alSet->{location}->{source});
# warn "After converting into ",$newLocation->{sourceToTarget},", the numeration of the source words file",
# " didn't correspond any more to that of the alignment file. So the 'source' entry has been removed from the location hash.";
}
}
if ($newLocation->{target}){
$alSet->{location}->{target}=$newLocation->{target};
}else{
if ($alSet->{firstSentPair} != 1 || $alSet->{format} ne $newFormat){
# in this case the numeration of the converted alignment file and that of the (not converted) source file will not correspond
delete($alSet->{location}->{target});
# warn "After converting into ",$newLocation->{sourceToTarget},", the numeration of the target words file ",
# "didn't correspond any more to that of the alignment file. So the 'target' entry has been removed from the location hash.";
}
}
$alSet->{format}=$newFormat;
if ($newFormat eq "BLINKER"){
$alSet->{location}->{sampleNum}=$newLocation->{sampleNum};
}elsif(exists($alSet->{location}->{sampleNum})){
delete($alSet->{location}->{sampleNum});
}
$alSet->{firstSentPair}=1;
$alSet->{lastSentPair}=$lastSentPairNum;
}
# returns the alignment set, with a unique new file set that has the required location,format and range values.
# TO DO: conversion to Giza++ format
sub convert {
my ($alSet,$newLocation,$newFormat,$alignMode,$AlignmentSub)=@_;
if (!defined($newFormat)){$newFormat="TALP"}
else {$newFormat = uc $newFormat}
$newLocation = readLocation($newLocation);
my $FH = $alSet->openFiles();
my $newFH = openLocation($newLocation,$newFormat,">",$alSet->{location});
my ($i,$al,$alSetChunk,$line,$lines);
my $inputSentPairNum=$alSet->{firstSentPair};
my $internalSentPairNum = 1;
while ($alSetChunk = $alSet->loadChunk($FH,$inputSentPairNum,$alignMode)){ # returns 0 if eof or last sentence pair
# print $inputSentPairNum."\n";
for ($i=0;$i<@$alSetChunk;$i++){
$al = $$alSetChunk[$i];
if (defined($AlignmentSub)){
#look if $AlignmentSub is a ref to an Array or a subroutine
if (ref($AlignmentSub) eq "ARRAY"){
my ($sub,@params) = @$AlignmentSub;
$al->$sub(@params);
}else{
$al->$AlignmentSub();
}
}
$al->output($FH,$newFormat,$newFH,$newLocation,$internalSentPairNum);
} #for
( run in 2.403 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )