Lingua-AlignmentSet

 view release on metacpan or  search on metacpan

AlignmentSet.pm  view on Meta::CPAN

		push @$st_alignments,$theRest;
		while ($$alFH{sourceToTarget}->getline() =~ m/^$sentPairNum (.*)$/) {
		    push @$st_alignments,$1;
		    $fhPos = $$alFH{sourceToTarget}->getpos;
		}
	    }
	    $$alFH{sourceToTarget}->setpos($fhPos); #if we changed sentences, we read the first line of next sentence=>go back one line
	    if ($$alFH{targetToSource}){
		$fhPos = $$alFH{targetToSource}->getpos; 
		$alString = $$alFH{targetToSource}->getline();
		my ($num,$theRest)=split " ",$alString,2;
		if ($num==$sentPairNum){	#skip if there is no link for this sentence pair
		    $fhPos = $$alFH{targetToSource}->getpos; 
		    push @$ts_alignments,$theRest;
		    while ($$alFH{targetToSource}->getline() =~ m/^$sentPairNum (.*)$/) {
			push @$ts_alignments,$1;
			$fhPos = $$alFH{targetToSource}->getpos; 
		    }
		}
		$$alFH{targetToSource}->setpos($fhPos); #if we changed sentences, we read the first line of next sentence=>go back one line
	    }
	    $al = Lingua::Alignment->new;
	    $al->loadFromBlinker($st_alignments,$ts_alignments,$sourceString,$targetString);
	}		
    } elsif ($alSet->{format} eq "BLINKER"){		
	if ($alSet->{lastSentPair} eq "eof"){
	    $theEnd = !(-e $alSet->{location}->{sourceToTarget}."/samp".$alSet->{location}->{sampleNum}.".SentPair".($sentPairNum-1));
	}else{
	    $theEnd = !(-e $alSet->{location}->{sourceToTarget}."/samp".$alSet->{location}->{sampleNum}.".SentPair".($sentPairNum-1)) || $sentPairNum > $alSet->{lastSentPair};
	}
	if ($theEnd){	
	    return 0;	
	}else{
	    if ($alFH->{source}){	
		$sourceString = $alFH->{source}->getline();
	    }
	    if ($alFH->{target}){	
		$targetString = $alFH->{target}->getline();
	    }
	    open(AL,"< ".$alSet->{location}->{sourceToTarget}."/samp".$alSet->{location}->{sampleNum}.".SentPair".($sentPairNum-1));
	    @$st_alignments = <AL>;			
	    close(AL);
	    if ($alSet->{location}->{targetToSource}){
		open(AL,"< ".$alSet->{location}->{targetToSource}."/samp".$alSet->{location}->{sampleNum}.".SentPair".($sentPairNum-1));
		@$ts_alignments = <AL>;			
		close(AL);
	    }
	    $al = Lingua::Alignment->new;
	    $al->loadFromBlinker($st_alignments,$ts_alignments,$sourceString,$targetString);		
	}
    } 
    if ($alignMode eq "null-align"){
	$al->forceNullAlign();
    }elsif ($alignMode eq "no-null-align"){
	$al->forceNoNullAlign();
    }
    return [$al];
}


sub updateObject {
    my ($alSet,$newFormat,$newLocation,$lastSentPairNum)=@_;
    $alSet->{location}->{sourceToTarget}=$newLocation->{sourceToTarget};	
    $alSet->{location}->{targetToSource}=$newLocation->{targetToSource};
    if ($newLocation->{source}){
	$alSet->{location}->{source}=$newLocation->{source};
    }else{
	if ($alSet->{firstSentPair} != 1 || $alSet->{format} ne $newFormat){ 
	    # in this case the numeration of the converted alignment file and that of the (not converted) source file will not correspond
	    delete($alSet->{location}->{source});
#			warn "After converting into ",$newLocation->{sourceToTarget},", the numeration of the source words file",
#			" didn't correspond any more to that of the alignment file. So the 'source' entry has been removed from the location hash.";
	}
    }
    if ($newLocation->{target}){
	$alSet->{location}->{target}=$newLocation->{target};
    }else{
	if ($alSet->{firstSentPair} != 1 || $alSet->{format} ne $newFormat){
	    # in this case the numeration of the converted alignment file and that of the (not converted) source file will not correspond
	    delete($alSet->{location}->{target});	
#			warn "After converting into ",$newLocation->{sourceToTarget},", the numeration of the target words file ",
#			"didn't correspond any more to that of the alignment file. So the 'target' entry has been removed from the location hash.";
	}
    }
    $alSet->{format}=$newFormat;
    if ($newFormat eq "BLINKER"){
	$alSet->{location}->{sampleNum}=$newLocation->{sampleNum};	
    }elsif(exists($alSet->{location}->{sampleNum})){
	delete($alSet->{location}->{sampleNum});
    }
    $alSet->{firstSentPair}=1;
    $alSet->{lastSentPair}=$lastSentPairNum;
}

# returns the alignment set, with a unique new file set that has the required location,format and range values.
# TO DO: conversion to Giza++ format
sub convert {
    my ($alSet,$newLocation,$newFormat,$alignMode,$AlignmentSub)=@_;
    if (!defined($newFormat)){$newFormat="TALP"}
    else {$newFormat = uc $newFormat}
    $newLocation = readLocation($newLocation);
    my $FH = $alSet->openFiles();
    my $newFH = openLocation($newLocation,$newFormat,">",$alSet->{location});
    my ($i,$al,$alSetChunk,$line,$lines);
    my $inputSentPairNum=$alSet->{firstSentPair};
    my $internalSentPairNum = 1;	
    while ($alSetChunk = $alSet->loadChunk($FH,$inputSentPairNum,$alignMode)){	# returns 0 if eof or last sentence pair
#	    print $inputSentPairNum."\n";
	for ($i=0;$i<@$alSetChunk;$i++){
	    $al = $$alSetChunk[$i];
	    if (defined($AlignmentSub)){
		#look if $AlignmentSub is a ref to an Array or a subroutine
		if (ref($AlignmentSub) eq "ARRAY"){
		    my ($sub,@params) = @$AlignmentSub;
		    $al->$sub(@params);	
		}else{
		    $al->$AlignmentSub();	
		}
	    }
	    $al->output($FH,$newFormat,$newFH,$newLocation,$internalSentPairNum);
	} #for



( run in 2.403 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )