App-Anchr

 view release on metacpan or  search on metacpan

share/createSuperReadsForDirectory.perl  view on Meta::CPAN

my $normalFileSizeMinimum = 1;

#
# joinKUnitigs
#
$cmd
    = "createKUnitigMaxOverlaps $kUnitigsFile "
    . "-kmervalue $merLen -largest-kunitig-number "
    . ( int($maxKUnitigNumber) + 1 )
    . " $prefixForOverlapsBetweenKUnitigs";
&runCommandAndExitIfBad( $cmd, $kUnitigOverlapsFile, 0, "createKUnitigMaxOverlaps",
    $kUnitigOverlapsFile, "$workingDirectory/overlap.coords" );

# Find the matches of k-unitigs to reads and pipe to the shooting method
$cmd
    = "findMatchesBetweenKUnitigsAndReads "
    . " -m $merLen -t $numProcessors -o /dev/fd/1 "
    . "$kUnitigsFile $maxKUnitigNumberFile $minSizeNeededForTable $cor_fa_fn "
    . " | joinKUnitigs_v3"
    . " --max-nodes-allowed $maxNodes --mean-and-stdev-by-prefix-file $meanAndStdevByPrefixFile"
    . " --num-stdevs-allowed $numStdevsAllowed --unitig-lengths-file $kUnitigLengthsFile "
    . " --num-kunitigs-file $maxKUnitigNumberFile --overlaps-file $kUnitigOverlapsFile"
    . " --min-overlap-length $merLenMinus1 -o $joinerOutput"
    . " -t $numProcessors /dev/fd/0";
&runCommandAndExitIfBad( $cmd, $joinerOutput, $normalFileSizeMinimum, "joinKUnitigs",
    $joinerOutput );

my $tempSize = -s $joinerOutput;
if ( $tempSize == 0 ) {
    $cmd = "touch $superReadCountsFile";
    print "$cmd\n";
    system($cmd);
}
else {
    my $minFileSizeToPass;
    if ( $minReadsInSuperRead > 1 ) {
        $minFileSizeToPass = 0;
        $cmd
            = "getSuperReadInsertCountsFromReadPlacementFileTwoPasses -n `cat $numKUnitigsFile "
            . " | perl -ane '{print \$F[0]*20}'` -o $superReadCountsFile $joinerOutput";
    }
    else {
        $minFileSizeToPass = $normalFileSizeMinimum;
        $cmd
            = "getSuperReadInsertCountsFromReadPlacementFile -n `cat $numKUnitigsFile "
            . "| perl -ane '{print \$F[0]*20}'` -o $superReadCountsFile -i $joinerOutput";
    }
    &runCommandAndExitIfBad( $cmd, $superReadCountsFile, $minFileSizeToPass,
        "getSuperReadInsertCounts", $superReadCountsFile );
}

my $goodSuperReadsNamesFile  = "$workingDirectory/superReadNames.txt";
my $fastaSuperReadErrorsFile = "$workingDirectory/createFastaSuperReadSequences.errors.txt";

#
# reduce SR
#
my $localGoodSequenceOutputFile = "${finalSuperReadSequenceFile}.all";
my $superReadNameAndLengthsFile = "$workingDirectory/sr_sizes.tmp";
my $reduceFile                  = "$workingDirectory/reduce.tmp";
my $reduceFileTranslated        = "$workingDirectory/reduce.tmp.renamed";
my $tflag                       = "-rename-super-reads";
if ($keepKUnitigsInSuperreadNames) {
    $tflag = "";
}
$cmd
    = "cat $superReadCountsFile "
    . "| createFastaSuperReadSequences $workingDirectory /dev/fd/0 "
    . "-seqdiffmax $seqDiffMax -min-ovl-len $merLenMinus1 -minreadsinsuperread $minReadsInSuperRead "
    . " -good-sr-filename $goodSuperReadsNamesFile "
    . "-kunitigsfile $kUnitigsFile -good-sequence-output-file $localGoodSequenceOutputFile "
    . "-super-read-name-and-lengths-file $superReadNameAndLengthsFile $tflag 2> $sequenceCreationErrorFile";
&runCommandAndExitIfBad( $cmd, $superReadNameAndLengthsFile, $normalFileSizeMinimum,
    "createFastaSuperReadSequences",
    $localGoodSequenceOutputFile, $goodSuperReadsNamesFile, $superReadNameAndLengthsFile );

$cmd
    = "reduce_sr $maxKUnitigNumber $kUnitigLengthsFile $merLen $superReadNameAndLengthsFile -o $reduceFile";
&runCommandAndExitIfBad( $cmd, $reduceFile, $normalFileSizeMinimum, "reduceSuperReads",
    $reduceFile, $fastaSuperReadErrorsFile );

if ( !$keepKUnitigsInSuperreadNames ) {
    $cmd
        = "translateReduceFile.perl $goodSuperReadsNamesFile $workingDirectory/reduce.tmp > $reduceFileTranslated";
    &runCommandAndExitIfBad( $cmd, $reduceFileTranslated, $normalFileSizeMinimum,
        "translateReduceFile", $reduceFileTranslated );
}

$tflag = "--translate-super-read-names";
if ($keepKUnitigsInSuperreadNames) {
    $reduceFileTranslated = $reduceFile;
    $tflag                = "";
}
$cmd
    = "eliminateBadSuperReadsUsingList --read-placement-file $joinerOutput --good-super-reads-file $goodSuperReadsNamesFile $tflag --reduce-file $reduceFileTranslated > $finalReadPlacementFile";
&runCommandAndExitIfBad( $cmd, $finalReadPlacementFile, $normalFileSizeMinimum,
    "createFinalReadPlacementFile",
    $finalReadPlacementFile );

$cmd
    = "outputRecordsNotOnList $reduceFileTranslated $localGoodSequenceOutputFile 0 --fld-num 0 > $finalSuperReadSequenceFile";
&runCommandAndExitIfBad( $cmd, $finalSuperReadSequenceFile, $normalFileSizeMinimum,
    "createFinalSuperReadFastaSequences",
    $finalSuperReadSequenceFile );

$cmd = "touch $successFile";
system($cmd);

exit(0);

# If localCmd is set, it captures the return code and makes
# sure it ran properly. Otherwise it exits.
# If one sets the fileName then we assume it must exist
# With minSize one can set the minimum output file size
sub runCommandAndExitIfBad {
    my ( $localCmd, $fileName, $minSize, $stepName, @filesCreated ) = @_;
    my ( $retCode, $exitValue,    $sz );
    my ( $totSize, $tempFilename, $local_cmd );
    my ( $success_fn, $failDir );

    #    sleep (5); # For testing only
    $failDir = $workingDirectory . "/" . $stepName . ".Failed";



( run in 0.792 second using v1.01-cache-2.11-cpan-39bf76dae61 )