BioPerl
view release on metacpan or search on metacpan
Bio/Assembly/IO/tigr.pm view on Meta::CPAN
);
# Add read location and sequence to contig (in 'gapped consensus' coordinates)
$$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates
$$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq
my $alncoord = Bio::SeqFeature::Generic->new(
-primary => $readobj->id,
-start => $$readinfo{'aln_start'},
-end => $$readinfo{'aln_end'},
-strand => $$readinfo{'strand'},
-tag => { 'contig' => $contigobj->id() }
);
$contigobj->set_seq_coord($alncoord, $readobj);
# Add quality clipping read information in contig features
# (from 'aligned read' to 'gapped consensus' coordinates)
$$readinfo{'clip_start'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_lend'});
$$readinfo{'clip_end'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_rend'});
my $clipcoord = Bio::SeqFeature::Generic->new(
-primary => '_quality_clipping',
-source => $readobj->id,
-start => $$readinfo{'clip_start'},
-end => $$readinfo{'clip_end'},
-strand => $$readinfo{'strand'}
);
$clipcoord->attach_seq($readobj);
$contigobj->add_features([ $clipcoord ], 0);
# Add other misc read information as subsequence feature
my $readtags = Bio::SeqFeature::Generic->new(
-primary => '_main_read_feature',
-source => $readobj->id,
-start => $$readinfo{'aln_start'},
-end => $$readinfo{'aln_end'},
-strand => $$readinfo{'strand'},
-tag => { 'best' => $$readinfo{'best'},
'comment' => $$readinfo{'comment'} }
);
$contigobj->get_features_collection->add_features([$readtags]);
$contigobj->get_features_collection->add_SeqFeature($alncoord, $readtags);
return $readobj;
}
=head2 _store_singlet
Title : _store_singlet
Usage : my $singletobj = $self->_store_read(\%readinfo, \%contiginfo);
Function: store information of a singlet belonging to a scaffold in a singlet object
Returns : Bio::Assembly::Singlet
Args : hash, hash
=cut
sub _store_singlet {
my ($self, $readinfo, $contiginfo) = @_;
# Singlets in TIGR_Assembler are represented as a contig of one sequence
# We try to simulate this duality by playing around with the Singlet object
my $contigid = $$contiginfo{'asmbl_id'};
my $readid = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});
# Create a sequence object
#$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});
my $seqobj = Bio::Seq::Quality->new(
-primary_id => $readid,
-display_id => $readid,
-seq => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase
-start => 1,
-strand => $$readinfo{'strand'},
-alphabet => 'dna',
-qual => $self->_qual_hex2dec($$contiginfo{'quality'})
);
# Create singlet from sequence and add it to scaffold
my $singletobj = Bio::Assembly::Singlet->new(
-id => $contigid,
-seqref => $seqobj
);
# Add other misc contig information as features of the singlet
my $contigtags = Bio::SeqFeature::Generic->new(
-primary => '_main_contig_feature',
-source => $contigid,
-start => 1,
-end => $singletobj->get_consensus_length(),
-strand => 1,
-tag => { 'seq_id' => $$contiginfo{'seq_id'},
'com_name' => $$contiginfo{'com_name'},
'type' => $$contiginfo{'type'},
'method' => $$contiginfo{'method'},
'ed_status' => $$contiginfo{'ed_status'},
'full_cds' => $$contiginfo{'full_cds'},
'cds_start' => $$contiginfo{'cds_start'},
'cds_end' => $$contiginfo{'cds_end'},
'ed_pn' => $$contiginfo{'ed_pn'},
'ed_date' => $$contiginfo{'ed_date'},
'comment' => $$contiginfo{'comment'},
'frameshift' => $$contiginfo{'frameshift'} }
);
$singletobj->add_features([ $contigtags ], 1);
# Add read location and sequence to singlet features (in 'gapped consensus' coordinates)
$$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates
$$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq
my $alncoord = Bio::SeqFeature::Generic->new(
-primary => '_aligned_coord',
-source => $readid,
-start => $$readinfo{'aln_start'},
-end => $$readinfo{'aln_end'},
-strand => $$readinfo{'strand'},
-tag => { 'contig' => $contigid }
);
$alncoord->attach_seq($singletobj->seqref);
$singletobj->add_features([ $alncoord ], 0);
# Add quality clipping read information in singlet features
# (from 'aligned read' to 'gapped consensus' coordinates)
$$readinfo{'clip_start'} = $$readinfo{'seq_lend'};
$$readinfo{'clip_end'} = $$readinfo{'seq_rend'};
my $clipcoord = Bio::SeqFeature::Generic->new(
-primary => '_quality_clipping',
-source => $readid,
-start => $$readinfo{'clip_start'},
-end => $$readinfo{'clip_end'},
-strand => $$readinfo{'strand'},
-tag => { 'contig' => $contigid }
);
$clipcoord->attach_seq($singletobj->seqref);
$singletobj->add_features([ $clipcoord ], 0);
# Add other misc read information as subsequence feature
my $readtags = Bio::SeqFeature::Generic->new(
-primary => '_main_read_feature',
-source => $readid,
-start => $$readinfo{'aln_start'},
-end => $$readinfo{'aln_end'},
-strand => $$readinfo{'strand'},
-tag => { 'best' => $$readinfo{'best'},
'comment' => $$readinfo{'comment'} }
);
$singletobj->get_features_collection->add_features([$readtags]);
$singletobj->get_features_collection->add_SeqFeature($alncoord, $readtags);
return $singletobj;
}
=head2 write_assembly
Title : write_assembly
Usage : $asmio->write_assembly($assembly)
Function: Write the assembly object in TIGR Assembler compatible format. The
contig IDs are sorted naturally if the Sort::Naturally module is
present, or lexically otherwise. Internally, write_assembly use
the write_contig, write_footer and write_header methods. Use these
methods if you want more control on the writing process.
Returns : 1 on success, 0 for error
Args : A Bio::Assembly::Scaffold object
1 to write singlets in the assembly file, 0 otherwise
=cut
=head2 write_contig
Title : write_contig
Usage : $asmio->write_contig($contig)
Function: Write a contig or singlet object in TIGR compatible format. Quality
scores are automatically generated if the contig does not contain
any
Returns : 1 on success, 0 for error
Args : A Bio::Assembly::Contig or Singlet object
=cut
sub write_contig {
my ($self, @args) = @_;
my ($contigobj) = $self->_rearrange([qw(CONTIG)], @args);
# Sanity check
if ( !$contigobj || !$contigobj->isa('Bio::Assembly::Contig') ) {
$self->throw("Must provide a Bio::Assembly::Contig or Singlet object when calling write_contig");
}
my $decimal_format = '%.2f';
my $contigid = $contigobj->id;
my $numseqs = $contigobj->num_sequences;
if ( $contigobj->isa('Bio::Assembly::Singlet') ) {
# This is a singlet
my $readid = $contigobj->seqref->id;
my $singletobj = $contigobj;
# Get contig information
my ($contanno) = $singletobj->get_features_collection->get_features_by_type("_main_contig_feature:$contigid");
my %contiginfo;
$contiginfo{'sequence'} = $singletobj->seqref->seq;
$contiginfo{'lsequence'} = $contiginfo{'sequence'};
$contiginfo{'quality'} = $self->_qual_dec2hex(
join ' ', @{$singletobj->get_consensus_quality->qual} );
$contiginfo{'asmbl_id'} = $contigid;
$contiginfo{'seq_id'} = ($contanno->get_tag_values('seq_id'))[0];
$contiginfo{'com_name'} = ($contanno->get_tag_values('com_name'))[0];
$contiginfo{'type'} = ($contanno->get_tag_values('type'))[0];
$contiginfo{'method'} = ($contanno->get_tag_values('method'))[0];
$contiginfo{'ed_status'} = ($contanno->get_tag_values('ed_status'))[0];
$contiginfo{'redundancy'} = sprintf($decimal_format, 1);
$contiginfo{'perc_N'} = sprintf(
$decimal_format, $self->_perc_N($contiginfo{'sequence'}));
$contiginfo{'seqnum'} = 1;
$contiginfo{'full_cds'} = ($contanno->get_tag_values('full_cds'))[0];
$contiginfo{'cds_start'} = ($contanno->get_tag_values('cds_start'))[0];
$contiginfo{'cds_end'} = ($contanno->get_tag_values('cds_end'))[0];
$contiginfo{'ed_pn'} = ($contanno->get_tag_values('ed_pn'))[0];
$contiginfo{'ed_date'} = $self->_date_time;
$contiginfo{'comment'} = ($contanno->get_tag_values('comment'))[0];
$contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];
# Check that no tag value is undef
$contiginfo{'seq_id'} = '' unless defined $contiginfo{'seq_id'};
$contiginfo{'com_name'} = '' unless defined $contiginfo{'com_name'};
$contiginfo{'type'} = '' unless defined $contiginfo{'type'};
$contiginfo{'method'} = '' unless defined $contiginfo{'method'};
$contiginfo{'ed_status'} = '' unless defined $contiginfo{'ed_status'};
$contiginfo{'full_cds'} = '' unless defined $contiginfo{'full_cds'};
$contiginfo{'cds_start'} = '' unless defined $contiginfo{'cds_start'};
$contiginfo{'cds_end'} = '' unless defined $contiginfo{'cds_end'};
$contiginfo{'ed_pn'} = '' unless defined $contiginfo{'ed_pn'};
$contiginfo{'comment'} = '' unless defined $contiginfo{'comment'};
$contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};
# Print singlet information
$self->_print(
"sequence\t$contiginfo{'sequence'}\n".
"lsequence\t$contiginfo{'lsequence'}\n".
"quality\t$contiginfo{'quality'}\n".
"asmbl_id\t$contiginfo{'asmbl_id'}\n".
"seq_id\t$contiginfo{'seq_id'}\n".
"com_name\t$contiginfo{'com_name'}\n".
"type\t$contiginfo{'type'}\n".
"method\t$contiginfo{'method'}\n".
"ed_status\t$contiginfo{'ed_status'}\n".
"redundancy\t$contiginfo{'redundancy'}\n".
"perc_N\t$contiginfo{'perc_N'}\n".
"seq#\t$contiginfo{'seqnum'}\n".
"full_cds\t$contiginfo{'full_cds'}\n".
"cds_start\t$contiginfo{'cds_start'}\n".
"cds_end\t$contiginfo{'cds_end'}\n".
"ed_pn\t$contiginfo{'ed_pn'}\n".
"ed_date\t$contiginfo{'ed_date'}\n".
"comment\t$contiginfo{'comment'}\n".
"frameshift\t$contiginfo{'frameshift'}\n".
"\n"
);
# Get read information
my ($seq_name, $db) = $self->_split_seq_name_and_db($readid);
my ($clipcoord) = $singletobj->get_features_collection->get_features_by_type("_quality_clipping:$readid");
my ($alncoord) = $singletobj->get_features_collection->get_features_by_type("_aligned_coord:$readid");
my ($readanno) = $singletobj->get_features_collection->get_features_by_type("_main_read_feature:$readid");
my %readinfo;
$readinfo{'seq_name'} = $seq_name;
$readinfo{'asm_lend'} = $alncoord->location->start;
$readinfo{'asm_rend'} = $alncoord->location->end;
$readinfo{'seq_lend'} = $clipcoord->location->start;
$readinfo{'seq_rend'} = $clipcoord->location->end;
$readinfo{'best'} = ($readanno->get_tag_values('best'))[0];
$readinfo{'comment'} = ($readanno->get_tag_values('comment'))[0];
$readinfo{'db'} = $db;
$readinfo{'offset'} = 0;
# ambiguities in read sequence are uppercase
$readinfo{'lsequence'} = uc($contiginfo{'lsequence'});
# Check that no tag value is undef
$readinfo{'best'} = '' unless defined $readinfo{'best'};
$readinfo{'comment'} = '' unless defined $readinfo{'comment'};
# Print read information
$self->_print(
"seq_name\t$readinfo{'seq_name'}\n".
"asm_lend\t$readinfo{'asm_lend'}\n".
"asm_rend\t$readinfo{'asm_rend'}\n".
"seq_lend\t$readinfo{'seq_lend'}\n".
"seq_rend\t$readinfo{'seq_rend'}\n".
"best\t$readinfo{'best'}\n".
"comment\t$readinfo{'comment'}\n".
"db\t$readinfo{'db'}\n".
"offset\t$readinfo{'offset'}\n".
"lsequence\t$readinfo{'lsequence'}\n"
);
$self->_print("|\n");
} else {
# This is a contig
# Get contig information
my ($contanno) = $contigobj->get_features_collection->get_features_by_type("_main_contig_feature:$contigid");
my %contiginfo;
$contiginfo{'sequence'} = $self->_ungap(
$contigobj->get_consensus_sequence->seq);
$contiginfo{'lsequence'} = $contigobj->get_consensus_sequence->seq;
$contiginfo{'quality'} = $self->_qual_dec2hex(
join ' ', @{$contigobj->get_consensus_quality->qual});
$contiginfo{'asmbl_id'} = $contigid;
$contiginfo{'seq_id'} = ($contanno->get_tag_values('seq_id'))[0];
$contiginfo{'com_name'} = ($contanno->get_tag_values('com_name'))[0];
$contiginfo{'type'} = ($contanno->get_tag_values('type'))[0];
$contiginfo{'method'} = ($contanno->get_tag_values('method'))[0];
$contiginfo{'ed_status'} = ($contanno->get_tag_values('ed_status'))[0];
$contiginfo{'redundancy'} = sprintf(
$decimal_format, $self->_redundancy($contigobj));
$contiginfo{'perc_N'} = sprintf(
$decimal_format, $self->_perc_N($contiginfo{'sequence'}));
$contiginfo{'seqnum'} = $contigobj->num_sequences;
$contiginfo{'full_cds'} = ($contanno->get_tag_values('full_cds'))[0];
$contiginfo{'cds_start'} = ($contanno->get_tag_values('cds_start'))[0];
$contiginfo{'cds_end'} = ($contanno->get_tag_values('cds_end'))[0];
$contiginfo{'ed_pn'} = ($contanno->get_tag_values('ed_pn'))[0];
$contiginfo{'ed_date'} = $self->_date_time;
$contiginfo{'comment'} = ($contanno->get_tag_values('comment'))[0];
$contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];
# Check that no tag value is undef
$contiginfo{'seq_id'} = '' unless defined $contiginfo{'seq_id'};
$contiginfo{'com_name'} = '' unless defined $contiginfo{'com_name'};
$contiginfo{'type'} = '' unless defined $contiginfo{'type'};
$contiginfo{'method'} = '' unless defined $contiginfo{'method'};
$contiginfo{'ed_status'} = '' unless defined $contiginfo{'ed_status'};
$contiginfo{'full_cds'} = '' unless defined $contiginfo{'full_cds'};
$contiginfo{'cds_start'} = '' unless defined $contiginfo{'cds_start'};
$contiginfo{'cds_end'} = '' unless defined $contiginfo{'cds_end'};
$contiginfo{'ed_pn'} = '' unless defined $contiginfo{'ed_pn'};
$contiginfo{'comment'} = '' unless defined $contiginfo{'comment'};
$contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};
# Print contig information
$self->_print(
"sequence\t$contiginfo{'sequence'}\n".
"lsequence\t$contiginfo{'lsequence'}\n".
"quality\t$contiginfo{'quality'}\n".
"asmbl_id\t$contiginfo{'asmbl_id'}\n".
"seq_id\t$contiginfo{'seq_id'}\n".
"com_name\t$contiginfo{'com_name'}\n".
"type\t$contiginfo{'type'}\n".
"method\t$contiginfo{'method'}\n".
"ed_status\t$contiginfo{'ed_status'}\n".
"redundancy\t$contiginfo{'redundancy'}\n".
"perc_N\t$contiginfo{'perc_N'}\n".
"seq#\t$contiginfo{'seqnum'}\n".
"full_cds\t$contiginfo{'full_cds'}\n".
"cds_start\t$contiginfo{'cds_start'}\n".
"cds_end\t$contiginfo{'cds_end'}\n".
"ed_pn\t$contiginfo{'ed_pn'}\n".
"ed_date\t$contiginfo{'ed_date'}\n".
"comment\t$contiginfo{'comment'}\n".
"frameshift\t$contiginfo{'frameshift'}\n".
"\n"
);
my $seqno = 0;
for my $readobj ( $contigobj->each_seq() ) {
$seqno++;
# Get read information
my ($seq_name, $db) = $self->_split_seq_name_and_db($readobj->id);
( run in 0.600 second using v1.01-cache-2.11-cpan-5735350b133 )