Bio-Affymetrix
view release on metacpan or search on metacpan
lib/Bio/Affymetrix/CHP.pm view on Meta::CPAN
my $self=shift;
if (my $q=shift) {
$self->{"no_rows"}=$q;
}
return $self->{"no_rows"};
}
# Number of probes claimed
=head2 original_number_of_probes
Arg [0] : none
Example : my $original_probes=$chp->original_number_of_probes()
Description: Gets the original number of probes reported in the
array.
The CHP files have the number of probes stored in
them, and this function lets you read this number as
it was stored in the file originally.
A better way of finding out the current number of
probes is to count the number in the probe_set_results
hash, like so: scalar(keys %{$chp->probe_set_results()});
Returntype : integer
Exceptions : none
Caller : general
=cut
sub original_number_of_probes {
my $self=shift;
return $self->{"no_units"};
}
# Number of qc units
=head2 original_number_qc_units
Arg [0] : none
Example : my $original_qc=$chp->original_number_qc_units()
Description: Gets the original number of QC units in the file.
Returntype : integer
Exceptions : none
Caller : general
=cut
sub original_number_qc_units {
my $self=shift;
return $self->{"no_qc_units"};
}
# MS COM prog ID
=head2 original_com_progid
Arg [0] : none
Example : my $com_id=$chp->original_com_progid()
Description: Gets the progid of the original Microsoft COM object that made
this CHP file
Returntype : string
Exceptions : none
Caller : general
=cut
sub original_com_progid {
my $self=shift;
return $self->{"com_progid"};
}
# CEL file name this CHP file originated from
=head2 CEL_file_name
Arg [1] : string $cel_file_name (optional)
Example : my $cel_file_name=$chp->CEL_file_name();
Description: Get/set the CEL file this CHP file was made from
Returntype : string
Exceptions : none
Caller : general
=cut
sub CEL_file_name {
my $self=shift;
if (my $q=shift) {
$self->{"cel_file_name"}=$q;
}
return $self->{"cel_file_name"};
}
=head2 original_file_name
Arg [0] : none
Example : my $chp_file_name=$chp->original_file_name();
Description: If this object was created using parse_from_file, the original filename. Otherwise undef.
Returntype : string
Exceptions : none
Caller : general
=cut
sub original_file_name {
my $self=shift;
return $self->{"file_name"};
}
# ATH1, etc.
=head2 probe_array_type
Arg [1] : string $array_type (optional)
Example : my $probe_array_type=$chp->probe_array_type();
Description: Get/set the Affymetrix chip type used in the
production of this CHP file. String is same format as CDF file name,
for example ATH1-121501
Returntype : string
Exceptions : none
Caller : general
=cut
lib/Bio/Affymetrix/CHP.pm view on Meta::CPAN
binmode $fh;
# First step- detect whether it's a GCOS or MAS5 file
# A buffer for reading things into
my $buffer;
# XDA files have their first feature as a magic number, 65
read ($fh, $buffer, 4);
my $magic_number = unpack("V", $buffer);
if ($magic_number==65) {
$self->_parse_xda($fh);
return;
}
# MAS5 files have their first feature as a string
read ($fh, $buffer, 18, 4);
$magic_number=unpack("A22",$buffer);
if ($magic_number eq "GeneChip Sequence File") {
# It's a MAS5/GCOS v1.0 "v3 file"! Yippee!
$self->_parse_mas5($fh);
return;
}
croak "This doesn't look like a CHP file to me. I can only understand certain CHP filetypes, however\n";
}
sub _parse_xda {
my ($self,$fh) = @_;
$self->{"format"}="XDA";
my $buffer;
# First some trivia
read ($fh, $buffer, 4);
$self->{"version"}= unpack ("V", $buffer);
if ($self->{"version"}!=1) {
carp "This CHP file is newer than the software parsing them. Results may be suspect."; # die here, perhaps?
}
read ($fh, $buffer, 12);
($self->{"no_cols"},$self->{"no_rows"},$self->{"no_units"},$self->{"no_qc_units"})= unpack ("S2V2", $buffer);
read ($fh, $buffer, 4);
$self->{"chip_type"}=unpack ("V", $buffer);
if ($self->{"chip_type"}!=0) {
croak "This software does not process non-expression arrays";
}
$self->{"com_progid"}=$self->unpack_length_string($fh);
$self->{"cel_file_name"}=$self->unpack_length_string($fh);
$self->{"probe_array_type"}=$self->unpack_length_string($fh);
$self->{"algorithm_name"}=$self->unpack_length_string($fh);
$self->{"algorithm_version"}=$self->unpack_length_string($fh);
# Algorithm parameters
{
read ($fh, $buffer, 4);
my $no_algorithm_params = unpack ("V", $buffer);
# get varying number of parameter names and values and read into hash
my %algorithm_params;
for (my $i=0;$i<$no_algorithm_params;$i++) {
my $name=$self->unpack_length_string($fh);
my $value=$self->unpack_length_string($fh);
$algorithm_params{$name}=$value;
}
$self->{"algorithm_params"}=\%algorithm_params;
}
# Summary statistics
{
read ($fh, $buffer, 4);
my $no_stats = unpack ("V", $buffer);
# get varying number of parameter names and values and read into hash
my %h;
for (my $i=0;$i<$no_stats;$i++) {
my $name=$self->unpack_length_string($fh);
my $value=$self->unpack_length_string($fh);
$h{$name}=$value;
}
$self->{"summary_statistics"}=\%h;
}
# Background calculation
{
read ($fh, $buffer, 8);
my $m;
($m,$self->{"smooth_factor"}) = unpack ("Vf", $buffer);
$self->{"zones"}=[];
my %zone_info;
for (my $i=0;$i<$m;$i++) {
read ($fh, $buffer, 12);
my @zone = unpack ("f3", $buffer);
push @{$self->{"zones"}},\@zone;
}
}
lib/Bio/Affymetrix/CHP.pm view on Meta::CPAN
if ($self->{"version"}==13) {
carp "The authors of this module have never seen a genuine GCOS v1.0 CHP file. This program can parse them, but we're only relying on the specification supplied by Affymetrix- we've not tested this at all. Suspect results therefore are highly likely...
}
# Trivia section
$self->{"algorithm_name"}=$self->unpack_length_string($fh);
$self->{"algorithm_version"}=$self->unpack_length_string($fh);
# Parse algorithm parameters to maintain compatability with XDA format
{
my %algorithm_params;
my $parse_me=$self->unpack_length_string($fh);
foreach my $i (split / /,$parse_me) {
my ($name,$value)=split /=/,$i;
$algorithm_params{$name}=$value;
}
$self->{"algorithm_params"}=\%algorithm_params;
}
# Summary statistics
{
my %summary_stats;
my $parse_me=$self->unpack_length_string($fh);
foreach my $i (split / /,$parse_me) {
my ($name,$value)=split /=/,$i;
$summary_stats{$name}=$value;
}
$self->{"summary_statistics"}=\%summary_stats;
}
read ($fh, $buffer, 20);
my $max_probeset_num;
($self->{"no_rows"},$self->{"no_cols"},$self->{"no_units"},$max_probeset_num,$self->{"no_qc_units"})= unpack ("V5", $buffer);
# THROW AWAY PROBESET NUMBER FOR EACH PROBESET
read ($fh, $buffer, 4*$self->{"no_units"});
# THROW AWAY NUMBER OF PROBE PAIRS FOR EACH PROBESET
read ($fh, $buffer, 4*$max_probeset_num);
# THROW AWAY PROBESETTYPE FOR EACH PROBESET
read ($fh, $buffer, 4*$max_probeset_num); # Should test
# THROW AWAY PROBESET NUMBER FOR EACH PROBESET
read ($fh, $buffer, 4*$self->{"no_units"});
read ($fh, $buffer, 512);
($self->{"probe_array_type"},$self->{"cel_file_name"})=unpack ("Z256Z256",$buffer);
if ($self->{"probe_array_type"} ne $self->{"cdf"}->name()) {
carp "The CDF object you have supplied does not have the same name as the CDF file used to make this CHP file. Results may be dubious";
}
$self->{"com_progid"}=$self->unpack_length_string($fh);
# Actual data. This is the bit that would need to be added to, if we did SNP etc. arrays
{
my %data;
if ($self->{"version"}==12) {
my %results;
my $probesetlist=$self->{"cdf"}->probesets();
foreach my $i (sort {int($a)<=>int($b)} keys(%$probesetlist)) {
# Non-comparison analysis
read ($fh, $buffer, 44);
my @results=unpack("V7f3V", $buffer);
my %h;
if ($results[10]==0) {
$h{"DetectionCall"}="P";
} elsif ($results[10]==1) {
$h{"DetectionCall"}="M";
} elsif ($results[10]==2) {
$h{"DetectionCall"}="A";
} elsif ($results[10]==3) {
$h{"DetectionCall"}="N";
}
$h{"DetectionPValue"}=$results[7];
$h{"Signal"}=$results[9];
$h{"StatPairs"}=$results[0];
$h{"StatPairsUsed"}=$results[1];
$h{"Probeset"}=$probesetlist->{$i};
# Blimey-o-reilly! There's an entire CEL file in here! Let's ditch that.
read ($fh, $buffer, $h{"StatPairs"}*52); #52 bytes of junk per probeset?
read ($fh, $buffer, 4);
# Comparison analysis
if (unpack("V",$buffer)==1) {
$self->{"comparison"}=1;
read ($fh, $buffer, 58);
my @results=unpack("V5c2l9",$buffer);
$h{"Change"}=$results[4];
$h{"ChangePValue"}=$results[15]/1000;
$h{"SignalLogRatio"}=$results[12]/1000;
$h{"SignalLogRatioLow"}=$results[14]/1000;
$h{"SignalLogRatioHigh"}=$results[9]/1000;
$h{"CommonPairs"}=$results[0];
$h{"BaselineAbsent"}=$results[5];
$h{"Probeset"}=$probesetlist->{$i};
}
( run in 2.000 seconds using v1.01-cache-2.11-cpan-5735350b133 )