Bio-Affymetrix

 view release on metacpan or  search on metacpan

lib/Bio/Affymetrix/CHP.pm  view on Meta::CPAN

    my $self=shift;

    if (my $q=shift) {
	$self->{"no_rows"}=$q;
    }
    return $self->{"no_rows"};
}


# Number of probes claimed


=head2 original_number_of_probes

  Arg [0]    : 	none
  Example    : 	my $original_probes=$chp->original_number_of_probes()
  Description:	Gets the original number of probes reported in the
				array.

				The CHP files have the number of probes stored in
				them, and this function lets you read this number as
				it was stored in the file originally.

				A better way of finding out the current number of
				probes is to count the number in the probe_set_results
				hash, like so: scalar(keys %{$chp->probe_set_results()});
		
  Returntype :	integer
  Exceptions : 	none
  Caller     : 	general

=cut


sub original_number_of_probes {
    my $self=shift;
    return $self->{"no_units"};
}

# Number of qc units

=head2 original_number_qc_units

  Arg [0]    : 	none
  Example    : 	my $original_qc=$chp->original_number_qc_units()
  Description:	Gets the original number of QC units in the file.
  Returntype :	integer
  Exceptions : 	none
  Caller     : 	general

=cut


sub original_number_qc_units {
    my $self=shift;
    return $self->{"no_qc_units"};
}

# MS COM prog ID

=head2 original_com_progid

  Arg [0]    : 	none
  Example    : 	my $com_id=$chp->original_com_progid()
  Description:	Gets the progid of the original Microsoft COM object that made
				this CHP file
  Returntype :	string
  Exceptions : 	none
  Caller     : 	general

=cut

sub original_com_progid {
    my $self=shift;
    return $self->{"com_progid"};
}

# CEL file name this CHP file originated from

=head2 CEL_file_name

  Arg [1]    : 	string $cel_file_name (optional)
  Example    : 	my $cel_file_name=$chp->CEL_file_name();
  Description:	Get/set the CEL file this CHP file was made from
  Returntype :	string
  Exceptions : 	none
  Caller     : 	general

=cut


sub CEL_file_name {
    my $self=shift;

    if (my $q=shift) {
	$self->{"cel_file_name"}=$q;
    }
    return $self->{"cel_file_name"};
}

=head2 original_file_name

  Arg [0]    : 	none
  Example    : 	my $chp_file_name=$chp->original_file_name();
  Description:	If this object was created using parse_from_file, the original filename. Otherwise undef.
  Returntype :	string
  Exceptions : 	none
  Caller     : 	general

=cut


sub original_file_name {
    my $self=shift;

    return $self->{"file_name"};
}



# ATH1, etc.

=head2 probe_array_type

  Arg [1]    : 	string $array_type (optional)
  Example    : 	my $probe_array_type=$chp->probe_array_type();
  Description:	Get/set the Affymetrix chip type used in the
production of this CHP file. String is same format as CDF file name,
for example ATH1-121501
  Returntype :	string
  Exceptions : 	none
  Caller     : 	general

=cut

lib/Bio/Affymetrix/CHP.pm  view on Meta::CPAN


    binmode $fh;
    
    # First step- detect whether it's a GCOS or MAS5 file
    
    # A buffer for reading things into
    my $buffer; 

    # XDA files have their first feature as a magic number, 65

    read ($fh, $buffer, 4);
    my $magic_number = unpack("V", $buffer);

    if ($magic_number==65) {
	$self->_parse_xda($fh);
	return;
    }

    # MAS5 files have their first feature as a string

    read ($fh, $buffer, 18, 4);
    
    $magic_number=unpack("A22",$buffer);

    if ($magic_number eq "GeneChip Sequence File") {
	# It's a MAS5/GCOS v1.0 "v3 file"! Yippee!
	$self->_parse_mas5($fh);
	return;
    }

    croak "This doesn't look like a CHP file to me. I can only understand certain CHP filetypes, however\n";
}

sub _parse_xda {
    my ($self,$fh) = @_;

    $self->{"format"}="XDA";
    
    my $buffer; 

    # First some trivia

    read ($fh, $buffer, 4);
    $self->{"version"}= unpack ("V", $buffer);
    
    if ($self->{"version"}!=1) {
	carp "This CHP file is newer than the software parsing them. Results may be suspect."; # die here, perhaps?
    }

    read ($fh, $buffer, 12);
    ($self->{"no_cols"},$self->{"no_rows"},$self->{"no_units"},$self->{"no_qc_units"})= unpack ("S2V2", $buffer);

    read ($fh, $buffer, 4);
    
    $self->{"chip_type"}=unpack ("V", $buffer);

    if ($self->{"chip_type"}!=0) {
	croak "This software does not process non-expression arrays";
    }
    
    $self->{"com_progid"}=$self->unpack_length_string($fh);
    
    $self->{"cel_file_name"}=$self->unpack_length_string($fh);

    $self->{"probe_array_type"}=$self->unpack_length_string($fh);

    $self->{"algorithm_name"}=$self->unpack_length_string($fh);

    $self->{"algorithm_version"}=$self->unpack_length_string($fh);

    # Algorithm parameters
    
    {
	read ($fh, $buffer, 4);
	my $no_algorithm_params = unpack ("V", $buffer);

	# get varying number of parameter names and values and read into hash 
	my %algorithm_params; 
	for (my $i=0;$i<$no_algorithm_params;$i++) {
	    my $name=$self->unpack_length_string($fh);
	    my $value=$self->unpack_length_string($fh);
	    $algorithm_params{$name}=$value;
	}

	$self->{"algorithm_params"}=\%algorithm_params;
    }

    # Summary statistics

    {
	read ($fh, $buffer, 4);
	my $no_stats = unpack ("V", $buffer);

	# get varying number of parameter names and values and read into hash 
	my %h;
	for (my $i=0;$i<$no_stats;$i++) {
	    my $name=$self->unpack_length_string($fh);
	    my $value=$self->unpack_length_string($fh);
	    $h{$name}=$value;
	}

	$self->{"summary_statistics"}=\%h;
    }

    # Background calculation

    {
	read ($fh, $buffer, 8);
	my $m;
	($m,$self->{"smooth_factor"}) = unpack ("Vf", $buffer);
	
	$self->{"zones"}=[];

	my %zone_info; 
	for (my $i=0;$i<$m;$i++) {
	    read ($fh, $buffer, 12);
	    my @zone = unpack ("f3", $buffer);
	    push @{$self->{"zones"}},\@zone;
	}
    }

lib/Bio/Affymetrix/CHP.pm  view on Meta::CPAN


    if ($self->{"version"}==13) {
	carp "The authors of this module have never seen a genuine GCOS v1.0 CHP file. This program can parse them, but we're only relying on the specification supplied by Affymetrix- we've not tested this at all. Suspect results therefore are highly likely...
    }
    # Trivia section

    $self->{"algorithm_name"}=$self->unpack_length_string($fh);

    $self->{"algorithm_version"}=$self->unpack_length_string($fh);

    
    # Parse algorithm parameters to maintain compatability with XDA format

    {
	my %algorithm_params;
	my $parse_me=$self->unpack_length_string($fh);
	foreach my $i (split / /,$parse_me) {
	    my ($name,$value)=split /=/,$i;
	    $algorithm_params{$name}=$value;
	}

	$self->{"algorithm_params"}=\%algorithm_params;
    }

    # Summary statistics

    {
	my %summary_stats;
	my $parse_me=$self->unpack_length_string($fh);
	foreach my $i (split / /,$parse_me) {
	    my ($name,$value)=split /=/,$i;
	    $summary_stats{$name}=$value;
	}

	$self->{"summary_statistics"}=\%summary_stats;
    }

    read ($fh, $buffer, 20);
    my $max_probeset_num;
    ($self->{"no_rows"},$self->{"no_cols"},$self->{"no_units"},$max_probeset_num,$self->{"no_qc_units"})= unpack ("V5", $buffer);
    
    # THROW AWAY PROBESET NUMBER FOR EACH PROBESET
    read ($fh, $buffer, 4*$self->{"no_units"});

    # THROW AWAY NUMBER OF PROBE PAIRS FOR EACH PROBESET
    read ($fh, $buffer, 4*$max_probeset_num);

    # THROW AWAY PROBESETTYPE FOR EACH PROBESET
    read ($fh, $buffer, 4*$max_probeset_num); # Should test

    # THROW AWAY PROBESET NUMBER FOR EACH PROBESET
    read ($fh, $buffer, 4*$self->{"no_units"});

    read ($fh, $buffer, 512);
    ($self->{"probe_array_type"},$self->{"cel_file_name"})=unpack ("Z256Z256",$buffer);

    if ($self->{"probe_array_type"} ne $self->{"cdf"}->name()) {
	carp "The CDF object you have supplied does not have the same name as the CDF file used to make this CHP file. Results may be dubious";
    }

    $self->{"com_progid"}=$self->unpack_length_string($fh);

    # Actual data. This is the bit that would need to be added to, if we did SNP etc. arrays
    {
	my %data;
	
	if ($self->{"version"}==12) {
	    
	    my %results;
	    
	    my $probesetlist=$self->{"cdf"}->probesets();
	    
	    foreach my $i (sort {int($a)<=>int($b)} keys(%$probesetlist)) {

		# Non-comparison analysis

		read ($fh, $buffer, 44);
		
		my @results=unpack("V7f3V", $buffer);

		my %h;

		if ($results[10]==0) {
		    $h{"DetectionCall"}="P";
		} elsif ($results[10]==1) {
		    $h{"DetectionCall"}="M";
		} elsif ($results[10]==2) {
		    $h{"DetectionCall"}="A";
		} elsif ($results[10]==3) {
		    $h{"DetectionCall"}="N";
		}

		$h{"DetectionPValue"}=$results[7];
		$h{"Signal"}=$results[9];
		$h{"StatPairs"}=$results[0];
		$h{"StatPairsUsed"}=$results[1];
		$h{"Probeset"}=$probesetlist->{$i};

		# Blimey-o-reilly! There's an entire CEL file in here! Let's ditch that.
		
		read ($fh, $buffer, $h{"StatPairs"}*52); #52 bytes of junk per probeset?

		read ($fh, $buffer, 4);
		
		# Comparison analysis

		if (unpack("V",$buffer)==1) {
		    $self->{"comparison"}=1;
		    read ($fh, $buffer, 58);
		    my @results=unpack("V5c2l9",$buffer);

		    $h{"Change"}=$results[4];
		    $h{"ChangePValue"}=$results[15]/1000;
		    $h{"SignalLogRatio"}=$results[12]/1000;
		    $h{"SignalLogRatioLow"}=$results[14]/1000;
		    $h{"SignalLogRatioHigh"}=$results[9]/1000;
		    $h{"CommonPairs"}=$results[0];
		    $h{"BaselineAbsent"}=$results[5];
		    $h{"Probeset"}=$probesetlist->{$i};
		}
		



( run in 2.000 seconds using v1.01-cache-2.11-cpan-5735350b133 )