Bio-Graphics

 view release on metacpan or  search on metacpan

lib/Bio/Graphics/Wiggle/Loader.pm  view on Meta::CPAN


  return 1;
}

sub process_track_line {
  my $self      = shift;
  my $line      = shift;
  my @tokens    = shellwords($line);
  shift @tokens;
  my %options = map {split '='} @tokens;
  $options{type} eq 'wiggle_0' or croak "invalid/unknown wiggle track type $options{type}";
  delete $options{type};
  $self->{tracknum}++;
  $self->current_track->{display_options} = \%options;
}

sub process_fixed_step_declaration {
  my $self  = shift;
  my $line  = shift;
  my @tokens    = shellwords($line);
  shift @tokens;
  my %options = map {split '='} @tokens;
  exists $options{chrom}        or croak "invalid fixedStep line: need a chrom option";
  exists $options{start}        or croak "invalid fixedStep line: need a start option";
  exists $options{step}         or croak "invalid fixedStep line: need a step  option";
  $self->{track_options} = \%options;
}

sub process_variable_step_declaration {
  my $self  = shift;
  my $line  = shift;
  my @tokens    = shellwords($line);
  shift @tokens;
  my %options = map {split '='} @tokens;
  exists $options{chrom}        or croak "invalid variableStep line: need a chrom option";
  $self->{track_options} = \%options;
}

sub process_first_bedline {
  my $self  = shift;
  my $line  = shift;
  my @tokens    = shellwords($line);
  $self->{track_options} = {chrom => $tokens[0]};
}

sub current_track {
  my $self = shift;
  return $self->{tracks}{$self->{tracknum}} ||= {};
}

sub minmax {
  my $self   = shift;
  my ($infh,$bedline) = @_;
  local $_;

  my $transform  = $self->get_transform;

  my $seqids = ($self->current_track->{seqids} ||= {});
  my $chrom  = $self->{track_options}{chrom};

  if ($self->allow_sampling && (my $size = stat($infh)->size) > BIG_FILE) {
      warn "Wiggle file is very large; resorting to genome-wide sample statistics for $chrom.\n";
      $self->{FILEWIDE_STATS} ||= $self->sample_file($infh,BIG_FILE_SAMPLES);
      for (keys %{$self->{FILEWIDE_STATS}}) {
	$seqids->{$chrom}{$_} = $self->{FILEWIDE_STATS}{$_};
      }
      return;
  }

  my %stats;
  if ($bedline) {  # left-over BED line
      my @tokens = split /\s+/,$bedline;
      my $seqid  = $tokens[0];
      my $value  = $tokens[-1];
      $value = $transform->($self,$value) if $transform;
      $stats{$seqid} ||= Statistics::Descriptive::Sparse->new();
      $stats{$seqid}->add_data($value);
  }

  while (<$infh>) {
      last if /^track/;
      last if /chrom=(\S+)/ && $1 ne $chrom;
      next if /^\#|fixedStep|variableStep/;
      my @tokens = split(/\s+/,$_) or next;
      my $seqid  = @tokens > 3 ? $tokens[0] : $chrom;
      my $value  = $tokens[-1];
      $value = $transform->($self,$value) if $transform;
      $stats{$seqid} ||= Statistics::Descriptive::Sparse->new();
      $stats{$seqid}->add_data($value);
  }

  for my $seqid (keys %stats) {
      $seqids->{$seqid}{min}    = $stats{$seqid}->min();
      $seqids->{$seqid}{max}    = $stats{$seqid}->max();
      $seqids->{$seqid}{mean}   = $stats{$seqid}->mean();
      $seqids->{$seqid}{stdev}  = $stats{$seqid}->standard_deviation();
  }
}

sub sample_file {
    my $self = shift;

    my ($fh,$samples) = @_;

    my $transform  = $self->get_transform;

    my $stats = Statistics::Descriptive::Sparse->new();

    my $size = stat($fh)->size;
    my $count=0;
    while ($count < $samples) {
	seek($fh,int(rand $size),0) or die;
	scalar <$fh>; # toss first line
	my $line = <$fh>; # next full line
	$line or next;
	my @tokens = split /\s+/,$line;
	my $value  = $tokens[-1];
	next unless $value =~ /^[\d\seE.+-]+$/; # non-numeric
	$value = $transform->($self,$value) if $transform;
	$stats->add_data($value);
	$count++;
    }

    return {
	min   => $stats->min,
	max   => $stats->max,
	mean  => $stats->mean,
	stdev => $stats->standard_deviation,
    };
}

sub get_transform {
    my $self = shift;
    my $transform  = $self->current_track->{display_options}{transform};
    return $self->can($transform) if $transform;
}

# one and only transform currently defined
# Natural log of the square of the value.
# Return 0 if the value is 0
sub logsquared {
    my $self  = shift;
    my $value = shift;
    return 0 if $value == 0;
    return log($value**2);
}

sub logtransform {
  my $self  = shift;
  my $value = shift;
  return 0 if $value == 0;
  if ($value < 0) {
    return -log(-$value);
  } else {
    return log($value);
  }
}

sub process_bed {
  my $self = shift;
  my $infh = shift;
  my $oops = shift;
  my $transform  = $self->get_transform;
  $self->process_bedline($oops) if $oops;
  while (<$infh>) {
    last if /^track/;
    next if /^#/;
    chomp;
    $self->process_bedline($_);



( run in 0.941 second using v1.01-cache-2.11-cpan-39bf76dae61 )