CLIPSeqTools

 view release on metacpan or  search on metacpan

lib/CLIPSeqTools/CompareApp/compare_counts.pm  view on Meta::CPAN

	is            => 'rw',
	isa           => 'Str',
	required      => 1,
	documentation => 'name of column with values to be normalized.',
);

option 'val_thres' => (
	is            => 'rw',
	isa           => 'Num',
	default       => 0,
	documentation => 'rows with value lower or equal than val_thres are not '.
						'used for normalization.',
);

option 't_name' => (
	is            => 'rw',
	isa           => 'ArrayRef[Str]',
	documentation => 'table name. Use option multiple times to give names '.
						'to all the tables. The number of table names must '.
						'match the number of input table files. If not set, '.
						'numbers are used instead. These names will be used '.
						'in the output files.',
);

#######################################################################
##########################   Consume Roles   ##########################
#######################################################################
with
	"CLIPSeqTools::Role::Option::Plot" => {
		-alias    => { validate_args => '_validate_args_for_plot' },
		-excludes => 'validate_args',
	},
	"CLIPSeqTools::Role::Option::OutputPrefix" => {
		-alias    => { validate_args => '_validate_args_for_output_prefix' },
		-excludes => 'validate_args',
	};

#######################################################################
########################   Interface Methods   ########################
#######################################################################
sub validate_args {
	my ($self) = @_;

	$self->_validate_args_for_plot;
	$self->_validate_args_for_output_prefix;
}

sub run {
	my ($self) = @_;

	warn "Starting analysis: compare_counts\n";

	warn "Validating arguments\n" if $self->verbose;
	$self->validate_args();

	warn "Reading input files\n" if $self->verbose;
	my @tables = map {Data::Table::fromFile($_)} @{$self->table};
	die "Error: Input table sizes differ. Aborting.\n" if not all_tables_of_equal_size(@tables);
	$self->t_name([(1..@tables)]) if !defined $self->t_name;

	warn "Searching for 25th percentile\n" if $self->verbose;
	my $uq_idx = _quantile_idx_from_table_with_fewer(
		$self->val_col, $self->val_thres, $self->key_col, @tables);

	warn "Normalizing the values\n" if $self->verbose;
	#Normalized column name is value column + "_uq" suffix
	_build_normalized_column_in_tables($self->val_col, $uq_idx, @tables);

	warn "Writing output files\n" if $self->verbose;
	my @output_files = map {
		$self->o_prefix . $_ . '.counts.uq.tab'} @{$self->t_name};
	for (my $i=0; $i<@output_files; $i++) {
		my (undef, $dir, undef) = File::Spec->splitpath($output_files[$i]);
		make_path($dir);
		open (my $OUT, '>', $output_files[$i]);
		print $OUT $tables[$i]->tsv;
		close $OUT;
	}

	if ($self->plot) {
		warn "Creating plots\n" if $self->verbose;
		for (my $i = 0; $i < @output_files; $i++) {
			for (my $j = $i+1; $j < @output_files; $j++) {
				CLIPSeqTools::PlotApp->initialize_command_class(
					'CLIPSeqTools::PlotApp::scatterplot',
					table1   => $output_files[$i],
					table2   => $output_files[$j],
					key_col  => $self->key_col,
					val_col  => $self->val_col . '_uq',
					name1    => $self->t_name->[$i],
					name2    => $self->t_name->[$j],
					verbose  => $self->verbose,
					o_prefix => $self->o_prefix .
								$self->t_name->[$i].'_vs_'.$self->t_name->[$j].
								'.counts.uq.',
				)->run();
			}
		}
	}
}


#######################################################################
############################   Functions   ############################
#######################################################################
sub _build_normalized_column_in_tables {
	my ($value_col, $uq_idx, @tables) = @_;

	# Sort the tables by descending value
	map{$_->sort($value_col, Data::Table::NUMBER, Data::Table::DESC)} @tables;
	foreach my $table (@tables) {
		my $uq = $table->elm($uq_idx, $value_col);
		my @normalized_values = map {$_ / $uq} $table->col($value_col);
		$table->addCol(\@normalized_values, $value_col."_uq");
	}
}

sub _quantile_idx_from_table_with_fewer {
	my ($value_col, $val_thres, $key_cols, @tables) = @_;

	# Sort tables according to keys



( run in 0.790 second using v1.01-cache-2.11-cpan-39bf76dae61 )