Bio-ToolBox
view release on metacpan or search on metacpan
scripts/pull_features.pl view on Meta::CPAN
# look for the corresponding data index if list was specified
elsif ( not defined $data_index and defined $list_index ) {
# we have the list index but need the data index
# get the column header name
my $lookup = $List->name($list_index);
# find it in the list
my $possible = $Data->find_column("^$lookup\$");
# check
if ( defined $possible ) {
# found something
$data_index = $possible;
printf " found column '%s', using Data index %d\n",
$Data->name($data_index), $data_index;
return;
}
else {
# did not find something
# is it possible it is a simple list of names?
print " could not find corresponding Data column index for '$lookup'\n";
if ( $List->number_columns == 1 ) {
# we likely only have a simple list of features
# adjust the data table accordingly by adding a name
# this will avoid not finding the first element in the output file
# if this is not true, then the user will just get an error that
# one feature cannot be found.....
#### WE ARE MESSING WITH THE INTERNALS OF THE OBJECT HERE
#### DONT DO THIS!!!!!!!
unshift @{ $List->{'data_table'} }, 'Name';
$List->{1}{'name'} = 'Name';
$List->{'last_row'}++;
}
}
}
# neither was specified
elsif ( not defined $data_index and not defined $list_index ) {
# try with common name columns
$data_index = $Data->name_column;
$list_index = $List->name_column;
if ( defined $data_index
and defined $list_index
and $Data->name($data_index) eq $List->name($list_index) )
{
# report
printf " using List column '%s', index $list_index\n",
$List->name($list_index);
printf " using Data column '%s', index $data_index\n",
$Data->name($data_index);
return;
}
else {
# nope, do not have a match, forget our guesses
undef $data_index;
undef $list_index;
}
# try with primary_id column
$data_index = $Data->id_column;
$list_index = $List->id_column;
if ( defined $data_index and defined $list_index ) {
# report
printf " using List column '%s', index $list_index\n",
$List->name($list_index);
printf " using Data column '%s', index $data_index\n",
$Data->name($data_index);
return;
}
else {
# nope, do not have a match, forget our guesses
undef $data_index;
undef $list_index;
}
}
# check for group number
if ( $List->number_columns > 1 and not defined $group_index ) {
my $i = $List->find_column('group');
if ( defined $i ) {
$group_index = $i;
}
# do we ask for a group or not????? probably not.... keep original functionality
}
if ( defined $group_index and not $List->name($group_index) ) {
die " invalid group index!\n";
}
# End automatic guessing of index numbers, ask the user
unless ( defined $list_index ) {
$list_index = ask_user_for_index( $List,
" Enter the unique identifier lookup column index from the List file "
);
}
unless ( defined $data_index ) {
$data_index = ask_user_for_index( $Data,
" Enter the unique identifier lookup column index from the Data file "
);
}
printf " We are using\n List lookup index %d, '%s'\n Data lookup index %d, '%s'\n",
$list_index, $List->name($list_index), $data_index, $Data->name($data_index);
if ( defined $group_index ) {
printf " group index %d, '%s'\n", $group_index, $List->name($group_index);
}
}
### Subroutine to collect list values from a file
sub collect_request_list {
my %requests; # the identifier values to look up
# request{ unique_id } = group#
# for KGG lists where we are splitting each of the groups into
# separate files, we need to know how many and which ones
# can't trust whether all groups are in the KGG file or just a few
my %pulled;
# hash pulled{ group# } -> { unique_id } = [ line_data ]
# still need a list of the order:
# hash pulled{ group# } -> { 'feature_order' } = [unique_id,...]
# check if we have multiple groups to work with
if ( defined $group_index ) {
$List->iterate(
sub {
my $row = shift;
my $id = $row->value($list_index);
my $group = $row->value($group_index);
# store the identifier in the requests hash
# the gene identifier is the key, the cluster group number is the value
$requests{$id} = $group;
# prepare the pulled data hash
$pulled{$group}->{$id} = [];
# record the ID for use in writing in the file list order
if ( exists $pulled{$group}{'list_order'} ) {
push @{ $pulled{$group}{'list_order'} }, $id;
}
else {
$pulled{$group}{'list_order'} = [ ($id) ];
}
}
);
}
# otherwise a simple one group list file
else {
# prepare the order array
( run in 0.679 second using v1.01-cache-2.11-cpan-39bf76dae61 )