Bio-ToolBox

 view release on metacpan or  search on metacpan

scripts/pull_features.pl  view on Meta::CPAN

	# look for the corresponding data index if list was specified
	elsif ( not defined $data_index and defined $list_index ) {

		# we have the list index but need the data index

		# get the column header name
		my $lookup = $List->name($list_index);

		# find it in the list
		my $possible = $Data->find_column("^$lookup\$");

		# check
		if ( defined $possible ) {

			# found something
			$data_index = $possible;
			printf "  found column '%s', using Data index %d\n",
				$Data->name($data_index), $data_index;
			return;
		}
		else {
			# did not find something
			# is it possible it is a simple list of names?
			print "  could not find corresponding Data column index for '$lookup'\n";
			if ( $List->number_columns == 1 ) {

				# we likely only have a simple list of features
				# adjust the data table accordingly by adding a name
				# this will avoid not finding the first element in the output file
				# if this is not true, then the user will just get an error that
				# one feature cannot be found.....
				#### WE ARE MESSING WITH THE INTERNALS OF THE OBJECT HERE
				#### DONT DO THIS!!!!!!!
				unshift @{ $List->{'data_table'} }, 'Name';
				$List->{1}{'name'} = 'Name';
				$List->{'last_row'}++;
			}
		}
	}

	# neither was specified
	elsif ( not defined $data_index and not defined $list_index ) {

		# try with common name columns
		$data_index = $Data->name_column;
		$list_index = $List->name_column;

		if (    defined $data_index
			and defined $list_index
			and $Data->name($data_index) eq $List->name($list_index) )
		{

			# report
			printf "  using List column '%s', index $list_index\n",
				$List->name($list_index);
			printf "  using Data column '%s', index $data_index\n",
				$Data->name($data_index);
			return;
		}
		else {
			# nope, do not have a match, forget our guesses
			undef $data_index;
			undef $list_index;
		}

		# try with primary_id column
		$data_index = $Data->id_column;
		$list_index = $List->id_column;

		if ( defined $data_index and defined $list_index ) {

			# report
			printf "  using List column '%s', index $list_index\n",
				$List->name($list_index);
			printf "  using Data column '%s', index $data_index\n",
				$Data->name($data_index);
			return;
		}
		else {
			# nope, do not have a match, forget our guesses
			undef $data_index;
			undef $list_index;
		}
	}

	# check for group number
	if ( $List->number_columns > 1 and not defined $group_index ) {
		my $i = $List->find_column('group');
		if ( defined $i ) {
			$group_index = $i;
		}

		# do we ask for a group or not????? probably not.... keep original functionality
	}
	if ( defined $group_index and not $List->name($group_index) ) {
		die " invalid group index!\n";
	}

	# End automatic guessing of index numbers, ask the user
	unless ( defined $list_index ) {
		$list_index = ask_user_for_index( $List,
			" Enter the unique identifier lookup column index from the List file    "
		);
	}
	unless ( defined $data_index ) {
		$data_index = ask_user_for_index( $Data,
			" Enter the unique identifier lookup column index from the Data file    "
		);
	}
	printf " We are using\n  List lookup index %d, '%s'\n  Data lookup index %d, '%s'\n",
		$list_index, $List->name($list_index), $data_index, $Data->name($data_index);
	if ( defined $group_index ) {
		printf "  group index %d, '%s'\n", $group_index, $List->name($group_index);
	}
}

### Subroutine to collect list values from a file
sub collect_request_list {

	my %requests;    # the identifier values to look up
					 # request{ unique_id } = group#
					 # for KGG lists where we are splitting each of the groups into
					 # separate files, we need to know how many and which ones
					 # can't trust whether all groups are in the KGG file or just a few
	my %pulled;

	# hash pulled{ group# } -> { unique_id } = [ line_data ]
	# still need a list of the order:
	# hash pulled{ group# } -> { 'feature_order' } = [unique_id,...]

	# check if we have multiple groups to work with
	if ( defined $group_index ) {
		$List->iterate(
			sub {
				my $row   = shift;
				my $id    = $row->value($list_index);
				my $group = $row->value($group_index);

				# store the identifier in the requests hash
				# the gene identifier is the key, the cluster group number is the value
				$requests{$id} = $group;

				# prepare the pulled data hash
				$pulled{$group}->{$id} = [];

				# record the ID for use in writing in the file list order
				if ( exists $pulled{$group}{'list_order'} ) {
					push @{ $pulled{$group}{'list_order'} }, $id;
				}
				else {
					$pulled{$group}{'list_order'} = [ ($id) ];
				}
			}
		);
	}

	# otherwise a simple one group list file
	else {
		# prepare the order array



( run in 0.679 second using v1.01-cache-2.11-cpan-39bf76dae61 )