Algorithm-SixDegrees

 view release on metacpan or  search on metacpan

lib/Algorithm/SixDegrees.pm  view on Meta::CPAN

	# If $altsource gets defined, that means there are two sources used.
	my $altsource;

	unless (exists($self->{'_sources'}) && reftype($self->{'_sources'}) eq 'ARRAY') {
		$ERROR = 'No data sources defined';
		return;
	}
	my @sources = @{$self->{'_sources'}};
	my $source_exists = 0;
	foreach my $source (@sources) {
		if ($mainsource eq $source) {
			$source_exists = 1;
			$leftside{$source} = {$start, undef};
			$rightside{$source} = {$end, undef};
		} else {
			$altsource = $source;
			$leftside{$source} = {};
			$rightside{$source} = {};
		}
		unless (ref($self->{'_source_left'}) && 
			ref($self->{'_source_left'}{$source}) &&
			reftype($self->{'_source_left'}{$source}{'sub'}) eq 'CODE') {
			$ERROR = "Source '$source' does not have a valid forward subroutine";
			return;
		}
		unless (ref($self->{'_source_right'}) && 
			ref($self->{'_source_right'}{$source}) &&
			reftype($self->{'_source_right'}{$source}{'sub'}) eq 'CODE') {
			$ERROR = "Source '$source' does not have a valid reverse subroutine";
			return;
		}
		$self->{'_investigated'}{$source} = {};
	}
	unless ($source_exists) {
		$ERROR = "Source '$mainsource' was not defined";
		return;
	}
	if (scalar(keys(%leftside)) > 2) {
		$ERROR = 'Too many defined data sources; maximum is 2';
		return;
	}


	if ($start eq $end) {
		# Only one element if the start and end are the same.
		return wantarray ? ($start) : [$start];
	}

	my $leftcount = 1;
	my $rightcount = 1;

	# If altsource exists, pull the left side main, then pull the right side main,
	# and check for middle matches.  This reduces database hits as opposed to
	# where it's pulled left main - left alt; left alt >= 1 at that point, whereas
	# right main on the first loop == 1.  Following that, pull the left alt and 
	# then the right alt, which gets the CHAINLOOP back in synch.

	if (defined($altsource)) {
		my ($count,$id,$err) = $self->_match('left',$mainsource,$altsource,\%leftside,\%rightside);
		if (defined($err)) { $ERROR = $err; return; };
		if (defined($id)) { $ERROR = 'Internal error, id cannot match here'; return; };
		return if !defined($count) || $count == 0;

		($count,$id,$err) = $self->_match('right',$mainsource,$altsource,\%rightside,\%leftside);
		if (defined($err)) { $ERROR = $err; return; };
		if (defined($id)) { 
			my @abc = ($leftside{$altsource}{$id},$id,$rightside{$altsource}{$id});
			return wantarray ? @abc : \@abc;
		};
		return if !defined($count) || $count == 0;

		($leftcount,$id,$err) = $self->_match('left',$altsource,$mainsource,\%leftside,\%rightside);
		if (defined($err)) { $ERROR = $err; return; };
		if (defined($id)) { $ERROR = 'Internal error, id cannot match here'; return; };
		return if !defined($leftcount) || $leftcount == 0;

		($rightcount,$id,$err) = $self->_match('right',$altsource,$mainsource,\%rightside,\%leftside);
		if (defined($err)) { $ERROR = $err; return; };
		if (defined($id)) { 
			my $la = $leftside{$mainsource}{$id};
			my $lm = $leftside{$altsource}{$la};
			my $ra = $rightside{$mainsource}{$id};
			my $rm = $rightside{$altsource}{$ra};
			unless (defined($la) && defined($lm) && defined($ra) && defined($rm)) {
				$ERROR = 'Internal error, identifier not defined';
				return;
			}
			return wantarray ? ($lm,$la,$id,$ra,$rm) : [$lm,$la,$id,$ra,$rm];
		};
		return if !defined($rightcount) || $rightcount == 0;

	}

	# There is bias here, but the tie needs to be broken, so in the
	# event of a tie, move left to right in the chain.

	CHAINLOOP: {
		my $id;
		my $err;
		if ($leftcount <= $rightcount) {
			if (defined($altsource)) {
				($leftcount,$id,$err) = $self->_match_two('left',$mainsource,$altsource,\%leftside,\%rightside);
			} else {
				($leftcount,$id,$err) = $self->_match_one('left',$mainsource,\%leftside,\%rightside);
			}
		} else {
			if (defined($altsource)) {
				($rightcount,$id,$err) = $self->_match_two('right',$mainsource,$altsource,\%rightside,\%leftside);
			} else {
				($rightcount,$id,$err) = $self->_match_one('right',$mainsource,\%rightside,\%leftside);
			}
		}
		if(defined($err)) {
			$ERROR = $err;
			return;
		}
		if(defined($id)) {
			# If _match returns an id, that means a match was found.
			# To get it, we simply have to trace out from the "middle"
			# to get the full link.
			my @match = ($id);
			# middle, building to left.
			while($match[0] ne $start) {
				unshift(@match,$leftside{$mainsource}{$match[0]});
				unshift(@match,$leftside{$altsource}{$match[0]}) if defined($altsource);
				if (!defined($match[0])) {
					$ERROR = 'Internal error, left identifier was not defined';
					return;
				}
			}
			# middle building to right
			while($match[-1] ne $end) {
				push(@match,$rightside{$mainsource}{$match[-1]});
				push(@match,$rightside{$altsource}{$match[-1]}) if defined($altsource);
				if (!defined($match[-1])) {
					$ERROR = 'Internal error, right identifier was not defined';
					return;
				}
			}
			return wantarray ? @match : \@match;
		} 
		if ($leftcount == 0 || $rightcount == 0) {
			last CHAINLOOP;
		}
		redo CHAINLOOP;
	}

	return wantarray ? () : [];
}

=head2 error

Returns the current value of C<$Algorithm::SixDegrees::ERROR>.  See
L</SUBROUTINE RULES>.

=cut

sub error {
	return $ERROR;
}

sub _match_two {
	my ($self,$side,$mainsource,$altsource,$thisside,$thatside) = @_;
	# Assume $self is OK since this is an internal function
	my ($count,$id,$err) = $self->_match($side,$mainsource,$altsource,$thisside,$thatside);
	return (undef,undef,$err) if defined($err);
	return ($count,$id,$err) if defined($id);
	return (0,undef,undef) if !defined($count) || $count == 0;
	# mental note: this should never return an id
	# after all, you can't have two mains together in a true
	# alternating chain
	return $self->_match($side,$altsource,$mainsource,$thisside,$thatside);
}

sub _match_one {
	my ($self,$side,$source,$thisside,$thatside) = @_;
	# Assume $self is OK since this is an internal function
	return $self->_match($side,$source,$source,$thisside,$thatside);
}

sub _match {
	my ($self,$side,$fromsource,$tosource,$thisside,$thatside) = @_;
	# Assume $self is OK since this is an internal function
	return (undef,undef,'Internal error: missing code') unless reftype($self->{"_source_$side"}{$fromsource}{'sub'}) eq 'CODE';
	return (undef,undef,'Internal error: missing side (1)') unless reftype($thisside) eq 'HASH';
	return (undef,undef,'Internal error: missing side (2)') unless exists($thisside->{$fromsource});
	return (undef,undef,'Internal error: missing side (3)') unless reftype($thatside) eq 'HASH';
	return (undef,undef,'Internal error: missing side (4)') unless exists($thatside->{$tosource});

	my $newsidecount = 0;
	foreach my $id (keys %{$thisside->{$fromsource}}) {
		next if exists($self->{"_investigated"}{$fromsource}{$id});
		$self->{"_investigated"}{$fromsource}{$id} = 1;

		my $use_args = reftype($self->{"_source_$side"}{$fromsource}{'args'}) eq 'ARRAY' ? 1 : 0;

		my @ids = &{$self->{"_source_$side"}{$fromsource}{'sub'}}($id,($use_args?@{$self->{"_source_$side"}{$fromsource}{'args'}}:()));
		return (undef,undef,$ERROR) if scalar(@ids) == 1 && !defined($ids[0]);
		foreach my $thisid (@ids) {
			unless (exists($thisside->{$tosource}{$thisid})) {
				$thisside->{$tosource}{$thisid} = $id;
				$newsidecount++;
			}
			return (0,$thisid,undef) if exists($thatside->{$tosource}{$thisid});
		}
	}

	return $newsidecount;
}

=head1 SUBROUTINE RULES

Passed-in subroutines should take at least one argument, which
should be some form of unique identifier, and return a list of
unique identifiers that have a relation to the argument.

The unique identifiers must be able to be compared with C<eq>.

The identifiers should be unique in datatype; that is, in an
actor/movie relationship, "Kevin Bacon" can be both the name of an
actor and a movie.

A linked data type must return identifiers that relate across the
link; that is, for an actor/movie relationship, an actor subroutine
should return movies, and a movie subroutine should return actors.

Additional arguments can be provided; these will be stored in the
object and passed through as the second and further arguments to
the subroutine.  This may be useful, for example, if you're using
some form of results caching and need to pass a C<tie>d handle
around.

If you return explicit undef, please set C<$Algorithm::SixDegrees::ERROR>
with an error code.  Explicit undef means that an error occurred
that should terminate the search; it should be returned as a
one-element list.

=head1 AUTHOR

Peter Krawczyk, C<< <petek@cpan.org> >>

=head1 BUGS

Please report any bugs or feature requests to
C<bug-algorithm-sixdegrees@rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org>.  I will be notified, and then you'll automatically
be notified of progress on your bug as I make changes.

=head1 ACKNOWLEDGEMENTS

Thank you to Slaven Rezić and Peter John Acklam for bug reports.
Apologies it took so long to see and fix them. (So much for
notifications...)

Andy Lester and Ricardo Signes wrote Module::Starter, which helped
get the framework up and running fairly quickly.

Brad Fitzpatrick of L<http://livejournal.com> for giving me access
to a LiveJournal interface to determine linking information on that
site, which enabled me to write the algorithm that has been reduced
into this module.

=head1 COPYRIGHT & LICENSE

Copyright 2005-2020 Peter Krawczyk, All Rights Reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut

1; # End of Algorithm::SixDegrees



( run in 2.231 seconds using v1.01-cache-2.11-cpan-140bd7fdf52 )