Astro-SpaceTrack

 view release on metacpan or  search on metacpan

xt/author/celestrak_datasets.t  view on Meta::CPAN

}

# diag 'Debug - got ', explain \%got;

%expect = %{ Astro::SpaceTrack->__catalog( 'celestrak_supplemental' ) };
%{ $_ } = ( %{ $_ }, ignore => 0 ) for values %expect;

foreach my $key ( keys %got ) {
    if ( $got{$key}{name} =~ m/ \b (
	pre-launch | post-deployment | backup \s+ launch \s+ opportunity
	) \b /smxi ) {
	$expect{$key}{note} = "\u$1 data sets are temporary";
	$expect{$key}{name} ||= $got{$key}{name};
	$expect{$key}{ignore} = 1;
    }
}

# diag 'Debug - want ', explain \%expect;

foreach my $key (sort keys %expect) {
    my $source = $expect{$key}{source} || $key;
    if ($expect{$key}{ignore}) {
	my $presence = delete $got{$source} ? 'present' : 'not present';
	note "Ignored - $key (@{[($got{$source} ||
		$expect{$key})->{name}]}): $presence";
	$expect{$key}{note} and note( "    $expect{$key}{note}" );
    } else {
	ok delete $got{$source}, $expect{$key}{name};
	$expect{$key}{note} and note "    $expect{$key}{note}";
    }
}

ok ( ! keys %got, 'The above is all there is' ) or do {
    diag( 'The following supplemental data sets have been added:' );
    foreach (sort keys %got) {
	diag( "    $got{$_}{source} $_ => '$got{$_}{name}'" );
    }
};

done_testing;

sub parse_string {
    my ( $string, @extra ) = @_;

=begin comment

    # The following horrible hack became unnecessary as of December 30
    # 2024.

    # The following horrible hack is because as of December 28 2024 the
    # Celestrak supplemental dataset ends in an un-terminated comment
    # which HTML::TreeBuilder 5.07 handles incorrectly (IM(NS)HO). So:
    {	# Single-iteration loop
	$string =~ m/ <!-- /smxgc
	    or last;
	$string =~ m/ --> /smxgc
	    and redo;
	$string .= ' -->';
    }
    # I wish I could capture the HTML::Parser events corresponding to
    # the above, but it looks like there are none, so I have to hack it
    # with regexen. GAH!

=end comment

=cut

    my $tree = HTML::TreeBuilder->new_from_content( $string );
    my %data;
    foreach my $anchor ( $tree->look_down( _tag => 'a' ) ) {
	my $href = $anchor->attr( 'href' )
	    or next;

	my $parent = $anchor->parent();
	my @sibs = $parent->content_list();

	# Handle the case where the name of the data set is an anchor.
	ref $sibs[0]
	    and $sibs[0] == $anchor
	    and next;
	ref $sibs[0]
	    and $sibs[0] = $sibs[0]->as_trimmed_text();

	# Exclude pre-launch and post-deployment data sets, which are
	# ephemeral.
	not ref $sibs[0]
	    and $sibs[0] =~ m/ \b (?:
		pre-launch | post-deployment |
		backup \s+ launch \s+ opportunity
		) \b /smxi
	    and next;

	if ( $href =~ m/ \b (?: sup- )? gp\.php \b /smx ) {
	    my $uri = URI->new( $href );
	    # NOTE convenient in this case but technically incorrect as
	    # it is legal for keys to repeat.
	    my %query = $uri->query_form();
	    #         Celestrak        Celestrak Supplemental
	    $href = ( $query{GROUP} || $query{FILE} )
		or next;
	} else {
	    $href =~ s/ [.] txt \z //smx
		or next;
	    $href =~ m{ / }smx
		and next;
	}
	my $name = $anchor->as_trimmed_text();
	$name eq ''
	    and not ref $sibs[0]
	    and $name = $sibs[0];
	$data{$href} = {
	    name	=> $name,
	    @extra,
	};

    }
    return %data;
}

1;



( run in 2.656 seconds using v1.01-cache-2.11-cpan-d7f47b0818f )