Astro-satpass

 view release on metacpan or  search on metacpan

tools/heavens-above-mag  view on Meta::CPAN


# This mess exists because Heavens-Above enclosed multiple table rows in
# a <span>..</span>. This is a standards violation which (e.g.) Firefox
# tolerates, but HTML::TreeBuilder's parse emits such spans as empty
# tags at some indeterminate point in the parse. Unless this is fixed,
# we will have to just hope they continue to honor the Accept-Language
# request header, or that the default language remains English.
sub find_td_by_content {
    my ( $tree, $re ) = @_;

    my $ele = $tree->look_down( _tag => 'td', sub {
	    ( local $_ ) = $_[0]->content_list();
	    defined
		or return $_;
	    return $_ =~ $re;
	} )
	or return;

    my ( $val ) = $ele->content_list();
    $val =~ s/ \A \s+ //smx;
    $val =~ s/ \s+ .* //smx;
    return $val;
}

sub find_span {
    my ( $tree, $id ) = @_;
    my $ele = $tree->look_down( _tag => 'span', id => $id )
	or die "Bug - span id='$id' not found";
    my ( $val ) = $ele->content_list();
    $val =~ s/ \A \s+ //smx;
    $val =~ s/ \s+ \z //smx;
    return $val;
}

sub get_cached {
    my ( $file, $uri ) = @_;

    my $path = join '/', CACHE_DIR, $file;
    -d CACHE_DIR
	or mkdir CACHE_DIR
	or die "Failed to create directory @{[ CACHE_DIR ]}/: $!\n";

    {
	my @stat;
	@stat = stat $path
	    and $stat[9] + $opt{age} > time
	    and do {
	    open my $fh, '<:encoding(utf-8)', $path
		or die "Failed to open $path for input: $!\n";
	    local $/ = undef;
	    local $YAML::LoadBlessed = 1;
	    return Load( <$fh> );
	};
    }

    state $ua = LWP::UserAgent->new();
    my $resp = $ua->get( $uri );
    $resp->is_success()
	or die "Failed to fetch $uri: ", $resp->status_line();
    my ( $last_modified ) = $resp->header( 'Last-Modified' );
    my $content = $resp->decoded_content();
    $content =~ s/ \r (?= \n ) //smxg;
    my $data = {
	last_modified	=> $last_modified,
	content		=> $content,
    };
    open my $fh, '>:encoding(utf-8)', $path
	or die "Failed to open $path for output: $!\n";
    print { $fh } Dump( $resp );
    return $resp;
}

sub get_file {
    my ( $fn ) = @_;
    local $/ = undef;
    open my $fh, '<:encoding(utf-8)', $fn
	or die "Unable to open $fn: $!\n";
    my $content = <$fh>;
    close $fh;
    return $content;
}

sub get_html {
    my ( $oid ) = @_;
    my $url = heavens_above_url( $oid );
    my $resp = get_cached( "oid-$oid", $url );
    return $resp->decoded_content();
}

sub heavens_above_url {
    my ( $oid ) = @_;
    $oid =~ m/ \A [0-9]+ \z /smx
	or die "OID '$oid' not numeric\n";
    return sprintf 'https://www.heavens-above.com/SatInfo.aspx?satid=%05d', $oid;
}

sub open_file_for_output {
    my ( $path ) = @_;
    open my $fh, '>:encoding(utf-8)', $path
	or die "Failed to open $path: $!\n";
    return $fh;
}

sub print_perl {
    my ( $oid, $name, $mag ) = @_;
    if ( defined $mag ) {
	# die "Debug - $oid ($name) '$mag'";
	printf "  '%05d' => %5.1f, # %s\n", $oid, $mag, $name;
    } else {
	printf "  '%05d' => undef, # %s has no recorded magnitude\n", $oid, $name;
    }
    return;
}

sub process_celestrak {
    my $visual = get_cached( VISUAL_YML, VISUAL_URL );
    my ( $last_modified ) = $visual->header( 'Last-Modified' );
    my ( @preamble, @postamble );
    my @options = qw{ --celestrak };
    if ( $opt{update} ) {
	open my $fh, '<:encoding(utf-8)', MODIFY_FILE
	    or die "Failed to open @{[ MODIFY_FILE ]}: $!\n";
	local $_ = undef;
	while ( <$fh> ) {
	    push @preamble, $_;
	    $_ eq MARK_BEGIN_MAG . "\n"
		and last;
	}
	defined $_
	    or die q/Failed to find '/, MARK_BEGIN_MAG, q/' in /,
		MODIFY_FILE, "\n";
	push @preamble, "\n";
	while ( <$fh> ) {
	    $_ eq MARK_END . "\n"
		and last;
	}
	defined $_
	    or die q/Failed to find '/, MARK_END, q/' in /, MODIFY_FILE, "\n";
	push @postamble, "\n", $_;
	while ( <$fh> ) {
	    push @postamble, $_;
	}
	close $fh;
	push @options, qw{ --update };
    }
    $opt{update}
	and local *STDOUT = open_file_for_output( MODIFY_FILE );
    print @preamble;
    print <<"EOD";
# The following is all the Celestrak visual list that have magnitudes in
# Heavens Above. These data are generated by the following:
#
#   \$ tools/heavens-above-mag @options
#
# Last-Modified: @{[ $last_modified // 'unknown' ]}

%magnitude_table = (
EOD

    my %oid = parse_visual( $visual );
    process_perl( sort { $a <=> $b } keys %oid );
    say ');';
    print @postamble;
    return;
}

sub parse_visual {
    my ( $resp ) = @_;
    my $content = $resp->decoded_content();
    local $_ = undef;	# while (<>) ... does not localize $_.
    my %oid;
    open my $fh, '<', \$content;
    local $_ = undef;  # while (<>) ... does not localize $_.
    while ( <$fh> ) {
	my ( $id, $name ) = unpack 'A5A*';
	$oid{$id} = $name;
    }
    close $fh;
    foreach (
	[ '53807', 'Bluewalker 3' ],
    ) {
	my ( $extra, $name ) = @{ $_ };
	if ( defined $oid{$extra} ) {
	    warn "OID $extra is already in visual.txt\n";
	} else {
	    $opt{verbose} and warn "Adding OID $extra\n";
	    $oid{$extra} = $name;
	}
    }
    return %oid;
}

sub process_get {
    my ( $spec ) = @_;
    my @rslt;
    process_parse(
	sub {
	    push @rslt, [ @_ ];
	    return;
	},
	$spec,
    );
    return @rslt;
}

sub process_open {
    my @arg = @_;
    require Browser::Open;
    my $cmd = Browser::Open::open_browser_cmd();
    foreach my $oid ( @arg ) {
	my $url = heavens_above_url( $oid );
	system { $cmd } $cmd, $url;
    }
    return;
}

sub process_parse {
    my ( $handler, @arg ) = @_;
    foreach my $spec ( @arg ) {
	my $get = $spec =~ m/ \A [0-9]+ \z /smx ? \&get_html : \&get_file;
	my $tree = HTML::TreeBuilder->new_from_content( $get->( $spec ) );
	# print $tree->as_HTML();
	# Should return a <span>..</span> containing the OID
	my $oid = find_span( $tree, 'ctl00_cph1_lblSatID' );
	# Should return a <span>..</span> containing the name
	my $name = find_span( $tree, 'ctl00_cph1_lblOIGName' );
	# We would like to look for id ctl00_cph1_lblBrightness here, but it
	# contains table rows (which it should not), so HTML::TreeBuilder
	# spits it out any old where. Sometimes before the table, sometimes



( run in 1.361 second using v1.01-cache-2.11-cpan-98e64b0badf )