App-DistSync

 view release on metacpan or  search on metacpan

lib/App/DistSync.pm  view on Meta::CPAN

package App::DistSync;
use strict;
use warnings;
use utf8;
use feature qw/say/;

=encoding utf-8

=head1 NAME

App::DistSync - Utility for synchronizing distribution mirrors

=head1 SYNOPSIS

    use App::DistSync;

    my $ds = App::DistSync->new(
            dir => "/var/www/www.example.com/dist",
            pid => $$,
            timeout => 60,
            proxy => 'http://http.example.com:8001/',
        );

    $ds->init or die "Initialization error";

    $ds->sync or die "Sync error";

=head1 DESCRIPTION

Utility for synchronizing distribution mirrors

=head1 METHODS

This module implements the following methods

=head2 new

    my $ds = new App::DistSync(
            dir => "/var/www/www.example.com/dist",
            pid => $$,
            timeout => 60,
            proxy => 'http://http.example.com:8001/',
        );

Returns the object

=head2 dir

    my $abs_dir = $ds->dir;

Returns absolute pathname of working directory

=head2 fetch

    my $struct = $self->fetch( $URI_STRING, "path/to/file.txt", "/tmp/file.txt" );

Fetching file from remote resource by URI and filename.
The result will be written to the specified file. For example: "/tmp/file.txt"

Returns structure, contains:

    {
        status  => 1,         # Status. 0 - Errors; 1 - OK
        mtime   => 123456789, # Last-Modified in ctime format or 0 in case of errors
        size    => 123,       # Content-length
        code    => 200,       # HTTP Status code
    };

=head2 init

    $ds->init or die ("Initialization error");

Initializing the mirror in the specified directory

=head2 mkmani

    $ds->mkmani;

Generation the new MANIFEST file

=head2 pid

    my $pid = $ds->pid;

Returns the pid of current process

=head2 status

    $ds->status;

lib/App/DistSync.pm  view on Meta::CPAN

    # Files
    $props{file_meta}       = File::Spec->catfile($dir, METAFILE);
    $props{file_manifest}   = File::Spec->catfile($dir, MANIFEST);
    $props{file_maniskip}   = File::Spec->catfile($dir, MANISKIP);
    $props{file_manilock}   = File::Spec->catfile($dir, MANILOCK);
    $props{file_manitemp}   = File::Spec->catfile($dir, MANITEMP);
    $props{file_manidel}    = File::Spec->catfile($dir, MANIDEL);
    $props{file_mirrors}    = File::Spec->catfile($dir, MIRRORS);
    $props{file_readme}     = File::Spec->catfile($dir, README);
    $props{file_temp}       = File::Spec->catfile(File::Spec->tmpdir(), TEMPFILE);

    # Read META file as YAML
    my $meta = read_yaml($props{file_meta});
    $props{meta} = $meta;

    # Create current static dates
    $props{mtime_manifest} = (-e $props{file_manifest}) && -s $props{file_manifest}
        ? (stat($props{file_manifest}))[9]
        : 0;
    $props{mtime_manidel}  = (-e $props{file_manidel}) && -s $props{file_manidel}
        ? (stat($props{file_manidel}))[9]
        : 0;
    $props{mtime_mirrors}  = (-e $props{file_mirrors}) && -s $props{file_mirrors}
        ? (stat($props{file_mirrors}))[9]
        : 0;

    # Set TimeOut
    my $to = _expire($props{timeout} // TIMEOUT);
    croak("Can't use specified timeout") unless $to =~ /^[0-9]{1,11}$/;

    # Instance
    my $self = bless({%props}, $class);

    # User Agent
    my $ua = $self->{ua} = LWP::UserAgent->new();
    $ua->timeout($to) if $to;
    $ua->agent(sprintf("%s/%s", __PACKAGE__, $VERSION));
    $ua->env_proxy;
    $ua->proxy(['http', 'https'] => $props{proxy}) if $props{proxy};
    $ua->ssl_opts(
        verify_hostname => 0,
        SSL_verify_mode => 0x00
    ) if $props{insecure};

    return $self;
}
sub verbose { !!shift->{verbose} }
sub dir { shift->{dir} }
sub pid { shift->{pid} }
sub ua { shift->{ua} }
sub init { # Initialization
    my $self = shift;
    my $stamp = scalar(localtime($self->{started}));
    my $status = 1;

    # MANIFEST.SKIP
    printf "%s... ", $self->{file_maniskip};
    if (touch($self->{file_maniskip}) && (-e $self->{file_maniskip}) && -z $self->{file_maniskip}) {
        my @content = (
            "# Generated on $stamp",
            "# List of files that should not be synchronized",
            "#",
            "# Format of file:",
            "#",
            "# dir1/dir2/.../dirn/foo.txt        any comment, for example blah-blah-blah",
            "# bar.txt                           any comment, for example blah-blah-blah",
            "# baz.txt",
            "# 'spaced dir1/foo.txt'             any comment, for example blah-blah-blah",
            "# 'spaced dir1/foo.txt'             any comment, for example blah-blah-blah",
            "# !!perl/regexp (?i-xsm:\\.bak\$)     avoid all bak files",
            "#",
            "# See also MANIFEST.SKIP file of ExtUtils::Manifest v1.68 or later",
            "#",
            "",
            "# Avoid version control files.",
            "!!perl/regexp (?i-xsm:\\bRCS\\b)",
            "!!perl/regexp (?i-xsm:\\bCVS\\b)",
            "!!perl/regexp (?i-xsm:\\bSCCS\\b)",
            "!!perl/regexp (?i-xsm:,v\$)",
            "!!perl/regexp (?i-xsm:\\B\\.svn\\b)",
            "!!perl/regexp (?i-xsm:\\B\\.git\\b)",
            "!!perl/regexp (?i-xsm:\\B\\.gitignore\\b)",
            "!!perl/regexp (?i-xsm:\\b_darcs\\b)",
            "!!perl/regexp (?i-xsm:\\B\\.cvsignore\$)",
            "",
            "# Avoid temp and backup files.",
            "!!perl/regexp (?i-xsm:~\$)",
            "!!perl/regexp (?i-xsm:\\.(old|bak|back|tmp|temp|rej)\$)",
            "!!perl/regexp (?i-xsm:\\#\$)",
            "!!perl/regexp (?i-xsm:\\b\\.#)",
            "!!perl/regexp (?i-xsm:\\.#)",
            "!!perl/regexp (?i-xsm:\\..*\\.sw.?\$)",
            "",
            "# Avoid prove files",
            "!!perl/regexp (?i-xsm:\\B\\.prove\$)",
            "",
            "# Avoid MYMETA files",
            "!!perl/regexp (?i-xsm:^MYMETA\\.)",
            "",
            "# Avoid Apache and building files",
            "!!perl/regexp (?i-xsm:\\B\\.ht.+\$)",
            "!!perl/regexp (?i-xsm:\\B\\.exists\$)",
            "",
            "# Skip TEMP files",
            "!!perl/regexp (?i-xsm:\\.TEMP\\-\\d+\$)",
            "\n",
        );
        if (spew($self->{file_maniskip}, join("\n", @content))) {
            say "ok";
        } else {
            say "fail";
            $status = 0;
        }
    } else {
        say "skip";
    }

    # MANIFEST.DEL
    printf "%s... ", $self->{file_manidel};
    if (touch($self->{file_manidel}) && (-e $self->{file_manidel}) && -z $self->{file_manidel}) {
        my @content = (

lib/App/DistSync.pm  view on Meta::CPAN

        my @skip_keys = @{(SKIPFILES)};
        my $maniskip = maniread($self->{file_maniskip}, SKIPMODE); # MANIFEST.SKIP
        push @skip_keys, keys %$maniskip if ref($maniskip) eq 'HASH';
        for (@skip_keys) {$skips{$_} = qrreconstruct($_)}
        debug("Found %d keys in the list of skipped files", scalar(keys %skips));
        #debug(Data::Dumper::Dumper(\%skips)) && return 0;
    }

    # Deleting files listed in the MANIFEST.DEL file but not in the exclusion list
    {
        debug("Deleting files from list: %s", MANIDEL);
        my $delfile = $self->{file_manidel};  # MANIFEST.DEL
        my $deltime = $self->{mtime_manidel}; # Modify time in seconds
        my $dellist = maniread($delfile) // {}; # { file => expire };
        my $expire = 0;
        foreach (values %$dellist) {
            my $dt = _expire($_->[0] || 0);
            $_ = [$dt];
            $expire = $dt if $dt > $expire;
        }
        $expire = _expire(EXPIRE) unless $expire > 0;
        debug("The file '$delfile' will expire on %s", scalar(localtime($deltime + $expire)))
            if $deltime;
        #debug(Data::Dumper::Dumper($dellist)) && return 0;
        if ($deltime && (time - $deltime) > $expire) { # MANIFEST.DEL is expired!
            # Delete files physically if they exist physically and are not on the exclusion list!
            foreach my $k (keys %$dellist) {
                if (_skipcheck(\%skips, $k)) { # The file is in the exclusion list.
                    debug("> [SKIPPED] %s", $k);
                    next;
                }
                my $f = File::Spec->canonpath(File::Spec->catfile($self->dir, $k));
                if (-e $f) {
                    fdelete($f);
                    debug("> [DELETED] %s", $k);
                } else {
                    debug("> [SKIPPED] %s (%s)", $k, $f);
                }
            }

            # Deleting the MANIFEST.DEL file and immediately creating a new one
            fdelete($delfile);
            touch($delfile);
        } else {
            if ($deltime) {
                debug("Skipped. Deletion is not required yet because the scheduled time has not arrived");
                if ($self->verbose) {
                    debug("  File    : %s", MANIDEL);
                    debug("  Created : %s", scalar(localtime($deltime)));
                    debug("  Expires : %s", scalar(localtime($deltime + $expire)));
                }
            } else {
                debug("Skipped. File %s not exists",  MANIDEL);
            }
        }

        # Adding files listed in MANIFEST.DEL to the exclusion list
        for (keys %$dellist) {$skips{$_} = qrreconstruct($_)}
    }

    # Reading the MIRRORS file and deciding whether to synchronize or not
    debug("Synchronization");
    my $mirrors_mani = maniread($self->{file_mirrors}) // {}; # MIRRORS
    my @mirrors = sort {$a cmp $b} keys %$mirrors_mani;
    if (scalar(@mirrors)) {
        foreach my $url (@mirrors) {
            debug("RESOURCE \"%s\"", $url);

            # Downloading the MANIFEST.LOCK file, skipping the mirror resource if this
            # file was successfully downloaded from the resource
            {
                debug("Fetching %s", MANILOCK);
                my $fetch_lock = $self->fetch($url, MANILOCK, $self->{file_manitemp});
                if ($fetch_lock->{status}) { # Ok
                    if ($self->_check_lockfile($self->{file_manitemp})) {
                        $self->{url} = $url;
                        debug("> [SKIPPED] Current resource SHOULD NOT update itself");
                    } else {
                        debug("> [SKIPPED] Remote resource is in a state of updating. Please wait");
                    }
                    next;
                }
            }

            # Downloading the META file and analyzing the resource (checking the resource
            # status and update date). If the check fails, the resource is skipped.
            {
                debug("Fetching %s", METAFILE);
                my $fetch_meta = $self->fetch($url, METAFILE, $self->{file_manitemp});
                if ($fetch_meta->{status}) { # Ok
                    my $remote_meta = read_yaml($self->{file_manitemp}) // '';
                    if (((ref($remote_meta) eq 'ARRAY') || ref($remote_meta) eq 'YAML::Tiny')) {
                        $remote_meta = $remote_meta->[0] || {};
                    }
                    unless ($remote_meta && ref($remote_meta) eq 'HASH') {
                        debug("> [SKIPPED] Remote resource is unreadable. Please contact the administrator of this resource");
                        next;
                    }
                    if ($remote_meta->{status}) {
                        my $remote_url  = $remote_meta->{url} || $remote_meta->{uri} || '';
                        my $remote_date = $fetch_meta->{mtime} || 0;
                        my $remote_datef = $remote_date ? scalar(localtime($remote_date)) : 'UNKNOWN';
                        my $remote_ok = (time - $remote_date) > _expire(FREEZE) ? 0 : 1;
                        if ($self->verbose) {
                            debug("RESOURCE INFORMATION:");
                            debug("  Resource URL : %s", $remote_url);
                            debug("  Date         : %s", $remote_meta->{date} // 'UNKNOWN');
                            debug("  Modified     : %s", $remote_datef);
                            debug("  Hostname     : %s", $remote_meta->{hostname} // '');
                            debug("  Directory    : %s", $remote_meta->{directory} // '');
                            debug("  Project      : %s v%s",
                                $remote_meta->{project} || ref($self), $remote_meta->{version} // '0.01');
                            debug("  Script       : %s", $remote_meta->{script} // '');
                            debug("  Status       : %s", $remote_ok ? "OK" : "EXPIRED");
                            debug("  Time         : %d sec", $remote_meta->{'time'} || 0);
                        }
                        unless ($remote_ok) {
                            debug("> [SKIPPED] Remote resource is expired. Last updated: %s", $remote_datef);
                            next
                        }
                    } else {

lib/App/DistSync.pm  view on Meta::CPAN

                debug("Fetching %s", MIRRORS);
                my $fetch_mirr = $self->fetch($url, MIRRORS, $self->{file_manitemp});
                if ($fetch_mirr->{status} && ((-z $self->{file_mirrors}) || $fetch_mirr->{mtime} > $self->{mtime_mirrors})) {
                    my $remote_mirr = maniread($self->{file_manitemp}) // {};
                    my $mcnt = scalar(keys %$remote_mirr) || 0; # Resources count in remote mirror file
                    if ($mcnt && $mcnt > 1) { # 2 and more resources
                        my $ar = $sync_list{(MIRRORS)} //= [];
                        push @$ar, {
                            url     => $url,
                            mtime   => $fetch_mirr->{mtime},
                            size    => $fetch_mirr->{size},
                        };
                    } else {
                        debug("> [SKIPPED] File %s on %s contains too few mirrors", MIRRORS, $url);
                    }
                } else {
                    printf STDERR "Can't download \"%s\": %s\n", $fetch_mirr->{url}, $fetch_mirr->{message}
                        unless $fetch_mirr->{status};
                }
            }

            # Download MANIFEST.DEL and fill the list to delete the files listed in it
            {
                debug("Fetching %s", MANIDEL);
                my $fetch_dir = $self->fetch($url, MANIDEL, $self->{file_manitemp});
                if ($fetch_dir->{status}) {
                    my $remote_manidel = maniread($self->{file_manitemp}) // {};
                    foreach my $k (keys %$remote_manidel) {
                        unless (_skipcheck(\%skips, $k)) {
                            $delete_list{$k} //= 0;
                            $delete_list{$k}++;
                        }
                    }
                } else {
                    printf STDERR "Can't download \"%s\": %s\n", $fetch_dir->{url}, $fetch_dir->{message}
                }
            }
        } continue {
            fdelete($self->{file_manitemp});
        }
    } else {
        $status = 1;
        debug("Skipped. File %s is empty", MIRRORS);
    }

    # Deleting files according to the generated list of files to be deleted
    {
        debug("Deleting files");
        foreach my $k (keys %delete_list) {
            my $f = File::Spec->canonpath(File::Spec->catfile($self->dir, $k));
            if (-e $f) {
                fdelete($f);
                debug("> [DELETED] %s", $k);
            } else {
                debug("> [SKIPPED] %s (%s)", $k, $f);
            }
        }
    }
    #debug(Data::Dumper::Dumper(\%delete_list));

    # Iterate through the synchronization list and download all files that
    # are NOT present in the previously generated deletion list.
    #debug(Data::Dumper::Dumper(\%sync_list));
    {
        debug("Downloading files");
        my $total = 0; # Size
        my $cnt = 0; # File number
        my $all = scalar(keys %sync_list);
        my $af = '[%0' . length("$all") . 'd/%0' . length("$all") . 'd] %s';
        foreach my $k (sort {lc($a) cmp lc($b)} keys %sync_list) { $cnt++;
            debug($af, $cnt, $all, $k);
            my $list = $sync_list{$k} // []; # Get list of urls
            unless (scalar(@$list)) {
                debug("> [SKIPPED] Nothing to do for %s", $k) if $self->verbose;
                next;
            }

            # Try to download by list of urls
            my $mt_l = $manifest->{$k}[0] || 0; # Modify time of local file
            my $is_downloaded = 0;
            foreach my $job (sort {($b->{mtime} || 0)  <=> ($a->{mtime} || 0)} @$list) {
                last if $is_downloaded;
                my $mt_r = $job->{mtime}; # Modify time of remote file
                my $url  = $job->{url}; # URL of remote file
                my $size = $job->{size}; # Size of remote file

                # Check URL
                unless ($url) {
                    debug("> [SKIPPED] No URL") if $self->verbose;
                    next;
                }

                # Check size
                unless ($size) {
                    debug("> [SKIPPED] No file size: %s", $url) if $self->verbose;;
                    next;
                }

                # Check modify time
                unless ($mt_r || !$mt_l) {
                    debug("> [SKIPPED] The remote file have undefined modified time: %s", $url) if $self->verbose;
                    next;
                }
                if ($mt_l >= $mt_r) {
                    debug("> [SKIPPED] File is up to date: %s", $url) if $self->verbose;
                    next;
                }

                # Download
                my $fetch_file = $self->fetch($url, $k, $self->{file_temp});
                if ($fetch_file->{status}) {
                    my $size_fact = $fetch_file->{size} || 0;
                    if ($size_fact && $size_fact == $size) {
                        debug("> [  OK   ] Received %d bytes: %s", $size_fact, $url) if $self->verbose;
                        $total += $size_fact;
                        $is_downloaded = 1;
                        next;
                    }
                } else {
                    printf STDERR "Can't download \"%s\": %s\n", $fetch_file->{url}, $fetch_file->{message};
                }



( run in 0.842 second using v1.01-cache-2.11-cpan-e1769b4cff6 )