App-SimpleBackuper
view release on metacpan or search on metacpan
lib/App/SimpleBackuper/Backup.pm view on Meta::CPAN
package App::SimpleBackuper;
use strict;
use warnings;
use feature ':5.14';
use Carp;
use Try::Tiny;
use Time::HiRes qw(time);
use Const::Fast;
use App::SimpleBackuper::BackupDB;
use App::SimpleBackuper::_format;
use App::SimpleBackuper::_BlockDelete;
use App::SimpleBackuper::_BlocksInfo;
const my $SIZE_OF_TOP_FILES => 10;
const my $SAVE_DB_PERIOD => 60 * 60;
const my $PRINT_PROGRESS_PERIOD => 60;
sub _proc_uid_gid($$$) {
my($uid, $gid, $uids_gids) = @_;
my $last_uid_gid = @$uids_gids ? $uids_gids->unpack( $uids_gids->[-1] )->{id} : 0;
my $user_name = getpwuid($uid);
my($user) = grep { $_->{name} eq $user_name } map { $uids_gids->unpack($_) } @$uids_gids;
if(! $user) {
$user = {id => ++$last_uid_gid, name => $user_name};
$uids_gids->upsert({ id => $user->{id} }, $user );
#printf "new owner user added (unix uid %d, name %s, internal uid %d)\n", $uid, $user_name, $user->{id};
}
$uid = $user->{id};
my $group_name = getgrgid($gid);
my($group) = grep { $_->{name} eq $group_name } map { $uids_gids->unpack($_) } @$uids_gids;
if(! $group) {
$group = {id => ++$last_uid_gid, name => $group_name};
$uids_gids->upsert({ id => $group->{id} }, $group );
#printf "new owner group added (unix gid %d, name %s, internal gid %d)\n", $gid, $group_name, $group->{id};
}
$gid = $group->{id};
return $uid, $gid;
}
sub _get_block_to_delete {
my($state) = @_;
if(ref($state->{blocks2delete_prio2size2chunks}) eq 'HASH') {
$state->{blocks2delete_prio2size2chunks} = [
map {$state->{blocks2delete_prio2size2chunks}->{ $_ }}
sort {$a <=> $b}
keys %{ $state->{blocks2delete_prio2size2chunks} }
];
}
return if ! @{ $state->{blocks2delete_prio2size2chunks} };
if(ref($state->{blocks2delete_prio2size2chunks}->[0]) eq 'HASH') {
$state->{blocks2delete_prio2size2chunks}->[0] = [
map {$state->{blocks2delete_prio2size2chunks}->[0]->{ $_ }}
sort {$b <=> $a}
keys %{ $state->{blocks2delete_prio2size2chunks}->[0] }
];
}
my $prio_basket = $state->{blocks2delete_prio2size2chunks}->[0];
my $size_basket = $prio_basket->[0];
my $block_id = shift @$size_basket;
shift @$prio_basket if ! @$size_basket;
shift @{ $state->{blocks2delete_prio2size2chunks} } if ! @$prio_basket;
return $block_id;
}
sub Backup {
my($options, $state) = @_;
my($backups, $files, $parts, $blocks) = @{ $state->{db} }{qw(backups files parts blocks)};
die "Backup '$options->{\"backup-name\"}' already exists" if grep { $backups->unpack($_)->{name} eq $options->{'backup-name'} } @$backups;
$state->{$_} = 0 foreach qw(last_backup_id last_file_id last_block_id bytes_processed bytes_in_last_backup total_weight);
print "Preparing to backup: " if $options->{verbose};
$state->{profile}->{init_ids} = - time();
foreach (@$backups) {
my $id = $backups->unpack($_)->{id};
$state->{last_backup_id} = $id if ! $state->{last_backup_id} or $state->{last_backup_id} < $id;
}
#print "last backup id $state->{last_backup_id}, ";
foreach (@$files) {
my $file = $files->unpack($_);
$state->{last_file_id} = $file->{id} if ! $state->{last_file_id} or $state->{last_file_id} < $file->{id};
if($file->{versions} and @{ $file->{versions} } and $file->{versions}->[-1]->{backup_id_max} == $state->{last_backup_id}) {
$state->{bytes_in_last_backup} += $file->{versions}->[-1]->{size};
}
}
#print "last file id $state->{last_file_id}, ";
foreach (@$blocks) {
my $id = $blocks->unpack($_)->{id};
$state->{last_block_id} = $id if ! $state->{last_block_id} or $state->{last_block_id} < $id;
}
#print "last block id $state->{last_block_id}, ";
$state->{profile}->{init_ids} += time;
print "total weight " if $options->{verbose};
for(my $q = 0; $q <= $#$parts; $q++) {
$state->{total_weight} += $parts->unpack($parts->[ $q ])->{size};
}
print fmt_weight($state->{total_weight}).", " if $options->{verbose};
my $cur_backup = {name => $options->{'backup-name'}, id => ++$state->{last_backup_id}, files_cnt => 0, max_files_cnt => 0};
$backups->upsert({ id => $cur_backup->{id} }, $cur_backup);
{
$state->{blocks_info} = _BlocksInfo($options, $state);
$state->{blocks2delete_prio2size2chunks} = {};
foreach my $block (@$blocks) {
my $block_id = $blocks->unpack($block)->{id};
next if ! $block_id; # What's this?
my $block_info = $state->{blocks_info}->{ $block_id };
push @{ $state->{blocks2delete_prio2size2chunks}->{ $block_info->[0] }->{ $block_info->[1] } }, $block_id;
}
print " OK\n" if $options->{verbose};
}
_print_progress($state) if $options->{verbose};
my(%files_queues_by_priority, %dirs2upd);
while(my($mask, $priority) = each %{ $options->{files} }) {
next if ! $priority;
foreach my $path (glob $mask) {
if(grep { ~index($path, $_ =~ s/(?!\/)$/\//r) } map {@$_} values %files_queues_by_priority) {
next;
}
# Remove child paths
foreach my $tasks (values %files_queues_by_priority) {
my $tasks_cnt = @$tasks;
@$tasks = grep { ! ~index($_->[0], $path =~ s/(?!\/)$/\//r) } @$tasks;
}
my $file_id = 0; {
my @path = split(/\//, $path, -1);
pop @path if @path and $path[-1] eq '';
pop @path;
my @cur_path;
foreach my $path_node (@path) {
push @cur_path, $path_node;
my $file = $files->find_by_parent_id_name($file_id, $path_node);
$file //= {
parent_id => $file_id,
id => ++$state->{last_file_id},
name => $path_node,
versions => [ {
backup_id_min => $state->{last_backup_id},
backup_id_max => 0,
uid => 0,
gid => 0,
size => 0,
mode => 0,
mtime => 0,
block_id => 0,
symlink_to => undef,
parts => [],
} ],
};
$dirs2upd{join('/', @cur_path) || '/'} = {
parent_id => $file_id,
filename => $path_node,
};
$files->upsert({ id => $file->{id}, parent_id => $file->{parent_id} }, $file);
$file_id = $file->{id};
}
}
push @{ $files_queues_by_priority{$priority} }, [ $path, $priority, $file_id ];
}
}
delete $files_queues_by_priority{ $_ } foreach grep {! @{ $files_queues_by_priority{ $_ } }} keys %files_queues_by_priority;
my $last_db_save = time;
my $last_print_progress = time;
while(%files_queues_by_priority) {
my($priority) = sort {$b <=> $a} keys %files_queues_by_priority;
my $task = shift @{ $files_queues_by_priority{$priority} };
delete $files_queues_by_priority{$priority} if ! @{ $files_queues_by_priority{$priority} };
my @next = _file_proc( $task, $options, $state );
unshift @{ $files_queues_by_priority{ $_->[1] } }, $_ foreach reverse @next;
if($options->{verbose} and time - $last_print_progress > $PRINT_PROGRESS_PERIOD) {
_print_progress($state);
$last_print_progress = time;
}
if(time - $last_db_save > $SAVE_DB_PERIOD) {
App::SimpleBackuper::BackupDB($options, $state);
$last_db_save = time;
}
}
while(my($full_path, $dir2upd) = each %dirs2upd) {
print "Updating dir $full_path..." if $options->{verbose};
my $file = $files->find_by_parent_id_name($dir2upd->{parent_id}, $dir2upd->{filename});
my @stat = lstat($full_path);
if(@stat and $file->{versions}->[-1]->{backup_id_max} != $state->{last_backup_id}) {
my($uid, $gid) =_proc_uid_gid($stat[4], $stat[5], $state->{db}->{uids_gids});
if($file->{versions}->[-1]->{backup_id_max} == $state->{last_backup_id} - 1) {
$file->{versions}->[-1] = {
%{ $file->{versions}->[-1] },
backup_id_max => $state->{last_backup_id},
uid => $uid,
gid => $gid,
size => $stat[7],
mode => $stat[2],
mtime => $stat[9],
block_id => 0,
symlink_to => undef,
parts => [],
};
} else {
push @{ $file->{versions} }, {
backup_id_min => $state->{last_backup_id},
backup_id_max => $state->{last_backup_id},
uid => $uid,
gid => $gid,
size => $stat[7],
mode => $stat[2],
mtime => $stat[9],
block_id => 0,
symlink_to => undef,
parts => [],
}
}
$files->upsert({ id => $file->{id}, parent_id => $file->{parent_id} }, $file);
my $backup = $backups->find_row({ id => $state->{last_backup_id} });
$backup->{files_cnt}++;
$backup->{max_files_cnt}++;
$backups->upsert({ id => $backup->{id} }, $backup );
}
print "OK\n" if $options->{verbose};
}
my $backup = $backups->find_row({ id => $state->{last_backup_id} });
$backup->{is_done} = 1;
$backups->upsert({ id => $backup->{id} }, $backup );
App::SimpleBackuper::BackupDB($options, $state);
_print_progress($state) if ! $options->{quiet};
}
sub _print_progress {
print "Progress: ";
if($_[0]->{bytes_in_last_backup}) {
printf "processed %s of %s in last backup, ", fmt_weight($_[0]->{bytes_processed}), fmt_weight($_[0]->{bytes_in_last_backup});
}
printf "total backups weight %s.\n", fmt_weight($_[0]->{total_weight});
}
use Text::Glob qw(match_glob);
use Fcntl ':mode'; # For S_ISDIR & same
use App::SimpleBackuper::RegularFile;
sub _file_proc {
my($task, $options, $state) = @_;
confess "No task" if ! $task;
confess "No filepath" if ! $task->[0];
my @next;
my $file_time_spent = 0;
my $file_weight_spent = 0;
print "$task->[0]\n" if $options->{verbose};
print "\tparent #$task->[2], priority $task->[1]" if $options->{verbose};
my $priority = $task->[1];
while(my($mask, $p) = each %{ $options->{files} }) {
if(match_glob( $mask, $task->[0] )) {
$priority = $p;
print ", priority $priority by rule '\"$mask\": $p'" if $options->{verbose};
}
}
if(! $priority) { # Excluded by user
print " -> skip\n" if $options->{verbose};
return;
}
$state->{profile}->{fs} -= time;
$state->{profile}->{fs_lstat} -= time;
$file_time_spent -= time;
my @stat = lstat($task->[0]);
$file_time_spent += time;
$state->{profile}->{fs} += time;
$state->{profile}->{fs_lstat} += time;
if(! @stat) {
print ". Not exists\n" if $options->{verbose};
return;
}
else {
printf ", stat: %s:%s %o %s modified at %s", scalar getpwuid($stat[4]), scalar getgrgid($stat[5]), $stat[2], fmt_weight($stat[7]), fmt_datetime($stat[9]) if $options->{verbose};
}
my($backups, $blocks, $files, $parts, $uids_gids) = @{ $state->{db} }{qw(backups blocks files parts uids_gids)};
my($uid, $gid) = _proc_uid_gid($stat[4], $stat[5], $uids_gids);
my($file); {
my($filename) = $task->[0] =~ /([^\/]+)\/?$/;
$file = $files->find_by_parent_id_name($task->[2], $filename);
if($file) {
print ", is old file #$file->{id}" if $options->{verbose};
if($file->{versions}->[-1]->{backup_id_max} == $state->{last_backup_id}) {
print ", is already backuped.\n" if $options->{verbose};
return;
}
} else {
$file = {
parent_id => $task->[2],
id => ++$state->{last_file_id},
name => $filename,
versions => [],
};
print ", is new file #$file->{id}" if $options->{verbose};
}
}
$state->{bytes_processed} += $file->{versions}->[-1]->{size} if @{ $file->{versions} };
my %version = (
backup_id_min => $state->{last_backup_id},
backup_id_max => $state->{last_backup_id},
uid => $uid,
gid => $gid,
size => $stat[7],
mode => $stat[2],
mtime => $stat[9],
block_id => undef,
symlink_to => undef,
parts => [],
);
if(S_ISDIR $stat[2]) {
print ", is directory.\n" if $options->{verbose};
my $dh;
$state->{profile}->{fs} -= time;
$state->{profile}->{fs_read_dir} -= time;
$file_time_spent -= time;
if(! opendir($dh, $task->[0])) {
$state->{profile}->{fs} += time;
$state->{profile}->{fs_read_dir} += time;
push @{ $state->{fails}->{$!} }, $task->[0];
print ", can't read: $!\n" if $options->{verbose};
return;
}
my @files;
while(my $f = readdir($dh)) {
next if $f eq '.' or $f eq '..';
push @files, $f;
}
closedir($dh);
$file_time_spent += time;
$state->{profile}->{fs} += time;
$state->{profile}->{fs_read_dir} += time;
$version{block_id} = 0;
push @next, map { [$task->[0].($task->[0] =~ /\/$/ ? '' : '/').$_, $priority, $file->{id}] } sort @files;
}
elsif(S_ISLNK $stat[2]) {
$state->{profile}->{fs} -= time;
$state->{profile}->{fs_read_symlink} -= time;
$file_time_spent -= time;
$version{symlink_to} = readlink($task->[0]);
$file_time_spent += time;
$state->{profile}->{fs} += time;
$state->{profile}->{fs_read_symlink} += time;
if(defined $version{symlink_to}) {
print ", is symlink to $version{symlink_to}.\n" if $options->{verbose};
$version{block_id} = 0;
} else {
push @{ $state->{fails}->{$!} }, $task->[0];
print ", can't read: $!\n" if $options->{verbose};
return;
}
}
elsif(S_ISREG $stat[2]) {
print ", is regular file" if $options->{verbose};
$state->{profile}->{fs} -= time;
$state->{profile}->{fs_read} -= time;
$file_time_spent -= time;
lib/App/SimpleBackuper/Backup.pm view on Meta::CPAN
if(! $block_parts_cnt or $block_parts_cnt < $b->{parts_cnt}) {
$block_parts_cnt = $b->{parts_cnt};
$block = $b;
}
}
# Merge blocks to highest one
foreach my $bi ( keys %block_ids ) {
next if $bi == $block->{id};
$state->{profile}->{db_find_version_by_block} -= time;
for my $block_file_index ( 0 .. $#$files ) {
my $block_file = $files->unpack( $files->[ $block_file_index ] );
foreach my $version ( @{ $block_file->{versions} } ) {
next if $version->{block_id} != $bi;
$version->{block_id} = $block->{id};
$block->{parts_cnt} += @{ $version->{parts} };
foreach my $vpart (@{ $version->{parts} }) {
my $part = $parts->find_row({ hash => $vpart->{hash} });
next if $part->{block_id} == $block->{id};
$part->{block_id} = $block->{id};
$parts->upsert({ hash => $part->{hash} }, $part);
}
}
$files->[ $block_file_index ] = $files->pack( $block_file );
}
$state->{profile}->{db_find_version_by_block} += time;
$blocks->delete({ id => $bi });
}
} else {
$block = {
id => ++$state->{last_block_id},
parts_cnt => scalar @{ $version{parts} },
};
}
foreach my $part (@{ $version{parts} }) {
$part->{block_id} //= $block->{id};
$parts->upsert({ hash => $part->{hash} }, $part);
}
$block->{last_backup_id} = $state->{last_backup_id};
$blocks->upsert({ id => $block->{id} }, $block);
$version{block_id} = $block->{id};
}
}
else {
print ", skip not supported file type\n" if $options->{verbose};
return;
}
# If file version not changed, use old version with wider backup ids range
if( @{ $file->{versions} }
and (
$file->{versions}->[-1]->{backup_id_max} + 1 == $state->{last_backup_id}
or $file->{versions}->[-1]->{backup_id_max} == $state->{last_backup_id}
)
and $file->{versions}->[-1]->{uid} == $version{uid}
and $file->{versions}->[-1]->{gid} == $version{gid}
and $file->{versions}->[-1]->{size} == $version{size}
and $file->{versions}->[-1]->{mode} == $version{mode}
and $file->{versions}->[-1]->{mtime}== $version{mtime}
and (
defined $file->{versions}->[-1]->{symlink_to} == defined $version{symlink_to}
and (
! defined $version{symlink_to}
or $file->{versions}->[-1]->{symlink_to} eq $version{symlink_to}
)
)
and join(' ', map { $_->{hash} } @{ $file->{versions}->[-1]->{parts} }) eq join(' ', map { $_->{hash} } @{ $version{parts} })
) {
$file->{versions}->[-1]->{backup_id_max} = $state->{last_backup_id};
} else {
push @{ $file->{versions} }, \%version;
}
$files->upsert({ parent_id => $file->{parent_id}, id => $file->{id} }, $file );
my $backup = $backups->find_row({ id => $state->{last_backup_id} });
$backup->{files_cnt}++;
$backup->{max_files_cnt}++;
$backups->upsert({ id => $backup->{id} }, $backup );
$state->{longest_files} ||= [];
if( @{ $state->{longest_files} } < $SIZE_OF_TOP_FILES
or $state->{longest_files}->[-1]->{time} < $file_time_spent
) {
@{ $state->{longest_files} } = sort {$b->{time} <=> $a->{time}} (@{ $state->{longest_files} }, {time => $file_time_spent, path => $task->[0]});
splice @{ $state->{longest_files} }, $SIZE_OF_TOP_FILES;
}
if($file_weight_spent) {
$state->{heaviweightest_files} ||= [];
if( @{ $state->{heaviweightest_files} } < $SIZE_OF_TOP_FILES
or $state->{heaviweightest_files}->[-1]->{weight} < $file_weight_spent
) {
@{ $state->{heaviweightest_files} } = sort {$b->{weight} <=> $a->{weight}}
(@{ $state->{heaviweightest_files} }, {weight => $file_weight_spent, path => $task->[0]});
splice @{ $state->{heaviweightest_files} }, $SIZE_OF_TOP_FILES;
}
}
return @next;
}
sub _free_up_space {
my($options, $state, $protected_block_ids) = @_;
my($backups, $files, $blocks, $parts) = @{ $state->{db} }{qw(backups files blocks parts)};
my $deleted = 0;
while(1) {
my($block_id, @files) = _get_block_to_delete($state);
last if ! $block_id;
next if exists $protected_block_ids->{ $block_id };
my $block = $blocks->find_row({ id => $block_id });
next if ! $block;
next if $block->{last_backup_id} == $state->{last_backup_id};
( run in 2.024 seconds using v1.01-cache-2.11-cpan-ceb78f64989 )