MARC-Convert-Wikidata
view release on metacpan or search on metacpan
Wikidata/Utils.pm view on Meta::CPAN
if (! defined $isbn) {
return;
}
my $ret_isbn = $isbn;
$ret_isbn =~ s/\s+:?$//ms;
if ($ret_isbn !~ m/^[\d\-]+$/ms) {
if ($DEBUG) {
warn "ISBN '$ret_isbn' couldn't clean.";
}
$ret_isbn = undef;
}
return $ret_isbn;
}
sub clean_issn {
my $issn = shift;
if (! defined $issn) {
return;
}
my $ret_issn = $issn;
$ret_issn =~ s/\s+;?$//ms;
if ($ret_issn !~ m/^\d{4}-\d{4}$/ms) {
if ($DEBUG) {
warn "ISSN '$ret_issn' couldn't clean.";
}
$ret_issn = undef;
}
return $ret_issn;
}
sub clean_number_of_pages {
my $number_of_pages = shift;
if (! defined $number_of_pages) {
return;
}
my $ret_number_of_pages = $number_of_pages;
# Remove []
$ret_number_of_pages =~ s/\[(\d+)\]/$1/msg;
# Remove text informations.
my $pril = decode_utf8('pÅÃl');
my $neci = decode_utf8('neÄÃslovaných');
$ret_number_of_pages =~ s/\s*(stran|s|$pril|$neci|l|barev|obr)\.*\s*//msg;
# Remove other characters like ':' and ';'.
$ret_number_of_pages =~ s/[:;]//msg;
# TODO Support roman numbers.
if ($ret_number_of_pages =~ m/^[\ \d,]+$/ms) {
my @numbers = split m/\s*,\s*/ms, $ret_number_of_pages;
$ret_number_of_pages = sum(@numbers);
}
if ($ret_number_of_pages !~ m/^\d+$/ms) {
if ($DEBUG) {
warn "Number of pages '$number_of_pages' couldn't clean.";
}
$ret_number_of_pages = undef;
}
return $ret_number_of_pages;
}
sub clean_oclc {
my $oclc = shift;
if (! defined $oclc) {
return;
}
my $ret_oclc = $oclc;
$ret_oclc =~ s/^\(OCoLC\)//ms;
return $ret_oclc;
}
sub clean_publication_date {
my $publication_date = shift;
my $ret_publication_date = $publication_date;
my ($start_date, $end_date, $dash);
if ($ret_publication_date =~ m/^([^-]+)(\-?)(.*)$/ms) {
$start_date = $1;
$dash = $2;
$end_date = $3;
}
# Remove [] on begin and end.
# XXX [] is circa
$start_date = _remove_square_brackets($start_date);
if (defined $end_date) {
$end_date = _remove_square_brackets($end_date);
}
# Detect circa.
my $option;
foreach my $date ($start_date, $end_date) {
if (defined $date && ($date =~ s/^c(.*)$/$1/ms
|| $date =~ s/^(.*)\?$/$1/ms)) {
# XXX Circa of start and end
$option = 'circa';
}
}
# Combine back.
$ret_publication_date = $start_date;
if ($dash) {
$ret_publication_date .= $dash;
( run in 0.685 second using v1.01-cache-2.11-cpan-71847e10f99 )