split m results from the CPAN

split m
FunctionalPerl
view release on metacpan or search on metacpan
examples/hiring-without-whiteboards view on Meta::CPAN
    }
}

*is_heading = is_heading_of(sub($s) {1});
*is_AlsoSee = is_heading_of(sub($s) { $s =~ /also *see/i });

sub is_hr($s) {
    $s =~ /^---\s*$/
}

sub is_empty($s) {
    $s =~ /^\s*$/
}

sub parse_country($str) {
    $USfromCode{$str} // NonUSCountry($str)
}

sub parse_location($str) {
    if ($str =~ /^remote$/i) {
        $Remote
    } else {
        my @s = split /\s*,\s*/, $str;
        if (@s == 1) {
            parse_country($s[0])
        } elsif (@s == 2) {
            my ($city, $country) = @s;
            City($city, parse_country($country))
        } elsif (@s == 3) {
            my ($city, $state, $country) = @s;
            if ($country eq "USA") {
                my $s = parse_country($state);
                if ($s->is_USA) {
                    City($city, $s)
                } else {
                    die "presumed state '$state' is not a state in the USA";
                }
            } else {
                die
                    "don't know how to deal with presumed state '$state' in country '$country': don't know that country";
            }
        } else {
            die "more than two commas in: '$str'"
        }
    }
}

sub parse_line($line) {
    my $s = $line;
    $s =~ s/^-\s*// or die "line is not an item";
    my ($name, $url, $rest) = $s =~ /^\[(.*?)\] *\((.*?)\)\s*(.*)$/
        or die "missing link formatting in: '$s'";

    my @p = split /\s*\|\s*/, $rest;
    @p == 2 or @p == 3 or die "rest does not contain 2 or 3 parts: '$rest'";
    my (undef, $locations, $maybe_process) = @p;
    Company(
        $name, $url,

        # /, ; and & used inconsistently:
        list(map { parse_location $_ } split m%\s*[/;&]\s*%, $locations),
        $maybe_process
    )
}

TEST {
    parse_line
        "- [Accredible](https://www.accredible.com/careers) | Cambridge, UK / San Francisco, CA / Remote | Take home project, then a pair-programming and discussion onsite / Skype round."
}
Company(
    "Accredible",
    "https://www.accredible.com/careers",
    list(
        City('Cambridge',     NonUSCountry('UK')),
        City('San Francisco', USPSCode('California', 'CA', 1)),
        Remote()
    ),
    "Take home project, then a pair-programming and discussion onsite / Skype round."
);

# XX move?; name?
sub FP::Abstract::Sequence::drop_over ($l, $pred) {
    $l->drop_while(complement $pred)->drop_while($pred)
}

sub datalines () {
    xfile_lines_chomp("$file", "UTF-8")->drop_over(\&is_hr)
        ->take_while(complement \&is_AlsoSee)->filter(complement \&is_empty)
}

sub companies () {

    # Simply ignore the grouping headings.
    datalines->filter(complement \&is_heading)->map (\&parse_line)
}

sub parse_heading($str) {
    my ($from, $to) = $str =~ /^#+\s+(\w)\s*-\s*(\w|\\?#)\s*$/
        or die "not a heading: '$str'";
    $to =~ s/^\\//;
    InclusiveRange($from, $to)
}

sub grouped_companies_from($datalines) {

    # Capture the groupings as well, as the original file is badly
    # grouping them, so to keep a diff minimal we first have to
    # maintain the wrong grouping before re-grouping automatically.
    if ($datalines->is_null) {
        null
    } else {
        my ($heading, $r) = $datalines->first_and_rest;
        if (is_heading($heading)) {
            my ($groupitems, $r)
                = $r->take_while_and_rest(complement \&is_heading);
            cons(
                Group(parse_heading($heading), $groupitems->map(\&parse_line)),
                grouped_companies_from $r)
        } else {
            die "expecting a header, got: '$heading'";
        }
( run in 0.977 second using v1.01-cache-2.11-cpan-71847e10f99 )