App-mdee

 view release on metacpan or  search on metacpan

lib/App/Greple/md.pm  view on Meta::CPAN

sub md_color {
    my($label, $text) = @_;
    $cm->color($label, $text);
}

sub mark_color {
    my($type, $text) = @_;
    my $label = "${type}_mark";
    $label = 'emphasis_mark' unless exists $cm->{HASH}{$label}
                                 && $cm->{HASH}{$label} ne '';
    md_color($label, $text);
}

#
# Protection mechanism
#
# SGR 256 placeholders protect processed regions (inline code,
# comments, links) from being matched by later patterns.
#

my @protected;
my($PS, $PE) = ("\e[256m", "\e[m");     # protect start/end markers
my $PR = qr/\e\[256m(\d+)\e\[m/;       # protect restore pattern
my($OS, $OE) = ("\e]8;;", "\e\\");      # OSC 8 start/end markers

sub protect {
    my $text = shift;
    push @protected, $text;
    $PS . $#protected . $PE;
}

sub restore {
    my $s = shift;
    1 while $s =~ s{$PR}{$protected[$1] // die "restore failed: index $1"}ge;
    $s;
}

#
# OSC 8 hyperlink generation
#

sub osc8 {
    return $_[1] unless $config->{osc8};
    my($url, $text) = @_;
    my $escaped = uri_escape_utf8($url, "^\\x20-\\x7e");
    "${OS}${escaped}${OE}${text}${OS}${OE}";
}

#
# Link text inner pattern: backtick spans, backslash escapes, normal chars
#

my $LT = qr/(?:`[^`\n]*+`|\\.|[^`\\\n\]]++)+/;

# Code span pattern (both single and multi-backtick).
# Captures: _bt (backtick delimiter), _content (code body).
# Used directly in inline_code step and as basis for $SKIP_CODE.
my $CODE = qr{(?x)
    (?<_bt> `++ )               # opening backtick(s)
    (?<_content>
        (?: (?! \g{_bt} ) . )+? # content (not containing same-length backticks)
    )
    \g{_bt}                     # closing backtick(s) matching opener
};

# Skip code spans in link/image patterns.
# Used as the first alternative in s{$SKIP_CODE|<link pattern>}{...}ge
# so that code spans are matched and skipped, preventing link/image
# patterns from matching inside them.
my $SKIP_CODE = qr{$CODE (*SKIP)(*FAIL)}x;

#
# colorize() - the main function
#
# Receives entire file content in $_ (--begin with -G --filter).
# Processes all patterns with multiline regexes.
#

#
# Pipeline step class
#

package App::Greple::md::Step {
    sub new {
        my($class, %args) = @_;
        bless \%args, $class;
    }
    sub label  { $_[0]->{label} }
    sub active { !$_[0]->{label} || App::Greple::md::active($_[0]->{label}) }
    sub run    { $_[0]->{code}->() }
}

sub Step {
    my $code = pop;
    my $label = shift;
    App::Greple::md::Step->new(label => $label, code => $code);
}

#
# Pipeline steps: Step(sub{}) = always active, Step(label => sub{}) = controllable
#

my %colorize = (

    code_blocks => Step(sub {
        s{^( {0,3})(`{3,}|~{3,})(.*)\n((?s:.*?))^( {0,3})\2(\h*)$}{
            my($oi, $fence, $lang, $body, $ci, $trail) = ($1, $2, $3, $4, $5, $6);
            my $result = md_color('code_mark', "$oi$fence");
            $result .= md_color('code_info', $lang) if length($lang);
            $result .= "\n";
            if (length($body)) {
                $result .= join '', map { md_color('code_block', $_) }
                    split /(?<=\n)/, $body;
            }
            $result .= md_color('code_mark', "$ci$fence") . $trail;
            protect($result)
        }mge;
    }),

    comments => Step(sub {
        s/(^<!--(?![->])(?s:.*?)-->)/protect(md_color('comment', $1))/mge;
    }),

    image_links => Step(sub {
        s{$SKIP_CODE|\[!\[(?<text>$LT)\]\((?<img>[^)\n]+)\)\]\(<?(?<url>[^>)\s\n]+)>?\)}{
            protect(
                osc8($+{img}, md_color('image_link', "!"))
                . osc8($+{url}, mark_color('link', "[") . md_color('image_link', $+{text}) . mark_color('link', "]"))
            )
        }ge;
    }),

    images => Step(sub {
        s{$SKIP_CODE|!\[(?<text>$LT)\]\(<?(?<url>[^>)\s\n]+)>?\)}{
            protect(osc8($+{url}, md_color('image', "!") . mark_color('link', "[") . md_color('image', $+{text}) . mark_color('link', "]")))
        }ge;
    }),

    links => Step(sub {
        s{$SKIP_CODE|(?<![!\e])\[(?<text>$LT)\]\(<?(?<url>[^>)\s\n]+)>?\)}{
            protect(osc8($+{url}, mark_color('link', "[") . md_color('link', $+{text}) . mark_color('link', "]")))
        }ge;
    }),

    inline_code => Step(code_inline => sub {
        state $to = $config->{tick_open};
        state $tc = $config->{tick_close};
        s{$CODE}{
            my $content = $+{_content};
            # Strip optional leading/trailing space for multi-backtick (CommonMark)
            $content =~ s/^ (.+) $/$1/ if length($+{_bt}) >= 2;
            protect(md_color('code_tick', $to) . md_color('code_inline', $content) . md_color('code_tick', $tc))
        }ge;
    }),

    headings => Step(header => sub {
        my $hashed = $config->{hashed};
        for my $n (reverse 1..6) {
            next unless active("h$n");
            my $hdr = '#' x $n;
            s{^($hdr\h+.*)$}{
                my $line = $1;
                $line .= " $hdr"
                    if $hashed->{"h$n"} && $line !~ /\#$/;
                protect(md_color("h$n", restore($line)));
            }mge;
        }
    }),

    horizontal_rules => Step(horizontal_rule => sub {
        s/^([ ]{0,3}(?:[-*_][ ]*){3,})$/protect(md_color('horizontal_rule', $1))/mge;
    }),

    bold_italic => Step(bold => sub {
        s{$SKIP_CODE|(?<!\\)(?<m>\*\*\*)(?<t>.*?)(?<!\\)\g{m}}{
            protect(mark_color('bold', $+{m}) . md_color('bold', md_color('italic', $+{t})) . mark_color('bold', $+{m}))
        }gep;
        s{$SKIP_CODE|(?<![\\\w])(?<m>___)(?<t>.*?)(?<!\\)\g{m}(?!\w)}{
            protect(mark_color('bold', $+{m}) . md_color('bold', md_color('italic', $+{t})) . mark_color('bold', $+{m}))
        }gep;
    }),

    bold => Step(bold => sub {
        s{$SKIP_CODE|(?<!\\)(?<m>\*\*)(?<t>.*?)(?<!\\)\g{m}}{
            mark_color('bold', $+{m}) . md_color('bold', $+{t}) . mark_color('bold', $+{m})
        }gep;
        s{$SKIP_CODE|(?<![\\\w])(?<m>__)(?<t>.*?)(?<!\\)\g{m}(?!\w)}{
            mark_color('bold', $+{m}) . md_color('bold', $+{t}) . mark_color('bold', $+{m})
        }gep;
    }),

    italic => Step(italic => sub {
        s{$SKIP_CODE|(?<![\\\w])(?<m>_)(?<t>(?:(?!_).)+)(?<!\\)\g{m}(?!\w)}{
            mark_color('italic', $+{m}) . md_color('italic', $+{t}) . mark_color('italic', $+{m})
        }gep;
        s{$SKIP_CODE|(?<![\\*])(?<m>\*)(?<t>(?:(?!\*).)+)(?<!\\)\g{m}(?!\*)}{
            mark_color('italic', $+{m}) . md_color('italic', $+{t}) . mark_color('italic', $+{m})
        }gep;
    }),

    strike => Step(strike => sub {
        s{$SKIP_CODE|(?<!\\)(?<m>~~)(?<t>.+?)(?<!\\)\g{m}}{
            mark_color('strike', $+{m}) . md_color('strike', $+{t}) . mark_color('strike', $+{m})
        }gep;
    }),

    blockquotes => Step(blockquote => sub {
        s/^(>+\h?)(.*)$/md_color('blockquote', $1) . $2/mge;
    }),
);

#
# Pipeline configuration
#

# Always before headings (protection + links)
my @protect_steps = qw(code_blocks comments image_links images links);

# Inline steps controlled by heading_markup
my @inline_steps  = qw(inline_code horizontal_rules bold_italic bold italic strike);

# Always last
my @final_steps   = qw(blockquotes);

sub build_pipeline {
    my $hm = $config->{heading_markup};

    # heading_markup disabled: headings before all inline steps
    if (!$hm) {
        return (@protect_steps, 'headings', @inline_steps, @final_steps);
    }

    # "all" or "1": all inline steps before headings
    my %before;
    if ($hm eq '1' || $hm =~ /^all$/i) {
        %before = map { $_ => 1 } @inline_steps;
    } else {
        # "bold:italic" → collect word tokens, filter to valid inline steps
        my %valid = map { $_ => 1 } @inline_steps;
        %before = map { $_ => 1 } grep { $valid{$_} } ($hm =~ /(\w+)/g);
    }

    my @before_h = grep {  $before{$_} } @inline_steps;
    my @after_h  = grep { !$before{$_} } @inline_steps;

    return (@protect_steps, @before_h, 'headings', @after_h, @final_steps);
}

sub colorize {
    setup_colors();
    @protected = ();

    for my $name (build_pipeline()) {
        my $step = $colorize{$name};
        $step->run if $step->active;
    }

    $_ = restore($_);
    $_;
}

#

lib/App/Greple/md.pm  view on Meta::CPAN

    }mge;
}

sub parse_separator {
    my $blockref = shift;
    my $SEP = qr/^\h*+\|(\h*+:?+-++:?+\h*+\|)++\h*+$/mn;
    my ($sep_line) = $$blockref =~ /($SEP)/;
    return ([], []) unless defined $sep_line;
    my @cells = split /\|/, $sep_line, -1;
    shift @cells; pop @cells;
    s/^\h+|\h+$//g for @cells;
    my @right  = grep { $cells[$_-1] =~ /^-+:$/  } 1..@cells;
    my @center = grep { $cells[$_-1] =~ /^:-+:$/ } 1..@cells;
    # Minimize dashes so separator width doesn't inflate column widths
    $$blockref =~ s{$SEP}{ ${^MATCH} =~ s/:?-+:?/-/gr }mpe;
    (\@right, \@center);
}

sub call_ansicolumn {
    my ($text, @args) = @_;
    require Command::Run;
    require App::ansicolumn;
    Command::Run->new
        ->command(\&App::ansicolumn::ansicolumn, @args)
        ->with(stdin => $text,
               $config->{nofork} ? (nofork => 1, raw => 1) : ())
        ->update
        ->data // '';
}

sub fix_separator {
    my ($text, $sep) = @_;
    my $sep_re = $sep eq "\x{2502}" ? "\x{2502}" : '\\|';
    $text =~ s{^(\h*?)($sep_re)?((?:\h*-+\h*$sep_re)*\h*-+\h*)($sep_re)?(\h*?)$}{
        my($pre, $left, $mid, $right, $post) = ($1, $2, $3, $4, $5);
        if ($sep eq "\x{2502}") {
            ($pre  =~ tr[ ][\x{2500}]r)
            . (defined $left  ? "\x{251C}" : '')
            . ($mid =~ tr[\x{2502} -][\x{253C}\x{2500}\x{2500}]r)
            . (defined $right ? "\x{2524}" : '')
            . ($post =~ tr[ ][\x{2500}]r)
        } else {
            ($pre  =~ tr[ ][-]r)
            . (defined $left  ? '|' : '')
            . ($mid =~ tr[ ][-]r)
            . (defined $right ? '|' : '')
            . ($post =~ tr[ ][-]r)
        }
    }xmeg;
    $text;
}

1;

__DATA__

option default \
    -G --filter --filestyle=once --color=always \
    --begin &__PACKAGE__::begin

define {CODE_BLOCK}  ^ {0,3}(?<bt>`{3,}+|~{3,}+)(.*)\n((?s:.*?))^ {0,3}(\g{bt})
define {COMMENT}     ^<!--(?![->])(?s:.+?)-->
define {TABLE}       ^ {0,3}([│|├].+[│|┤]\n){3,}
define {LIST_ITEM}   ^\h*(?:[*-]|(?:\d+|#)[.)])\h+.*\n
define {DEFINITION}  (?:\A|\G\n|\n\n).+\n\n?(:\h+.*\n)

option --fold-by \
    -Mtee "&ansifold" --crmode \
        --autoindent='^\h*(?:[*-]|(?:\d+|#)[.)]|:)\h+|^\h+' \
        --smart --width=$<shift> \
    -- \
    --exclude {CODE_BLOCK} \
    --exclude {COMMENT} \
    --exclude {TABLE} \
    --cm N -E {LIST_ITEM} \
    --cm N -E {DEFINITION} \
    --crmode



( run in 1.155 second using v1.01-cache-2.11-cpan-5a3173703d6 )