ultraedit results from the CPAN

ultraedit

text-highlight

view release on metacpan or search on metacpan

lib/Text/Highlight.pm view on Meta::CPAN

	}
}

#load syntax from a separate grammar file
sub get_syntax
{
	my $self = shift;
	my %args = @_ if(@_ % 2 == 0);
	my($type,$grammar,$format,$force);
	if(exists $args{type} && exists $args{grammar})
	{
		$type = $args{type};
		$grammar = $args{grammar};
		$format = $args{format};
		$force = $args{force};
	}
	else
	{
		$type = shift;
		$grammar = shift;
		$format = shift;
		$force = shift;
	}
	
	unless($type) {
		cluck "You must specify a type.\n";
		return undef;
	}
	
	#check if syntax for this type is already loaded and reload isn't forced
	return $self->{_grammars}{$type} if(!$force && exists $self->{_grammars}{$type});
	
	unless($grammar) {
		cluck "No grammar for '$type' found.\n";
		return undef;
	}
	
	#check if a hashref was passed in instead of a filename
	if(ref $grammar eq 'HASH') {
		$self->{_grammars}{$type} = $grammar;
		return $grammar;
	}
	
	#holds the grammar structure
	#initialize and set common defaults in case of incomplete grammar
	my %syntax = (
		name => 'Unknown-type',
		escape => '\\',
		case => 1,
		continueQuote => 0,
		blockCommentOn => [],
		lineComment => [],
		quot => [],
	);
	#attempt to open grammar file
	open FH, $grammar or croak "Cannot open '$grammar' to find syntax for '$type': $!";
	
	if($format eq 'editplus') {
		_get_syntax_editplus(\%syntax, \*FH);
	}
	elsif($format eq 'ultraedit') {
		_get_syntax_ultraedit(\%syntax, \*FH);
	}
	#else return a non-function yet parsable %syntax, might be desired?
	
	close FH;
	$self->{_grammars}{$type} = \%syntax;
	
	#dump the syntax table to stderr (less screen space than Data::Dumper)
	#print STDERR "$_ : ".((ref $syntax{$_} eq 'HASH') ? join(' | ', keys %{$syntax{$_}}) : (ref $syntax{$_} eq 'ARRAY') ? join(' | ', @{$syntax{$_}}) : $syntax{$_})."\n" for(keys %syntax);

	return $self->{_grammars}{$type};
}

sub _get_syntax_editplus
{
	my $syntax = shift;
	my $FH = shift;
	
	#make sure we break on newlines
	local $/ = "\n";

	my $key = 1;

	while(<$FH>)
	{
		#comment and blank lines ignored
		next if(/^;/ || !/./);

		#search for each type
		$syntax->{name} = $1                if(/^\#TITLE=(.+?)$/i);
		$syntax->{delimiters} = $1          if(/^\#DELIMITER=(.+?)$/i);
		$syntax->{escape} = $1              if(/^\#ESCAPE=(.+?)$/i);
		$syntax->{case} = 0                 if(/^\#CASE=n$/i);
		$syntax->{case} = 1                 if(/^\#CASE=y$/i);
		$syntax->{continueQuote} = 0        if(/^\#CONTINUE_QUOTE=n$/i);
		$syntax->{continueQuote} = 1        if(/^\#CONTINUE_QUOTE=y$/i);

		$syntax->{blockCommentOn}[0] = $1   if(/^\#COMMENTON=(.+?)$/i);
		$syntax->{blockCommentOff}[0] = $1  if(/^\#COMMENTOFF=(.+?)$/i);
		$syntax->{blockCommentOn}[1] = $1   if(/^\#COMMENTON2=(.+?)$/i);
		$syntax->{blockCommentOff}[1] = $1  if(/^\#COMMENTOFF2=(.+?)$/i);

		push @{$syntax->{lineComment}}, $1  if(/^\#LINECOMMENT\d?=(.+?)$/i);
		push @{$syntax->{quot}}, $1         if(/^\#QUOTATION\d?=(.+?)$/i);

		if(/^\#KEYWORD/ && $key <= $KEYMAX)
		{
			while(defined($_ = <$FH>) && !/^\#/)
			{
				#comment and blank lines ignored
				next if(/^;/ || !/./);
				chomp;

				#the escape character is ^ and possible escape sequences are ^^ ^; ^#
				s/\^([;^#])/$1/g;

				#save the literal if case sensitive, else lc it as key
				if($syntax->{case}){
					$syntax->{"key$key"}{$_} = $_;
				} else {
					$syntax->{"key$key"}{lc($_)} = $_;
				}
			}
			$key++;             #for next potential key group
			redo unless(eof);   #back to the top of the while without hitting <FILE> again, assuming not EOF
		}
	}
}

sub _get_syntax_ultraedit
{
	my $syntax = shift;
	my $FH = shift;
	
	#make sure we break on newlines
	local $/ = "\n";
		
	while(<$FH>)
	{
		$syntax->{name} = $1                   if(/^\/L\d+"(.+?)"/i);
		$syntax->{escape} = $1                 if(/Escape Char = (\S+)/);
		$syntax->{case} = 0                    if(/Nocase/);
		push @{$syntax->{quot}}, split //, $1  if(/String Chars = (\S{1,2})/);
		
		$syntax->{blockCommentOn}[0] = $1      if(/Block Comment On = (\S{1,5})/);
		$syntax->{blockCommentOff}[0] = $1     if(/Block Comment Off = (\S{1,5})/);
		$syntax->{blockCommentOn}[1] = $1      if(/Block Comment On Alt = (\S{1,5})/);
		$syntax->{blockCommentOff}[1] = $1     if(/Block Comment Off Alt = (\S{1,5})/);
		
		push @{$syntax->{lineComment}}, $1     if(/Line Comment (?:Alt )?= (\S{1,5})/);
		$syntax->{delimiters} = $1             if(/^\/Delimiters = (.+)$/i);
		
		my($key) = /^\/C(\d+)(?:".+")?$/;
		if($key && $key <= $KEYMAX)
		{
			#any non-escape line
			while(defined($_ = <$FH>) && !/^\/(?!\/)/)
			{
				chomp;

				#escape is a line that starts with //, allows the line to contain / in keywords
				s/^\/\///;

				#keywords are whitespace delimited, and ignore the empty strings with truth test
				for(grep $_, split /\s+/)
				{
					#save the literal if case sensitive, else lc it as key
					if($syntax->{case}){
						$syntax->{"key$key"}{$_} = $_;
					} else {
						$syntax->{"key$key"}{lc($_)} = $_;
					}
				}
			}
			redo unless(eof);   #back to the top of the while without hitting <FILE> again, assuming not EOF
		}
	}
	
	# UE has both quotes enabled by default, so if none were defined, use them
	@{$syntax->{quot}} or push @{$syntax->{quot}}, qw/' "/;
}

1;

__END__

=pod 

=head1 NAME

lib/Text/Highlight.pm view on Meta::CPAN

C<< type => $type >>

=over 4

The C<type> passed in is the name of the type of code. This can either be a type loaded from C<get_syntax> or is the name of a sub-module that has a syntax or highlight method, ie C<Text::Highlight::$type>. 

=back

C<< code => $code >>

=over 4

C<code> is the unmarked-up, unescaped, plain-text code that needs to be highlighted.

=back

C<< options => $options >>

=over 4

C<options> is optional and mostly not needed. Some parsing modules can take extra configuration options, so what C<options> is can vary greatly. Could be a string, a number, or a hashref of many options. The only standard is if it is set to the strin...

=back

=back

C<< $code = $th->output >>

=over 4

Returns the highlighted code from the last time the C<highlight> method was called.

=back

C<< $th->get_syntax($type, $grammar, $format, $force) >>

C<< $th->get_syntax(type => $type, grammar => $grammar, format => $format, force => $force) >>

=over 4

In addition to the existing T::H:: sub-modules, you can specify new ones at runtime via text editor syntax files. Current support is for EditPlus and UltraEdit (both very good text/code editors). Many users make these files available on the web and s...

This method returns a hashref to the parsed syntax if successful, or undef and a clucked error message if not. You can use the returned value as a simple truth test, or you can make your own static sub-module out of it and save reparsing time if you'...

C<< type => $type >>

=over 4

The C<type> is the same that gets passed to C<highlight>, so whatever is specified here must match the call there for use. Also, if the same type is specified as one that already exits as a sub-module (visible in @INC as Text::Highlight::$type), the ...

=back

C<< grammar => $filename | \%syntax >>

=over 4

C<grammar> can be one of two things: the filename containing the syntax, or a hashref to an already parsed language syntax. If a filename, the file must contain only a single language syntax definition. Though some editors allow multiple language def...

=back

C<< format => 'editplus' | 'ultraedit' >>

=over 4

C<format> is a string specifying which format the syntax definition in the file is in. It is not used if C<grammar> is a hashref, but is required if it is a filename. Currently, it must be set to one of the following strings: 'editplus' 'ultraedit'

The syntax for a language is set to the following default hash before parsing the file. This means if any of the options are not set in the syntax file, the default specified here is used instead. If C<format> is not set to a valid string, this defau...

  { name => 'Unknown-type',
    escape => '\\',
    case => 1,
    continueQuote => 0,
    blockCommentOn => [],
    lineComment => [],
    quot => [],
  };

=back

C<< force => 1 >>

=over 4

If C<force> is set to a true value, the grammar specified will always be reparsed, reset, and reloaded. By default, if a grammar is loaded for a C<type> that has already been loaded, the existing copy is used instead and no reparsing is done. This wo...

=back

=back

=head2 Examples:

Until I come up with some better examples, here's the defaults the module uses.

=over 4

  $DEF_FORMAT   = '<span class="%s">%s</span>';
  $DEF_ESCAPE   = \&_simple_html_escape;
  $DEF_WRAPPER  = '<pre>%s</pre>';
  $DEF_COLORS   = { comment => 'comment',
                    string  => 'string',
                    number  => 'number',
                    key1    => 'key1',
                    key2    => 'key2',
                    key3    => 'key3',
                    key4    => 'key4',
                    key5    => 'key5',
                    key6    => 'key6',
                    key7    => 'key7',
                    key8    => 'key8',
  };
  				
  #sub is the same prototype as CGI.pm's escapeHTML()
  #and HTML::Entity's encode_entities()
  sub _simple_html_escape
  {
      my $code = shift;
  	
      #escape the only three characters that "really" matter for displaying html
      $code =~ s/&/&amp;/g;
      $code =~ s/</&lt;/g;
      $code =~ s/>/&gt;/g;
  
      return $code;
  }

( run in 0.524 second using v1.01-cache-2.11-cpan-71847e10f99 )