#!/usr/bin/perl
use strict;
use warnings;
my $file = "STDIN"; #$ARGV[0];
my $debug = 0;
my ($rest, $code, $ignore) = ("");
my $lineNumber = 1;
my $topLineOfBlock;
#open(my $inputFH, "<:raw:crlf", $file) or die "can't open $file for reading. $!";
#open(my $outputFH, ">:raw:crlf", "$file.out") or die "can't open $file.out for writing. $!";
my $inputFH = \*STDIN;
my $complete = 1; # NOTE: These 2 must be declared in the base scope of the package.
my $lineOffset; # See https://r...content-available-to-author-only...l.org/Ticket/Display.html?id=120554 for details.
while ($_ = <$inputFH>, !eof($inputFH) or length($rest) != 0) {
$topLineOfBlock = $lineNumber;
print "Read line $lineNumber\n" if $debug; {
$_ = "$rest$_";
}
else
{
$_ = $rest;
}
my $loopAgain;
do
{
if (/\\$/) # if line ends with '\' read and append in next line
{
$complete = 0;
}
elsif (eof($inputFH) or /;\s*+$/) # if eof or line does end in a ';', break it up. # otherwise read and append in next line.
{
print "INPUT: '$_'\n" if $debug; use re 'eval';
m%
(?{print "STRING: '${^POSTMATCH}'\n" if $debug}) (?{$lineOffset = 0})
# ROUTINES
(?!) # Ignore this section for matching
# DEBUG ROUTINES
# Call them using (?N) where N is the corrisponding number.
((?{print "1]'${^MATCH}'\n" if $debug})) ((?{print "2]'${^MATCH}'\n" if $debug})) ((?{print "3]'${^MATCH}'\n" if $debug})) ((?{print "4]'${^MATCH}'\n" if $debug})) ((?{print "5]'${^MATCH}'\n" if $debug})) ((?{print "6]'${^MATCH}'\n" if $debug})) ((?{print "7]'${^MATCH}'\n" if $debug})) ((?{print "8]'${^MATCH}'\n" if $debug})) ((?{print "9]'${^MATCH}'\n" if $debug})) ((?{print "10]'${^MATCH}'\n" if $debug})) ((?{print "11]'${^MATCH}'\n" if $debug})) ((?{print "12]'${^MATCH}'\n" if $debug})) ((?{print "13]'${^MATCH}'\n" if $debug})) ((?{print "14]'${^MATCH}'\n" if $debug})) ((?{print "15]'${^MATCH}'\n" if $debug})) ((?{print "16]'${^MATCH}'\n" if $debug})) ((?{print "17]'${^MATCH}'\n" if $debug})) ((?{print "18]'${^MATCH}'\n" if $debug})) ((?{print "19]'${^MATCH}'\n" if $debug}))
# SUBROUTINES
# States that code read in is in an incomplete state.
(?<INCOMPLETE>(?{print "INCOMPLETE: '${^MATCH}'\n" if $debug; $complete = 0;}))
# States that code read in is in a completed state.
(?<COMPLETE> (?{print "COMPLETE: '${^MATCH}'\n" if $debug; $complete = 1;}))
# Matches against one character that has been escaped including EOL.
# If a quoted EOL found, mark match as incomplete.
(?<ESCAPED_CHAR>
\\
(?:
(?&EOL) (?&INCOMPLETE)
)
)
# Matches against a single quoted string excluding EOL.
(?<SINGLE_QUOTED_STRING>
'(?&INCOMPLETE) # Adding ' to limit to syn hilight bug in ideone.com
(?:
[^'\\\\n]++ # Adding ' to limit to syn hilight bug in ideone.com
| (?&ESCAPED_CHAR)
)*+
'(?&COMPLETE) # Adding ' to limit to syn hilight bug in ideone.com
)
# Matches against a double quoted string excluding EOL.
(?<DOUBLE_QUOTED_STRING>
"(?&INCOMPLETE) # Adding " to limit to syn hilight bug in ideone.com
(?:
[^"\\\n]++ # Adding " to limit to syn hilight bug in ideone.com
| (?&ESCAPED_CHAR)
)*+
"(?&COMPLETE) # Adding " to limit to syn hilight bug in ideone.com
)
# matches strings intermingled with other chars excluding EOL.
(?<STRINGS_WITH_CHARS>
(?:
[^\\\n"']++ # Adding " to limit to syn hilight bug in ideone.com
| (?&DOUBLE_QUOTED_STRING)
| (?&SINGLE_QUOTED_STRING)
| (?&ESCAPED_CHAR)
)*+
)
# Matches against non escaped characters excluding EOL.
(?<NON_ESCAPED_CHARS> [^\\\n]++)
# Matches all non escaped chars and escaped chars.
# upto but not including the EOL unless it's escaped.
(?<CHARS> (?:(?&NON_ESCAPED_CHARS)|(?&ESCAPED_CHAR))*+)
# Matches EOL (end of line) or EOS (end of string) and states it is in a complete state.
(?<EOL_OR_EOS> (?&EOL) | $ (?&COMPLETE))
# Matches on EOL and increments $lineOffset if matched.
# When using this, make sure you don't allow backtracking over this call.
(?<EOL> \n(?{++$lineOffset}))
| # ACTUAL SEARCH
(?<ignore>
(?:
(?&EOL)? ^ [^\S\n]*+ \# (?&STRINGS_WITH_CHARS) (?&EOL_OR_EOS) # preprocessor statement
| \
s*+ // (?&CHARS) (?&EOL_OR_EOS) # line comment | \
s*+ /\
* (?&INCOMPLETE) # block comment (?:
[^*]++
| \* (?!/)
)*+
\*/ (?&COMPLETE) # block comment completed
)*+
)
(?(?{$complete}) # completed parse of all ignored stuff? Then read code stuff.
(?<code>
(?:
(?!^[^\S\n]*+\#) # do not match on a preprocessor statement
(?:
(?&DOUBLE_QUOTED_STRING) # found a string
| (?&SINGLE_QUOTED_STRING) # found a string
| (?: [^'"/\n]++ | /[^*/\n]) # found something not a string or comment or newline
# Adding ' to limit to syn hilight bug in ideone.com
| (?&EOL) # newline
)
)*+
)
(?<rest>
(?s).* # match to the end of the string )
) # if not completed, read in more stuff and do parse over again.
%xmp;
($code, $ignore, $rest) = ($+{'code'}, $+{'ignore'}, $+{'rest'});
print "**COMPLETE = $complete\n" if $debug; goto BLOCK_READ_COMPLETE
if $complete or eof($inputFH); }
# read in more data to allow for a complete parse
++$lineNumber;
print "Reading line $lineNumber\n" if $debug;
my $newStuff = <$inputFH>;
{
$_ .= $newStuff;
$loopAgain = 1;
}
else
{
$loopAgain = 0;
}
} while ($loopAgain);
BLOCK_READ_COMPLETE:
$complete or die "Something wasn't terminated at line ". ($topLineOfBlock+$lineOffset) ." of file '$file'.\n";
# do transformation on $code
print "CODE: >>$code<<\nIGNORE: >>$ignore<<\nREST: >>$rest<<\n" if $debug; # print $outputFH "$ignore$code";
print "$ignore$code" if !$debug; }