using System;
using System.Collections.Generic;
using System.Linq;
using System.IO;
using System.Text;
namespace RegexGenerator
{
class RegexGenerator
{
// For simplicity's sake, we'll limit the options to to 7-bit ASCII.
static char[] _allPrintableChars = null;
static char[] AllPrintableChars
{
get
{
if (_allPrintableChars == null)
_allPrintableChars = Enumerable.Range(32, 127 - 32).Select(i => (char)i).ToArray();
return _allPrintableChars;
}
}
struct Path
{
public char Value;
public List<Path> NextState;
public Path(char value, List<Path> nextState)
{
Value = value;
NextState = nextState;
}
}
static Random Rand = new Random(); // random generator
List<Path> startState;
RegexGenerator(string regex)
{
startState = new List<Path>();
var currentState = startState;
using (StringReader sr = new StringReader(regex))
{
char[] pathValues;
while (GetNextValueSet(sr, out pathValues))
{
var newState = new List<Path>();
int nextChar = sr.Peek();
if (nextChar == '*')
{
sr.Read();
currentState.Add(new Path('\0', newState));
currentState.AddRange(pathValues.Select(c => new Path(c, currentState)));
}
else
{
currentState.AddRange(pathValues.Select(c => new Path(c, newState)));
if (nextChar == '+')
{
sr.Read();
currentState = newState;
currentState.AddRange(pathValues.Select(c => new Path(c, currentState)));
newState = new List<Path>();
currentState.Add(new Path('\0', newState));
}
}
currentState = newState;
}
}
}
bool GetNextValueSet(StringReader sr, out char[] pathValues)
{
int c = sr.Read();
switch (c)
{
case -1: pathValues = null; return false;
default: pathValues = new char[] { (char)c }; return true;
case '\\':
c = AssertRead(sr, "Pattern ends in trailing backslash.");
goto default;
case '*':
case '+':
throw new Exception("Syntax error: Invalid character ('" + (char)c + "')");
case '.':
pathValues = AllPrintableChars;
return true;
case '[':
List<char> charlist = new List<char>();
while ((c = AssertRead(sr, "Missing close bracket.")) != ']')
{
if (c == '\\')
c = AssertRead(sr, "Pattern ends in trailing backslash.");
int nextChar = sr.Peek();
if (nextChar == -1)
throw new Exception("Syntax Error: Missing close bracket.");
if (nextChar == '-') // add range
{
sr.Read();
nextChar = sr.Read();
if (nextChar == ']')
{
charlist.Add((char)c);
charlist.Add('-');
break;
}
else if (nextChar == '\\')
nextChar = AssertRead(sr, "Pattern ends in trailing backslash.");
if (nextChar < c)
throw new Exception("Syntax Error: Range out of order.");
charlist.AddRange(Enumerable.Range(c, nextChar - c + 1).Select(i => (char)i));
}
else
{
charlist.Add((char)c);
}
}
pathValues = charlist.Distinct().ToArray();
return true;
}
}
int AssertRead(TextReader reader, string message)
{
int c = reader.Read();
if (c == -1)
throw new Exception("Syntax error: " + message);
return c;
}
string GenerateRandomPattern()
{
StringBuilder sb = new StringBuilder();
var currentState = startState;
while (currentState.Count > 0)
{
Path pathToTake = currentState[Rand.Next(currentState.Count)];
if (pathToTake.Value != '\0')
{
sb.Append(pathToTake.Value);
}
currentState = pathToTake.NextState;
}
return sb.ToString();
}
static void Main(string[] args)
{
try
{
var gen = new RegexGenerator("[A-Za-z0-9$.+!*'(){},~:;=@#%_\\-]*");
Console.WriteLine(gen.GenerateRandomPattern());
gen = new RegexGenerator("ab[c-l]+jkm9*10+");
Console.WriteLine(gen.GenerateRandomPattern());
gen = new RegexGenerator("iqb[beoqob-q]872+0qbq*");
Console.WriteLine(gen.GenerateRandomPattern());
}
catch (Exception e)
{
Console.WriteLine(e.Message);
}
//Console.Read();
}
}
}