<?php
class Lexer {
protected static
$_terminals = array( '~^(\[)~' => "T_OPEN",
'~^(\])~' => "T_CLOSE",
'~^(".+?(?<!\\\\)")~' => "T_ITEM",
'~^(,)(?!,)~' => "T_SEPARATOR",
'~^(\d+)~' => "T_NUMBER",
'~^(,)(?=,)~' => "T_EMPTY"
);
public static function run($line) {
$offset = 0;
while($offset < strlen($line)) { $result = static::_match($line, $offset);
if($result === false) {
throw new Exception("Unable to parse line " . ($line+1) . ".");
}
$tokens[] = $result;
$offset += strlen($result['match']); }
return static::_generate($tokens);
}
protected static function _match($line, $offset) {
$string = substr($line, $offset);
foreach(static::$_terminals as $pattern => $name) {
'match' => $matches[1],
'token' => $name
);
}
}
return false;
}
// a recursive function to actually build the structure
protected static
function _generate
($arr=array(), $idx=0) { $current = 0;
for($i=$idx;$i<count($arr);$i++) { $type = $arr[$i]["token"];
$element = $arr[$i]["match"];
switch ($type) {
case 'T_OPEN':
list($out, $index) = static
::_generate
($arr, $i+1); $output[] = $out;
$i = $index;
break;
case 'T_CLOSE':
return array($output, $i); break;
case 'T_ITEM':
case 'T_NUMBER':
$output[] = $element;
break;
case 'T_EMPTY':
$output[] = "";
break;
}
}
return $output;
}
}
$input = '[[["Hello, \"how\" are you?","Good!",,,123]],,"ok"]';
$items = Lexer::run($input);
?>
PD9waHAKCmNsYXNzIExleGVyIHsKCXByb3RlY3RlZCBzdGF0aWMgJF90ZXJtaW5hbHMgPSBhcnJheSgKICAgICAgICAnfl4oXFspficJCQkJPT4gIlRfT1BFTiIsCiAgICAgICAgJ35eKFxdKX4nCQkJCT0+ICJUX0NMT1NFIiwKICAgICAgICAnfl4oIi4rPyg/PCFcXFxcKSIpficJPT4gIlRfSVRFTSIsCiAgICAgICAgJ35eKCwpKD8hLCl+JwkJCT0+ICJUX1NFUEFSQVRPUiIsCiAgICAgICAgJ35eKFxkKyl+JwkJCQk9PiAiVF9OVU1CRVIiLAogICAgICAgICd+XigsKSg/PSwpficJCQk9PiAiVF9FTVBUWSIKICAgICk7CgogICAgcHVibGljIHN0YXRpYyBmdW5jdGlvbiBydW4oJGxpbmUpIHsKICAgIAkkdG9rZW5zID0gYXJyYXkoKTsKICAgIAkkb2Zmc2V0ID0gMDsKICAgIAl3aGlsZSgkb2Zmc2V0IDwgc3RybGVuKCRsaW5lKSkgewogICAgCQkkcmVzdWx0ID0gc3RhdGljOjpfbWF0Y2goJGxpbmUsICRvZmZzZXQpOwogICAgCQlpZigkcmVzdWx0ID09PSBmYWxzZSkgewogICAgCQkJdGhyb3cgbmV3IEV4Y2VwdGlvbigiVW5hYmxlIHRvIHBhcnNlIGxpbmUgIiAuICgkbGluZSsxKSAuICIuIik7CiAgICAJCX0KICAgIAkJJHRva2Vuc1tdID0gJHJlc3VsdDsKICAgIAkJJG9mZnNldCArPSBzdHJsZW4oJHJlc3VsdFsnbWF0Y2gnXSk7CiAgICAJfQogICAgCXJldHVybiBzdGF0aWM6Ol9nZW5lcmF0ZSgkdG9rZW5zKTsKICAgIH0KCiAgICBwcm90ZWN0ZWQgc3RhdGljIGZ1bmN0aW9uIF9tYXRjaCgkbGluZSwgJG9mZnNldCkgewogICAgCSRzdHJpbmcgPSBzdWJzdHIoJGxpbmUsICRvZmZzZXQpOwoKICAgIAlmb3JlYWNoKHN0YXRpYzo6JF90ZXJtaW5hbHMgYXMgJHBhdHRlcm4gPT4gJG5hbWUpIHsKICAgIAkJaWYocHJlZ19tYXRjaCgkcGF0dGVybiwgJHN0cmluZywgJG1hdGNoZXMpKSB7CiAgICAJCQlyZXR1cm4gYXJyYXkoCiAgICAJCQkJJ21hdGNoJyA9PiAkbWF0Y2hlc1sxXSwKICAgIAkJCQkndG9rZW4nID0+ICRuYW1lCiAgICAJCQkpOwogICAgCQl9CiAgICAJfQogICAgCXJldHVybiBmYWxzZTsKICAgIH0KCgkvLyBhIHJlY3Vyc2l2ZSBmdW5jdGlvbiB0byBhY3R1YWxseSBidWlsZCB0aGUgc3RydWN0dXJlCglwcm90ZWN0ZWQgc3RhdGljIGZ1bmN0aW9uIF9nZW5lcmF0ZSgkYXJyPWFycmF5KCksICRpZHg9MCkgewoJICAgICRvdXRwdXQgPSBhcnJheSgpOwoJICAgICRjdXJyZW50ID0gMDsKCSAgICBmb3IoJGk9JGlkeDskaTxjb3VudCgkYXJyKTskaSsrKSB7CgkgICAgCSR0eXBlID0gJGFyclskaV1bInRva2VuIl07CgkgICAgCSRlbGVtZW50ID0gJGFyclskaV1bIm1hdGNoIl07CgkgICAgCXN3aXRjaCAoJHR5cGUpIHsKCSAgICAJCWNhc2UgJ1RfT1BFTic6CgkgICAgCQkJbGlzdCgkb3V0LCAkaW5kZXgpID0gc3RhdGljOjpfZ2VuZXJhdGUoJGFyciwgJGkrMSk7CgkgICAgICAgIAkJJG91dHB1dFtdID0gJG91dDsKCSAgICAgICAgCQkkaSA9ICRpbmRleDsKCSAgICAgICAgCQlicmVhazsKCSAgICAgICAgCWNhc2UgJ1RfQ0xPU0UnOgoJICAgICAgICAJCXJldHVybiBhcnJheSgkb3V0cHV0LCAkaSk7CgkgICAgICAgIAkJYnJlYWs7CgkgICAgICAgIAljYXNlICdUX0lURU0nOgoJICAgICAgICAJY2FzZSAnVF9OVU1CRVInOgoJICAgICAgICAJCSRvdXRwdXRbXSA9ICRlbGVtZW50OwoJICAgICAgICAJCWJyZWFrOwoJICAgICAgICAJY2FzZSAnVF9FTVBUWSc6CgkgICAgICAgIAkJJG91dHB1dFtdID0gIiI7CgkgICAgICAgIAkJYnJlYWs7CgkgICAgCX0KCSAgICB9CgkgICAgcmV0dXJuICRvdXRwdXQ7Cgl9ICAgIAp9CgokaW5wdXQgID0gJ1tbWyJIZWxsbywgXCJob3dcIiBhcmUgeW91PyIsIkdvb2QhIiwsLDEyM11dLCwib2siXSc7CiRpdGVtcyA9IExleGVyOjpydW4oJGlucHV0KTsKcHJpbnRfcigkaXRlbXMpOwoKPz4=