import re
import ast
test = ("[ \"A\", \"\"B\"\",'C' , \" D\"]\n"
"[ \"A\", \"'B'\",'C' , \" D\"]\n"
"[ \"A\", ''B'','C' , \" D\"]\n"
"[ \"A\", '\"B\"','C' , \" D\"]\n"
"[ \"A\", '8 o'clock','C' , \" D\"]\n"
"[ \"A\", \"Ol' 8 o'clock\",'C' , \" D\"]\n"
"[\"Some Text\"]\n"
"[\"Some more Text\"]\n"
"[\"Even more text about \\\"this text\\\"\"]\n"
"[\"Ol' 8 o'clock\"]\n"
"['8 o'clock']\n"
"[ '8 o'clock']\n"
"['Ol' 8 o'clock']\n"
"[\"\"B\"]\n"
"[\"\\\"B\"]\n"
"[\"\\\\\"B\"]\n"
"[\"\\\\\\\"B\"]\n"
"[\"\\\\\\\\\"B\"]")
result = u''
last_index = 0
regex1 = r"(?<=[\[,])\s*(['\"])(?:(\1)|.)*?\1(?=\s*[,\]])" #nested quotes of the same type
regex2 = r'''^"[^"\\]*(?:\\.[^\"\\]*)*"$|^'[^'\\]*(?:\\.[^'\\]*)*'$|^\s*["'][^"'\\]*|["']\s*$|\\.|(["'])[^"'\\\n]*''' # unescaped quotes in $1
matches = re.finditer(regex1, test, re.MULTILINE)
for match in matches:
if match.groups()[1] is not None: #nested quotes of the same type present
print(match.group())
inner = re.finditer(regex2, match.group())
for m in inner:
if m.groups()[0] is not None: # unescaped quotes in $1 present
result += test[last_index:match.start() + m.start()] + '\\' + m.group()
last_index = match.start()+m.end()
result += test[last_index:len(test)]
print(result)
for test_str in result.split("\n"):
List = ast.literal_eval(test_str)
print(List)
print(type(List))
aW1wb3J0IHJlCmltcG9ydCBhc3QKCnRlc3QgPSAoIlsgXCJBXCIsIFwiXCJCXCJcIiwnQycgLCBcIiBEXCJdXG4iCgkiWyBcIkFcIiwgXCInQidcIiwnQycgLCBcIiBEXCJdXG4iCgkiWyBcIkFcIiwgJydCJycsJ0MnICwgXCIgRFwiXVxuIgoJIlsgXCJBXCIsICdcIkJcIicsJ0MnICwgXCIgRFwiXVxuIgoJIlsgXCJBXCIsICc4IG8nY2xvY2snLCdDJyAsIFwiIERcIl1cbiIKCSJbIFwiQVwiLCBcIk9sJyA4IG8nY2xvY2tcIiwnQycgLCBcIiBEXCJdXG4iCgkiW1wiU29tZSBUZXh0XCJdXG4iCgkiW1wiU29tZSBtb3JlIFRleHRcIl1cbiIKCSJbXCJFdmVuIG1vcmUgdGV4dCBhYm91dCBcXFwidGhpcyB0ZXh0XFxcIlwiXVxuIgoJIltcIk9sJyA4IG8nY2xvY2tcIl1cbiIKCSJbJzggbydjbG9jayddXG4iCgkiWyAnOCBvJ2Nsb2NrJ11cbiIKCSJbJ09sJyA4IG8nY2xvY2snXVxuIgoJIltcIlwiQlwiXVxuIgoJIltcIlxcXCJCXCJdXG4iCgkiW1wiXFxcXFwiQlwiXVxuIgoJIltcIlxcXFxcXFwiQlwiXVxuIgoJIltcIlxcXFxcXFxcXCJCXCJdIikKcmVzdWx0ID0gdScnCmxhc3RfaW5kZXggPSAwCnJlZ2V4MSA9IHIiKD88PVtcWyxdKVxzKihbJ1wiXSkoPzooXDEpfC4pKj9cMSg/PVxzKlssXF1dKSIgI25lc3RlZCBxdW90ZXMgb2YgdGhlIHNhbWUgdHlwZQpyZWdleDIgPSByJycnXiJbXiJcXF0qKD86XFwuW15cIlxcXSopKiIkfF4nW14nXFxdKig/OlxcLlteJ1xcXSopKickfF5ccypbIiddW14iJ1xcXSp8WyInXVxzKiR8XFwufChbIiddKVteIidcXFxuXSonJycgIyB1bmVzY2FwZWQgcXVvdGVzIGluICQxCm1hdGNoZXMgPSByZS5maW5kaXRlcihyZWdleDEsIHRlc3QsIHJlLk1VTFRJTElORSkKZm9yIG1hdGNoIGluIG1hdGNoZXM6CiAgICBpZiBtYXRjaC5ncm91cHMoKVsxXSBpcyBub3QgTm9uZTogI25lc3RlZCBxdW90ZXMgb2YgdGhlIHNhbWUgdHlwZSBwcmVzZW50CiAgICAgICAgcHJpbnQobWF0Y2guZ3JvdXAoKSkKICAgICAgICBpbm5lciA9IHJlLmZpbmRpdGVyKHJlZ2V4MiwgbWF0Y2guZ3JvdXAoKSkKICAgICAgICBmb3IgbSBpbiBpbm5lcjogICAgICAgICAgICAKICAgICAgICAgICAgaWYgbS5ncm91cHMoKVswXSBpcyBub3QgTm9uZTogIyB1bmVzY2FwZWQgcXVvdGVzIGluICQxIHByZXNlbnQKICAgICAgICAgICAgICAgIHJlc3VsdCArPSB0ZXN0W2xhc3RfaW5kZXg6bWF0Y2guc3RhcnQoKSArIG0uc3RhcnQoKV0gKyAnXFwnICsgbS5ncm91cCgpCiAgICAgICAgICAgICAgICBsYXN0X2luZGV4ID0gbWF0Y2guc3RhcnQoKSttLmVuZCgpCnJlc3VsdCArPSB0ZXN0W2xhc3RfaW5kZXg6bGVuKHRlc3QpXQpwcmludChyZXN1bHQpCgpmb3IgdGVzdF9zdHIgaW4gcmVzdWx0LnNwbGl0KCJcbiIpOgogICAgTGlzdCA9IGFzdC5saXRlcmFsX2V2YWwodGVzdF9zdHIpCiAgICBwcmludChMaXN0KQogICAgcHJpbnQodHlwZShMaXN0KSk=
""B""
''B''
'8 o'clock'
"Even more text about \"this text\""
'8 o'clock'
'8 o'clock'
'Ol' 8 o'clock'
""B"
"\"B"
"\\"B"
"\\\"B"
"\\\\"B"
[ "A", "\"B\"",'C' , " D"]
[ "A", "'B'",'C' , " D"]
[ "A", '\'B\'','C' , " D"]
[ "A", '"B"','C' , " D"]
[ "A", '8 o\'clock','C' , " D"]
[ "A", "Ol' 8 o'clock",'C' , " D"]
["Some Text"]
["Some more Text"]
["Even more text about \"this text\""]
["Ol' 8 o'clock"]
['8 o\'clock']
[ '8 o\'clock']
['Ol\' 8 o\'clock']
["\"B"]
["\"B"]
["\\\"B"]
["\\\"B"]
["\\\\\"B"]
['A', '"B"', 'C', ' D']
<class 'list'>
['A', "'B'", 'C', ' D']
<class 'list'>
['A', "'B'", 'C', ' D']
<class 'list'>
['A', '"B"', 'C', ' D']
<class 'list'>
['A', "8 o'clock", 'C', ' D']
<class 'list'>
['A', "Ol' 8 o'clock", 'C', ' D']
<class 'list'>
['Some Text']
<class 'list'>
['Some more Text']
<class 'list'>
['Even more text about "this text"']
<class 'list'>
["Ol' 8 o'clock"]
<class 'list'>
["8 o'clock"]
<class 'list'>
["8 o'clock"]
<class 'list'>
["Ol' 8 o'clock"]
<class 'list'>
['"B']
<class 'list'>
['"B']
<class 'list'>
['\\"B']
<class 'list'>
['\\"B']
<class 'list'>
['\\\\"B']
<class 'list'>