import re
import pprint
pattern = r"^([A-Z0-9](?:[A-Z0-9 ]*[A-Z0-9])?\b )(?: \( (a\. k\. a\. [^()]+(?:\s a\. k\. a\. [^()]+)*)\) )?"
textData = ( "2ND COMPLEX OF NEURAL SCIENCES (a.k.a. ACADEMY OF NEURAL \n "
"SCIENCES; a.k.a. CHE 2 CHAON KAHAK-WON; a.k.a. CHE 2 CHAYON KAHAK-WON;\n "
"a.k.a. KUKPAN KAHAK-WON; a.k.a. NATIONAL DEFENSE ACADEMY; a.k.a.\n "
"SANSRI; a.k.a. SECOND COMPLEX OF NEURAL SCIENCES; a.k.a. SECOND\n "
"COMPLEX OF NEURAL SCIENCES RESEARCH INSTITUTE), Pyongyang, Korea,\n "
"North; Secondary sanctions risk: North Korea Sanctions Regulations,\n "
"sections 510.201 and 510.210; Transactions Prohibited For Persons\n "
"Owned or Controlled By U.S. Financial Institutions: North Korea\n "
"Sanctions Regulations section 510.214.\n \n "
"LOSTIK VE HAVAIK HIZMETLARI LTD., No. 3/182 Antepe\n "
"Bagdat Cad. Istasyon Yolu Sok., Istanbul 34840, Turkey; Additional\n "
"Sanctions Information - Subject to Secondary Sanctions.\n "
"[IFSR] (Linked To: MAHAN AIR).\n \n "
"7 KARNES, Avenida Ciudad de Cali No. 15A-91, Local A06-07, Bogota,\n "
"Colombia; Matricula Mercantil No 1978075 (Colombia).\n \n "
"SWING OF TIR (a.k.a. 7TH OF TIR COMPLEX; a.k.a. 7TH OF TIR INDUSTRIAL\n "
"COMPLEX; a.k.a. 7TH OF TIR INDUSTRIES; a.k.a. 7TH OF TIR INDUSTRIES\n "
"OF ISFAHAN/ESFAHAN; a.k.a. MOJTAMAE SANATE HAFTOME TIR; a.k.a.\n "
"SANAYE HAFTOME TIR; a.k.a. SEVENTH OF TIR), Mobarakeh Road Km 45,\n "
"Isfahan, Iran; P.O. Box 81465-478, Isfahan, Iran; Additional\n "
"Sanctions Information - Subject to Secondary Sanctions." )
d = { }
for t in re .findall ( pattern, textData, re .M ) :
parts = [ p for p in re .split ( r"(?:;\s )?a\. k\. a\. \s " , t[ 1 ] ) if p]
parts.insert ( 0 , ( t[ 0 ] ) )
d[ t[ 0 ] ] = parts
pprint .pprint ( d)
aW1wb3J0IHJlCmltcG9ydCBwcHJpbnQKCnBhdHRlcm4gPSByIl4oW0EtWjAtOV0oPzpbQS1aMC05IF0qW0EtWjAtOV0pP1xiKSg/OiBcKChhXC5rXC5hXC5bXigpXSsoPzpcc2FcLmtcLmFcLlteKCldKykqKVwpKT8iCnRleHREYXRhID0gKCIyTkQgQ09NUExFWCBPRiBORVVSQUwgU0NJRU5DRVMgKGEuay5hLiBBQ0FERU1ZIE9GIE5FVVJBTCBcbiIKICAgICAgICAgICAgIlNDSUVOQ0VTOyBhLmsuYS4gQ0hFIDIgQ0hBT04gS0FIQUstV09OOyBhLmsuYS4gQ0hFIDIgQ0hBWU9OIEtBSEFLLVdPTjtcbiIKICAgICAgICAgICAgImEuay5hLiBLVUtQQU4gS0FIQUstV09OOyBhLmsuYS4gTkFUSU9OQUwgREVGRU5TRSBBQ0FERU1ZOyBhLmsuYS5cbiIKICAgICAgICAgICAgIlNBTlNSSTsgYS5rLmEuIFNFQ09ORCBDT01QTEVYIE9GIE5FVVJBTCBTQ0lFTkNFUzsgYS5rLmEuIFNFQ09ORFxuIgogICAgICAgICAgICAiQ09NUExFWCBPRiBORVVSQUwgU0NJRU5DRVMgUkVTRUFSQ0ggSU5TVElUVVRFKSwgUHlvbmd5YW5nLCBLb3JlYSxcbiIKICAgICAgICAgICAgIk5vcnRoOyBTZWNvbmRhcnkgc2FuY3Rpb25zIHJpc2s6IE5vcnRoIEtvcmVhIFNhbmN0aW9ucyBSZWd1bGF0aW9ucyxcbiIKICAgICAgICAgICAgInNlY3Rpb25zIDUxMC4yMDEgYW5kIDUxMC4yMTA7IFRyYW5zYWN0aW9ucyBQcm9oaWJpdGVkIEZvciBQZXJzb25zXG4iCiAgICAgICAgICAgICJPd25lZCBvciBDb250cm9sbGVkIEJ5IFUuUy4gRmluYW5jaWFsIEluc3RpdHV0aW9uczogTm9ydGggS29yZWFcbiIKICAgICAgICAgICAgIlNhbmN0aW9ucyBSZWd1bGF0aW9ucyBzZWN0aW9uIDUxMC4yMTQuXG5cbiIKICAgICAgICAgICAgIkxPU1RJSyBWRSBIQVZBSUsgSElaTUVUTEFSSSBMVEQuLCBOby4gMy8xODIgQW50ZXBlXG4iCiAgICAgICAgICAgICJCYWdkYXQgQ2FkLiBJc3Rhc3lvbiBZb2x1IFNvay4sIElzdGFuYnVsIDM0ODQwLCBUdXJrZXk7IEFkZGl0aW9uYWxcbiIKICAgICAgICAgICAgIlNhbmN0aW9ucyBJbmZvcm1hdGlvbiAtIFN1YmplY3QgdG8gU2Vjb25kYXJ5IFNhbmN0aW9ucy5cbiIKICAgICAgICAgICAgIltJRlNSXSAoTGlua2VkIFRvOiBNQUhBTiBBSVIpLlxuXG4iCiAgICAgICAgICAgICI3IEtBUk5FUywgQXZlbmlkYSBDaXVkYWQgZGUgQ2FsaSBOby4gMTVBLTkxLCBMb2NhbCBBMDYtMDcsIEJvZ290YSxcbiIKICAgICAgICAgICAgIkNvbG9tYmlhOyBNYXRyaWN1bGEgTWVyY2FudGlsIE5vIDE5NzgwNzUgKENvbG9tYmlhKS5cblxuIgogICAgICAgICAgICAiU1dJTkcgT0YgVElSIChhLmsuYS4gN1RIIE9GIFRJUiBDT01QTEVYOyBhLmsuYS4gN1RIIE9GIFRJUiBJTkRVU1RSSUFMXG4iCiAgICAgICAgICAgICJDT01QTEVYOyBhLmsuYS4gN1RIIE9GIFRJUiBJTkRVU1RSSUVTOyBhLmsuYS4gN1RIIE9GIFRJUiBJTkRVU1RSSUVTXG4iCiAgICAgICAgICAgICJPRiBJU0ZBSEFOL0VTRkFIQU47IGEuay5hLiBNT0pUQU1BRSBTQU5BVEUgSEFGVE9NRSBUSVI7IGEuay5hLlxuIgogICAgICAgICAgICAiU0FOQVlFIEhBRlRPTUUgVElSOyBhLmsuYS4gU0VWRU5USCBPRiBUSVIpLCBNb2JhcmFrZWggUm9hZCBLbSA0NSxcbiIKICAgICAgICAgICAgIklzZmFoYW4sIElyYW47IFAuTy4gQm94IDgxNDY1LTQ3OCwgSXNmYWhhbiwgSXJhbjsgQWRkaXRpb25hbFxuIgogICAgICAgICAgICAiU2FuY3Rpb25zIEluZm9ybWF0aW9uIC0gU3ViamVjdCB0byBTZWNvbmRhcnkgU2FuY3Rpb25zLiIpCgpkID0ge30KCmZvciB0IGluIHJlLmZpbmRhbGwocGF0dGVybiwgdGV4dERhdGEsIHJlLk0pOgogICAgcGFydHMgPSBbcCBmb3IgcCBpbiByZS5zcGxpdChyIig/Ojtccyk/YVwua1wuYVwuXHMiLCB0WzFdKSBpZiBwXQogICAgcGFydHMuaW5zZXJ0KDAsICh0WzBdKSkKICAgIGRbdFswXV0gPSBwYXJ0cwoKcHByaW50LnBwcmludChkKQo=
stdout
{'2ND COMPLEX OF NEURAL SCIENCES': ['2ND COMPLEX OF NEURAL SCIENCES',
'ACADEMY OF NEURAL \nSCIENCES',
'CHE 2 CHAON KAHAK-WON',
'CHE 2 CHAYON KAHAK-WON',
'KUKPAN KAHAK-WON',
'NATIONAL DEFENSE ACADEMY',
'SANSRI',
'SECOND COMPLEX OF NEURAL SCIENCES',
'SECOND\n'
'COMPLEX OF NEURAL SCIENCES RESEARCH '
'INSTITUTE'],
'7 KARNES': ['7 KARNES'],
'LOSTIK VE HAVAIK HIZMETLARI LTD': ['LOSTIK VE HAVAIK HIZMETLARI LTD'],
'SWING OF TIR': ['SWING OF TIR',
'7TH OF TIR COMPLEX',
'7TH OF TIR INDUSTRIAL\nCOMPLEX',
'7TH OF TIR INDUSTRIES',
'7TH OF TIR INDUSTRIES\nOF ISFAHAN/ESFAHAN',
'MOJTAMAE SANATE HAFTOME TIR',
'SANAYE HAFTOME TIR',
'SEVENTH OF TIR']}