#!/bin/bash
# ideone boilerplate: run in temp dir
t=$(mktemp -d -t ideone.XXXXXXXX) || exit
trap 'rm -rf "$t"' ERR EXIT
cd "$t"
cat <<\: >file1
2001009
@vanti Finserv Co.
2001009
Fund #1
11:11 - capital
MS&CO(NY)
American Friends Org, Inc. 12X32
Domain-Name (LLC)
MS&CO(NY)
MS&CO(NY)
Ivy/Estate Rd
E*Trade wholesale
:
cat <<\: >file2
<html>
<body>
<hr><br><>span class="table">Records</span><table>
<tr class="column">
<td>Rec1</td>
<td>Rec2</td>
<td>Rec3</td>
<td>Rec4</td>
<td>Rec5</td>
<td>Rec6</td>
<td>Rec7</td>
<td>Rec8</td>
</tr>
<tr class="data">
<td>@vanti Finserv Co.</td>
<td>11:11 - Capital</td>
<td>MS&CO(NY)</td>
<td>New York</td>
<td>CDX98XSD</td>
<td>E*Trade wholesale</td>
<td>Domain-Name (LLC)</td>
<td>Ivy/Estate Rd</td>
<td></td>
</tr>
<tr class="data">
<td>@vanti Finserv Co.</td>
<td></td>
<td>MS&CO(NY)</td>
<td>2</td>
<td>2</td>
<td>MS&CO(NY)</td>
<td>MS&CO(NY)</td>
<td>Ivy/Estate Rd</td>
</table>
</body>
:
awk 'NR==FNR {
regex = $0;
gsub(/[][(){}|\\*+?.^$]/, "\\\\&", regex);
a[++n] = regex;
print "## a[" n "] = \047" regex "\047"
gsub(/[A-Za-z0-9]/, "#");
gsub(/&/, "\\\\&");
b[n] = $0;
next
}
{ for(i=1;i<=n;++i)
gsub(a[i], b[i])
} 1' file1 file2
IyEvYmluL2Jhc2gKCiMgaWRlb25lIGJvaWxlcnBsYXRlOiBydW4gaW4gdGVtcCBkaXIgCnQ9JChta3RlbXAgLWQgLXQgaWRlb25lLlhYWFhYWFhYKSB8fCBleGl0CnRyYXAgJ3JtIC1yZiAiJHQiJyBFUlIgRVhJVApjZCAiJHQiCgpjYXQgPDxcOiA+ZmlsZTEKMjAwMTAwOQpAdmFudGkgRmluc2VydiBDby4KMjAwMTAwOQpGdW5kICMxCjExOjExIC0gY2FwaXRhbApNUyZDTyhOWSkKQW1lcmljYW4gRnJpZW5kcyBPcmcsIEluYy4gMTJYMzIKRG9tYWluLU5hbWUgKExMQykKTVMmQ08oTlkpCk1TJkNPKE5ZKQpJdnkvRXN0YXRlIFJkCkUqVHJhZGUgd2hvbGVzYWxlCjoKCmNhdCA8PFw6ID5maWxlMgo8aHRtbD4KPGJvZHk+Cjxocj48YnI+PD5zcGFuIGNsYXNzPSJ0YWJsZSI+UmVjb3Jkczwvc3Bhbj48dGFibGU+Cjx0ciBjbGFzcz0iY29sdW1uIj4KIDx0ZD5SZWMxPC90ZD4KIDx0ZD5SZWMyPC90ZD4KIDx0ZD5SZWMzPC90ZD4KIDx0ZD5SZWM0PC90ZD4KIDx0ZD5SZWM1PC90ZD4KIDx0ZD5SZWM2PC90ZD4KIDx0ZD5SZWM3PC90ZD4KIDx0ZD5SZWM4PC90ZD4KPC90cj4KPHRyIGNsYXNzPSJkYXRhIj4KPHRkPkB2YW50aSBGaW5zZXJ2IENvLjwvdGQ+Cjx0ZD4xMToxMSAtIENhcGl0YWw8L3RkPgo8dGQ+TVMmQ08oTlkpPC90ZD4KPHRkPk5ldyBZb3JrPC90ZD4KPHRkPkNEWDk4WFNEPC90ZD4KPHRkPkUqVHJhZGUgd2hvbGVzYWxlPC90ZD4KPHRkPkRvbWFpbi1OYW1lIChMTEMpPC90ZD4KPHRkPkl2eS9Fc3RhdGUgUmQ8L3RkPgo8dGQ+PC90ZD4KPC90cj4KPHRyIGNsYXNzPSJkYXRhIj4KPHRkPkB2YW50aSBGaW5zZXJ2IENvLjwvdGQ+Cjx0ZD48L3RkPgo8dGQ+TVMmQ08oTlkpPC90ZD4KPHRkPjI8L3RkPgo8dGQ+MjwvdGQ+Cjx0ZD5NUyZDTyhOWSk8L3RkPgo8dGQ+TVMmQ08oTlkpPC90ZD4KPHRkPkl2eS9Fc3RhdGUgUmQ8L3RkPgo8L3RhYmxlPgo8L2JvZHk+CjoKCmF3ayAnTlI9PUZOUiB7CiAgcmVnZXggPSAkMDsKICBnc3ViKC9bXVsoKXt9fFxcKis/Ll4kXS8sICJcXFxcJiIsIHJlZ2V4KTsKICBhWysrbl0gPSByZWdleDsKcHJpbnQgIiMjIGFbIiBuICJdID0gXDA0NyIgcmVnZXggIlwwNDciCiAgZ3N1YigvW0EtWmEtejAtOV0vLCAiIyIpOwogIGdzdWIoLyYvLCAiXFxcXCYiKTsKICBiW25dID0gJDA7CiAgbmV4dAp9CnsgZm9yKGk9MTtpPD1uOysraSkKICAgIGdzdWIoYVtpXSwgYltpXSkKfSAxJyBmaWxlMSBmaWxlMgo=