#!/bin/bash
# ideone boilerplate - we can't write files in the home directory;
# so create a temporary directory for our files instead
t =$( mktemp -d -t ideone.XXXXXXXXXXXX) || exit
trap 'rm -rf "$t"' ERR EXIT
cd "$t "
cat << \: > filea.csv
Foo,Bar,Baz,Quux,There is #Corona and #Covid here,False positive?
Foo,Bar,Baz,Quux,There is Corona and #Covid here,This should be matched
Foo,Bar,Baz,Quux,There is #Corona and Covid here,This should be matched
Foo,Bar,Baz,Quux,There is #Corona and #Covid here,False positive?
Foo,Bar,Baz,Quux,"Quoted comma, There is Corona and Covid here" ,False negative for non-Python/ grep -P cases
Corona,Covid,Baz,Quux,There is #Corona and #Covid here,False positive for grep -P case
:
cat << \: > prog.py
#!/usr/bin/env python3
import csv
import re
import sys
reader = csv.reader( sys.stdin)
writer = csv.writer( sys.stdout)
for line in reader:
if re.search( r'(?<!#)\b(?:Corona|Covid)\b' , line[ 4 ] , re.IGNORECASE) :
writer.writerow( line)
:
echo '** grep -P **'
grep -Pwi '(?<!#)(Corona|Covid)' filea.csv
echo
echo '** grep -E **'
grep -Ei '^([^,]*,){4}(#?[^,#]+)*\b(Corona|Covid)\b' filea.csv
echo
echo '** awk **'
awk -F, -v col =5 '{ field = tolower($col); gsub(/#[A-Za-z0-9_]+/, "", field) }
field ~ /\<(corona|covid)\>/' filea.csv
echo
echo '** python3 **'
python3 prog.py < filea.csv
IyEvYmluL2Jhc2gKIyBpZGVvbmUgYm9pbGVycGxhdGUgLSB3ZSBjYW4ndCB3cml0ZSBmaWxlcyBpbiB0aGUgaG9tZSBkaXJlY3Rvcnk7CiMgc28gY3JlYXRlIGEgdGVtcG9yYXJ5IGRpcmVjdG9yeSBmb3Igb3VyIGZpbGVzIGluc3RlYWQKdD0kKG1rdGVtcCAtZCAtdCBpZGVvbmUuWFhYWFhYWFhYWFhYKSB8fCBleGl0CnRyYXAgJ3JtIC1yZiAiJHQiJyBFUlIgRVhJVApjZCAiJHQiCgpjYXQgPDxcOiA+ZmlsZWEuY3N2CkZvbyxCYXIsQmF6LFF1dXgsVGhlcmUgaXMgI0Nvcm9uYSBhbmQgI0NvdmlkIGhlcmUsRmFsc2UgcG9zaXRpdmU/CkZvbyxCYXIsQmF6LFF1dXgsVGhlcmUgaXMgQ29yb25hIGFuZCAjQ292aWQgaGVyZSxUaGlzIHNob3VsZCBiZSBtYXRjaGVkCkZvbyxCYXIsQmF6LFF1dXgsVGhlcmUgaXMgI0Nvcm9uYSBhbmQgQ292aWQgaGVyZSxUaGlzIHNob3VsZCBiZSBtYXRjaGVkCkZvbyxCYXIsQmF6LFF1dXgsVGhlcmUgaXMgI0Nvcm9uYSBhbmQgI0NvdmlkIGhlcmUsRmFsc2UgcG9zaXRpdmU/CkZvbyxCYXIsQmF6LFF1dXgsIlF1b3RlZCBjb21tYSwgVGhlcmUgaXMgQ29yb25hIGFuZCBDb3ZpZCBoZXJlIixGYWxzZSBuZWdhdGl2ZSBmb3Igbm9uLVB5dGhvbi9ncmVwIC1QIGNhc2VzCkNvcm9uYSxDb3ZpZCxCYXosUXV1eCxUaGVyZSBpcyAjQ29yb25hIGFuZCAjQ292aWQgaGVyZSxGYWxzZSBwb3NpdGl2ZSBmb3IgZ3JlcCAtUCBjYXNlCjoKCgpjYXQgPDxcOiA+cHJvZy5weQojIS91c3IvYmluL2VudiBweXRob24zCgppbXBvcnQgY3N2CmltcG9ydCByZQppbXBvcnQgc3lzCgpyZWFkZXIgPSBjc3YucmVhZGVyKHN5cy5zdGRpbikKd3JpdGVyID0gY3N2LndyaXRlcihzeXMuc3Rkb3V0KQpmb3IgbGluZSBpbiByZWFkZXI6CiAgICBpZiByZS5zZWFyY2gocicoPzwhIylcYig/OkNvcm9uYXxDb3ZpZClcYicsIGxpbmVbNF0sIHJlLklHTk9SRUNBU0UpOgogICAgICAgIHdyaXRlci53cml0ZXJvdyhsaW5lKQo6CgoKZWNobyAnKiogZ3JlcCAtUCAqKicKZ3JlcCAtUHdpICcoPzwhIykoQ29yb25hfENvdmlkKScgZmlsZWEuY3N2CgplY2hvCmVjaG8gJyoqIGdyZXAgLUUgKionCmdyZXAgLUVpICdeKFteLF0qLCl7NH0oIz9bXiwjXSspKlxiKENvcm9uYXxDb3ZpZClcYicgZmlsZWEuY3N2CgplY2hvCmVjaG8gJyoqIGF3ayAqKicKYXdrIC1GLCAtdiBjb2w9NSAneyBmaWVsZCA9IHRvbG93ZXIoJGNvbCk7IGdzdWIoLyNbQS1aYS16MC05X10rLywgIiIsIGZpZWxkKSB9CiAgZmllbGQgfiAvXDwoY29yb25hfGNvdmlkKVw+LycgZmlsZWEuY3N2CgoKZWNobwplY2hvICcqKiBweXRob24zICoqJwpweXRob24zIHByb2cucHkgPGZpbGVhLmNzdgo=
stdout
** grep -P **
Foo,Bar,Baz,Quux,There is Corona and #Covid here,This should be matched
Foo,Bar,Baz,Quux,There is #Corona and Covid here,This should be matched
Foo,Bar,Baz,Quux,"Quoted comma, There is Corona and Covid here",False negative for non-Python/grep -P cases
Corona,Covid,Baz,Quux,There is #Corona and #Covid here,False positive for grep -P case
** grep -E **
Foo,Bar,Baz,Quux,There is Corona and #Covid here,This should be matched
Foo,Bar,Baz,Quux,There is #Corona and Covid here,This should be matched
** awk **
Foo,Bar,Baz,Quux,There is Corona and #Covid here,This should be matched
Foo,Bar,Baz,Quux,There is #Corona and Covid here,This should be matched
** python3 **
Foo,Bar,Baz,Quux,There is Corona and #Covid here,This should be matched
Foo,Bar,Baz,Quux,There is #Corona and Covid here,This should be matched
Foo,Bar,Baz,Quux,"Quoted comma, There is Corona and Covid here",False negative for non-Python/grep -P cases