/* package whatever; // don't place package name! */
class StringSimilarity {
/**
* Calculates the similarity (a number within 0 and 1) between two strings.
*/
String longer
= s1, shorter
= s2
; if ( s1.length ( ) < s2.length ( ) ) { // longer should always have greater length
longer = s2; shorter = s1;
}
int longerLength = longer.length ( ) ;
if ( longerLength == 0 ) { return 1.0 ; /* both strings are zero length */ }
/* // If you have StringUtils, you can use it to calculate the edit distance:
return (longerLength - StringUtils.getLevenshteinDistance(longer, shorter)) /
(double) longerLength; */
return ( longerLength - editDistance( longer, shorter) ) / ( double ) longerLength;
}
// Example implementation of the Levenshtein Edit Distance
// See http://r...content-available-to-author-only...e.org/wiki/Levenshtein_distance#Java
s1 = s1.toLowerCase ( ) ;
s2 = s2.toLowerCase ( ) ;
int [ ] costs = new int [ s2.length ( ) + 1 ] ;
for ( int i = 0 ; i <= s1.length ( ) ; i++ ) {
int lastValue = i;
for ( int j = 0 ; j <= s2.length ( ) ; j++ ) {
if ( i == 0 )
costs[ j] = j;
else {
if ( j > 0 ) {
int newValue = costs[ j - 1 ] ;
if ( s1.charAt ( i - 1 ) != s2.charAt ( j - 1 ) )
newValue
= Math .
min ( Math .
min ( newValue, lastValue
) ,
costs[ j] ) + 1 ;
costs[ j - 1 ] = lastValue;
lastValue = newValue;
}
}
}
if ( i > 0 )
costs[ s2.length ( ) ] = lastValue;
}
return costs[ s2.length ( ) ] ;
}
"%.3f is the similarity between \" %s\" and \" %s\" " , similarity( s, t) , s, t) ) ;
}
public static void main
( String [ ] args
) { printSimilarity( "" , "" ) ;
printSimilarity( "1234567890" , "1" ) ;
printSimilarity( "1234567890" , "123" ) ;
printSimilarity( "1234567890" , "1234567" ) ;
printSimilarity( "1234567890" , "1234567890" ) ;
printSimilarity( "1234567890" , "1234567980" ) ;
printSimilarity( "47/2010" , "472010" ) ;
printSimilarity( "47/2010" , "472011" ) ;
printSimilarity( "47/2010" , "AB.CDEF" ) ;
printSimilarity( "47/2010" , "4B.CDEFG" ) ;
printSimilarity( "47/2010" , "AB.CDEFG" ) ;
printSimilarity( "The quick fox jumped" , "The fox jumped" ) ;
printSimilarity( "The quick fox jumped" , "The fox" ) ;
printSimilarity( "The quick fox jumped" , "The quick fox jumped off the balcany" ) ;
printSimilarity( "kitten" , "sitting" ) ;
}
}
LyogcGFja2FnZSB3aGF0ZXZlcjsgLy8gZG9uJ3QgcGxhY2UgcGFja2FnZSBuYW1lISAqLwoKY2xhc3MgU3RyaW5nU2ltaWxhcml0eSB7CgogICAgLyoqCiAgICAgKiBDYWxjdWxhdGVzIHRoZSBzaW1pbGFyaXR5IChhIG51bWJlciB3aXRoaW4gMCBhbmQgMSkgYmV0d2VlbiB0d28gc3RyaW5ncy4KICAgICAqLwogICAgcHVibGljIHN0YXRpYyBkb3VibGUgc2ltaWxhcml0eShTdHJpbmcgczEsIFN0cmluZyBzMikgewogICAgICAgIFN0cmluZyBsb25nZXIgPSBzMSwgc2hvcnRlciA9IHMyOwogICAgICAgIGlmIChzMS5sZW5ndGgoKSA8IHMyLmxlbmd0aCgpKSB7IC8vIGxvbmdlciBzaG91bGQgYWx3YXlzIGhhdmUgZ3JlYXRlciBsZW5ndGgKICAgICAgICAgICAgbG9uZ2VyID0gczI7IHNob3J0ZXIgPSBzMTsKICAgICAgICB9CiAgICAgICAgaW50IGxvbmdlckxlbmd0aCA9IGxvbmdlci5sZW5ndGgoKTsKICAgICAgICBpZiAobG9uZ2VyTGVuZ3RoID09IDApIHsgcmV0dXJuIDEuMDsgLyogYm90aCBzdHJpbmdzIGFyZSB6ZXJvIGxlbmd0aCAqLyB9CiAgICAgICAgLyogLy8gSWYgeW91IGhhdmUgU3RyaW5nVXRpbHMsIHlvdSBjYW4gdXNlIGl0IHRvIGNhbGN1bGF0ZSB0aGUgZWRpdCBkaXN0YW5jZToKICAgICAgICByZXR1cm4gKGxvbmdlckxlbmd0aCAtIFN0cmluZ1V0aWxzLmdldExldmVuc2h0ZWluRGlzdGFuY2UobG9uZ2VyLCBzaG9ydGVyKSkgLwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKGRvdWJsZSkgbG9uZ2VyTGVuZ3RoOyAqLwogICAgICAgIHJldHVybiAobG9uZ2VyTGVuZ3RoIC0gZWRpdERpc3RhbmNlKGxvbmdlciwgc2hvcnRlcikpIC8gKGRvdWJsZSkgbG9uZ2VyTGVuZ3RoOwoKICAgIH0KCiAgICAvLyBFeGFtcGxlIGltcGxlbWVudGF0aW9uIG9mIHRoZSBMZXZlbnNodGVpbiBFZGl0IERpc3RhbmNlCiAgICAvLyBTZWUgaHR0cDovL3IuLi5jb250ZW50LWF2YWlsYWJsZS10by1hdXRob3Itb25seS4uLmUub3JnL3dpa2kvTGV2ZW5zaHRlaW5fZGlzdGFuY2UjSmF2YQogICAgcHVibGljIHN0YXRpYyBpbnQgZWRpdERpc3RhbmNlKFN0cmluZyBzMSwgU3RyaW5nIHMyKSB7CiAgICAgICAgczEgPSBzMS50b0xvd2VyQ2FzZSgpOwogICAgICAgIHMyID0gczIudG9Mb3dlckNhc2UoKTsKCiAgICAgICAgaW50W10gY29zdHMgPSBuZXcgaW50W3MyLmxlbmd0aCgpICsgMV07CiAgICAgICAgZm9yIChpbnQgaSA9IDA7IGkgPD0gczEubGVuZ3RoKCk7IGkrKykgewogICAgICAgICAgICBpbnQgbGFzdFZhbHVlID0gaTsKICAgICAgICAgICAgZm9yIChpbnQgaiA9IDA7IGogPD0gczIubGVuZ3RoKCk7IGorKykgewogICAgICAgICAgICAgICAgaWYgKGkgPT0gMCkKICAgICAgICAgICAgICAgICAgICBjb3N0c1tqXSA9IGo7CiAgICAgICAgICAgICAgICBlbHNlIHsKICAgICAgICAgICAgICAgICAgICBpZiAoaiA+IDApIHsKICAgICAgICAgICAgICAgICAgICAgICAgaW50IG5ld1ZhbHVlID0gY29zdHNbaiAtIDFdOwogICAgICAgICAgICAgICAgICAgICAgICBpZiAoczEuY2hhckF0KGkgLSAxKSAhPSBzMi5jaGFyQXQoaiAtIDEpKQogICAgICAgICAgICAgICAgICAgICAgICAgICAgbmV3VmFsdWUgPSBNYXRoLm1pbihNYXRoLm1pbihuZXdWYWx1ZSwgbGFzdFZhbHVlKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29zdHNbal0pICsgMTsKICAgICAgICAgICAgICAgICAgICAgICAgY29zdHNbaiAtIDFdID0gbGFzdFZhbHVlOwogICAgICAgICAgICAgICAgICAgICAgICBsYXN0VmFsdWUgPSBuZXdWYWx1ZTsKICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgICAgaWYgKGkgPiAwKQogICAgICAgICAgICAgICAgY29zdHNbczIubGVuZ3RoKCldID0gbGFzdFZhbHVlOwogICAgICAgIH0KICAgICAgICByZXR1cm4gY29zdHNbczIubGVuZ3RoKCldOwogICAgfQoKICAgIHB1YmxpYyBzdGF0aWMgdm9pZCBwcmludFNpbWlsYXJpdHkoU3RyaW5nIHMsIFN0cmluZyB0KSB7CiAgICAgICAgU3lzdGVtLm91dC5wcmludGxuKFN0cmluZy5mb3JtYXQoCiAgICAgICAgICAgICIlLjNmIGlzIHRoZSBzaW1pbGFyaXR5IGJldHdlZW4gXCIlc1wiIGFuZCBcIiVzXCIiLCBzaW1pbGFyaXR5KHMsIHQpLCBzLCB0KSk7CiAgICB9CgogICAgcHVibGljIHN0YXRpYyB2b2lkIG1haW4oU3RyaW5nW10gYXJncykgewogICAgICAgIHByaW50U2ltaWxhcml0eSgiIiwgIiIpOwogICAgICAgIHByaW50U2ltaWxhcml0eSgiMTIzNDU2Nzg5MCIsICIxIik7CiAgICAgICAgcHJpbnRTaW1pbGFyaXR5KCIxMjM0NTY3ODkwIiwgIjEyMyIpOwogICAgICAgIHByaW50U2ltaWxhcml0eSgiMTIzNDU2Nzg5MCIsICIxMjM0NTY3Iik7CiAgICAgICAgcHJpbnRTaW1pbGFyaXR5KCIxMjM0NTY3ODkwIiwgIjEyMzQ1Njc4OTAiKTsKICAgICAgICBwcmludFNpbWlsYXJpdHkoIjEyMzQ1Njc4OTAiLCAiMTIzNDU2Nzk4MCIpOwogICAgICAgIHByaW50U2ltaWxhcml0eSgiNDcvMjAxMCIsICI0NzIwMTAiKTsKICAgICAgICBwcmludFNpbWlsYXJpdHkoIjQ3LzIwMTAiLCAiNDcyMDExIik7CiAgICAgICAgcHJpbnRTaW1pbGFyaXR5KCI0Ny8yMDEwIiwgIkFCLkNERUYiKTsKICAgICAgICBwcmludFNpbWlsYXJpdHkoIjQ3LzIwMTAiLCAiNEIuQ0RFRkciKTsKICAgICAgICBwcmludFNpbWlsYXJpdHkoIjQ3LzIwMTAiLCAiQUIuQ0RFRkciKTsKICAgICAgICBwcmludFNpbWlsYXJpdHkoIlRoZSBxdWljayBmb3gganVtcGVkIiwgIlRoZSBmb3gganVtcGVkIik7CiAgICAgICAgcHJpbnRTaW1pbGFyaXR5KCJUaGUgcXVpY2sgZm94IGp1bXBlZCIsICJUaGUgZm94Iik7CiAgICAgICAgcHJpbnRTaW1pbGFyaXR5KCJUaGUgcXVpY2sgZm94IGp1bXBlZCIsICJUaGUgcXVpY2sgZm94IGp1bXBlZCBvZmYgdGhlIGJhbGNhbnkiKTsKICAgICAgICBwcmludFNpbWlsYXJpdHkoImtpdHRlbiIsICJzaXR0aW5nIik7CiAgICB9Cgp9