/* * @author Andrea Ligios * */ import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Locale; class WordsCapitalizer { char[] chars; if (delimiters == null || delimiters.size() == 0) delimiters = getDefaultDelimiters(); // If Locale specified, i18n toLowerCase is executed, to handle specific behaviors (eg. Turkish dotted and dotless 'i') if (locale!=null) chars = source.toLowerCase(locale).toCharArray(); else chars = source.toLowerCase().toCharArray(); // First charachter ALWAYS capitalized, if it is a Letter. } for (int i = 0; i < chars.length; i++) { // Current char is not a Letter; gonna check if it is a delimitrer. for (Delimiter delimiter : delimiters){ if (delimiter.getDelimiter()==chars[i]){ // Delimiter found, applying rules... if (delimiter.capitalizeBefore() && i>0 { // previous character is a Letter and I have to capitalize it } if (delimiter.capitalizeAfter() && i<chars.length-1 { // next character is a Letter and I have to capitalize it } break; } } } } } return capitalizeEveryWord(source,null,locale); } return capitalizeEveryWord(source,null,null); } private static boolean isSurrogate(char chr){ // Check if the current character is part of an UTF-16 Surrogate Pair. // Note: not validating the pair, just used to bypass (any found part of) it. } private static List<Delimiter> getDefaultDelimiters(){ // If no delimiter specified, "Capitalize after space" rule is set by default. List<Delimiter> delimiters = new ArrayList<Delimiter>(); delimiters.add(new Delimiter(Behavior.CAPITALIZE_AFTER_MARKER, ' ')); return delimiters; } static class Delimiter { private Behavior behavior; private char delimiter; private Delimiter(Behavior behavior, char delimiter) { super(); this.behavior = behavior; this.delimiter = delimiter; } public boolean capitalizeBefore(){ return (behavior.equals(Behavior.CAPITALIZE_BEFORE_MARKER) || behavior.equals(Behavior.CAPITALIZE_BEFORE_AND_AFTER_MARKER)); } public boolean capitalizeAfter(){ return (behavior.equals(Behavior.CAPITALIZE_AFTER_MARKER) || behavior.equals(Behavior.CAPITALIZE_BEFORE_AND_AFTER_MARKER)); } public char getDelimiter() { return delimiter; } } static enum Behavior { CAPITALIZE_AFTER_MARKER(0), CAPITALIZE_BEFORE_MARKER(1), CAPITALIZE_BEFORE_AND_AFTER_MARKER(2); private int value; private Behavior(int value) { this.value = value; } public int getValue() { return value; } } String testString; // ============================================================== // SIMPLE USAGE // ============================================================== testString = "cApItAlIzE this string after WHITE SPACES"; System.out.println("\n====================================\n SIMPLE USAGE\n===================================="); // ============================================================== // ============================================================== // SINGLE CUSTOM-DELIMITER USAGE : // ============================================================== testString = "capitalize this string ONLY before'and''after'''APEX"; delimiters = new ArrayList<Delimiter>(); delimiters.add(new Delimiter(Behavior.CAPITALIZE_BEFORE_AND_AFTER_MARKER, '\'')); System.out.println("\n====================================\n SINGLE CUSTOM-DELIMITER USAGE\n===================================="); // ============================================================== // ============================================================== // MULTIPLE CUSTOM-DELIMITER USAGE : // ============================================================== testString = "capitalize this string AFTER SPACES, BEFORE'APEX, " + "and #AFTER AND BEFORE# NUMBER SIGN (#)"; delimiters = new ArrayList<Delimiter>(); delimiters.add(new Delimiter(Behavior.CAPITALIZE_AFTER_MARKER, ' ')); delimiters.add(new Delimiter(Behavior.CAPITALIZE_BEFORE_MARKER, '\'')); delimiters.add(new Delimiter(Behavior.CAPITALIZE_BEFORE_AND_AFTER_MARKER, '#')); System.out.println("\n====================================\n MULTIPLE CUSTOM-DELIMITER USAGE\n===================================="); // ============================================================== // ============================================================== // SIMPLE USAGE WITH CUSTOM LOCALE : // ============================================================== // Reference for this i18n problem: http://e...content-available-to-author-only...a.org/wiki/Dotted_and_dotless_I testString = "Uniforming the first and last vowels (different kind of 'i's) of the Turkish word D[\u0130]YARBAK[\u0049]R (D\u0130YARBAK\u0049R) "; System.out.println("\n====================================\n SIMPLE USAGE WITH CUSTOM LOCALE\n===================================="); // ============================================================== // SIMPLE USAGE WITH A SURROGATE PAIR INSIDE THE STRING: // ============================================================== byte[] data = { 0, 0x61, // a 0, 0x62, // b 0, 0x20, // space // Surrogate Pair Begins here (byte) 0xD8, 1, // High surrogate (byte) 0xDC, 2, // Low surrogate // Surrogate Pair Ends here 0, 0x63, // c 0, 0x20, // space 0, 0x64, // d 0, 0x65, // e 0, 0x20, // space 0,(byte) 0xE0, // à }; System.out.println("\n====================================\n SIMPLE USAGE WITH A SURROGATE PAIR \n===================================="); } }
Standard input is empty
=== WordsCapitalizer Live Demo === ==================================== SIMPLE USAGE ==================================== Source: cApItAlIzE this string after WHITE SPACES Output: Capitalize This String After White Spaces ==================================== SINGLE CUSTOM-DELIMITER USAGE ==================================== Source: capitalize this string ONLY before'and''after'''APEX Output: Capitalize this string only beforE'AnD''AfteR'''Apex ==================================== MULTIPLE CUSTOM-DELIMITER USAGE ==================================== Source: capitalize this string AFTER SPACES, BEFORE'APEX, and #AFTER AND BEFORE# NUMBER SIGN (#) Output: Capitalize This String After Spaces, BeforE'apex, And #After And BeforE# Number Sign (#) ==================================== SIMPLE USAGE WITH CUSTOM LOCALE ==================================== Source: Uniforming the first and last vowels (different kind of 'i's) of the Turkish word D[İ]YARBAK[I]R (DİYARBAKIR) Output: Uniforming The First And Last Vowels (different Kind Of 'i's) Of The Turkish Word D[i]yarbak[i]r (diyarbakir) ==================================== SIMPLE USAGE WITH A SURROGATE PAIR ==================================== Source: ab 𐐂c de à Output: Ab 𐐪c De À Total Execution time (in milliseconds): [5]