fork(11) download
  1. /*
  2.  * @author Andrea Ligios
  3.  * */
  4.  
  5. import java.util.ArrayList;
  6. import java.util.Date;
  7. import java.util.List;
  8. import java.util.Locale;
  9.  
  10. class WordsCapitalizer {
  11.  
  12. public static String capitalizeEveryWord(String source, List<Delimiter> delimiters, Locale locale) {
  13. char[] chars;
  14.  
  15. if (delimiters == null || delimiters.size() == 0)
  16. delimiters = getDefaultDelimiters();
  17.  
  18. // If Locale specified, i18n toLowerCase is executed, to handle specific behaviors (eg. Turkish dotted and dotless 'i')
  19. if (locale!=null)
  20. chars = source.toLowerCase(locale).toCharArray();
  21. else
  22. chars = source.toLowerCase().toCharArray();
  23.  
  24. // First charachter ALWAYS capitalized, if it is a Letter.
  25. if (chars.length>0 && Character.isLetter(chars[0]) && !isSurrogate(chars[0])){
  26. chars[0] = Character.toUpperCase(chars[0]);
  27. }
  28.  
  29. for (int i = 0; i < chars.length; i++) {
  30. if (!isSurrogate(chars[i]) && !Character.isLetter(chars[i])) {
  31. // Current char is not a Letter; gonna check if it is a delimitrer.
  32. for (Delimiter delimiter : delimiters){
  33. if (delimiter.getDelimiter()==chars[i]){
  34. // Delimiter found, applying rules...
  35. if (delimiter.capitalizeBefore() && i>0
  36. && Character.isLetter(chars[i-1]) && !isSurrogate(chars[i-1]))
  37. { // previous character is a Letter and I have to capitalize it
  38. chars[i-1] = Character.toUpperCase(chars[i-1]);
  39. }
  40. if (delimiter.capitalizeAfter() && i<chars.length-1
  41. && Character.isLetter(chars[i+1]) && !isSurrogate(chars[i+1]))
  42. { // next character is a Letter and I have to capitalize it
  43. chars[i+1] = Character.toUpperCase(chars[i+1]);
  44. }
  45. break;
  46. }
  47. }
  48. }
  49. }
  50. return String.valueOf(chars);
  51. }
  52.  
  53. public static String capitalizeEveryWord(String source, Locale locale) {
  54. return capitalizeEveryWord(source,null,locale);
  55. }
  56.  
  57. public static String capitalizeEveryWord(String source) {
  58. return capitalizeEveryWord(source,null,null);
  59. }
  60.  
  61. private static boolean isSurrogate(char chr){
  62. // Check if the current character is part of an UTF-16 Surrogate Pair.
  63. // Note: not validating the pair, just used to bypass (any found part of) it.
  64. return (Character.isHighSurrogate(chr) || Character.isLowSurrogate(chr));
  65. }
  66.  
  67. private static List<Delimiter> getDefaultDelimiters(){
  68. // If no delimiter specified, "Capitalize after space" rule is set by default.
  69. List<Delimiter> delimiters = new ArrayList<Delimiter>();
  70. delimiters.add(new Delimiter(Behavior.CAPITALIZE_AFTER_MARKER, ' '));
  71. return delimiters;
  72. }
  73.  
  74. static class Delimiter {
  75. private Behavior behavior;
  76. private char delimiter;
  77.  
  78. private Delimiter(Behavior behavior, char delimiter) {
  79. super();
  80. this.behavior = behavior;
  81. this.delimiter = delimiter;
  82. }
  83.  
  84. public boolean capitalizeBefore(){
  85. return (behavior.equals(Behavior.CAPITALIZE_BEFORE_MARKER)
  86. || behavior.equals(Behavior.CAPITALIZE_BEFORE_AND_AFTER_MARKER));
  87. }
  88.  
  89. public boolean capitalizeAfter(){
  90. return (behavior.equals(Behavior.CAPITALIZE_AFTER_MARKER)
  91. || behavior.equals(Behavior.CAPITALIZE_BEFORE_AND_AFTER_MARKER));
  92. }
  93.  
  94. public char getDelimiter() {
  95. return delimiter;
  96. }
  97. }
  98.  
  99. static enum Behavior {
  100. CAPITALIZE_AFTER_MARKER(0),
  101. CAPITALIZE_BEFORE_MARKER(1),
  102. CAPITALIZE_BEFORE_AND_AFTER_MARKER(2);
  103.  
  104. private int value;
  105.  
  106. private Behavior(int value) {
  107. this.value = value;
  108. }
  109.  
  110. public int getValue() {
  111. return value;
  112. }
  113. }
  114.  
  115.  
  116.  
  117.  
  118. public static void main(String[] args) throws Exception {
  119. String testString;
  120. List <Delimiter> delimiters;
  121.  
  122. Long startTime = new Date().getTime();
  123. System.out.println("=== WordsCapitalizer Live Demo ===");
  124.  
  125. // ==============================================================
  126. // SIMPLE USAGE
  127. // ==============================================================
  128.  
  129. testString = "cApItAlIzE this string after WHITE SPACES";
  130.  
  131. System.out.println("\n====================================\n SIMPLE USAGE\n====================================");
  132. System.out.println("Source: " + testString);
  133. System.out.println("Output: " + WordsCapitalizer.capitalizeEveryWord(testString));
  134. // ==============================================================
  135.  
  136.  
  137.  
  138. // ==============================================================
  139. // SINGLE CUSTOM-DELIMITER USAGE :
  140. // ==============================================================
  141. testString = "capitalize this string ONLY before'and''after'''APEX";
  142. delimiters = new ArrayList<Delimiter>();
  143. delimiters.add(new Delimiter(Behavior.CAPITALIZE_BEFORE_AND_AFTER_MARKER, '\''));
  144.  
  145. System.out.println("\n====================================\n SINGLE CUSTOM-DELIMITER USAGE\n====================================");
  146. System.out.println("Source: " + testString);
  147. System.out.println("Output: " + WordsCapitalizer.capitalizeEveryWord(testString,delimiters,null));
  148. // ==============================================================
  149.  
  150.  
  151.  
  152. // ==============================================================
  153. // MULTIPLE CUSTOM-DELIMITER USAGE :
  154. // ==============================================================
  155. testString = "capitalize this string AFTER SPACES, BEFORE'APEX, " +
  156. "and #AFTER AND BEFORE# NUMBER SIGN (#)";
  157. delimiters = new ArrayList<Delimiter>();
  158. delimiters.add(new Delimiter(Behavior.CAPITALIZE_AFTER_MARKER, ' '));
  159. delimiters.add(new Delimiter(Behavior.CAPITALIZE_BEFORE_MARKER, '\''));
  160. delimiters.add(new Delimiter(Behavior.CAPITALIZE_BEFORE_AND_AFTER_MARKER, '#'));
  161.  
  162. System.out.println("\n====================================\n MULTIPLE CUSTOM-DELIMITER USAGE\n====================================");
  163. System.out.println("Source: " + testString);
  164. System.out.println("Output: " + WordsCapitalizer.capitalizeEveryWord(testString,delimiters,null));
  165. // ==============================================================
  166.  
  167.  
  168. // ==============================================================
  169. // SIMPLE USAGE WITH CUSTOM LOCALE :
  170. // ==============================================================
  171. // Reference for this i18n problem: http://en.wikipedia.org/wiki/Dotted_and_dotless_I
  172. testString = "Uniforming the first and last vowels (different kind of 'i's) of the Turkish word D[\u0130]YARBAK[\u0049]R (D\u0130YARBAK\u0049R) ";
  173.  
  174. System.out.println("\n====================================\n SIMPLE USAGE WITH CUSTOM LOCALE\n====================================");
  175. System.out.println("Source: " + testString);
  176. System.out.println("Output: " + WordsCapitalizer.capitalizeEveryWord(testString,Locale.ENGLISH));
  177.  
  178.  
  179. // ==============================================================
  180. // SIMPLE USAGE WITH A SURROGATE PAIR INSIDE THE STRING:
  181. // ==============================================================
  182.  
  183. byte[] data =
  184. {
  185. 0, 0x61, // a
  186. 0, 0x62, // b
  187. 0, 0x20, // space
  188. // Surrogate Pair Begins here
  189. (byte) 0xD8, 1, // High surrogate
  190. (byte) 0xDC, 2, // Low surrogate
  191. // Surrogate Pair Ends here
  192. 0, 0x63, // c
  193. 0, 0x20, // space
  194. 0, 0x64, // d
  195. 0, 0x65, // e
  196. 0, 0x20, // space
  197. 0,(byte) 0xE0, // à
  198. };
  199.  
  200. testString = new String(data, "UTF-16");
  201. System.out.println("\n====================================\n SIMPLE USAGE WITH A SURROGATE PAIR \n====================================");
  202. System.out.println("Source: " + testString);
  203. System.out.println("Output: " + WordsCapitalizer.capitalizeEveryWord(testString));
  204.  
  205.  
  206.  
  207. Long endTime = new Date().getTime();
  208. System.out.println("\nTotal Execution time (in milliseconds): [" + (endTime - startTime)+"]");
  209.  
  210. }
  211.  
  212. }
Success #stdin #stdout 0.03s 245632KB
stdin
Standard input is empty
stdout
=== WordsCapitalizer Live Demo ===

====================================
 SIMPLE USAGE
====================================
Source: cApItAlIzE this string after WHITE SPACES
Output: Capitalize This String After White Spaces

====================================
 SINGLE CUSTOM-DELIMITER USAGE
====================================
Source: capitalize this string ONLY before'and''after'''APEX
Output: Capitalize this string only beforE'AnD''AfteR'''Apex

====================================
 MULTIPLE CUSTOM-DELIMITER USAGE
====================================
Source: capitalize this string AFTER SPACES, BEFORE'APEX, and #AFTER AND BEFORE# NUMBER SIGN (#)
Output: Capitalize This String After Spaces, BeforE'apex, And #After And BeforE# Number Sign (#)

====================================
 SIMPLE USAGE WITH CUSTOM LOCALE
====================================
Source: Uniforming the first and last vowels (different kind of 'i's) of the Turkish word D[İ]YARBAK[I]R (DİYARBAKIR) 
Output: Uniforming The First And Last Vowels (different Kind Of 'i's) Of The Turkish Word D[i]yarbak[i]r (diyarbakir) 

====================================
 SIMPLE USAGE WITH A SURROGATE PAIR 
====================================
Source: ab 𐐂c de à
Output: Ab 𐐪c De À

Total Execution time (in milliseconds): [5]