import java.util.HashSet; import java.util.Map; import java.util.Scanner; import java.util.Set; import java.util.StringTokenizer; import java.util.TreeMap; public class Main { //========================================================================== // Interface //========================================================================== public interface FrequencyVocabulary { public class Statistic { public int count; public double frequency; public Statistic() { count = 0; frequency = 0; } } } //========================================================================== // Implementation //========================================================================== public class FrequencyVocabularyImpl implements FrequencyVocabulary { { int totalCount = 0; // first we parse text to tokens (words) and fill the map where keys are words and values - statistic for given word while (st.hasMoreTokens()) { if ( containsDigits(key) ) { continue; } Statistic value = map.get(key); if (null != value) { value.count++; map.replace(key, value); } else { value = new Statistic(); value.count = 1; map.put( key, value ); } totalCount += value.count; } // now we can calculate and set frequency field for all the entries { if ( entry.getValue().count != 0 ) { entry.getValue().frequency = ((double)entry.getValue().count) / totalCount; } } } { return map; } // select token with max frequency from the given set of tokens { Statistic maxFreqStat = null; { Statistic stat = map.get(token); if ( stat != null ) { if ( maxFreqToken == null ) { maxFreqToken = token; maxFreqStat = stat; } else { if ( stat.count > maxFreqStat.count ) { maxFreqToken = token; maxFreqStat = stat; } } } } return maxFreqToken; } { if ( token.indexOf('0') >= 0 || token.indexOf('1') >= 0 || token.indexOf('2') >= 0 || token.indexOf('3') >= 0 || token.indexOf('4') >= 0 || token.indexOf('5') >= 0 || token.indexOf('6') >= 0 || token.indexOf('7') >= 0 || token.indexOf('8') >= 0 || token.indexOf('9') >= 0 ) { return true; } return false; } } //========================================================================== // Testing //========================================================================== { for(;;) { if ( ! in.hasNextLine() ) break; if ( inputLine == null || inputLine.equals("%EOF%") ) break; text = text + inputLine + "\n"; } in.close(); if ( text.isEmpty() ) { return; } //System.out.println("*** DBG:input text is: <"+text+">"); Main obj = new Main(); FrequencyVocabulary fv = obj.new FrequencyVocabularyImpl( text, " ?.,;:-+\n\t{}[]=()<>*&%$#@\"\'`~!" ); { statEntry.getKey(), statEntry.getValue().count, statEntry.getValue().frequency ); } HashSet<String> tokens = new HashSet<String>(); tokens.add("for"); tokens.add("do"); tokens.add("to"); tokens.add("else"); tokens.add("while"); tokens.add("if"); { caption += " " + s; } if ( s == null ) s = "<null>"; } }
To be, or not to be: that is the question: Whether 'tis nobler in the mind to suffer The slings and arrows of outrageous fortune, Or to take arms against a sea of troubles, And by opposing end them? To die: to sleep; No more; and by a sleep to say we end The heart-ache and the thousand natural shocks That flesh is heir to, 'tis a consummation Devoutly to be wish'd. To die, to sleep; To sleep: perchance to dream: ay, there's the rub; For in that sleep of death what dreams may come When we have shuffled off this mortal coil, Must give us pause: there's the respect That makes calamity of so long life; For who would bear the whips and scorns of time, The oppressor's wrong, the proud man's contumely, The pangs of despised love, the law's delay, The insolence of office and the spurns That patient merit of the unworthy takes, When he himself might his quietus make With a bare bodkin? who would fardels bear, To grunt and sweat under a weary life, But that the dread of something after death, The undiscover'd country from whose bourn No traveller returns, puzzles the will And makes us rather bear those ills we have Than fly to others that we know not of? Thus conscience does make cowards of us all; And thus the native hue of resolution Is sicklied o'er with the pale cast of thought, And enterprises of great pith and moment With this regard their currents turn awry, And lose the name of action. - Soft you now! The fair Ophelia! Nymph, in thy orisons Be all my sins remember'd. %EOF%
Word frequency statistics: And , 5, 0.007321 Be , 1, 0.001464 But , 1, 0.001464 Devoutly , 1, 0.001464 For , 2, 0.002928 Is , 1, 0.001464 Must , 1, 0.001464 No , 2, 0.002928 Nymph , 1, 0.001464 Ophelia , 1, 0.001464 Or , 1, 0.001464 Soft , 1, 0.001464 Than , 1, 0.001464 That , 3, 0.004392 The , 7, 0.010249 Thus , 1, 0.001464 To , 5, 0.007321 When , 2, 0.002928 Whether , 1, 0.001464 With , 2, 0.002928 a , 5, 0.007321 ache , 1, 0.001464 action , 1, 0.001464 after , 1, 0.001464 against , 1, 0.001464 all , 2, 0.002928 and , 7, 0.010249 arms , 1, 0.001464 arrows , 1, 0.001464 awry , 1, 0.001464 ay , 1, 0.001464 bare , 1, 0.001464 be , 3, 0.004392 bear , 3, 0.004392 bodkin , 1, 0.001464 bourn , 1, 0.001464 by , 2, 0.002928 calamity , 1, 0.001464 cast , 1, 0.001464 coil , 1, 0.001464 come , 1, 0.001464 conscience , 1, 0.001464 consummation , 1, 0.001464 contumely , 1, 0.001464 country , 1, 0.001464 cowards , 1, 0.001464 currents , 1, 0.001464 d , 3, 0.004392 death , 2, 0.002928 delay , 1, 0.001464 despised , 1, 0.001464 die , 2, 0.002928 does , 1, 0.001464 dread , 1, 0.001464 dream , 1, 0.001464 dreams , 1, 0.001464 end , 2, 0.002928 enterprises , 1, 0.001464 er , 1, 0.001464 fair , 1, 0.001464 fardels , 1, 0.001464 flesh , 1, 0.001464 fly , 1, 0.001464 fortune , 1, 0.001464 from , 1, 0.001464 give , 1, 0.001464 great , 1, 0.001464 grunt , 1, 0.001464 have , 2, 0.002928 he , 1, 0.001464 heart , 1, 0.001464 heir , 1, 0.001464 himself , 1, 0.001464 his , 1, 0.001464 hue , 1, 0.001464 ills , 1, 0.001464 in , 3, 0.004392 insolence , 1, 0.001464 is , 2, 0.002928 know , 1, 0.001464 law , 1, 0.001464 life , 2, 0.002928 long , 1, 0.001464 lose , 1, 0.001464 love , 1, 0.001464 make , 2, 0.002928 makes , 2, 0.002928 man , 1, 0.001464 may , 1, 0.001464 merit , 1, 0.001464 might , 1, 0.001464 mind , 1, 0.001464 moment , 1, 0.001464 more , 1, 0.001464 mortal , 1, 0.001464 my , 1, 0.001464 name , 1, 0.001464 native , 1, 0.001464 natural , 1, 0.001464 nobler , 1, 0.001464 not , 2, 0.002928 now , 1, 0.001464 o , 1, 0.001464 of , 15, 0.021962 off , 1, 0.001464 office , 1, 0.001464 opposing , 1, 0.001464 oppressor , 1, 0.001464 or , 1, 0.001464 orisons , 1, 0.001464 others , 1, 0.001464 outrageous , 1, 0.001464 pale , 1, 0.001464 pangs , 1, 0.001464 patient , 1, 0.001464 pause , 1, 0.001464 perchance , 1, 0.001464 pith , 1, 0.001464 proud , 1, 0.001464 puzzles , 1, 0.001464 question , 1, 0.001464 quietus , 1, 0.001464 rather , 1, 0.001464 regard , 1, 0.001464 remember , 1, 0.001464 resolution , 1, 0.001464 respect , 1, 0.001464 returns , 1, 0.001464 rub , 1, 0.001464 s , 5, 0.007321 say , 1, 0.001464 scorns , 1, 0.001464 sea , 1, 0.001464 shocks , 1, 0.001464 shuffled , 1, 0.001464 sicklied , 1, 0.001464 sins , 1, 0.001464 sleep , 5, 0.007321 slings , 1, 0.001464 so , 1, 0.001464 something , 1, 0.001464 spurns , 1, 0.001464 suffer , 1, 0.001464 sweat , 1, 0.001464 take , 1, 0.001464 takes , 1, 0.001464 that , 4, 0.005857 the , 15, 0.021962 their , 1, 0.001464 them , 1, 0.001464 there , 2, 0.002928 this , 2, 0.002928 those , 1, 0.001464 thought , 1, 0.001464 thousand , 1, 0.001464 thus , 1, 0.001464 thy , 1, 0.001464 time , 1, 0.001464 tis , 2, 0.002928 to , 10, 0.014641 traveller , 1, 0.001464 troubles , 1, 0.001464 turn , 1, 0.001464 under , 1, 0.001464 undiscover , 1, 0.001464 unworthy , 1, 0.001464 us , 3, 0.004392 we , 4, 0.005857 weary , 1, 0.001464 what , 1, 0.001464 whips , 1, 0.001464 who , 2, 0.002928 whose , 1, 0.001464 will , 1, 0.001464 wish , 1, 0.001464 with , 1, 0.001464 would , 2, 0.002928 wrong , 1, 0.001464 you , 1, 0.001464 -------------------------------------- Select max frequent token from: else for do to while if => selected is: to