fork download
  1. //http://stackoverflow.com/questions/21638775/tokenizing-an-arithmetic-expression
  2.  
  3. #include <iostream>
  4. #include <iterator>
  5. #include <string>
  6. #include <stdexcept>
  7. #include <cctype>
  8. #include <sstream>
  9.  
  10. struct location {
  11. unsigned line;
  12. unsigned col;
  13. location() :line(1), col(1) {}
  14. };
  15.  
  16. enum token_type {token_eof, token_double, token_mul, token_div, token_add, token_sub, token_pow, token_oparam, token_cparam};
  17. struct token {
  18. location loc;
  19. std::string literal;
  20. token_type type;
  21. double value;
  22.  
  23. token() {}
  24. explicit token(location l) : loc(l), type(token_eof) {}
  25. explicit token(location l, std::string lit, token_type t) : loc(l), literal(lit), type(t) {}
  26. explicit token(location l, std::string lit, double v) : loc(l), literal(lit), type(token_double), value(v) {}
  27. };
  28. std::ostream& operator<<(std::ostream& out, const token& t)
  29. {return out << t.literal;}
  30.  
  31. struct invalid_character : std::runtime_error {
  32. invalid_character(location loc, char c)
  33. : std::runtime_error("line:"+std::to_string(loc.line)+" col:"+std::to_string(c)+" - invalid character " + std::string(1, c)),
  34. loc(loc), c(c)
  35. {}
  36.  
  37. location loc;
  38. char c;
  39. };
  40.  
  41. struct tokenizer_iterator {
  42. typedef token value_type;
  43. typedef token* pointer;
  44. typedef token& reference;
  45. typedef ptrdiff_t difference_type;
  46. typedef size_t size_type;
  47. typedef std::output_iterator_tag iterator_category;
  48.  
  49. tokenizer_iterator() : source(nullptr), loc(), cur() {}
  50. tokenizer_iterator(std::istream& source) :source(&source), loc(), cur() {load();}
  51.  
  52. const token& operator*() const {return cur;}
  53. const token* operator->() const {return &cur;}
  54. tokenizer_iterator& operator++() {load(); return *this;}
  55. tokenizer_iterator operator++(int) {load(); return *this;}
  56. friend bool operator==(const tokenizer_iterator& lhs, const tokenizer_iterator& rhs) {return lhs.cur.type==rhs.cur.type;}
  57. friend bool operator!=(const tokenizer_iterator& lhs, const tokenizer_iterator& rhs) {return lhs.cur.type!=rhs.cur.type;}
  58. private:
  59. void load();
  60.  
  61. std::istream* source;
  62. location loc;
  63. token cur;
  64. };
  65.  
  66.  
  67.  
  68.  
  69.  
  70.  
  71.  
  72.  
  73.  
  74.  
  75.  
  76.  
  77.  
  78.  
  79.  
  80.  
  81.  
  82. void tokenizer_iterator::load() {
  83. if (!*source)
  84. return;
  85.  
  86. char next = source->peek();
  87. if (next==EOF) {
  88. cur = token(loc);
  89. return;
  90. }
  91. while(isspace(next)) {
  92. if (next=='\n') {
  93. ++loc.line;
  94. loc.col = 1;
  95. } else
  96. loc.col += 1;
  97. next = source->get();
  98. next = source->peek();
  99. }
  100. switch(next) {
  101. case '+':
  102. next = source->get();
  103. loc.col += 1;
  104. cur = token(loc, "+", token_add);
  105. return;
  106. case '-':
  107. next = source->get();
  108. loc.col += 1;
  109. cur = token(loc, "-", token_sub);
  110. return;
  111. case '*':
  112. next = source->get();
  113. loc.col += 1;
  114. cur = token(loc, "*", token_mul);
  115. return;
  116. case '/':
  117. next = source->get();
  118. loc.col += 1;
  119. cur = token(loc, "/", token_div);
  120. return;
  121. case '^':
  122. next = source->get();
  123. loc.col += 1;
  124. cur = token(loc, "^", token_pow);
  125. return;
  126. case '(':
  127. next = source->get();
  128. loc.col += 1;
  129. cur = token(loc, "(", token_oparam);
  130. return;
  131. case ')':
  132. next = source->get();
  133. loc.col += 1;
  134. cur = token(loc, ")", token_cparam);
  135. return;
  136. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
  137. {
  138. std::string num;
  139. location start = loc;
  140.  
  141. do {
  142. num.push_back(next);
  143. next = source->get();
  144. loc.col += 1;
  145. next = source->peek();
  146. } while(isdigit(next));
  147. if (next=='.') {
  148. num.push_back(next);
  149. next = source->get();
  150. loc.col += 1;
  151. next = source->peek();
  152. }
  153. while(isdigit(next)) {
  154. num.push_back(next);
  155. next = source->get();
  156. loc.col += 1;
  157. next = source->peek();
  158. }
  159.  
  160. double value = atof(num.c_str());
  161. cur = token(start, num, value);
  162. return;
  163. }
  164. default:
  165. throw invalid_character(loc, next);
  166. }
  167. }
  168.  
  169. int main() {
  170. std::istringstream ss;
  171. ss.str("3.14 + 3 * (7.7/9.8^32.9 )");
  172. std::copy(tokenizer_iterator(ss), tokenizer_iterator(), std::ostream_iterator<token>(std::cout, "\n"));
  173. return 0;
  174. }
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
prog.cpp:45:13: error: ‘ptrdiff_t’ does not name a type
     typedef ptrdiff_t difference_type;
             ^
In file included from /usr/include/c++/4.8/bits/stl_algobase.h:65:0,
                 from /usr/include/c++/4.8/bits/char_traits.h:39,
                 from /usr/include/c++/4.8/ios:40,
                 from /usr/include/c++/4.8/ostream:38,
                 from /usr/include/c++/4.8/iostream:39,
                 from prog.cpp:3:
/usr/include/c++/4.8/bits/stl_iterator_base_types.h: In instantiation of ‘struct std::__iterator_traits<tokenizer_iterator, true>’:
/usr/include/c++/4.8/bits/stl_iterator_base_types.h:159:12:   required from ‘struct std::iterator_traits<tokenizer_iterator>’
/usr/include/c++/4.8/bits/stl_algobase.h:381:57:   required from ‘_OI std::__copy_move_a(_II, _II, _OI) [with bool _IsMove = false; _II = tokenizer_iterator; _OI = std::ostream_iterator<token>]’
/usr/include/c++/4.8/bits/stl_algobase.h:428:38:   required from ‘_OI std::__copy_move_a2(_II, _II, _OI) [with bool _IsMove = false; _II = tokenizer_iterator; _OI = std::ostream_iterator<token>]’
/usr/include/c++/4.8/bits/stl_algobase.h:460:17:   required from ‘_OI std::copy(_II, _II, _OI) [with _II = tokenizer_iterator; _OI = std::ostream_iterator<token>]’
prog.cpp:172:106:   required from here
/usr/include/c++/4.8/bits/stl_iterator_base_types.h:153:53: error: no type named ‘difference_type’ in ‘struct tokenizer_iterator’
       typedef typename _Iterator::difference_type   difference_type;
                                                     ^
stdout
Standard output is empty