Files Classes Functions Hierarchy
00001 #ifndef TOKENIZER_H 00002 #define TOKENIZER_H 00003 00004 #include <cassert> 00005 #include <string> 00006 #include <vector> 00007 #include <iostream> 00008 #include <list> 00009 #include <algorithm> 00010 using namespace std; 00011 00012 #include <stringspace.h> 00013 00014 typedef list<string>::iterator liststringi; 00015 typedef list<string>::const_iterator liststringic; 00016 00024 class tokenizermisc 00025 { 00026 public: 00027 00029 static boolc readfile(string & str, stringc & fname); 00030 00032 static boolc comparewithoutspace(stringc & s1, stringc & s2); 00033 00034 }; 00035 00036 00066 class tokenizer 00067 { 00069 void atomize( liststringi & i, stringc & atom, string::size_type const k0=string::npos ); 00070 public: 00071 00073 liststringi current; 00074 00076 list<string> seq; 00077 00078 // 00079 // Iterator Characteristics 00080 // 00081 00083 void reset(); 00085 boolc operator ! () const; 00087 string & operator * (); 00089 stringc & operator() () const; 00091 void operator ++ (); 00092 00093 // 00094 // Processing each element in the list. 00095 // 00096 00098 void atomize(stringc & atom); 00100 void subtract( stringc & atom ); 00102 void tokenize(); 00105 void stripcomment( stringc & comment ); 00107 void remove(stringc & token); 00109 void remove_if(); 00111 template< typename SPACER > 00112 void remove_if(SPACER spacer) 00113 { seq.remove_if(spacer); } 00114 00116 template< typename X > 00117 void apply(X x) 00118 { 00119 liststringi i = seq.begin(); 00120 liststringic imax = seq.end(); 00121 for ( ; i!=imax; ++i ) 00122 { x(*i); } 00123 }; 00124 00126 void trim(); 00128 void trim_and_prune(); 00129 00130 // 00131 // Miscellaneous 00132 // 00133 00136 void extractfromcurrent 00137 ( 00138 vector<string> & v, 00139 stringc & atom 00140 ) const; 00141 00142 // 00143 // Printing 00144 // 00145 00147 string printdelimiter; 00149 ostream& print(ostream& os) const; 00150 00151 // 00152 // Reading 00153 // 00154 00155 // Note: constructor by default calls reset(). 00156 // read* does not have this implemented. 00157 00159 void read(stringc & data); 00161 void readaslines(stringc & data); 00164 void readaslinesgeneral(stringc & data); 00165 00166 // 00167 // Construction 00168 // 00169 00171 tokenizer(); 00172 00174 tokenizer(stringc & data); 00176 // TODO? tokenizer(tokenizer const & tk0); 00177 00179 boolc operator == (tokenizer & t2); 00180 00181 // 00182 // Parsing and tags 00183 // 00184 00186 boolc find(string::size_type & k, stringc & atom ); 00187 00189 boolc find( string::size_type& k, stringc & atom, string::size_type const k0 ); 00190 00193 boolc atomize_next( stringc & atom ); 00194 00196 boolc atomize_next 00197 ( 00198 stringc & atom, 00199 liststringi & iend_ 00200 ); 00201 00203 boolc atomize_next_tag 00204 ( 00205 liststringi& i1, 00206 liststringi& i2, 00207 stringc& tag, 00208 liststringi& iend_ 00209 ); 00210 00211 boolc atomize_next_tag 00212 ( 00213 liststringi& i1, 00214 liststringi& i2, 00215 stringc& tag 00216 ); 00217 00218 /* Search for sequence bounded by two atoms. */ 00219 boolc atomize_next 00220 ( 00221 liststringi& i1, 00222 liststringi& i2, 00223 stringc& atom1, 00224 stringc& atom2 00225 ); 00226 00228 operator stringc (); 00229 00230 }; 00231 00234 ostream & operator << (ostream & os, tokenizer const & ss); 00235 00236 #endif 00237 00238
1.5.8