proj home

Files   Classes   Functions   Hierarchy  

tokenizer.h

Go to the documentation of this file.
00001 #ifndef TOKENIZER_H 
00002 #define TOKENIZER_H
00003 
00004 #include <cassert>
00005 #include <string>
00006 #include <vector>
00007 #include <iostream>
00008 #include <list>
00009 #include <algorithm>
00010 using namespace std;
00011 
00012 #include <stringspace.h>
00013 
00014 typedef list<string>::iterator liststringi;
00015 typedef list<string>::const_iterator liststringic;
00016 
00024 class tokenizermisc
00025 {
00026 public:
00027 
00029   static boolc readfile(string & str, stringc & fname);
00030 
00032   static boolc comparewithoutspace(stringc & s1, stringc & s2);
00033 
00034 };
00035 
00036 
00066 class tokenizer
00067 {
00069   void atomize( liststringi & i, stringc & atom, string::size_type const k0=string::npos );
00070 public:
00071 
00073   liststringi current;
00074 
00076   list<string> seq;
00077 
00078   //
00079   // Iterator Characteristics
00080   // 
00081 
00083   void reset();
00085   boolc operator ! () const; 
00087   string & operator * ();
00089   stringc & operator() () const; 
00091   void operator ++ ();
00092 
00093   //
00094   // Processing each element in the list. 
00095   //
00096 
00098   void atomize(stringc & atom);
00100   void subtract( stringc & atom );
00102   void tokenize();
00105   void stripcomment( stringc & comment );
00107   void remove(stringc & token);
00109   void remove_if();
00111   template< typename SPACER >
00112   void remove_if(SPACER spacer)
00113     { seq.remove_if(spacer); }
00114 
00116   template< typename X >
00117   void apply(X x)
00118   { 
00119     liststringi i = seq.begin();
00120     liststringic imax = seq.end();
00121     for ( ; i!=imax; ++i )
00122       { x(*i); }
00123   };
00124   
00126   void trim();
00128   void trim_and_prune();
00129 
00130   //
00131   // Miscellaneous
00132   //
00133 
00136   void extractfromcurrent
00137   ( 
00138     vector<string> & v, 
00139     stringc & atom 
00140   ) const;
00141 
00142   //
00143   // Printing
00144   //
00145 
00147   string printdelimiter;
00149   ostream& print(ostream& os) const;
00150 
00151   //
00152   // Reading
00153   //
00154 
00155   // Note: constructor by default calls reset().
00156   //   read* does not have this implemented.
00157 
00159   void read(stringc & data);
00161   void readaslines(stringc & data);
00164   void readaslinesgeneral(stringc & data);
00165 
00166   //
00167   // Construction
00168   //
00169 
00171   tokenizer();
00172 
00174   tokenizer(stringc & data);
00176 // TODO?  tokenizer(tokenizer const & tk0);
00177 
00179   boolc operator == (tokenizer & t2);
00180   
00181   //
00182   // Parsing and tags
00183   //
00184 
00186   boolc find(string::size_type & k, stringc & atom );
00187  
00189   boolc find( string::size_type& k, stringc & atom, string::size_type const k0 );
00190 
00193   boolc atomize_next( stringc & atom ); 
00194 
00196   boolc atomize_next 
00197   (
00198     stringc & atom,
00199     liststringi & iend_
00200   );
00201 
00203   boolc atomize_next_tag
00204   (
00205     liststringi& i1,
00206     liststringi& i2,
00207     stringc& tag,
00208     liststringi& iend_
00209   );
00210 
00211   boolc atomize_next_tag
00212   (
00213     liststringi& i1,
00214     liststringi& i2,
00215     stringc& tag
00216   );
00217 
00218   /* Search for sequence bounded by two atoms. */
00219   boolc atomize_next
00220   (
00221     liststringi& i1,
00222     liststringi& i2,
00223     stringc& atom1,
00224     stringc& atom2
00225   );
00226 
00228   operator stringc ();
00229 
00230 };
00231 
00234 ostream & operator << (ostream & os, tokenizer const & ss);
00235 
00236 #endif
00237 
00238 

Generated on Fri Mar 4 00:49:29 2011 for Chelton Evans Source by  doxygen 1.5.8