#include <tokenizer.h>

#include <fstream>
#include <sstream>
using namespace std;


boolc tokenizer::operator ! () const 
{ 
  return (current != seq.end()); 
}

string & tokenizer::operator * ()
{ 
  assert(current != seq.end()); 
  return *current; 
} 

stringc & tokenizer::operator() () const 
{ 
  assert(current != seq.end()); 
  return *current; 
}

void tokenizer::remove(stringc & token)
{ 
  seq.remove(token); 
}

void tokenizer::remove_if()
{ 
  seq.remove_if(spacerdelete<>()); 
}

void tokenizer::trim()
{ 
  apply(spacertrim<>()); 
}

void tokenizer::trim_and_prune()
{ 
  trim(); 
  remove_if(spacerdelete<>()); 
}

void tokenizer::read(stringc & data)
{ 
  seq.push_back(data); 
}

void tokenizer::readaslines(stringc & data)
{ 
  seq.push_back(data); subtract("\n"); 
}



boolc tokenizermisc::comparewithoutspace
(
  stringc & s1, 
  stringc & s2
)
{
  tokenizer t1(s1);
  t1.atomize(" ");
  t1.atomize("\n");
  t1.atomize("\t");
  t1.trim();
  t1.remove_if(spacerdelete<>());

  tokenizer t2(s2);
  t2.atomize(" ");
  t2.atomize("\n");
  t2.atomize("\t");
  t2.trim();
  t2.remove_if(spacerdelete<>());

  return t1==t2;
}

boolc tokenizermisc::readfile
(
  string & str, 
  stringc & fname
)
{
  assert(false); // Use stringserialization::serialize(str,filename); 
  ifstream file(fname.c_str());
  if (!file)
    return false;

  stringstream ss;
  ss << file;

  str = ss.str();


//  char ch;

/*
  if (file.get(ch))
  {
    str.push_back(ch);
    for ( ; file.get(ch); )
    {
      str.push_back(ch) 
    };
  }
*/


  //while (file.get(ch)!= EOF)
/*
  while ( file.get(ch) )
  { 
    str.push_back(ch) 
  };
*/

  return true;
}

void tokenizer::stripcomment(stringc & comment)
{
  liststringi k = seq.begin();
  string::size_type i;
  for (;k!=seq.end(); ++k)
  {
    string & token(*k);
    i=0;
    i = token.find(comment,i);
    if (i==string::npos)
      continue;

    token.erase(i);
  }
}



boolc tokenizer::atomize_next
( 
  stringc & atom 
)
{
  liststringi i = current;

  string::size_type k;
  
  string::size_type const atomlen = atom.length();

  for (;i!=seq.end(); ++i)
  {
    k = i->find(atom.c_str());

    if (k==string::npos)
      continue;

    atomize(i,atom,k);
    if (i->length()==atomlen)
    {
      if (atom==*i)
      {
        current=i;
        return true;
      }
    }

    ++i;
    atomize(i,atom,0);
    assert(i->length()==atomlen);
    current=i;
    return true;
  }

  return false; 
}







boolc tokenizer::atomize_next
( 
  stringc & atom, 
  liststringi & iend_
)
{
  liststringi i = current;

  string::size_type k;
  
  string::size_type const atomlen = atom.length();

  for (;i!=iend_; ++i)
  {
    k = i->find(atom.c_str());

    if (k==string::npos)
      continue;

    atomize(i,atom,k);
    if (i->length()==atomlen)
    {
      if (atom==*i)
      {
        current=i;
        return true;
      }
    }

    ++i;
    atomize(i,atom,0);
    assert(i->length()==atomlen);
    current=i;
    return true;
  }

  return false; 
}





boolc tokenizer::atomize_next
(
  liststringi& i1,
  liststringi& i2,
  stringc& atom1,
  stringc& atom2
)
{
  bool res;
  
  res=atomize_next(atom1);
  if (res==false)
    return false;
  i1=current;

  res=atomize_next(atom2);
  if (res==false)
    return false;
  i2=current;

  // Default to reset iterator to first tag.
  current=i1;

  return true;
}

boolc tokenizer::atomize_next_tag
(
  liststringi& i1,
  liststringi& i2,
  stringc& tag
)
{
  liststringi iend_=seq.end();
  return atomize_next_tag(i1,i2,tag,iend_);
}

boolc tokenizer::atomize_next_tag
(
  liststringi& i1,
  liststringi& i2,
  stringc& tag,
  liststringi& iend_
)
{
//cout << "atomize_next_tag ";
  string tag1="<"+tag+">";
//cout << SHOW(tag1) << " ";
  bool res;

  res=atomize_next(tag1,iend_);
  if (res==false)
    return false;
  i1=current;

  string tag2="</"+tag+">";
//cout << SHOW(tag2) << " ";
  res=atomize_next(tag2,iend_);
  if (res==false)
    return false;
  i2=current;

  // Default to reset iterator to first tag.
  current=i1;
// cout << SHOW(*current) << " 1" << endl;

  return true;
}

void tokenizer::atomize( stringc & atom )
{
  liststringi i = seq.begin();

  for (;i!=seq.end(); ++i)
    atomize(i,atom);
}

void tokenizer::extractfromcurrent
( 
  vector<string> & v, 
  stringc & atom 
) const
{
  v.clear();

  string s(*current);

  if (s.empty())
    return;

  string::size_type k;

  string::size_type const atomlen = atom.length();

  k = s.find(atom.c_str());
  for ( ; k!=string::npos; k=s.find(atom.c_str()) )
  {
    if (k==0)
    {
      s.erase(k,atomlen);

      continue;
    }

    v.push_back(s.substr(0,k));
    s.erase(0,k);
  }

  if (s.empty()==false)
    v.push_back(s);
}

void tokenizer::atomize
( 
  liststringi & i, 
  stringc & atom,
  string::size_type const k0 
)
{
  string::size_type const atomlen = atom.length();
  if (atomlen==0)
    return;

  // Valid iterator?
  assert(i!=seq.end());

  // If k0 is not a valid pointer into the string
  // k0 assumes find was previously called to find the string.

  string::size_type k;
  if (k0!=string::npos) 
  {
    assert(k0+atomlen-1<i->length());
    assert( i->substr(k0,atomlen)==atom );
    
    k=k0;
  }
  else
    k=i->find(atom.c_str());

  if (k==string::npos)
    return;

  liststringi w(i);
  ++w;

  if (k==0)
  {
    // If the string is already atomized exit.
    if (i->length()==atomlen)
      return;

    string s2 = i->substr(atomlen);
    i->erase(atomlen);
    seq.insert(w,s2);

    return;
  }

  string s2 = i->substr(k);
  i->erase(k);
  seq.insert(w,s2);
}

void tokenizer::readaslinesgeneral( stringc & data )
{
  readaslines(data);
  subtract(",");
  subtract(" ");
  trim();
  remove_if();
}

void tokenizer::subtract( stringc & atom )
{
  atomize(atom);
  seq.remove(atom);
}

ostream & tokenizer::print(ostream & os ) const
{
  liststringic i = seq.begin();
  liststringic iend2 = seq.end();
  if (i!=iend2)
    os << *i;
  ++i;
  for ( ; i!=iend2; ++i )
  {
    os << printdelimiter << *i;
  }

  return os;
}

void tokenizer::reset()
{
  current=seq.begin();
}

void tokenizer::operator ++ ()
{
  if (current==seq.end())
    return;

  ++current;
} 

tokenizer::tokenizer()
  : printdelimiter("\n")
{
  current=seq.end();
}

tokenizer::tokenizer(stringc & data)
{
  seq.push_back(data);
  reset();
}

ostream & operator << (ostream & os, tokenizer const & ss)
{
  return ss.print(os);
}

boolc tokenizer::operator == (tokenizer & t2)
{
  reset();
  t2.reset();
  for ( ;!t2; ++t2 )
  {
    if (!(*this)==false)
      return false; 

    if ( (*this)() != t2() )
      return false;

    ++(*this);
  }

  if (!(*this))
    return false;

  return true;
}

void tokenizer::tokenize()
{
  subtract(" ");
  trim();
  remove_if();
}

tokenizer::operator stringc ()
{
  string s;
  tokenizer& tk(*this);
  for ( tk.reset(); !tk; ++tk)
    { s += tk(); }; 
  
  return s; 
}

/*
boolc tokenizer::myfind(size_t k, stringc& atom, size_t k0)
{
  return false;
}

boolc tokenizer::myfind(size_t k, stringc& atom)
{
  return false;
}
*/

boolc tokenizer::find
(
  string::size_type & k, 
  stringc & atom, 
  string::size_type const k0 
)
{
  string::size_type atomsize=atom.size();
  if (atomsize==0)
  {
    current=seq.end();
    return false;
  }

  liststringi i = current;
  assertreturnfalse(i!=seq.end());

  if (k0+atomsize-1<i->size())
  {
    k = i->find(atom.c_str(),k0);
    if (k!=string::npos)
      return true;
  }
    
  // failed to find in current string.
  ++current;

  return tokenizer::find(k,atom);
}

boolc tokenizer::find
(
  string::size_type & k, 
  stringc & atom 
)
{
  liststringi i = current;

  for (;i!=seq.end(); ++i)
  {
    k = i->find(atom.c_str());

    if (k==string::npos)
      continue;

    current=i;
    return true;
  }

  return false;
}





