// This file is part of PUMA.
// Copyright (C) 1999-2003  The PUMA developer team.
//                                                                
// This program is free software;  you can redistribute it and/or 
// modify it under the terms of the GNU General Public License as 
// published by the Free Software Foundation; either version 2 of 
// the License, or (at your option) any later version.            
//                                                                
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  
// GNU General Public License for more details.                   
//                                                                
// You should have received a copy of the GNU General Public      
// License along with this program; if not, write to the Free     
// Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
// MA  02111-1307  USA                                            

#ifndef __Token_h__
#define __Token_h__

/** \file
 *  Token abstraction. */

#include <stdlib.h>
#include "Puma/LanguageID.h"
#include "Puma/Location.h"
#include "Puma/Printable.h"
#include "Puma/ListElement.h"
#include "Puma/Array.h"
#include <assert.h>
#include "Puma/DString.h"
#include "Puma/StrCol.h"

namespace Puma {


class Unit;


/** \class Token Token.h Puma/Token.h
 *  Abstraction of a language token. A token is created by a
 *  scanner (see Puma::CScanner) as part a token chain (Puma::Unit). 
 *  It encapsulates a lexical unit of the scanned text. 
 *  
 *  The information provided about a lexical unit is 
 *  \li the token type
 *  \li the location in the scanned text
 *  \li the portion of the scanned text represented by this token
 *  \li the language of the token */
class Token : public ListElement {
  int _type, _real_type;

  Location _location;
  LanguageID _language;
  const char *_text;
  mutable DString _dtext;

  Array<int> *_cont_lines;
   
  enum TK_Flags {
          TK_COMMENT = (0x01 << 0),
          TK_CORE = (0x01 << 1),
          TK_MACRO_CALL = (0x01 << 2)
  };
  unsigned short _flags;

public:
  /** Special token types. */
  enum {
    /** End-of-file token. */
    ID_END_OF_FILE = -1,
    /** Unknown token type. */
    ID_UNKNOWN = -2,
    /** Error token type. */
    ID_ERROR = -3,
    /** Warning token type. */
    ID_WARNING = -4
  };

  /** Comment tokens. */ 
  static LanguageID comment_id;
  /** Preprocessor tokens. */
  static LanguageID pre_id;
  /** Compiler directives. */
  static LanguageID dir_id;
  /** C/C++ core language tokens. */
  static LanguageID cpp_id;
  /** White-spaces. */
  static LanguageID white_id;
  /** Wildcards. */
  static LanguageID wildcard_id;
  /** Keywords. */
  static LanguageID keyword_id;
  /** Left parenthesis. */
  static LanguageID open_id;
  /** Function-like macro operands. */
  static LanguageID macro_op_id;
  /** Identifiers. */
  static LanguageID identifier_id;
  /** Comma. */
  static LanguageID comma_id;
  /** Right parenthesis. */
  static LanguageID close_id;

public:
  /** Constructor.
   *  \param type The token type.
   *  \param lang The language of the token.
   *  \param text The optional token text (defaults to the empty string). */
  Token (int type = 0, LanguageID lang = LanguageID(0), const char *text = "");
  /** Copy-constructor. 
   *  \param copy The token to copy. */
  Token (const Token &copy);
  /** Destructor. Destroys the token text. */
  virtual ~Token ();

  /** Print the token text on the given stream. 
   *  \param os The output stream. */
  void print (ostream &os) const;
  /** Duplicate this token.
   *  \return The copy of this token. Has to be destroyed by the caller. */
  virtual ListElement *duplicate () { return new Token (*this); }

  /** Get the unit this token belongs to. */
  Unit* unit () const;

  /** Reset the token object.
   *  \param type The new token type.
   *  \param text The new token text.
   *  \param lang The new token language. */
  void reset (int type = 0, const char *text = 0, LanguageID lang = LanguageID(0));
  
  /** Set the location of the token (usually the line and column in a file). 
   *  \param loc The location. */
  void location (const Location &loc)      { _location = loc; }

  /** Set the continues lines appeared in this token. 
   *  \param cl The continues line array. */
  void cont_lines (Array<int> *cl)  { _cont_lines = cl; }

  /** Count the number of line breaks in the text of this token. */
  int line_breaks () const;

  /** Set the token as being a macro call. */
  void macro_call (bool is_call = true);
  
  /** Get the token text. */
  const char *text () const;

  /** Get the token text. */
  DString &dtext () const;

  /** Get the static token text. Only for keywords, operators, and so on
   *  with a constant token text. 
   *  \return The static text or NULL for tokens with dynamic text (like identifiers). */
  char *get_static_text () const;

  /** Get the static token text for the given token type. Only for keywords,
   *  operators, and so on with a constant token text.
   *  \return The static text or NULL for tokens with dynamic text (like identifiers). */
  static char *get_static_text (int token_type);

  /** Get the type of the token (see Puma::CTokens). */
  int type () const                 { return _type; }

  /** Get the location of the token (usually the line and column in a file). */
  const Location &location () const { return _location; }
   
  /** Check if the token is macro generated. */
  bool is_macro_generated () const;
  /** Check if the token is a macro call. */
  bool is_macro_call () const       { return (_flags & TK_MACRO_CALL) != 0; }
  /** Check if this is a core language token. */
  bool is_core () const;
  /** Check if the token is an identifier. */
  bool is_identifier () const;    // subset of core
  /** Check if the token is a keyword. */
  bool is_keyword () const;       // subset of identifier
  /** Check if this is a wildcard token. */
  bool is_wildcard () const;      // subset of core
  /** Check if this is a preprocessor token. */
  bool is_preprocessor () const;  // a preprocessor directive (include, etc.)
  /** Check if this is a compiler directive. */
  bool is_directive () const;     // a compiler directive (pragma, line, etc.)
  /** Check if this is a white-space token. */
  bool is_whitespace () const;
  /** Check if this is a comment. */
  bool is_comment () const;
  /** Check if this is function-like macro operand. */
  bool is_macro_op () const;
  /** Check if this is a left parenthesis. */
  bool is_open () const;          // subset of core
  /** Check if this is a comma. */
  bool is_comma () const;         // subset of core
  /** Check if this is a right parenthesis. */
  bool is_close () const;         // subset of core

  /** Own operator new reusing memory. */
  void *operator new (size_t);
  /** Own delete operator. */
  void  operator delete (void *);

private:
  int get_general_type () const;
  LanguageID language () const { return _language; }
  void is_comment_internal ();
  void is_core_internal ();
};

/** Set the token as being a macro call. */
inline void Token::macro_call (bool is_call) {
  if (is_call)
    _flags |= TK_MACRO_CALL;
  else
    _flags &= ~TK_MACRO_CALL;
}

inline void Token::is_core_internal () {
  if (_language == cpp_id || _language == wildcard_id || 
      _language == open_id || _language == comma_id || 
      _language == close_id || is_identifier ())
    _flags |= TK_CORE;
}
inline void Token::is_comment_internal () { 
  if (_language == comment_id) 
    _flags |= TK_COMMENT;
}

inline bool Token::is_core () const 
 { return (_flags & TK_CORE) != 0; }
inline bool Token::is_identifier () const 
 { return _language == identifier_id || is_keyword ();}
inline bool Token::is_keyword () const 
 { return _language == keyword_id; }
inline bool Token::is_wildcard () const 
 { return _language == wildcard_id; }
inline bool Token::is_preprocessor () const 
 { return _language == pre_id; }
inline bool Token::is_directive () const 
 { return _language == dir_id; }
inline bool Token::is_whitespace () const 
 { return _language == white_id; }
inline bool Token::is_comment () const 
 { return (_flags & TK_COMMENT) != 0; }
inline bool Token::is_macro_op () const 
 { return _language == macro_op_id; }
inline bool Token::is_open () const 
 { return _language == open_id; }
inline bool Token::is_comma () const 
 { return _language == comma_id; }
inline bool Token::is_close () const 
 { return _language == close_id; }

inline ostream &operator << (ostream &os, const Token &object) {
  object.print (os);
  return os;
}

} // namespace Puma

#endif /* __Token_h__ */
