/*
 * Kai Programming Language Compiler
 * by John Ryland
 * (c) Copyright 2019
 */
#include <fstream>
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cctype>
#include <string>
#include <vector>
#include <map>

enum class TokenType
{
  // [a-z|A-Z] [a-z|A-Z|0-9|_] *
  Identifier,

  // literals
  IntegerLiteral,
  FloatLiteral,
  StringLiteral,

  // Specific parsed identifiers
  Type,     // ?
  Constant, // ?
  Function, // ?
  Module,   // ?

  // Language specific tokens
  Keyword,
  Symbol,

  // end of file
  EndOfFile
};

enum class SymbolType
{
  // Unary
  Not,
  Tilda,

  // Binary
  Add,
  Subtract,
  Multiply,
  Divide,
  Modulus,

  LogicalAnd,
  LogicalOr,
  BinaryAnd,
  BinaryOr,
  BinaryXor,

  Equality,
  InEquality,
  LessThan,
  LessThanOrEqual,
  GreaterThan,
  GreaterThanOrEqual,

  Assignment,
  Produces,

  // Other
  OpenSquareBracket,
  ClosSquareeBracket,

  OpenBracket,
  CloseBracket,

  OpenBrace,
  CloseBrace,

  QuestionMark,
  Colon,
  SemiColon,
  Period,
  Comma,
  At,

  EnumSize
};

enum class ExpressionType
{
  Constant,
  Variable,
  Unary,
  Binary,
  Ternary
};

enum class KeywordType
{
  Module,
  Information,
  Imports,
  Interface,
  Implementation,
  Tests,
  Type,
  Const,
  Enum,
  Struct,
  Function,
  Var,
  If,
  Else,
  For,
  While,

  EnumSize
};

enum class TypeType
{
  Basic,
  Enum,
  Struct,
  Function,
  Alias
};

enum class BasicType
{
  Int8,
  Int16,
  Int32,
  Int64,

  UInt8,
  UInt16,
  UInt32,
  UInt64,

  Flt32,
  Flt64,

  String,
  
  Meta,     // template type

  EnumSize
};

enum class ControlFlowType
{
  If,
  For,
  While,
  DoWhile,
  Continue,
  Break,
  Switch
};

enum class StatementType
{
  Block,
  ControlFlow,
  Expression
};

enum class ConstExpressionType
{
  IntegerLiteral,
  FloatLiteral,
  StringLiteral,
  CompileTimeConstant,  // expression which can be evaluated at compile-time
  TypeName
};

enum class UnaryExpressionType
{
  /* C unary operators
  PreIncrement,
  PreDecrement,
  PostIncrement,
  PostDecrement,
  AddressOf,
  Indirection,
  Sizeof,
  Cast,
  */
  Positive,
  Negative,
  OnesComplement,
  Not
};

enum class BinaryExpressionType
{
  Add,
  Subtract,
  Multiply,
  Divide,
  Modulus,

  LogicalAnd,
  LogicalOr,
  BinaryAnd,
  BinaryOr,
  BinaryXor,

  Equality,
  InEquality,
  LessThan,
  LessThanOrEqual,
  GreaterThan,
  GreaterThanOrEqual,
};

enum class TernaryExpressionType
{
  Conditional
};

struct TypeDeclaration;
struct Expression;
struct Statement;
struct FunctionBlock;
struct Module;

struct Identifier
{
  std::string                           m_name;
};

struct StringLiteral
{
  std::string                           m_string;
};

struct IntegerLiteral
{
  bool                                  m_negative;
  uint64_t                              m_value;
};

struct FloatLiteral
{
  double                                m_value;
};

struct EnumValues
{
  uint64_t                              m_nextValue;
  std::map<std::string,uint64_t>        m_values;
};

struct Variable
{
  std::string                           m_name;
  TypeDeclaration*                      m_type;
};

struct StructMembers
{
  std::vector<Variable>                 m_members;
};

struct FunctionDeclaration
{
  std::string                           m_name;
  TypeDeclaration*                      m_returnType;
  std::vector<Variable>                 m_parameters;
};

struct TypeDeclaration
{
  std::string                           m_name;

  // TODO: C++ lacks a typesafe union
  //       C++17 adds variant which tries to address this, but the syntax is very clunky, bolted on using boost style library code.
  //       Ideally language should have built-in a proper typesafe union which kind of fuses together what is in C++ an enum and a union.
  //       Below is the pattern that is being used here, an enum and union. The type needs to identify which member of the union is valid.
  //       In the Kai language, perhaps this can be done right. Need to figure out the correct syntax for it.
  //       These are called 'tagged unions' as opposed to an anonymous union.
  //       Seems rust has something similar to what would be expected.
  TypeType                              m_type;
  union
  {
    BasicType                           m_basic;
    EnumValues*                         m_enum;
    StructMembers*                      m_struct;
    TypeDeclaration*                    m_alias;  // perhaps strong and weak options?
    FunctionDeclaration*                m_function;
  };
};


/*

Safer pointer concepts

  void foo(char* a_pointer);
  char* normal_pointer = (char*)malloc(1024);
  foo(normal_pointer);      //  the passed in pointer contains no information about the size


Perhaps better:

  template <typename T>
  struct Pointer {
    Pointer(T* p, size_t s) { m_p = p; m_s = s; }
    T*     m_p;
    size_t m_s;
  }

  void foo(Pointer<char> a_pointer);
  Pointer<char> normal_pointer = Pointer<char>((char*)malloc(1024), 1024);
  foo(normal_pointer);      //  the passed in pointer now contains information about the size

  It is possible to build in pointer arithmatic bounds checking.
  It is also possible to create spans/views which also follow a similar style.

  A span/view however needs to understand object lifetime.

  Lifetime management is another concern. Possibly the Qt way with a hierarchy of ownership
  from a root object is one way.

  In terms of VM and functional style programming and multi-threading, was thinking about
  the idea that in the VM it could enforce that when no-synchronization is being performed,
  that a thread of execution just has read-only access to heap and constant data, but can
  read/write to it's stack. Multiple threads can run concurrently just working off their
  stack. Then there can be some synchronization primitives that can join the work of the
  threads giving them controlled access to the head where work can be transferred from the
  stacks to the heap and the heap generally modified. The heap might be generational.

*/


/*

Tagged union concept

*/

class tagged_union
{
  enum
  {
    m_basic_tag,
    m_enum_tag,
    m_struct_tag,
    m_alias_tag,
    m_function_tag,
  } m_type;
  union
  {
    BasicType             m_basic;
    EnumValues*           m_enum;
    StructMembers*        m_struct;
    TypeDeclaration*      m_alias;
    FunctionDeclaration*  m_function;
  };
public:
  tagged_union& operator=(const BasicType& b) { m_type = m_basic_tag; m_basic = b; return *this; }
  tagged_union& operator=(EnumValues* e)      { m_type = m_enum_tag;  m_enum  = e; return *this; }
  // ...

  // How to access? :
  //  runtime-checking way:
  BasicType& get_basic()  { assert(m_type == m_basic_tag);  return m_basic; } 

  // Better would be to statically analyse and not even need the tag!!

  // other way is access is only via a switch/, but 

  /*
     heavy approach is using inheritance:

        class tagged_union { virtual void f() {} };  // base class with a v-table
        class basicC  : base { BasicType             m_basic;  }
        class enumC   : base { EnumValues*           m_enum;   }
        class structC : base { StructMembers*        m_struct; }
        // ...

        tagged_union* thing;   // needs to be pointer

        // setting:
        delete thing; thing = new basicC{ b };

     then access using dynamic_cast
        if (dynamic_cast<basicC*>(thing))
           ... // it is of BasicType
  */

  /*
  
  // syntax in other languages:

  tree match
  {
    case Node(x, _, _) => println("top level node value: " + x)
    case Leaf          => println("top level node is a leaf")
  }
  
  */

  /*
 
     // desired syntax:

     // declared same as a C style union
     union Shape
     {
         struct { int size; }              Square;
         struct { int width; int height; } Rectangle;
         struct { int radius; }            Circle;  
     };

     // alternative idea, same as C style enum
     // perhaps use the name 'variant' to make it clear it is different
     variant Shape
     {
         Square    = struct { int size; },
         Rectangle = struct { int width; int height; },
         Circle    = struct { int radius; },
     }

     void func(Shape shape)
     {
        // access similar to an enum, this is the only provided way to access the members
        switch (value in shape)
        {
           case Square:    printf("square : %i\n", value.size);
           case Rectangle: printf("rectangle: %i\n", value.width);
           case Circle:    printf("circle: %i\n", value.radius);
        }

        shape = 
     }


     // C++ has optional, also rust, basically something like this is possible with tagged unions

     template <typename T>
     variant Optional
     {
         Some = T,
         None = void,
     }

     template <typename T>
     variant Result
     {
         Okay = T,
         Error = struct { int reasonCode; string errorMessage; },
     }
  */

private:
};


/*

 // Bad code: returns stack memory
 char *itoa(int i)
 {
    char buf[20], *z;
    sprintf(buf,"%d",i);
    z = buf;
    return z;
 }

 However if the language was designed to, it might be possible
 for the caller to take ownership of stack from the callee, as if
 the caller had just declared some local variables within its scope.
 This perhaps could apply all the way up the call stack. It would
 allow things like stings and small allocations to happen from the
 stack instead of needing the heap for everything.
 
 Probably this could be implemented using alloca and some classes.

 Ideally something like this would work and the buffer for string
 would be on the stack

  string itoa(int i)
  {
     string buf(20);
     sprintf(buf, "%d", i);
     return buf;
  }

  void caller()
  {
     string s;
     s = itoa(100);   // have compiler do some combination of RVO and stack transfer
  }


  Extending on this idea, there will be areas of the stack which will be used
  for the lifetime of a block, and others which will have their lifetimes extended
  by being allocated from a parent block or a parent's parent's block etc.
  This is part of the concept of RVO, a returned value could be in the parent's
  stack frame and it was as if that memory was past by reference to the function
  and it allocated in to it, however potentially without the constructor being
  run in the parent etc.

  This is fine for fixed sized values. Anything of a varying size would otherwise
  need to be in the heap, which is pointed out above, but is not a general limitation
  of the stack. alloca is an example how varying sized objects can be allocated from
  the stack. So the challenge is to do something like RVO with varying sized objects.

  When the stack is used to store short lived objects like loop variables and temporary
  variables, and is also being used to store preserved values between blocks/stack frames
  for larger variable sized allocations, then something has to give. Either the short
  lived objects can't be reclaimed, or the compiler has to move memory around to keep
  these short lived objects in memory after the preserved ones. This rearranging could
  happen at the end of a block, but anytime memory is moved, it means it is not
  possible to have pointers to things, or need another level of indirection. Essentially
  we create something like an incremental GC which is incrementally working within
  the thread and at an increment of each block. Generally it will slow down the program.

  So instead of moving memory around, perhaps the concept could be to have two stacks.
  One a preserved stack, and another a temporary or scratch stack.

  The preserved stack is for RVO type objects, ones with varying size. The size can only
  be varied in the frame in which the object is constructed, not in subsequent frames.
  That is so that future objects won't need to be moved in memory to accommodate the change
  in size. This is achieved by virtue of the rule that parameters to functions are passed
  by const reference, so deeper function calls shouldn't be modifying it anyway.
  Problem is if multiple values in a current frame are varying in size. This is a tricky
  case, but conceptually that might need to be either one thing or in different frames/
  blocks. 
  
  Also another concern is guaranteeing safety. The preserved stack could run out of space.
  For a high safety critical system, it would be nice to make guarantees about the
  maximum amount of memory which could be used and to be able to say that the preserved
  stack size is large enough for any case. This might mean making a program specify the
  upper bound on the size of varying sized objects. It might also mean not allowing
  recursion or perhaps some way to bound the recursion as part of the language.


*/



struct ConstExpression
{
  ConstExpressionType                   m_type;
  union
  {
    // TODO: enum value - has a value and a type
    // TODO: constant variable
    IntegerLiteral*                     m_integer;
    FloatLiteral*                       m_float;
    StringLiteral*                      m_string;
    // CompileTimeConstant                 m_expression;
    TypeDeclaration*                    m_typeDeclaration;
  };
};

struct ConstantVariable
{
  std::string                           m_name;
  TypeDeclaration*                      m_type;
  // TODO: const structs, where value is inside { }
  //    eg:   const vec3 origin := { 0, 0, 0 };
  ConstExpression                       m_value;
};

struct IfStatement
{
  Expression*                           m_condition;
  Statement*                            m_statement;
  Statement*                            m_else;
};

struct ForStatement
{
  Statement*                            m_initializer;
  Expression*                           m_condition;
  Statement*                            m_statement;
  Statement*                            m_increment;
};

struct WhileStatement
{
  Expression*                           m_condition;
  Statement*                            m_statement;
};

struct SwitchStatement
{
  Expression*                           m_condition;
  std::map<ConstExpression,Expression*> m_cases;
};

struct ControlFlow
{
  ControlFlowType                       m_type;
  union
  {
    IfStatement*                        m_if;
    ForStatement*                       m_for;
    WhileStatement*                     m_while;
    WhileStatement*                     m_doWhile;
    SwitchStatement*                    m_switch;
  };
};

struct UnaryExpression
{
  UnaryExpressionType                   m_operator;
  Expression*                           m_first;
};

struct BinaryExpression
{
  BinaryExpressionType                  m_operator;
  Expression*                           m_first;
  Expression*                           m_second;
};

struct TernaryExpression
{
  TernaryExpressionType                 m_operator;
  Expression*                           m_first;
  Expression*                           m_second;
  Expression*                           m_third;
};

// These are RHS expressions
struct Expression
{
  ExpressionType                        m_type;
  union
  {
    ConstExpression                     m_constant;
    Variable*                           m_variable;
    Identifier*                         m_variableId; // TODO: place-holder member, until can do contexted lookup of variable by name
    UnaryExpression                     m_unary;
    BinaryExpression                    m_binary;
    TernaryExpression                   m_ternary;
  };
};

struct Statement
{
  StatementType                         m_type;
  union
  {
    FunctionBlock*                      m_block;
    ControlFlow*                        m_controlFlow;
    Expression*                         m_expression;
  };
};

struct FunctionBlock
{
  std::vector<Variable>                 m_localVariables;
  std::vector<Statement>                m_statements;
  std::vector<Expression>               m_expressions;
};

struct Function
{
  FunctionDeclaration*                  m_interface;
  FunctionBlock                         m_implementation;
};

struct ModuleInformation
{
  std::string                           m_description;
  std::string                           m_version;
  std::string                           m_copyright;
  std::string                           m_date;
  std::string                           m_author;
  std::string                           m_authorURL;
  std::string                           m_authorURLToPublicKey;
  std::string                           m_authorDigitalSignature;
  std::string                           m_checksum;
  std::string                           m_interfaceHash;
  std::string                           m_implementationHash;
  std::string                           m_digitalSignature; // online digital timestamp notary service?
  std::string                           m_history;
  std::string                           m_profile;  // 8-bit,16-bit,32-bit,no-float,no-mmu,no-IO,low-ram,...
  // scheme where generate checksum/hash of a file
  // author has publish public key inside DNS or somewhere public
  // author signs file/hash with private key and appends
  // anyone can verify that only the author was the creator of this file
  // however anyone can strip this digital signature and repeat the process with their own key pair
  // what is needed is to also timestamp it in a verifiable way that can see who published first
  // https://github.com/trbs/rfc3161ng/
  // actually perhaps author doesn't need to have a key-pair. It may be enough that they put their
  // name on it, and the time-stamping server is signing the hash of the file which include details
  // of authorship.
  // I guess that doesn't prevent someone from attributing something to someone else.
  // If the author wants to sign it, say to prove it came from their reputable company, then they
  // can sign it too. This might be authorship signature.
};

struct ModuleImports
{
  std::map<std::string,Module*>         m_imports;
};

struct ModuleInterface
{
  // TODO: should they be pointers to the decls in CompilerState?
  std::vector<TypeDeclaration*>         m_types;
  std::vector<ConstantVariable*>        m_constants;
  std::vector<FunctionDeclaration*>     m_functions;
};

struct ModuleImplementation
{
  ModuleInterface                       m_details;
  // TODO: should be pointers to the decls in CompilerState?
  std::map<std::string,Function>        m_functions;
};

struct ModuleTests
{
  std::map<std::string,Function>        m_tests;
};

struct Module
{
  std::string                           m_name;
  std::string                           m_fileName;
  ModuleInformation                     m_information;
  ModuleImports                         m_imports;
  ModuleInterface                       m_interface;
  ModuleImplementation                  m_implementation;
  ModuleTests                           m_tests;
};

struct Program
{
  std::string                           m_name;
  std::string                           m_fileName;
  ModuleInformation                     m_information;
  ModuleImports                         m_imports;
  Function                              m_mainFunction;
};

struct Token
{
  uint32_t                              m_lineNumber;
  uint32_t                              m_columnNumber;
  TokenType                             m_tokenType;
  union
  {
    Identifier*                         m_identifier;
    IntegerLiteral*                     m_integerLiteral;
    FloatLiteral*                       m_floatLiteral;
    StringLiteral*                      m_stringLiteral;

    ConstantVariable*                   m_constant; // ?
    TypeDeclaration*                    m_type;     // ?
    FunctionDeclaration*                m_function; // ?
    Module*                             m_module;   // ?

    KeywordType                         m_keyword;
    SymbolType                          m_symbol;
  };
};

struct CompilerState
{
  // TODO: nothing is namespaced here. perhaps multi-maps, but would be better to avoid
  // depends if everything needs to be explicitly scoped when used, or can have a 'using' keyword.
  std::map<std::string,TokenType>       m_tokens;
  std::map<std::string,Identifier>      m_identifiers;
  std::map<std::string,IntegerLiteral>  m_integerLiteralTable;
  std::map<std::string,FloatLiteral>    m_floatLiteralTable;
  std::map<std::string,StringLiteral>   m_stringLiteralTable;

  // TODO: perhaps these belong in the modules
  std::map<std::string,ConstantVariable>    m_constants;
  std::map<std::string,EnumValues>          m_enums;
  std::map<std::string,StructMembers>       m_structs;
  std::map<std::string,TypeDeclaration>     m_types;
  std::map<std::string,FunctionDeclaration> m_functions;

  std::map<std::string,Module>          m_modules;
  Program                               m_program;

  // language grammar
  std::map<std::string,KeywordType>     m_keywordTable;
  std::map<std::string,SymbolType>      m_symbolTable;

  // Lexical parsing state
  bool                                  m_insideMultiLineComment;
  bool                                  m_insideMultiLineString;

  // Function pointers
  std::function<std::string(void)>      m_getInput;
  std::function<bool(void)>             m_getEof;
  std::function<Token(void)>            m_getToken;
  std::function<void(void)>             m_parser;
};

std::string basicTypeToString(BasicType a_basicType)
{
  switch (a_basicType)
  {
    case BasicType::Int8:                  return "int8";
    case BasicType::Int16:                 return "int16";
    case BasicType::Int32:                 return "int32";
    case BasicType::Int64:                 return "int64";
    case BasicType::UInt8:                 return "uint8";
    case BasicType::UInt16:                return "uint16";
    case BasicType::UInt32:                return "uint32";
    case BasicType::UInt64:                return "uint64";
    case BasicType::Flt32:                 return "flt32";
    case BasicType::Flt64:                 return "flt64";
    case BasicType::String:                return "string";
    case BasicType::Meta:                  return "type";
    case BasicType::EnumSize:              return "";
  }
}

std::string symbolToString(SymbolType a_operator)
{
  switch (a_operator)
  {
    case SymbolType::Not:                  return "!";
    case SymbolType::Tilda:                return "~";
    case SymbolType::Add:                  return "+";
    case SymbolType::Subtract:             return "-";
    case SymbolType::Multiply:             return "*";
    case SymbolType::Divide:               return "/";
    case SymbolType::Modulus:              return "%";
    case SymbolType::LogicalAnd:           return "&&";
    case SymbolType::LogicalOr:            return "||";
    case SymbolType::BinaryAnd:            return "&";
    case SymbolType::BinaryOr:             return "|";
    case SymbolType::BinaryXor:            return "^";
    case SymbolType::Equality:             return "==";
    case SymbolType::InEquality:           return "!=";
    case SymbolType::LessThan:             return "<";
    case SymbolType::LessThanOrEqual:      return "<=";
    case SymbolType::GreaterThan:          return ">";
    case SymbolType::GreaterThanOrEqual:   return ">=";
    case SymbolType::Assignment:           return ":=";
    case SymbolType::Produces:             return "=>";
    case SymbolType::OpenSquareBracket:    return "[";
    case SymbolType::ClosSquareeBracket:   return "]";
    case SymbolType::OpenBracket:          return "(";
    case SymbolType::CloseBracket:         return ")";
    case SymbolType::OpenBrace:            return "{";
    case SymbolType::CloseBrace:           return "}";
    case SymbolType::QuestionMark:         return "?";
    case SymbolType::Colon:                return ":";
    case SymbolType::SemiColon:            return ";";
    case SymbolType::Period:               return ".";
    case SymbolType::Comma:                return ",";
    case SymbolType::At:                   return "@";
    case SymbolType::EnumSize:             return "";
  }
}

std::string keywordToString(KeywordType a_keyword)
{
  switch (a_keyword)
  {
    case KeywordType::Module:              return "module";
    case KeywordType::Information:         return "information";
    case KeywordType::Imports:             return "imports";
    case KeywordType::Interface:           return "interface";
    case KeywordType::Implementation:      return "implementation";
    case KeywordType::Tests:               return "tests";
    case KeywordType::Type:                return "type";
    case KeywordType::Const:               return "const";
    case KeywordType::Enum:                return "enum";
    case KeywordType::Struct:              return "struct";
    case KeywordType::Function:            return "function";
    case KeywordType::Var:                 return "var";
    case KeywordType::If:                  return "if";
    case KeywordType::Else:                return "else";
    case KeywordType::For:                 return "for";
    case KeywordType::While:               return "while";
    case KeywordType::EnumSize:            return "";
  }
}

void AddBasicTypeToCompilerState(CompilerState& a_state, BasicType a_type)
{
  std::string typeString = basicTypeToString(a_type);
  a_state.m_tokens[typeString] = TokenType::Type;
  a_state.m_types[typeString] = TypeDeclaration{ typeString, TypeType::Basic, a_type };
}

void AddSymbolToCompilerState(CompilerState& a_state, SymbolType a_symbol)
{
  std::string symbolString = symbolToString(a_symbol);
  a_state.m_tokens[symbolString] = TokenType::Symbol;
  a_state.m_symbolTable[symbolString] = a_symbol;
}

void AddKeywordToCompilerState(CompilerState& a_state, KeywordType a_keyword)
{
  std::string keywordString = keywordToString(a_keyword);
  a_state.m_tokens[keywordString] = TokenType::Keyword;
  a_state.m_keywordTable[keywordString] = a_keyword;
}

template <typename T>
class EnumIterator
{
	public:
		EnumIterator()
			: m_value(T::EnumSize)
	  {
    }

    EnumIterator(T a_value)
			: m_value(a_value)
	  {
    }

		T operator*(void) const
		{
			return m_value;
		}

		void operator++()
		{
			m_value = static_cast<T>(static_cast<int>(m_value) + 1);
		}

		bool operator!=(EnumIterator rhs)
		{
			return m_value != rhs.m_value;
		}
	private:
		T m_value;
};

template <typename T>
EnumIterator<T> begin(EnumIterator<T>)
{
	return EnumIterator<T>(static_cast<T>(0));
}

template <typename T>
EnumIterator<T> end(EnumIterator<T>)
{
	return EnumIterator<T>(T::EnumSize);
}

CompilerState InitializeCompilerState()
{
  CompilerState state;

  state.m_insideMultiLineComment = false;

  // Currently not-supported
  state.m_insideMultiLineString = false;

/*
  constexpr int startLine1 = __LINE__;
  AddBasicTypeToCompilerState(state, BasicType::Int8);
  static_assert((__LINE__ - startLine1 - 1) == static_cast<int>(BasicType::EnumSize), "Not all BasicType enum cases handled");
*/

  // Register basic types
	for (BasicType basicType: EnumIterator<BasicType>())
	{
		AddBasicTypeToCompilerState(state, basicType);
	}

  // Register symbols
	for (SymbolType symbol: EnumIterator<SymbolType>())
	{
		AddSymbolToCompilerState(state, symbol);
	}

  // Register keywords
	for (KeywordType keyword: EnumIterator<KeywordType>())
	{
    AddKeywordToCompilerState(state, keyword);
	}

  return state;
}

void parser(CompilerState& a_state, Token& a_token)
{
  // TODO: from this stream of tokens, we need to build a syntax tree / parse expressions etc.
  switch (a_token.m_tokenType)
  {
    case TokenType::Identifier:     printf("i: %s\n",     a_token.m_identifier->m_name.c_str());         break;
    case TokenType::Type:           printf("t: %s\n",     a_token.m_type->m_name.c_str());               break;
    case TokenType::Function:       printf("f: %s\n",     a_token.m_function->m_name.c_str());           break;
    case TokenType::Constant:       printf("c: %s\n",     a_token.m_constant->m_name.c_str());           break;
    case TokenType::IntegerLiteral: printf("n: %c%llu\n", a_token.m_integerLiteral->m_negative ? '-' : '+',
                                                          a_token.m_integerLiteral->m_value);            break;
    case TokenType::FloatLiteral:   printf("f: %lf\n",    a_token.m_floatLiteral->m_value);              break;
    case TokenType::StringLiteral:  printf("s: %s\n",     a_token.m_stringLiteral->m_string.c_str());    break;
    case TokenType::Module:         printf("m: %s\n",     a_token.m_module->m_name.c_str());             break;
    case TokenType::Keyword:        printf("k: %s\n",     keywordToString(a_token.m_keyword).c_str());   break;
    case TokenType::Symbol:         printf("o: %s\n",     symbolToString(a_token.m_symbol).c_str());     break;
    case TokenType::EndOfFile:      printf("EOF\n");                                                     break;
  }
}

void dumpToken(const Token& a_token)
{
  switch (a_token.m_tokenType)
  {
    case TokenType::Identifier:     printf("i: %s\n",     a_token.m_identifier->m_name.c_str());         break;
    case TokenType::Type:           printf("t: %s\n",     a_token.m_type->m_name.c_str());               break;
    case TokenType::Function:       printf("f: %s\n",     a_token.m_function->m_name.c_str());           break;
    case TokenType::Constant:       printf("c: %s\n",     a_token.m_constant->m_name.c_str());           break;
    case TokenType::IntegerLiteral: printf("n: %c%llu\n", a_token.m_integerLiteral->m_negative ? '-' : '+',
                                                          a_token.m_integerLiteral->m_value);            break;
    case TokenType::FloatLiteral:   printf("f: %lf\n",    a_token.m_floatLiteral->m_value);              break;
    case TokenType::StringLiteral:  printf("s: %s\n",     a_token.m_stringLiteral->m_string.c_str());    break;
    case TokenType::Module:         printf("m: %s\n",     a_token.m_module->m_name.c_str());             break;
    case TokenType::Keyword:        printf("k: %s\n",     keywordToString(a_token.m_keyword).c_str());   break;
    case TokenType::Symbol:         printf("o: %s\n",     symbolToString(a_token.m_symbol).c_str());     break;
    case TokenType::EndOfFile:      printf("EOF\n");                                                     break;
  }
}

bool scanForEndOfComment(size_t& a_idx, const char* a_lineBuffer)
{
  while (a_lineBuffer[a_idx] != '\n' && a_lineBuffer[a_idx] != '\0')
  {
    if (a_lineBuffer[a_idx] == '*')
    {
      a_idx++;
      if (a_lineBuffer[a_idx] == '/')
      {
        a_idx++;
        return false;
      }
      a_idx--;
    }
    a_idx++;
  }
  return true;
}

// <letter>          ::= [a-ZA-Z]
// <digit>           ::= [0-9]
// <id>              ::= <letter> { <letter> | <digit> | "_" }
// <digits>          ::= <digit> { <digit> }
// <integer literal> ::= <digits>
// <string literal>  ::= "\"" { < a char or escape char > } "\""
// <real literal>    ::= <digits> "." <digits> [ "e" [ <sign> ] <digits>] <string literal> ::= "\"" { < a char or escape char > } "\""
// <literal>         ::= <integer literal> | <real literal> | <string literal>
// <operators>       ::= "+" | "-" | "*" | "%" | "=" | "!=" | "<" | ">" | "<=" | ">=" | "(" | ")" | "[" | "]" | ":=" | "." | "," | ";" | ":" |
//                       "!" | "||" | "|" | "&" | "&&" | "^" | "{" | "}"
// <keywords>        ::= "if" | "then" | "else" | "of" | "while" | "do" | "begin" | "end" | "var" | "array" | "procedure" | "function" | "program" | "assert"
// <predefined id>   ::= "Boolean" | "false" | "integer" | "read" | "real" | "size" | "string" | "true" | "writeln"

bool tokenFromString(CompilerState& a_state, std::string& a_string, Token& newToken, uint32_t& column)
{
  // TODO: replace a_string[x] with str[x]
  const char* str = a_string.c_str();

  // ignore new-lines
  if (str[0] == '\n' && str[1] == '\0')
  {
    a_string = "";
    return false;
  }

  if (a_state.m_insideMultiLineComment)
  {
    size_t idx = 0;
    a_state.m_insideMultiLineComment = scanForEndOfComment(idx, str);
    if (a_state.m_insideMultiLineComment)
    {
      // ignore lines that are inside of a multi-line comment
      a_string = "";
      return false;
    }
    a_string = (a_string.length() == idx) ? "" : a_string.substr(idx);
    column += static_cast<uint32_t>(idx);
    return false;
  }

  // currently multi-line strings are not supported
  a_state.m_insideMultiLineString = false;

  char* uint_end;
  uint64_t asUInt = std::strtoull(str, &uint_end, 0);
  char* flt_end;
  double asDouble = std::strtod(str, &flt_end);

  std::string token;
  size_t end = 0;

  // TODO: all these parsings need validating of the token (string, id, int, float)
  if (isalpha(a_string[0]))
  {
    while (isalnum(a_string[end]) || a_string[end] == '_')
    {
      end++;
    }
    // might be a type or a module, so will look-up against a_state
    newToken.m_tokenType = TokenType::Identifier;
  }
  else if (isdigit(a_string[0]))
  {
    if (uint_end >= flt_end)
    {
      newToken.m_tokenType = TokenType::IntegerLiteral;
    }
    else
    {
      newToken.m_tokenType = TokenType::FloatLiteral;
    }
    end = std::max(uint_end, flt_end) - str;
  }
  else if (a_string[0] == '"')
  {
    newToken.m_tokenType = TokenType::StringLiteral;
    end = a_string.find('"', 1);
    if (end == std::string::npos)
    {
      printf("line: %i:%i bad string literal, couldn't find closing quotes: %s\n", newToken.m_lineNumber, newToken.m_columnNumber, a_string.c_str());
      exit(0);
    }
    end++;
  }
  else
  {
    if (a_string[0] == '/')
    {
      if (a_string[1] == '/')
      {
        a_string = "";
        return false;
      }
      else if (a_string[1] == '*')
      {
        size_t idx = 2;
        a_state.m_insideMultiLineComment = scanForEndOfComment(idx, str);
        if (a_state.m_insideMultiLineComment)
        {
          a_string = "";
          return false;
        }
        a_string = (a_string.length() == idx) ? "" : a_string.substr(idx);
        column += static_cast<uint32_t>(idx);
        return false;
      }
    }

    // symbol
    while (!isalnum(a_string[end]) && a_string[end] != 0)
    {
      end++;
    }
    while (end && !a_state.m_tokens.count(a_string.substr(0, end)))
    {
      end--;
    }
    if (end == 0)
    {
      printf("line %i:%i unrecognized character sequence / symbol: %s\n", newToken.m_lineNumber, newToken.m_columnNumber, a_string.c_str());
      exit(0);
    }
    assert(a_state.m_tokens.count(a_string.substr(0, end)));
  }

  token = a_string.substr(0, end);
  a_string = a_string.substr(end);
  column += static_cast<uint32_t>(end);

  // printf("got token : %s\n", token.c_str());

  // Handle existing/same tokens as previously parsed
  if (!a_state.m_tokens.count(token))
  {
    a_state.m_tokens[token] = newToken.m_tokenType;
    switch (newToken.m_tokenType)
    {
      case TokenType::Identifier:      a_state.m_identifiers[token]         = Identifier{ token };        break;
      case TokenType::IntegerLiteral:  a_state.m_integerLiteralTable[token] = { false, asUInt };          break;
      case TokenType::FloatLiteral:    a_state.m_floatLiteralTable[token]   = FloatLiteral{ asDouble };   break;
      case TokenType::StringLiteral:   a_state.m_stringLiteralTable[token]  =  // strips quotes from string
                                                 StringLiteral{ token.substr(1, token.length()-2) };      break;
      default:                                                                                            break;
    }
  }
  else
  {
    newToken.m_tokenType = a_state.m_tokens[token];
  }

  switch (newToken.m_tokenType)
  {
    case TokenType::Identifier:     newToken.m_identifier     = &a_state.m_identifiers[token];            break;
    case TokenType::Type:           newToken.m_type           = &a_state.m_types[token];                  break;
    case TokenType::Function:       newToken.m_function       = &a_state.m_functions[token];              break; 
    case TokenType::Constant:       newToken.m_constant       = &a_state.m_constants[token];              break;
    case TokenType::IntegerLiteral: newToken.m_integerLiteral = &a_state.m_integerLiteralTable[token];    break;
    case TokenType::FloatLiteral:   newToken.m_floatLiteral   = &a_state.m_floatLiteralTable[token];      break;
    case TokenType::StringLiteral:  newToken.m_stringLiteral  = &a_state.m_stringLiteralTable[token];     break;
    case TokenType::Module:         newToken.m_module         = &a_state.m_modules[token];                break;
    case TokenType::Keyword:        newToken.m_keyword        =  a_state.m_keywordTable[token];           break;
    case TokenType::Symbol:         newToken.m_symbol         =  a_state.m_symbolTable[token];            break;
    case TokenType::EndOfFile:                                                                            break;
  }
  return true;
}

// Returns the number of characters trimmed
uint32_t trimLeadingWhiteSpace(std::string& str)
{
  const std::string whitespace = " \t\n\v\f\r";
  size_t first = str.find_first_not_of(whitespace);
  first = first == std::string::npos ? str.length() : first;
  str = str.substr(first);
  return static_cast<uint32_t>(first);
}

/*
void push_tokenizer(CompilerState& a_state, std::ifstream& a_input)
{
  uint32_t lineNumber = 1;
  while (!a_input.eof())
  {
    std::string lineBuffer;
    std::getline(a_input, lineBuffer);
    uint32_t column = 1 + trimLeadingWhiteSpace(lineBuffer);
    while (!lineBuffer.empty())
    {
      Token token{ lineNumber, column };
      // token generator
      if (tokenFromString(a_state, lineBuffer, token, column))
      {
        // token consumer
        a_state.m_parser(a_state, token);
      }
      column += trimLeadingWhiteSpace(lineBuffer);
    }
    lineNumber++;
  }
}
*/

struct TokenizerState
{
  uint32_t         m_lineNumber = 0;
  uint32_t         m_column = 1;
  std::string      m_lineBuffer;
};

Token getNextToken(CompilerState& a_state, TokenizerState& a_tokenizer)
{
  Token token{ a_tokenizer.m_lineNumber, a_tokenizer.m_column };
  while (!a_tokenizer.m_lineBuffer.empty())
  {
    if (tokenFromString(a_state, a_tokenizer.m_lineBuffer, token, a_tokenizer.m_column))
    {
      a_tokenizer.m_column += trimLeadingWhiteSpace(a_tokenizer.m_lineBuffer);
      return token;
    }
    a_tokenizer.m_column += trimLeadingWhiteSpace(a_tokenizer.m_lineBuffer);
  }

  if (a_state.m_getEof())
  {
    token.m_tokenType = TokenType::EndOfFile;
    return token;
  }

  a_tokenizer.m_lineNumber++;
  a_tokenizer.m_lineBuffer = a_state.m_getInput();
  a_tokenizer.m_column = 1 + trimLeadingWhiteSpace(a_tokenizer.m_lineBuffer);
  return getNextToken(a_state, a_tokenizer);
}

/*
void tokenizer(CompilerState& a_state)
{
  while (true)
  {
    Token token = a_state.m_getToken();
    if (token.m_tokenType == TokenType::EndOfFile)
    {
      return;
    }
    // token consumer
    a_state.m_parser(a_state, token);
  }
}
*/

void dumpTokens(CompilerState& a_state)
{
  while (true)
  {
    Token token = a_state.m_getToken();
    if (token.m_tokenType == TokenType::EndOfFile)
    {
      return;
    }
    dumpToken(token);
  }
}

bool tokenIs(Token& a_token, TokenType a_type)
{
  return a_token.m_tokenType == a_type;
}

bool tokenIs(Token& a_token, KeywordType a_keyword)
{
  return a_token.m_tokenType == TokenType::Keyword && a_token.m_keyword == a_keyword;
}

bool tokenIs(Token& a_token, SymbolType a_symbol)
{
  return a_token.m_tokenType == TokenType::Symbol && a_token.m_symbol == a_symbol;
}

bool tokenIs(Token& a_token, const std::string& a_string)
{
  return a_token.m_tokenType == TokenType::Identifier && a_token.m_identifier->m_name == a_string;
}

#define tokenAssert(cond, error) \
  if (!(cond)) \
  { \
    printf("input line %i:%i, error parsing token of type %i, %s.\n", token.m_lineNumber, token.m_columnNumber, token.m_tokenType, error); \
    return false; \
  }

template <typename T>
bool valid(const T& a_arg)
{
  return false;
}

template <>
bool valid(const std::string& a_string)
{
  return !a_string.empty();
}

struct GenericProcessElement
{
  // Because can not have virtual templated functions
  // we use this mode for polymorphism
  enum class Mode
  {
    Parse,
    Validate
  };

  // working state
  bool done = false;
  Mode mode;

  // captured state
  bool& okay;
  Token& token;
  CompilerState& a_state;
  GenericProcessElement(bool& b, Token& t, CompilerState& s, Mode m)
    : okay(b), token(t), a_state(s), mode(m) {}

  // generic lambda emulation
  template <typename EL, typename T1, typename T2>
  void operator()(EL elem, T1 func, T2& var, bool validate = false)
  {
    if (mode == Mode::Parse)
    {
      if (!done && tokenIs(token, elem))
      {
        done = true;
        okay = func(a_state, token, var);
      }
    }
    else if (mode == Mode::Validate)
    {
      if (okay && validate)
      {
        okay = valid(var);
      }
    }
  }
};

template <typename T, typename T2>
bool processSection(CompilerState& a_state, T a_blockProcessor, T2 a_terminal)
{
  Token token;
  bool okay = true;
  GenericProcessElement processElement(okay, token, a_state, GenericProcessElement::Mode::Parse);
  while (okay)
  {
    token = a_state.m_getToken();
    processElement.done = false;
    a_blockProcessor(processElement);
    if (tokenIs(token, a_terminal))
    {
      break;
    }
    tokenAssert(processElement.done, "invalid token");
  }
  return okay;
}

template <typename T>
bool processBlock(CompilerState& a_state,
                  Token&         a_token,
                  T              a_blockProcessor,
                  SymbolType     a_open = SymbolType::OpenBrace,
                  SymbolType     a_close = SymbolType::CloseBrace)
{
  bool okay = false;
  a_token = a_state.m_getToken();
  if (tokenIs(a_token, a_open))
  {
    okay = processSection(a_state, a_blockProcessor, a_close);
  }
  if (okay)
  {
    GenericProcessElement validateElement(okay, a_token, a_state, GenericProcessElement::Mode::Validate);
    a_blockProcessor(validateElement);
  }
  return okay;
}

bool processInformationParameter(CompilerState& a_state, Token& a_token, std::string& a_parameter)
{
  Token token = a_state.m_getToken();
  tokenAssert(tokenIs(token, SymbolType::Assignment), "expected ':='");
  token = a_state.m_getToken();
  tokenAssert(token.m_tokenType == TokenType::StringLiteral, "expected a string");
  a_parameter = token.m_stringLiteral->m_string;
  printf("processing %s\n", a_parameter.c_str());
  token = a_state.m_getToken();
  tokenAssert(tokenIs(token, SymbolType::SemiColon), "expected a semi-colon");
  return true;
}

bool processInformation(CompilerState& a_state, Token& a_token, ModuleInformation& a_information)
{
  const bool required = true;
  bool okay = processBlock(a_state, a_token, [&a_information](GenericProcessElement& processElement) {
      processElement("description", processInformationParameter, a_information.m_description, required);
      processElement("version",     processInformationParameter, a_information.m_version,     required);
      processElement("copyright",   processInformationParameter, a_information.m_copyright,   required);
      processElement("author",      processInformationParameter, a_information.m_author,      required);
      processElement("checksum",    processInformationParameter, a_information.m_checksum,    required);
  });
  return okay;
}

bool processImport(CompilerState& a_state, Token& a_token, ModuleImports& a_imports)
{
  std::string moduleName = a_token.m_identifier->m_name;
  if (!a_state.m_modules.count(moduleName))
  {
    Module& module = a_state.m_modules[moduleName];
    // TODO: populate
    printf("need to parse module %s\n", moduleName.c_str());
  }
  a_imports.m_imports[a_token.m_identifier->m_name] = &a_state.m_modules[moduleName];
  a_token = a_state.m_getToken();
  return tokenIs(a_token, SymbolType::Comma) || tokenIs(a_token, SymbolType::CloseBrace);
}

bool processImports(CompilerState& a_state, Token& a_token, ModuleImports& a_imports)
{
  bool okay = processBlock(a_state, a_token, [&a_imports](GenericProcessElement& processElement) {
      processElement(TokenType::Identifier, processImport, a_imports);
  });
  return okay;
}

bool processTypedef(CompilerState& a_state, Token& a_token, ModuleInterface& a_interface)
{
  Token token = a_state.m_getToken();
  tokenAssert(token.m_tokenType != TokenType::Type, "redefinition of type");
  tokenAssert(token.m_tokenType == TokenType::Identifier, "expected an identifier");
  std::string typeName = token.m_identifier->m_name;
  tokenAssert(a_state.m_types.count(typeName) == 0, "redefinition of type (2)");
  token = a_state.m_getToken();
  tokenAssert(token.m_tokenType == TokenType::Symbol, "expected a ':=' symbol");
  tokenAssert(token.m_symbol == SymbolType::Assignment, "expected a ':=' symbol");
  token = a_state.m_getToken();
  tokenAssert(token.m_tokenType == TokenType::Type, "expected a type");

  // register the type
  TypeDeclaration* typeDef = &a_state.m_types[typeName];
  a_state.m_tokens[typeName] = TokenType::Type;

  // construct it
  a_interface.m_types.emplace_back(typeDef);
  typeDef->m_name = typeName;
  typeDef->m_type = TypeType::Alias;
  typeDef->m_alias = token.m_type;

  token = a_state.m_getToken();
  tokenAssert(token.m_tokenType == TokenType::Symbol, "expected a ';' symbol");
  tokenAssert(token.m_symbol == SymbolType::SemiColon, "expected a ';' symbol");
  return true;
}

bool processConst(CompilerState& a_state, Token& a_token, ModuleInterface& a_interface)
{
  Token token = a_state.m_getToken();
  tokenAssert(token.m_tokenType == TokenType::Type, "expected a type");
  TypeDeclaration* typeDef = token.m_type;

  token = a_state.m_getToken();
  tokenAssert(token.m_tokenType == TokenType::Identifier, "expected an identifier");
  std::string varName = token.m_identifier->m_name;

  token = a_state.m_getToken();
  tokenAssert(token.m_tokenType == TokenType::Symbol, "expected a ':=' symbol");
  tokenAssert(token.m_symbol == SymbolType::Assignment, "expected a ':=' symbol");

  // processConstExpression
  token = a_state.m_getToken();
  bool tokenTypeOkay = token.m_tokenType == TokenType::IntegerLiteral
    || token.m_tokenType == TokenType::FloatLiteral || token.m_tokenType == TokenType::StringLiteral;
  tokenAssert(tokenTypeOkay, "expected a constant expression");

  // register the constant variable
  ConstantVariable* constVar = &a_state.m_constants[varName];
  a_state.m_tokens[varName] = TokenType::Constant;

  // construct it
  a_interface.m_constants.emplace_back(constVar);
  constVar->m_name = varName;
  constVar->m_type = typeDef;
  switch (token.m_tokenType)
  {
    case TokenType::IntegerLiteral:
      constVar->m_value.m_type = ConstExpressionType::IntegerLiteral;
      constVar->m_value.m_integer = token.m_integerLiteral;
      break;
    case TokenType::FloatLiteral:
      constVar->m_value.m_type = ConstExpressionType::FloatLiteral;
      constVar->m_value.m_float = token.m_floatLiteral;
      break;
    case TokenType::StringLiteral:
      constVar->m_value.m_type = ConstExpressionType::StringLiteral;
      constVar->m_value.m_string = token.m_stringLiteral;
      break;
    default:
      break;
  }

  token = a_state.m_getToken();
  tokenAssert(token.m_tokenType == TokenType::Symbol, "expected a ';' symbol");
  tokenAssert(token.m_symbol == SymbolType::SemiColon, "expected a ';' symbol");
  return true;
}

bool processEnumValue(CompilerState& a_state, Token& a_token, EnumValues& a_values)
{
  Token& token = a_token;
  std::string enumValueName = a_token.m_identifier->m_name;
  tokenAssert(a_values.m_values.count(enumValueName) == 0, "duplicate enum value\n");
  a_token = a_state.m_getToken();
  if (tokenIs(a_token, SymbolType::Assignment))
  {
    a_token = a_state.m_getToken();
    tokenAssert(token.m_tokenType == TokenType::IntegerLiteral, "expected an integer");
    a_values.m_nextValue = a_token.m_integerLiteral->m_value;
    a_token = a_state.m_getToken();
  }
  a_values.m_values[enumValueName] = a_values.m_nextValue;
  a_values.m_nextValue++;
  return tokenIs(a_token, SymbolType::Comma) || tokenIs(a_token, SymbolType::CloseBrace);
}

bool processEnum(CompilerState& a_state, Token& a_token, ModuleInterface& a_interface)
{
  Token token = a_state.m_getToken();
  tokenAssert(token.m_tokenType == TokenType::Identifier, "expected an identifier");
  std::string enumName = token.m_identifier->m_name;
  TypeDeclaration* enumDef = &a_state.m_types[enumName];
  EnumValues* enumValues = &a_state.m_enums[enumName];
  a_state.m_tokens[enumName] = TokenType::Type;
  enumDef->m_name = enumName;
  enumDef->m_type = TypeType::Enum;
  enumDef->m_enum = enumValues;
  enumValues->m_nextValue = 0;

  bool okay = processBlock(a_state, a_token, [&enumValues](GenericProcessElement& processElement) {
      processElement(TokenType::Identifier, processEnumValue, *enumValues);
  });
  return okay;
}

// TODO: move to utils
template <typename Container, typename Functor>
typename Container::const_iterator find(const Container& a_container, Functor&& a_functor)
{
  return std::find_if(a_container.begin(), a_container.end(), a_functor);
}

  template <typename Container>
bool processVariableNameHelper(CompilerState& a_state, Token& a_token, Variable& a_variable, Container& a_collection, const char* a_errorMessage)
{
  Token& token = a_token;
  token = a_state.m_getToken();
  tokenAssert(token.m_tokenType == TokenType::Identifier, "expected an identifier");
  a_variable.m_name = token.m_identifier->m_name;
  std::string name = a_variable.m_name;
  auto sameName = [&name](const Variable& a_var) { return a_var.m_name == name; };
  tokenAssert(find(a_collection, sameName) == a_collection.end(), a_errorMessage);
  return true;
}

template <typename Container>
bool processVariableHelper(CompilerState& a_state, Token& a_token, Variable& a_variable, Container& a_collection, const char* a_errorMessage)
{
  Token& token = a_token;
  tokenAssert(token.m_tokenType == TokenType::Type, "expected a type");
  a_variable.m_type = token.m_type;
  return processVariableNameHelper(a_state, a_token, a_variable, a_collection, a_errorMessage);
}

bool processStructMember(CompilerState& a_state, Token& a_token, StructMembers& a_members)
{
  Variable member;
  if (!processVariableHelper(a_state, a_token, member, a_members.m_members, "existing member with same name exists"))
    return false;
  a_members.m_members.push_back(member);

  Token& token = a_token;
  token = a_state.m_getToken();
  tokenAssert(tokenIs(token, SymbolType::SemiColon), "expected a ';' symbol");
  return true;
}

bool processStruct(CompilerState& a_state, Token& a_token, ModuleInterface& a_interface)
{
  Token& token = a_token;
  token = a_state.m_getToken();
  tokenAssert(token.m_tokenType == TokenType::Identifier, "expected an identifier");
  std::string structName = token.m_identifier->m_name;
  TypeDeclaration* structDef = &a_state.m_types[structName];
  StructMembers* structMembers = &a_state.m_structs[structName];
  a_state.m_tokens[structName] = TokenType::Type;
  structDef->m_name = structName;
  structDef->m_type = TypeType::Struct;
  structDef->m_struct = structMembers;

  if (!processBlock(a_state, a_token, [&structMembers](GenericProcessElement& processElement) {
          processElement(TokenType::Type, processStructMember, *structMembers); }))
  {
    // Forward declaration
    structDef->m_struct = nullptr; // if the members are nullptr, it is a forward declaration
    tokenAssert(tokenIs(token, SymbolType::SemiColon), "expected a ';' symbol");
  }
  return true;
}

bool processFunctionParameter(CompilerState& a_state, Token& a_token, FunctionDeclaration& a_function)
{
  Variable parameter;
  if (!processVariableHelper(a_state, a_token, parameter, a_function.m_parameters, "existing parameter with same name exists"))
    return false;
  a_function.m_parameters.push_back(parameter);

  Token& token = a_token;
  token = a_state.m_getToken();
  return tokenIs(a_token, SymbolType::Comma) || tokenIs(a_token, SymbolType::CloseBracket);
}

bool processFunctionCommon(CompilerState& a_state, Token& a_token, ModuleInterface& a_interface)
{
  FunctionDeclaration* functionDef = nullptr;
  Token& token = a_token;
  token = a_state.m_getToken();

  if (token.m_tokenType == TokenType::Identifier)
  {
    // tokenAssert(token.m_tokenType == TokenType::Identifier, "expected an identifier");
    std::string functionName = token.m_identifier->m_name;
    functionDef = &a_state.m_functions[functionName];
    a_state.m_tokens[functionName] = TokenType::Function;
    functionDef->m_name = functionName;
    functionDef->m_returnType = nullptr;
  }
  else if (token.m_tokenType == TokenType::Function)
  {
    // Existing function declaration - either it is redeclaring/overriding existing function declaration
    // or it is defining the body of a forward/interface of a function - TODO: validate the arguments are not changed
    // TODO: perhaps need to think about if function is declared and it is a separate definition of the function
    //       then there is no need to repeat what the arguments etc are, might be easier if just assign to it like this:
    //  function MyFunc(int8 arg1) => int8;    // This is the declaration
    //  MyFunc := { return arg1 + 1; }         // This is the definition - note without the repetition but hard to see the
    //                                         // args and return types in the scope it is defined, so good and bad
    functionDef = token.m_function;
  }

  if (!functionDef)
  {
    return false;
  }

  bool okay = processBlock(a_state, a_token, [&functionDef](GenericProcessElement& processElement) {
      processElement(TokenType::Type, processFunctionParameter, *functionDef);
  }, SymbolType::OpenBracket, SymbolType::CloseBracket);

  if (okay)
  {
    token = a_state.m_getToken();
    tokenAssert(tokenIs(token, SymbolType::Produces), "expected '=>' and return type");
    token = a_state.m_getToken();
    tokenAssert(tokenIs(token, TokenType::Type), "expected a type");
    functionDef->m_returnType = token.m_type;
  }
  return true;
}

bool processFunctionDeclaration(CompilerState& a_state, Token& a_token, ModuleInterface& a_interface)
{
  bool okay = processFunctionCommon(a_state, a_token, a_interface);
  if (okay)
  {
    Token token = a_state.m_getToken();
    tokenAssert(tokenIs(token, SymbolType::SemiColon), "expected ';'");
  }
  return true;
}

//bool processLeftExpression(CompilerState& a_state, Token& a_token, Function& a_function)

struct RightHandSideExpression
{
  TypeDeclaration* m_deducedType;
  Expression       m_expressionTree;
};

struct LeftHandSideExpression
{
  // var declaration and assignment
  // var
  // struct var member -> recursive
};

/*
bool processLeftExpression(CompilerState& a_state, Token& a_token, LeftHandSideExpression& a_lhsExpression)
{
}
*/

/*

struct Expression
{
  ExpressionType                        m_type;
  union
  {
    ConstExpression                     m_constant;
    Variable*                           m_variable;
    UnaryExpression                     m_unary;
    BinaryExpression                    m_binary;
    TernaryExpression                   m_ternary;
  };
};

*/


Expression& newExpression(FunctionBlock& a_functionBlock)
{
  Expression newExpr;
  a_functionBlock.m_expressions.push_back(newExpr);
  return a_functionBlock.m_expressions.back();
}

bool processRightLeft(CompilerState& a_state, Token& a_token, FunctionBlock& a_functionBlock, Expression& a_left)
{
  if (tokenIs(a_token, TokenType::IntegerLiteral))
  {
    a_left.m_type = ExpressionType::Constant;
    a_left.m_constant.m_type = ConstExpressionType::IntegerLiteral;
    a_left.m_constant.m_integer = a_token.m_integerLiteral;
  }
  else if (tokenIs(a_token, TokenType::FloatLiteral))
  {
    a_left.m_type = ExpressionType::Constant;
    a_left.m_constant.m_type = ConstExpressionType::FloatLiteral;
    a_left.m_constant.m_float = a_token.m_floatLiteral;
  }
  else if (tokenIs(a_token, TokenType::StringLiteral))
  {
    a_left.m_type = ExpressionType::Constant;
    a_left.m_constant.m_type = ConstExpressionType::StringLiteral;
    a_left.m_constant.m_string = a_token.m_stringLiteral;
  }
  else if (tokenIs(a_token, TokenType::Function))
  {
    // TODO: need to add support for making function calls - this is another expression type
  }
  else if (tokenIs(a_token, TokenType::Identifier))
  {
    // TODO: assuming here that all identifiers are local variables - could be non-local, could be in outer block, could be parameter
    // TODO: perhaps variable lookup could be built in to the tokenizing. as enter/leave blocks, could update the CompilerState with the
    //       table of variable for the given context? Perhaps variables can have a type, local,parameter,const global.
    a_left.m_type = ExpressionType::Variable;
    a_left.m_variableId = a_token.m_identifier;
  }
  else if (tokenIs(a_token, TokenType::Constant))
  {
    // TODO:

  }
  else if (tokenIs(a_token, TokenType::Type))
  {
    // TODO:  could perhaps assign a type to a variable if the variable is of meta type
  }
  else if (tokenIs(a_token, TokenType::Module))
  {
    // TODO:  perhaps namespace - in general need to consider this/namespaces

    // TODO:  build test case with Math module, and reference a constant like Math::pi.
  }
  else if (tokenIs(a_token, TokenType::Keyword))
  {
    // TODO:  keyword could be control-flow, like  if/while/do/done/for etc - need to process somewhere
    //        although this is not valid mid-way through an expression, only for the first token on the lhs.
  }
  else if (tokenIs(a_token, TokenType::Symbol))
  {
    // ++ and -- are not supported, perhaps should consider if need or not
    a_left.m_type = ExpressionType::Unary;
  
    if (tokenIs(a_token, SymbolType::Add))
    {
      a_left.m_unary.m_operator = UnaryExpressionType::Positive;
    }
    else if (tokenIs(a_token, SymbolType::Subtract))
    {
      a_left.m_unary.m_operator = UnaryExpressionType::Negative;
    }
    else if (tokenIs(a_token, SymbolType::Tilda))
    {
      a_left.m_unary.m_operator = UnaryExpressionType::OnesComplement;
    }
    else if (tokenIs(a_token, SymbolType::Not))
    {
      a_left.m_unary.m_operator = UnaryExpressionType::Not;
    }

    a_left.m_unary.m_first = &newExpression(a_functionBlock);
  
    a_token = a_state.m_getToken();
    processRightLeft(a_state, a_token, a_functionBlock, *a_left.m_unary.m_first);
  }
  else
  {
    return false;
  }
  return true;
}

bool processTerminal(CompilerState& a_state, Token& a_token, FunctionBlock& a_functionBlock, Expression& a_expression)
{
  return processRightLeft(a_state, a_token, a_functionBlock, a_expression);
}

bool processRightExpressionHelper(CompilerState& a_state, Token& a_token, FunctionBlock& a_functionBlock, Expression& a_expression);

bool processRightRight(CompilerState& a_state, Token& a_token, FunctionBlock& a_functionBlock, Expression& a_left, Expression& a_expression)
{
  Token& token = a_token;
  a_token = a_state.m_getToken();

  if (tokenIs(a_token, SymbolType::SemiColon))
  {
    Expression& left = newExpression(a_functionBlock);
    left = a_left;
    a_expression = left;
    //a_expression = a_left;
    return true;
  }
  else if (tokenIs(a_token, SymbolType::Add))
  {
    Expression& left = newExpression(a_functionBlock);
    Expression& right = newExpression(a_functionBlock);
    left = a_left;
    if (!processRightExpressionHelper(a_state, a_token, a_functionBlock, right))
    {
      return false;
    }
    a_expression.m_type = ExpressionType::Binary;
    a_expression.m_binary.m_first = &left;
    a_expression.m_binary.m_second = &right;
    a_expression.m_binary.m_operator = BinaryExpressionType::Add;
    return true;
  }
  else if (tokenIs(a_token, SymbolType::Multiply))
  {
    a_expression.m_type = ExpressionType::Binary;

    Expression& left = newExpression(a_functionBlock);
    Expression& right = newExpression(a_functionBlock);
    left = a_left;
    a_token = a_state.m_getToken();
    if (!processRightLeft(a_state, a_token, a_functionBlock, right))
    {
      return false;
    }
    
    //Expression& product = newExpression(a_functionBlock);
    Expression product;
    product.m_type = ExpressionType::Binary;
    product.m_binary.m_first = &left;
    product.m_binary.m_second = &right;
    product.m_binary.m_operator = BinaryExpressionType::Multiply;
    return processRightRight(a_state, a_token, a_functionBlock, product, a_expression);
  }

  return false;
}

bool processTerm(CompilerState& a_state, Token& a_token, FunctionBlock& a_functionBlock, Expression& a_expression)
{
  Token& token = a_token;
  a_token = a_state.m_getToken();

  //Expression left;
  Expression& left = newExpression(a_functionBlock);
  if (!processTerminal(a_state, a_token, a_functionBlock, left))
  {
    return false;
  }

  Expression& exp = newExpression(a_functionBlock);
  if (!processRightRight(a_state, a_token, a_functionBlock, left, exp))
  // if (!processRightRight(a_state, a_token, a_functionBlock, left, a_expression))
  {
    return false;
  }
  a_expression = exp;

  return true;
}

bool processRightExpressionHelper(CompilerState& a_state, Token& a_token, FunctionBlock& a_functionBlock, Expression& a_expression)
{
  Token& token = a_token;
  a_token = a_state.m_getToken();

#if 0
  while (!tokenIs(token, SymbolType::SemiColon))
  {
    a_token = a_state.m_getToken();
  }
#else

  Expression& left = newExpression(a_functionBlock);
  if (!processRightLeft(a_state, a_token, a_functionBlock, left))
  {
    return false;
  }

  if (!processRightRight(a_state, a_token, a_functionBlock, left, a_expression))
  {
    return false;
  }

#endif

  return true;
}

void printSpaces(int a_spaces)
{
  for (int i = 0; i < a_spaces; ++i)
    printf(" ");
}

void dumpExpressionTree(Expression& a_expression, int a_depth = 0)
{
  printf("\n");
  printSpaces(a_depth);
  switch (a_expression.m_type)
  {
    case ExpressionType::Constant:
      printf("const -%llu-", a_expression.m_constant.m_integer->m_value);
      break;
    case ExpressionType::Variable:
      printf("var -%s-", a_expression.m_variableId->m_name.c_str());
      break;
    case ExpressionType::Unary:
      printf("unary ");
      break;
    case ExpressionType::Binary:
      printf("binary L:");
      dumpExpressionTree(*a_expression.m_binary.m_first, a_depth + 1);
      printf("\n");
      printSpaces(a_depth);
      printf("binary R:");
      dumpExpressionTree(*a_expression.m_binary.m_second, a_depth + 1);
      break;
    case ExpressionType::Ternary:
      printf("ternary ");
      break;
  }
}
  
std::string expressionTreeString(const Expression& a_expression)
{
  std::string str;
  switch (a_expression.m_type)
  {
    case ExpressionType::Constant:
      str += std::to_string(a_expression.m_constant.m_integer->m_value);
      break;
    case ExpressionType::Variable:
      str += a_expression.m_variableId->m_name;
      break;
    case ExpressionType::Unary:
      str += "(";
      switch (a_expression.m_unary.m_operator)
      {
        case UnaryExpressionType::Not:            str += "!"; break;
        case UnaryExpressionType::OnesComplement: str += "~"; break;
        case UnaryExpressionType::Positive:       str += "+"; break;
        case UnaryExpressionType::Negative:       str += "-"; break;
      }
      str += expressionTreeString(*a_expression.m_unary.m_first) + ")";
      break;
    case ExpressionType::Binary:
      str += "(" + expressionTreeString(*a_expression.m_binary.m_first);
      if (a_expression.m_binary.m_operator == BinaryExpressionType::Multiply)
        str += " x ";
      else if (a_expression.m_binary.m_operator == BinaryExpressionType::Add)
        str += " + ";
      else
        str += " ? ";
      str += expressionTreeString(*a_expression.m_binary.m_second) + ")";
      break;
    case ExpressionType::Ternary:
      str += " !!!!!!! ";
      break;
  }
  return str;
}


std::string expressionTreeStack(const Expression& a_expression)
{
  std::string str;
  switch (a_expression.m_type)
  {
    case ExpressionType::Constant:
      if (a_expression.m_constant.m_integer)
        str += "push " + std::to_string(a_expression.m_constant.m_integer->m_value) + "\n";
      break;
    case ExpressionType::Variable:
      if (a_expression.m_variableId)
        str += "push " + a_expression.m_variableId->m_name + "\n";
      break;
    case ExpressionType::Unary:
      if (a_expression.m_unary.m_first)
        str += expressionTreeStack(*a_expression.m_unary.m_first);
      str += "op unary ";
      switch (a_expression.m_unary.m_operator)
      {
        case UnaryExpressionType::Not:            str += "!"; break;
        case UnaryExpressionType::OnesComplement: str += "~"; break;
        case UnaryExpressionType::Positive:       str += "+"; break;
        case UnaryExpressionType::Negative:       str += "-"; break;
      }
      str += "\n";
      break;
    case ExpressionType::Binary:
      if (a_expression.m_binary.m_first)
        str += expressionTreeStack(*a_expression.m_binary.m_first);
      if (a_expression.m_binary.m_second)
        str += expressionTreeStack(*a_expression.m_binary.m_second);
      str += "op binary ";
      if (a_expression.m_binary.m_operator == BinaryExpressionType::Multiply)
        str += " x ";
      else if (a_expression.m_binary.m_operator == BinaryExpressionType::Add)
        str += " + ";
      else
        str += " ? ";
      str += "\n";
      break;
    case ExpressionType::Ternary:
      str += " !!!!!!! ";
      break;
  }
  return str;
}


std::string expressionTreeRegisters(const Expression& a_expression, int a_currentRegister = 0)
{
  std::string str;
  std::string reg1 = "%r" + std::to_string(a_currentRegister);
  std::string reg2 = "%r" + std::to_string(a_currentRegister + 1);

  switch (a_expression.m_type)
  {
    case ExpressionType::Constant:
      if (a_expression.m_constant.m_integer)
        str += "mov   " + reg1 + ", " + std::to_string(a_expression.m_constant.m_integer->m_value) + "\n";
      break;
    case ExpressionType::Variable:
      if (a_expression.m_variableId)
        str += "mov   " + reg1 + ", " + a_expression.m_variableId->m_name + "\n";
      break;
    case ExpressionType::Unary:
      if (a_expression.m_unary.m_first)
        str += expressionTreeRegisters(*a_expression.m_unary.m_first, a_currentRegister);
      switch (a_expression.m_unary.m_operator)
      {
        case UnaryExpressionType::Not:            str += "not"; break;
        case UnaryExpressionType::OnesComplement: str += "not"; break;
        case UnaryExpressionType::Positive:       str += "pos"; break;
        case UnaryExpressionType::Negative:       str += "neg"; break;
      }
      str += "   " + reg1 + "\n";
      break;
    case ExpressionType::Binary:
      if (a_expression.m_binary.m_first)
        str += expressionTreeRegisters(*a_expression.m_binary.m_first, a_currentRegister);
      if (a_expression.m_binary.m_second)
        str += expressionTreeRegisters(*a_expression.m_binary.m_second, a_currentRegister + 1);
      if (a_expression.m_binary.m_operator == BinaryExpressionType::Multiply)
        str += "mul";
      else if (a_expression.m_binary.m_operator == BinaryExpressionType::Add)
        str += "add";
      else
        str += "???";
      str += "   " + reg1 + ", " + reg2 + "\n";
      break;
    case ExpressionType::Ternary:
      str += " !!!!!!! ";
      break;
  }
  return str;
}


bool processRightExpression(CompilerState& a_state, Token& a_token, FunctionBlock& a_functionBlock, RightHandSideExpression& a_rhsExpression)
{
//  if (!processRightExpressionHelper(a_state, a_token, a_functionBlock, a_rhsExpression.m_expressionTree))
  if (!processTerm(a_state, a_token, a_functionBlock, a_rhsExpression.m_expressionTree))
  {
    return false;
  }

  //dumpExpressionTree(a_rhsExpression.m_expressionTree);
  
  std::string expTreeStr = expressionTreeString(a_rhsExpression.m_expressionTree);
  std::string expTreeReg = expressionTreeRegisters(a_rhsExpression.m_expressionTree);
  std::string expTreeStack = expressionTreeStack(a_rhsExpression.m_expressionTree);
  printf("expression tree as string:  %s\n", expTreeStr.c_str());
  printf("expression tree as registers:\n%s\n", expTreeReg.c_str());
  printf("expression tree as stack:\n%s\n", expTreeStack.c_str());

  // TODO: process the expression tree to determine the type
  a_rhsExpression.m_deducedType = &a_state.m_types["int8"];
  return true;
}

bool processExpression(CompilerState& a_state, Token& a_token, FunctionBlock& a_functionBlock)
{
  Token& token = a_token;
  if (tokenIs(a_token, KeywordType::Var))
  {
    Variable variable;
    if (!processVariableNameHelper(a_state, a_token, variable, a_functionBlock.m_localVariables, "existing variable with same name exists"))
    {
      return false;
    }
    token = a_state.m_getToken();
    tokenAssert(tokenIs(a_token, SymbolType::Assignment), "expected an assignment to deduce the type of var");
    
    // TODO: save the assigned value/variable/expression
    RightHandSideExpression rhsExpression;
    if (!processRightExpression(a_state, a_token, a_functionBlock, rhsExpression))
    {
      return false;
    }

    variable.m_type = rhsExpression.m_deducedType;
    a_functionBlock.m_localVariables.push_back(variable);

    tokenAssert(tokenIs(token, SymbolType::SemiColon), "expected a ';' symbol");
    return true;
  }
  else
  {
    // TODO: probably block or control flow statements
  }

  return true;
}

bool processStatement(CompilerState& a_state, Token& a_token, FunctionBlock& a_functionBlock)
{
  return true;
}

bool processVariable(CompilerState& a_state, Token& a_token, FunctionBlock& a_functionBlock)
{
  Variable variable;
  if (!processVariableHelper(a_state, a_token, variable, a_functionBlock.m_localVariables, "existing variable with same name exists"))
  {
    return false;
  }
  a_functionBlock.m_localVariables.push_back(variable);

  Token& token = a_token;
  token = a_state.m_getToken();

  if (tokenIs(a_token, SymbolType::Assignment))
  {
    // TODO: save the assigned value/variable/expression
    RightHandSideExpression rhsExpression;
    if (!processRightExpression(a_state, a_token, a_functionBlock, rhsExpression))
    {
      return false;
    }
  }
  
  tokenAssert(tokenIs(token, SymbolType::SemiColon), "expected a ';' symbol");
  return true;
}

  /*
struct Statement
{
  StatementType                         m_type;
  union
  {
    FunctionBlock*                      m_block;
    ControlFlow*                        m_controlFlow;
    Expression*                         m_expression;
  };
};

struct FunctionBlock
{
  std::vector<Variable>                 m_localVariables;
  std::vector<Statement>                m_statements;
  std::vector<Expression>               m_expressions;
};

struct Function
{
  FunctionDeclaration*                  m_interface;
  FunctionBlock                         m_implementation;
};

*/


bool processFunctionBody(CompilerState& a_state, Token& a_token, ModuleImplementation& a_implementation)
{
  bool okay = processFunctionCommon(a_state, a_token, a_implementation.m_details);
  if (okay)
  {
    Function function;
    okay = processBlock(a_state, a_token, [&function](GenericProcessElement& processElement) {
        processElement(TokenType::Type,        processVariable,     function.m_implementation);  // variable declaration
        processElement(TokenType::Identifier,  processStatement,    function.m_implementation);  // variable assignment
        processElement(TokenType::Keyword,     processExpression,   function.m_implementation);  // different
    });

    if (!okay && tokenIs(a_token, SymbolType::SemiColon))
    {
      return true;
    }
  }
  return true;
}

bool processInterface(CompilerState& a_state, Token& a_token, ModuleInterface& a_interface)
{
  bool okay = processBlock(a_state, a_token, [&a_interface](GenericProcessElement& processElement) {
      processElement(KeywordType::Type,     processTypedef,             a_interface);
      processElement(KeywordType::Const,    processConst,               a_interface);
      processElement(KeywordType::Enum,     processEnum,                a_interface);
      processElement(KeywordType::Struct,   processStruct,              a_interface);
      processElement(KeywordType::Function, processFunctionDeclaration, a_interface);
  });
  Token& token = a_token;
  tokenAssert(okay, "bad interface");
  return okay;
}

bool processImplementation(CompilerState& a_state, Token& a_token, ModuleImplementation& a_implementation)
{
  bool okay = processBlock(a_state, a_token, [&a_implementation](GenericProcessElement& processElement) {
      processElement(KeywordType::Type,     processTypedef,      a_implementation.m_details);
      processElement(KeywordType::Const,    processConst,        a_implementation.m_details);
      processElement(KeywordType::Enum,     processEnum,         a_implementation.m_details);
      processElement(KeywordType::Struct,   processStruct,       a_implementation.m_details);
      processElement(KeywordType::Function, processFunctionBody, a_implementation);
  });
  Token& token = a_token;
  tokenAssert(okay, "bad interface");
  return okay;
}

bool processTests(CompilerState& a_state, Token& a_token, ModuleTests& a_tests)
{
  bool okay = true;
  return okay;
}

bool processModule(CompilerState& a_state, Token& a_token, Module& a_module)
{
  const bool required = true;
  bool okay = processBlock(a_state, a_token, [&a_module](GenericProcessElement& processElement) {
      processElement(KeywordType::Information,    processInformation,    a_module.m_information);
      processElement(KeywordType::Imports,        processImports,        a_module.m_imports);
      processElement(KeywordType::Interface,      processInterface,      a_module.m_interface);
      processElement(KeywordType::Implementation, processImplementation, a_module.m_implementation);
      processElement(KeywordType::Tests,          processTests,          a_module.m_tests);
  });
  return okay;
}

bool processProgram(CompilerState& a_state, const char* a_fileName)
{
  bool okay = true;
  Token token = a_state.m_getToken();
  if (tokenIs(token, KeywordType::Module))
  {
    token = a_state.m_getToken();
    if (token.m_tokenType != TokenType::Identifier)
    {
      tokenAssert(false, "expected an identifier");
      return false;
    }
    const std::string& moduleName = token.m_identifier->m_name;
    Module& module = a_state.m_modules[moduleName];
    module.m_name = moduleName;
    module.m_fileName = a_fileName;
    okay = processModule(a_state, token, module);
  }
  else
  {
    tokenAssert(false, "invalid token");
    okay = false;
  }
  return okay;
}

int main(int argc, char* argv[])
{
  if (argc < 2)
  {
    printf("bad number of arguments\n");
    return -1;
  }

  std::ifstream input(argv[1]);
  if (!input.is_open())
  {
    printf("couldn't open input file\n");
    return -1;
  }

  TokenizerState tokenizerState;
  CompilerState state = InitializeCompilerState();

  // Setup handlers
  //   i/o input
  state.m_getInput = [&input](){ std::string line; std::getline(input, line); return line; };
  //   eof
  state.m_getEof   = [&input](){ return input.eof(); };
  //   tokenizer
  state.m_getToken = [&state, &tokenizerState](){ return getNextToken(state, tokenizerState); };
  //   parser
  state.m_parser   = [&state](){ dumpTokens(state); };

  //state.m_parser = &parser;
  //state.m_parser();
  bool okay = processProgram(state, argv[1]);
  printf("%sokay\n", !okay ? "not " : "");

  return okay ? 0 : -1;
}

