#include "XmlParser.h"
#include "MemoryMapping.h"
#include "Containers.h"
#include "Utils.h"
#include <string>
#include <vector>
#include <map>
#include <utility>
#include <cstdio>


BEGIN_NAMESPACE


XmlParser::XmlParser()
{
}


XmlParser::~XmlParser()
{
}


void XmlParser::parseXmlFile(const char* a_fileName, XmlConsumer& a_consumer)
{
	MemoryMapping memMap(a_fileName);
	char* d = (char*)memMap.address();
	if (!d)
		return;
	size_t s = (size_t)memMap.size(); // If this is a 32bit build, don't expect xml files more than 4GiB to work!
	parseXmlData(d, s, a_consumer);
}


static const XmlStringSlice endSlice   = { "</>", 3 };
static const XmlStringSlice startSlice = { "<>", 2 };


#define EMIT_ATOM(type) \
	a_consumer.consumeToken(XT_##type, slice)

#define EMIT_START_TAG() \
	a_consumer.consumeToken(XT_TagStart, startSlice)

#define EMIT_END_TAG() \
	a_consumer.consumeToken(XT_TagEnd, endSlice)

// Close tag
#define RESET_STATE \
	state = 0; start = i + 1


void XmlParser::parseXmlData(const char* a_data, size_t a_size, XmlConsumer& a_consumer)
{
	const char* d = a_data;
	int state = 0;
	int start = 2;
	//printf("BOM: %i %i %i %i\n", d[0], d[1], d[2], d[3]);
	for (int i = 2; i < int(a_size); i++)
	{
		int len = i - 1 - start + 1;
		XmlTokenType typ = XT_Unknown;
		XmlStringSlice slice = { &d[start], len };
		char ch = d[i];
		do {
			if (state == 0 && ch == '<')    { typ = XT_Text; state = 1; continue; }
			if (state == 0)                 { continue; }
			if (state == 1 && ch == '?')    { continue; }
			if (state == 1 && ch == '/')    { state = 10; continue; }
			if (state == 1 && isspace(ch))  { continue; }
			if (state == 1 && isalnum(ch))  { start = i; state = 2; continue; }
			if (state == 2 && isalnum(ch))  { continue; }
			if (state == 2 && isspace(ch))  { typ = XT_TagOpen; state = 3; continue; }
			if (state == 2 && ch == '/')    { state = 8; continue; }
			if (state == 2 && ch == '>')    { EMIT_ATOM(TagOpen); EMIT_START_TAG(); RESET_STATE; continue; }
			
			if (state == 3 && isspace(ch))  { continue; }
			if (state == 3 && isalnum(ch))  { start = i; state = 4; continue; }
			if (state == 3 && ch == '/')    { state = 7; continue; }
			if (state == 3 && ch == '>')	{ EMIT_START_TAG(); RESET_STATE; continue; }

			if (state == 4 && isalnum(ch))  { continue; }
			//if (state == 4 && isspace(ch))  { EMIT_ATOM(Attribute); state = 3; continue; }
			if (state == 4 && ch == '=')    { typ = XT_AttributeName; state = 5; start = i+1; continue; }

			if (state == 5 && ch == '\"')   { state = 9; continue; }

			if (state == 5 && isalnum(ch))  { continue; }
			if (state == 5 && isspace(ch))  { typ = XT_AttributeValue; state = 3; continue; }
			if (state == 5 && ch == '/')    { state = 6; continue; }
			if (state == 5 && ch == '>')	{ EMIT_ATOM(AttributeValue); EMIT_START_TAG(); RESET_STATE; continue; }
			if (state == 5 && ispunct(ch))  { continue; }
			if (state == 6 && ch == '>')    { slice.m_length--; EMIT_ATOM(AttributeValue); EMIT_START_TAG(); EMIT_END_TAG(); RESET_STATE; continue; }

			if (state == 7 && ch == '>')    { EMIT_START_TAG(); EMIT_END_TAG(); RESET_STATE; continue; }
			if (state == 8 && ch == '>')    { slice.m_length--; EMIT_ATOM(TagOpen); EMIT_START_TAG(); EMIT_END_TAG(); RESET_STATE; continue; }
			if (state == 9 && ch == '\"')   { state = 5; continue; }
			if (state == 9)                 { continue; }

			if (state == 10 && isspace(ch)) { continue; }
			if (state == 10 && isalnum(ch)) { state = 11; continue; }
			if (state == 11 && isalnum(ch)) { continue; }
			if (state == 11 && isspace(ch)) { continue; }
			if (state == 11 && ch == '>')   { EMIT_END_TAG(); RESET_STATE; continue; }
			printf("unexpected token\n"); break;
		} while(0);

		if (typ != XT_Unknown)
			a_consumer.consumeToken(typ, slice);
	}
}


class XmlNode
{
public:
	~XmlNode() {
		for (unsigned i = 0; i < m_children.size(); i++)
			delete m_children[i];
	}
	String						m_tag;
	HashMap<String,String>		m_attributes;
	Vector<XmlNode*>			m_children;
};


class XmlDomTreeBuilderData
{
public:
	void printTreeRecurse(int depth, XmlNode* a_node);
	void consumeToken(XmlTokenType a_type, const XmlStringSlice& a_text);

	XmlNode* m_domTreeRootNode;
	Vector<XmlNode*> m_parentNode;
	XmlNode* m_currentNode;
	String m_currentAttributeName;
};


XmlDomTreeBuilder::XmlDomTreeBuilder()
{
	m_data = new XmlDomTreeBuilderData;
	m_data->m_domTreeRootNode = 0;
	m_data->m_currentNode = 0;
}


XmlDomTreeBuilder::~XmlDomTreeBuilder()
{
	delete m_data->m_domTreeRootNode;
	delete m_data;
}


void XmlDomTreeBuilder::consumeToken(XmlTokenType a_type, const XmlStringSlice& a_text)
{
	m_data->consumeToken(a_type, a_text);
}


void XmlDomTreeBuilder::printDomTree()
{
	printf("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
	m_data->printTreeRecurse(0, m_data->m_domTreeRootNode);
}


void XmlDomTreeBuilderData::printTreeRecurse(int depth, XmlNode* a_node)
{
	for (unsigned i = 0; i < a_node->m_children.size(); i++)
	{
		for (int s = 0; s < depth; s++)
			printf("  ");
		printf("<%s", a_node->m_children[i]->m_tag.c_str());
		for (HashMap<String,String>::iterator it = 
			a_node->m_children[i]->m_attributes.begin();
			it != a_node->m_children[i]->m_attributes.end(); ++it)
		{
			printf(" %s=%s", it->first.c_str(), it->second.c_str());
		}
		if (a_node->m_children[i]->m_children.size())
		{
			printf(">\n");
			printTreeRecurse(depth+1, a_node->m_children[i]);
			for (int s = 0; s < depth; s++)
				printf("  ");
			printf("</%s>\n", a_node->m_children[i]->m_tag.c_str());
		} else {
			printf(" />\n");
		}
	}
}


void XmlDomTreeBuilderData::consumeToken(XmlTokenType a_type, const XmlStringSlice& a_text)
{
	if (a_text.m_length <= 0)
	{
		printf("bad token\n");
		return;
	}

	std::string str(a_text.m_data, a_text.m_length);

	switch (a_type)
	{
		case XT_Text:
			/* ignore */
			break;
		case XT_TagOpen:
			{
				XmlNode* newNode = new XmlNode;
				if (m_currentNode)
				{
					m_parentNode.push_back(m_currentNode);
					m_currentNode->m_children.push_back(newNode);
				}
				m_currentNode = newNode;
				m_currentNode->m_tag = str;
				if ( !m_domTreeRootNode )
					m_domTreeRootNode = m_currentNode;
			}
			break;
		case XT_TagStart:
			/* ignore */
			break;
		case XT_TagEnd:
			if (m_parentNode.size())
			{
				m_currentNode = m_parentNode.back();
				m_parentNode.pop_back();
			}
			break;
		case XT_AttributeName:
			m_currentAttributeName = str;
			m_currentNode->m_attributes[m_currentAttributeName] = "true";
			break;
		case XT_AttributeValue:
			m_currentNode->m_attributes[m_currentAttributeName] = str;
			break;
		default:
			printf("Got token type=%i  data=---%s---\n", a_type, str.c_str());
			break;
	}
}


END_NAMESPACE
