#include "XmlParser.h"
#include "MemoryMapping.h"
#include "Common.h"
BEGIN_NAMESPACE
#define DEBUG_LOG(...) \
printf(__VA_ARGS__)
XmlParser::XmlParser()
{
}
XmlParser::~XmlParser()
{
}
void XmlParser::parseXmlFile(const char* a_fileName, XmlConsumerInterface& a_consumer)
{
MemoryMapping memMap(a_fileName);
char* d = (char*)memMap.address();
if (!d)
return;
size_t s = (size_t)memMap.size(); // If this is a 32bit build, don't expect xml files more than 4GiB to work!
parseXmlData(d, s, a_consumer);
}
static const XmlStringSlice endSlice = { "</>", 3 };
static const XmlStringSlice startSlice = { "<>", 2 };
#define EMIT_ATOM(type) \
a_consumer.consumeToken(XT_##type, slice)
#define EMIT_START_TAG() \
a_consumer.consumeToken(XT_TagStart, startSlice)
#define EMIT_END_TAG() \
a_consumer.consumeToken(XT_TagEnd, endSlice)
// Close tag
#define RESET_STATE \
state = 0; start = i + 1
void XmlParser::parseXmlData(const char* a_data, size_t a_size, XmlConsumerInterface& a_consumer)
{
const char* d = a_data;
int state = 0;
int start = 2;
//DEBUG_LOG("BOM: %i %i %i %i\n", d[0], d[1], d[2], d[3]);
for (int i = 2; i < int(a_size); i++)
{
int len = i - 1 - start + 1;
XmlTokenType typ = XT_Unknown;
XmlStringSlice slice = { &d[start], size_t(len) };
char ch = d[i];
do {
if (state == 0 && ch == '<') { typ = XT_Text; state = 1; continue; }
if (state == 0) { continue; }
if (state == 1 && ch == '!') { state = 12; continue; }
if (state == 1 && ch == '?') { continue; }
if (state == 1 && ch == '/') { state = 10; continue; }
if (state == 1 && isspace(ch)) { continue; }
if (state == 1 && isalnum(ch)) { start = i; state = 2; continue; }
if (state == 2 && isalnum(ch)) { continue; }
if (state == 2 && isspace(ch)) { typ = XT_TagOpen; state = 3; continue; }
if (state == 2 && ch == '/') { state = 8; continue; }
if (state == 2 && ch == '>') { EMIT_ATOM(TagOpen); EMIT_START_TAG(); RESET_STATE; continue; }
if (state == 3 && isspace(ch)) { continue; }
if (state == 3 && isalnum(ch)) { start = i; state = 4; continue; }
if (state == 3 && ch == '/') { state = 7; continue; }
if (state == 3 && ch == '>') { EMIT_START_TAG(); RESET_STATE; continue; }
if (state == 4 && isalnum(ch)) { continue; }
//if (state == 4 && isspace(ch)) { EMIT_ATOM(Attribute); state = 3; continue; }
if (state == 4 && ch == '=') { typ = XT_AttributeName; state = 5; start = i+1; continue; }
if (state == 5 && ch == '\"') { state = 9; continue; }
if (state == 5 && isalnum(ch)) { continue; }
if (state == 5 && isspace(ch)) { typ = XT_AttributeValue; state = 3; continue; }
if (state == 5 && ch == '/') { state = 6; continue; }
if (state == 5 && ch == '>') { EMIT_ATOM(AttributeValue); EMIT_START_TAG(); RESET_STATE; continue; }
if (state == 5 && ispunct(ch)) { continue; }
if (state == 6 && ch == '>') { slice.m_length--; EMIT_ATOM(AttributeValue); EMIT_START_TAG(); EMIT_END_TAG(); RESET_STATE; continue; }
if (state == 7 && ch == '>') { EMIT_START_TAG(); EMIT_END_TAG(); RESET_STATE; continue; }
if (state == 8 && ch == '>') { slice.m_length--; EMIT_ATOM(TagOpen); EMIT_START_TAG(); EMIT_END_TAG(); RESET_STATE; continue; }
if (state == 9 && ch == '\"') { state = 5; continue; }
if (state == 9) { continue; }
if (state == 10 && isspace(ch)) { continue; }
if (state == 10 && isalnum(ch)) { state = 11; continue; }
if (state == 11 && isalnum(ch)) { continue; }
if (state == 11 && isspace(ch)) { continue; }
if (state == 11 && ch == '>') { EMIT_END_TAG(); RESET_STATE; continue; }
// Handle comments
if (state == 12 && ch == '-') { state = 13; continue; }
if (state == 13 && ch == '-') { state = 14; continue; }
if (state == 14 && ch == '-') { state = 15; continue; }
if (state == 14) { state = 14; continue; }
if (state == 15 && ch == '-') { state = 16; continue; }
if (state == 15) { state = 14; continue; }
if (state == 16 && ch == '>') { RESET_STATE; continue; }
if (state == 16) { state = 14; continue; }
DEBUG_LOG("unexpected token\n"); break;
} while(0);
if (typ != XT_Unknown)
a_consumer.consumeToken(typ, slice);
}
}
class XmlNode
{
public:
~XmlNode() {
for (unsigned i = 0; i < m_children.size(); i++)
delete m_children[i];
}
String m_tag;
HashMap<String,String> m_attributes;
Vector<XmlNode*> m_children;
};
class XmlDomTreeBuilderData
{
public:
void printTreeRecurse(int depth, XmlNode* a_node);
void consumeToken(XmlTokenType a_type, const XmlStringSlice& a_text);
XmlNode* m_domTreeRootNode;
Vector<XmlNode*> m_parentNode;
XmlNode* m_currentNode;
String m_currentAttributeName;
};
XmlDomTreeBuilder::XmlDomTreeBuilder()
{
m_data = new XmlDomTreeBuilderData;
m_data->m_domTreeRootNode = 0;
m_data->m_currentNode = 0;
}
XmlDomTreeBuilder::~XmlDomTreeBuilder()
{
delete m_data->m_domTreeRootNode;
delete m_data;
}
void XmlDomTreeBuilder::consumeToken(XmlTokenType a_type, const XmlStringSlice& a_text)
{
m_data->consumeToken(a_type, a_text);
}
void XmlDomTreeBuilder::printDomTree()
{
if (!m_data->m_domTreeRootNode)
{
DEBUG_LOG("XML not loaded!\n");
return;
}
DEBUG_LOG("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
m_data->printTreeRecurse(0, m_data->m_domTreeRootNode);
}
void XmlDomTreeBuilderData::printTreeRecurse(int depth, XmlNode* a_node)
{
for (unsigned i = 0; i < a_node->m_children.size(); i++)
{
for (int s = 0; s < depth; s++)
DEBUG_LOG(" ");
DEBUG_LOG("<%s", a_node->m_children[i]->m_tag.c_str());
for (HashMap<String,String>::iterator it =
a_node->m_children[i]->m_attributes.begin();
it != a_node->m_children[i]->m_attributes.end(); ++it)
{
DEBUG_LOG(" %s=%s", it->first.c_str(), it->second.c_str());
}
if (a_node->m_children[i]->m_children.size())
{
DEBUG_LOG(">\n");
printTreeRecurse(depth+1, a_node->m_children[i]);
for (int s = 0; s < depth; s++)
DEBUG_LOG(" ");
DEBUG_LOG("</%s>\n", a_node->m_children[i]->m_tag.c_str());
} else {
DEBUG_LOG(" />\n");
}
}
}
void XmlDomTreeBuilderData::consumeToken(XmlTokenType a_type, const XmlStringSlice& a_text)
{
if (a_text.m_length <= 0)
{
DEBUG_LOG("bad token\n");
return;
}
std::string str(a_text.m_data, a_text.m_length);
switch (a_type)
{
case XT_Text:
/* ignore */
break;
case XT_TagOpen:
{
XmlNode* newNode = new XmlNode;
if (m_currentNode)
{
m_parentNode.push_back(m_currentNode);
m_currentNode->m_children.push_back(newNode);
}
m_currentNode = newNode;
m_currentNode->m_tag = str;
if ( !m_domTreeRootNode )
m_domTreeRootNode = m_currentNode;
}
break;
case XT_TagStart:
/* ignore */
break;
case XT_TagEnd:
if (m_parentNode.size())
{
m_currentNode = m_parentNode.back();
m_parentNode.pop_back();
}
break;
case XT_AttributeName:
m_currentAttributeName = str;
m_currentNode->m_attributes[m_currentAttributeName] = "true";
break;
case XT_AttributeValue:
m_currentNode->m_attributes[m_currentAttributeName] = str;
break;
default:
DEBUG_LOG("Got token type=%i data=---%s---\n", a_type, str.c_str());
break;
}
}
END_NAMESPACE