mirror of
https://github.com/huderlem/porymap.git
synced 2026-04-14 13:46:28 -05:00
485 lines
14 KiB
C++
485 lines
14 KiB
C++
#include "lib/fex/lexer.h"
|
|
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <QFile>
|
|
|
|
namespace fex
|
|
{
|
|
|
|
bool Lexer::IsNumber()
|
|
{
|
|
char c = Peek();
|
|
return (c >= '0' && c <= '9');
|
|
}
|
|
|
|
bool Lexer::IsWhitespace()
|
|
{
|
|
char c = Peek();
|
|
return (c == ' ' || c == '\t' || c == '\r' || c == '\n');
|
|
}
|
|
|
|
bool Lexer::IsHexAlpha()
|
|
{
|
|
char c = Peek();
|
|
return ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
|
|
}
|
|
|
|
bool Lexer::IsAlpha()
|
|
{
|
|
char c = Peek();
|
|
return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));
|
|
}
|
|
|
|
bool Lexer::IsAlphaNumber()
|
|
{
|
|
return IsAlpha() || IsNumber();
|
|
};
|
|
|
|
char Lexer::Peek()
|
|
{
|
|
return data_[index_];
|
|
}
|
|
|
|
char Lexer::Next()
|
|
{
|
|
char c = Peek();
|
|
index_++;
|
|
return c;
|
|
}
|
|
|
|
Token Lexer::ConsumeKeyword(Token identifier)
|
|
{
|
|
const std::string &value = identifier.string_value();
|
|
|
|
if (value == "extern")
|
|
{
|
|
return Token(Token::Type::kExtern, identifier.filename(), identifier.line_number());
|
|
}
|
|
if (value == "const")
|
|
{
|
|
return Token(Token::Type::kConst, identifier.filename(), identifier.line_number());
|
|
}
|
|
if (value == "struct")
|
|
{
|
|
return Token(Token::Type::kStruct, identifier.filename(), identifier.line_number());
|
|
}
|
|
|
|
return identifier;
|
|
}
|
|
|
|
Token Lexer::ConsumeIdentifier()
|
|
{
|
|
std::string identifer = "";
|
|
|
|
while (IsAlphaNumber() || Peek() == '_')
|
|
{
|
|
identifer += Next();
|
|
}
|
|
|
|
return ConsumeKeyword(Token(Token::Type::kIdentifier, filename_, line_number_, identifer));
|
|
}
|
|
|
|
Token Lexer::ConsumeNumber()
|
|
{
|
|
std::string identifer = "";
|
|
|
|
if (Peek() == '0')
|
|
{
|
|
identifer += Next();
|
|
if (Peek() == 'x')
|
|
{
|
|
identifer += Next();
|
|
}
|
|
|
|
while (IsNumber() || IsHexAlpha())
|
|
{
|
|
identifer += Next();
|
|
}
|
|
|
|
return Token(Token::Type::kNumber, filename_, line_number_, std::stoi(identifer, nullptr, 16));
|
|
}
|
|
|
|
while (IsNumber())
|
|
{
|
|
identifer += Next();
|
|
}
|
|
|
|
return Token(Token::Type::kNumber, filename_, line_number_, std::stoi(identifer));
|
|
}
|
|
|
|
// TODO: Doesn't currently support escape characters
|
|
Token Lexer::ConsumeString()
|
|
{
|
|
std::string value = "";
|
|
if (Next() != '\"')
|
|
{
|
|
// Error
|
|
}
|
|
|
|
// TODO: error if we never see a quote
|
|
while (Peek() != '\"')
|
|
{
|
|
value += Next();
|
|
}
|
|
Next(); // Consume final quote
|
|
return Token(Token::Type::kString, filename_, line_number_, value);
|
|
}
|
|
|
|
Token Lexer::ConsumeMacro()
|
|
{
|
|
Token id = ConsumeIdentifier();
|
|
|
|
if (id.string_value() == "ifdef")
|
|
{
|
|
return Token(Token::Type::kIfDef, filename_, line_number_);
|
|
}
|
|
if (id.string_value() == "ifndef")
|
|
{
|
|
return Token(Token::Type::kIfNDef, filename_, line_number_);
|
|
}
|
|
if (id.string_value() == "define")
|
|
{
|
|
return Token(Token::Type::kDefine, filename_, line_number_);
|
|
}
|
|
if (id.string_value() == "endif")
|
|
{
|
|
return Token(Token::Type::kEndIf, filename_, line_number_);
|
|
}
|
|
|
|
if (id.string_value() == "include")
|
|
{
|
|
return Token(Token::Type::kInclude, filename_, line_number_);
|
|
}
|
|
|
|
return Token(Token::Type::kDefine, filename_, line_number_);
|
|
}
|
|
|
|
std::vector<Token> Lexer::LexFile(const QString &path)
|
|
{
|
|
filename_ = path.toStdString();
|
|
line_number_ = 1;
|
|
|
|
// Note: Using QFile instead of ifstream to handle encoding differences between platforms
|
|
// (specifically to handle accented characters on Windows)
|
|
QFile file(path);
|
|
file.open(QIODevice::ReadOnly);
|
|
|
|
const QByteArray data = file.readAll();
|
|
|
|
index_ = 0;
|
|
data_ = data.toStdString();
|
|
|
|
file.close();
|
|
|
|
return Lex();
|
|
}
|
|
|
|
std::vector<Token> Lexer::Lex()
|
|
{
|
|
std::vector<Token> tokens;
|
|
|
|
while (index_ < data_.length())
|
|
{
|
|
while (IsWhitespace())
|
|
{
|
|
if (Peek() == '\n')
|
|
{
|
|
line_number_++;
|
|
}
|
|
Next();
|
|
}
|
|
|
|
if (IsAlpha())
|
|
{
|
|
tokens.push_back(ConsumeIdentifier());
|
|
continue;
|
|
}
|
|
|
|
if (IsNumber())
|
|
{
|
|
tokens.push_back(ConsumeNumber());
|
|
continue;
|
|
}
|
|
|
|
switch (Peek())
|
|
{
|
|
case '*':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kTimes, filename_, line_number_));
|
|
break;
|
|
case '-':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kMinus, filename_, line_number_));
|
|
break;
|
|
case '+':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kPlus, filename_, line_number_));
|
|
break;
|
|
case '(':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kOpenParen, filename_, line_number_));
|
|
break;
|
|
case ')':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kCloseParen, filename_, line_number_));
|
|
break;
|
|
case '&':
|
|
Next();
|
|
if (Peek() == '&')
|
|
{
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kLogicalAnd, filename_, line_number_));
|
|
break;
|
|
}
|
|
tokens.push_back(Token(Token::Type::kBitAnd, filename_, line_number_));
|
|
break;
|
|
case '|':
|
|
Next();
|
|
if (Peek() == '|')
|
|
{
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kLogicalOr, filename_, line_number_));
|
|
break;
|
|
}
|
|
tokens.push_back(Token(Token::Type::kBitOr, filename_, line_number_));
|
|
break;
|
|
case '^':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kBitXor, filename_, line_number_));
|
|
break;
|
|
case ',':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kComma, filename_, line_number_));
|
|
break;
|
|
case '=':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kEqual, filename_, line_number_));
|
|
break;
|
|
case ';':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kSemicolon, filename_, line_number_));
|
|
break;
|
|
case '[':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kOpenSquare, filename_, line_number_));
|
|
break;
|
|
case ']':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kCloseSquare, filename_, line_number_));
|
|
break;
|
|
case '{':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kOpenCurly, filename_, line_number_));
|
|
break;
|
|
case '}':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kCloseCurly, filename_, line_number_));
|
|
break;
|
|
case '.':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kPeriod, filename_, line_number_));
|
|
break;
|
|
case '_':
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kUnderscore, filename_, line_number_));
|
|
break;
|
|
case '#':
|
|
Next();
|
|
tokens.push_back(ConsumeMacro());
|
|
break;
|
|
case '\"':
|
|
tokens.push_back(ConsumeString());
|
|
break;
|
|
case '<':
|
|
Next();
|
|
if (Peek() == '<')
|
|
{
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kLeftShift, filename_, line_number_));
|
|
break;
|
|
}
|
|
if (Peek() == '=')
|
|
{
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kLessThanEqual, filename_, line_number_));
|
|
break;
|
|
}
|
|
tokens.push_back(Token(Token::Type::kLessThan, filename_, line_number_));
|
|
break;
|
|
case '>':
|
|
Next();
|
|
if (Peek() == '>')
|
|
{
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kRightShift, filename_, line_number_));
|
|
break;
|
|
}
|
|
if (Peek() == '=')
|
|
{
|
|
Next();
|
|
tokens.push_back(Token(Token::Type::kGreaterThanEqual, filename_, line_number_));
|
|
break;
|
|
}
|
|
tokens.push_back(Token(Token::Type::kGreaterThan, filename_, line_number_));
|
|
break;
|
|
|
|
case '/':
|
|
Next();
|
|
switch (Peek())
|
|
{
|
|
case '/':
|
|
while (Next() != '\n')
|
|
;
|
|
continue;
|
|
case '*':
|
|
while (Next() != '*')
|
|
;
|
|
Next(); // last /
|
|
continue;
|
|
default:
|
|
tokens.push_back(Token(Token::Type::kDivide, filename_, line_number_));
|
|
continue;
|
|
}
|
|
|
|
case '\0':
|
|
Next();
|
|
break;
|
|
|
|
default:
|
|
char c = Next();
|
|
std::cout << "[WARNING] Unable to lex unknown char: '" << c << "' (0x" << std::hex << (int)c << ")" << std::endl;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return tokens;
|
|
}
|
|
|
|
std::string Token::ToString() const
|
|
{
|
|
std::string out = filename() + ":" + std::to_string(line_number()) + " - ";
|
|
switch (type())
|
|
{
|
|
case Token::Type::kIfDef:
|
|
out += "Macro: IfDef";
|
|
break;
|
|
case Token::Type::kIfNDef:
|
|
out += "Macro: IfNDef";
|
|
break;
|
|
case Token::Type::kDefine:
|
|
out += "Macro: Define";
|
|
break;
|
|
case Token::Type::kEndIf:
|
|
out += "Macro: EndIf";
|
|
break;
|
|
case Token::Type::kInclude:
|
|
out += "Macro: Include";
|
|
break;
|
|
case Token::Type::kNumber:
|
|
out += "Number: " + std::to_string(int_value());
|
|
break;
|
|
case Token::Type::kString:
|
|
out += "String: " + string_value();
|
|
break;
|
|
case Token::Type::kIdentifier:
|
|
out += "Identifier: " + string_value();
|
|
break;
|
|
case Token::Type::kOpenParen:
|
|
out += "Symbol: (";
|
|
break;
|
|
case Token::Type::kCloseParen:
|
|
out += "Symbol: )";
|
|
break;
|
|
case Token::Type::kLessThan:
|
|
out += "Symbol: <";
|
|
break;
|
|
case Token::Type::kGreaterThan:
|
|
out += "Symbol: >";
|
|
break;
|
|
case Token::Type::kLeftShift:
|
|
out += "Symbol: <<";
|
|
break;
|
|
case Token::Type::kRightShift:
|
|
out += "Symbol: >>";
|
|
break;
|
|
case Token::Type::kPlus:
|
|
out += "Symbol: +";
|
|
break;
|
|
case Token::Type::kMinus:
|
|
out += "Symbol: -";
|
|
break;
|
|
case Token::Type::kTimes:
|
|
out += "Symbol: *";
|
|
break;
|
|
case Token::Type::kDivide:
|
|
out += "Symbol: /";
|
|
break;
|
|
case Token::Type::kBitXor:
|
|
out += "Symbol: ^";
|
|
break;
|
|
case Token::Type::kBitAnd:
|
|
out += "Symbol: &";
|
|
break;
|
|
case Token::Type::kBitOr:
|
|
out += "Symbol: |";
|
|
break;
|
|
case Token::Type::kQuote:
|
|
out += "Symbol: \"";
|
|
break;
|
|
case Token::Type::kComma:
|
|
out += "Symbol: ,";
|
|
break;
|
|
case Token::Type::kLessThanEqual:
|
|
out += "Symbol: <=";
|
|
break;
|
|
case Token::Type::kGreaterThanEqual:
|
|
out += "Symbol: >=";
|
|
break;
|
|
case Token::Type::kEqual:
|
|
out += "Symbol: =";
|
|
break;
|
|
case Token::Type::kLogicalAnd:
|
|
out += "Symbol: &&";
|
|
break;
|
|
case Token::Type::kLogicalOr:
|
|
out += "Symbol: ||";
|
|
break;
|
|
case Token::Type::kSemicolon:
|
|
out += "Symbol: ;";
|
|
break;
|
|
case Token::Type::kExtern:
|
|
out += "Keyword: extern";
|
|
break;
|
|
case Token::Type::kConst:
|
|
out += "Keyword: const";
|
|
break;
|
|
case Token::Type::kStruct:
|
|
out += "Keyword: struct";
|
|
break;
|
|
case Token::Type::kOpenSquare:
|
|
out += "Symbol: [";
|
|
break;
|
|
case Token::Type::kCloseSquare:
|
|
out += "Symbol: ]";
|
|
break;
|
|
case Token::Type::kOpenCurly:
|
|
out += "Symbol: {";
|
|
break;
|
|
case Token::Type::kCloseCurly:
|
|
out += "Symbol: }";
|
|
break;
|
|
case Token::Type::kPeriod:
|
|
out += "Symbol: .";
|
|
break;
|
|
case Token::Type::kUnderscore:
|
|
out += "Symbol: _";
|
|
break;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
} // namespace fex
|