Browse Source

Support for `.section` directive

- Move common string classification code from `Tokenizer` and `Parser`
  into `char_utils.hpp`.
- Modify `Assembler` constructor so that AST and ELF objects are not
  attached to object state. Still trying decide if `Assember` can just
  be static--not sure that the object is stateful enough to justify
  storing any state at all.
- Remove `ELF::setSection` altogether. For now, let's just use a single
  symbol table, even if the infrastructure is there to support multiple
  symbol tables. No need to be too clever.
- For now, just have one relocation section. This should change, since
  should be a one-to-one mapping between non-symbol table sections and
  relocation sections. TODO.
- Add support to `Parser` for directives (`Directive` AST node).
- Add support to `Tokenizer` for directives.
- Modify tests accordingly.
assembler
Zack Marvel 2 years ago
parent
commit
0392994171
  1. 5
      Makefile
  2. 185
      include/assembler.hpp
  3. 51
      include/char_utils.hpp
  4. 60
      include/elf.hpp
  5. 72
      include/parser.hpp
  6. 1
      include/tokenizer.hpp
  7. 422
      src/assembler.cpp
  8. 188
      src/elf.cpp
  9. 101
      src/parser.cpp
  10. 19
      src/tokenizer.cpp
  11. 148
      test/assembler_test.cpp
  12. 36
      test/char_utils_test.cpp
  13. 140
      test/elf_test.cpp
  14. 41
      test/elf_wrapper.hpp
  15. 47
      test/parser_test.cpp

5
Makefile

@ -17,7 +17,8 @@ COVS = $(SRCS:.cpp=.gcda) \
EXE_OBJS = $(OBJS) $(EXE_SRC:.cpp=.o)
INC = -Iinclude
TEST_SRCS = test/tokenizer_test.cpp \
TEST_SRCS = test/char_utils_test.cpp \
test/tokenizer_test.cpp \
test/parser_test.cpp \
test/assembler_test.cpp \
test/elf_test.cpp \
@ -35,12 +36,14 @@ $(EXE): $(EXE_OBJS)
.PHONY: test
test: $(TEST_EXE)
test: CXXFLAGS += -Itest
$(TEST_EXE): $(OBJS) $(TEST_OBJS)
$(CXX) $(LDFLAGS) -o [email protected] $^ -lboost_unit_test_framework
.PHONY: check
check: CXXFLAGS += -Itest
check: $(TEST_EXE)
ifdef CHECK_LOG
-./$(TEST_EXE) $(CHECK_OPTIONS) > $(CHECK_LOG)

185
include/assembler.hpp

@ -12,24 +12,183 @@ namespace GBAS {
BSS,
TEXT,
INIT,
INVALID,
};
static inline SectionType stringToSectionType(const std::string& str) {
if (str == ".data") {
return SectionType::DATA;
} else if (str == ".rodata") {
return SectionType::RODATA;
} else if (str == ".bss") {
return SectionType::BSS;
} else if (str == ".text") {
return SectionType::TEXT;
} else if (str == ".init") {
return SectionType::INIT;
} else {
return SectionType::INVALID;
}
};
}
class AssemblerException : std::exception {
public:
AssemblerException(const char* msg) { mMsg = msg; }
AssemblerException(const std::string& msg) { mMsg = msg; }
virtual const char* what() const noexcept { return mMsg.c_str(); }
private:
std::string mMsg;
};
struct InstructionNone {
InstructionNone(AST::InstructionType typ) : typ{typ} { }
constexpr uint8_t encode() {
using namespace AST;
uint8_t opcode = 0;
switch (typ) {
case InstructionType::NOP:
opcode = 0x00;
break;
case InstructionType::STOP:
opcode = 0x10;
break;
case InstructionType::RLCA:
opcode = 0x07;
break;
case InstructionType::RLA:
opcode = 0x17;
break;
case InstructionType::DAA:
opcode = 0x27;
break;
case InstructionType::SCF:
opcode = 0x37;
break;
case InstructionType::RRCA:
opcode = 0x0f;
break;
case InstructionType::RRA:
opcode = 0x1f;
break;
case InstructionType::CPL:
opcode = 0x2f;
break;
case InstructionType::CCF:
opcode = 0x3f;
break;
case InstructionType::HALT:
opcode = 0x76;
break;
case InstructionType::DI:
opcode = 0xf3;
break;
case InstructionType::RET:
opcode = 0xc9;
break;
case InstructionType::RETI:
opcode = 0xd9;
break;
case InstructionType::EI:
opcode = 0xfb;
break;
default:
throw AssemblerException{"Unrecognized instruction"};
break;
}
return opcode;
}
AST::InstructionType typ;
};
/**
* The assembler takes an AST as input and outputs an object file. It should
* evaluate any constant expressions in the AST (trivially optimize).
*/
class Assembler {
public:
explicit Assembler(std::shared_ptr<AST::Root> ast, std::ofstream out);
explicit Assembler();
/**
* Parse the AST and put the generated code and symbols into the provided ELF.
* This function does not write the ELF object to a file.
*
* @param ast: AST root node that will be walked.
* @param elf: ELF object that will be modified to store generated code and
* symbols.
*
* @throws AssemblerException upon invalid input.
*/
void assemble(std::shared_ptr<AST::Root> ast, GBAS::ELF& elf);
void assemble();
GBAS::SectionType currentSectionType() { return mCurrSectionType; }
/**
* Helper function to dispatch and generate code for Instruction nodes.
*
* @param elf: ELF object containing the "text" section where generated code
* will be added.
* @param instr: BaseInstruction that this function will dispatch and
* generate code for.
*
* @throws AssemblerException upon invalid instruction input.
*/
void assembleInstruction(GBAS::ELF& elf, AST::BaseInstruction& instr);
std::vector<uint8_t> instructionNone(AST::Instruction0& instr0);
std::vector<uint8_t> instructionR(AST::Instruction1& instr1, const AST::BaseRegister& reg);
std::vector<uint8_t> instructionRA(AST::Instruction1& instr1, const AST::BaseRegister& reg);
std::vector<uint8_t> instructionD(AST::Instruction1& instr1, const AST::BaseDRegister& reg);
/**
* Helper function for assembling instructions with no arguments.
*
* @param instr0: Instruction0 node.
*
* @returns Vector of bytes representing encoded instruction.
*
* @throws AssemblerException upon invalid instruction parameter.
*/
static std::vector<uint8_t> instructionNone(AST::Instruction0& instr0);
/**
* Helper function for assembling instructions with one argument, a register.
*
* @param instr1: Instruction1 node.
* @param reg: Register node--the operand.
*
* @returns Vector of bytes representing encoded instruction.
*
* @throws AssemblerException upon invalid instruction parameter.
*/
static std::vector<uint8_t> instructionR(AST::Instruction1& instr1, const AST::BaseRegister& reg);
/**
* Helper function for assembling instructions with one argument, a register.
* Instructions dispatched to this function operate on the provided register
* as well as the accumulator register.
*
* @param instr1: Instruction1 node.
* @param reg: Register node--the operand.
*
* @returns Vector of bytes representing encoded instruction.
*
* @throws AssemblerException upon invalid instruction parameter.
*/
static std::vector<uint8_t> instructionRA(AST::Instruction1& instr1, const AST::BaseRegister& reg);
/**
* Helper function for assembling instructions with one argument, a
* double-register.
*
* @param instr1: Instruction1 node.
* @param reg: DRegister node--the operand.
*
* @returns Vector of bytes representing encoded instruction.
*
* @throws AssemblerException upon invalid instruction parameter.
*/
static std::vector<uint8_t> instructionD(AST::Instruction1& instr1, const AST::BaseDRegister& reg);
/**
* Given any type of node, evaluate it and its descendents, recursively.
@ -68,21 +227,7 @@ class Assembler {
std::shared_ptr<AST::BaseUnaryOp> node);
private:
std::shared_ptr<AST::Root> mAst;
std::ofstream mOut;
GBAS::SectionType mCurrSectionType;
//std::vector<std::pair<std::string, size_t>> mLabels;
};
class AssemblerException : std::exception {
public:
AssemblerException(const char* msg) { mMsg = msg; }
AssemblerException(const std::string& msg) { mMsg = msg; }
virtual const char* what() const noexcept { return mMsg.c_str(); }
private:
std::string mMsg;
};

51
include/char_utils.hpp

@ -0,0 +1,51 @@
#ifndef CHAR_UTILS_HPP
#define CHAR_UTILS_HPP
#include <algorithm>
namespace GBAS {
constexpr bool isAlpha(char c) {
return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'));
}
constexpr bool isDigit(char c) {
return c >= '0' && c <= '9';
}
constexpr bool isAlphaNumeric(char c) {
return isAlpha(c) || isDigit(c);
}
static inline bool isNumber(const std::string tok) {
if (tok.size() == 0) {
return false;
}
for (auto it = tok.begin(); it != tok.end(); it++) {
if (!isDigit(*it)) {
return false;
}
}
return true;
}
constexpr bool isNumericOp(char c) {
return c == '+' || c == '-' || c == '*' || c == '/';
}
static inline bool isMaybeSection(const std::string& tok) {
if (tok.size() == 0) {
return false;
} else if (!isAlpha(tok[0])) {
return false;
}
return std::all_of(tok.begin() + 1, tok.end(),
[](auto c) { return isAlphaNumeric(c) || c == '_' || c == '.'; });
}
}; // namespace GBAS
#endif // CHAR_UTILS_HPP

60
include/elf.hpp

@ -1,4 +1,7 @@
#ifndef GBAS_ELF_H
#define GBAS_ELF_H
#include <string.h>
#include <string>
#include <unordered_set>
@ -174,6 +177,9 @@ class ELF {
*/
std::string& getSectionName(size_t sidx);
/**
* ELF file header.
*/
Elf32_Ehdr mHeader;
/**
@ -181,17 +187,35 @@ class ELF {
* mSectionHeaders.
*/
uint32_t mSectionNamesIdx;
/**
* List of section names.
*/
StringTable mSectionNames;
/**
* Index of the string table's section header in mSectionHeaders.
*/
uint32_t mStringTableIdx;
/**
* Vector of strings that will turn into the string table.
*/
StringTable mStringTable;
/**
* Vector of symbol tables.
*/
std::vector<SymbolTable> mSymbolTables;
/**
* Vector of relocation tables.
*/
std::vector<RelocationSection> mRelocationSections;
/**
* Vector of section headers.
*/
SectionHeaderTable mSectionHeaders;
/**
@ -199,6 +223,24 @@ class ELF {
*/
uint16_t mCurrSymTab;
/**
* Index of the relocation section's header in mSectionHeaders.
*
* TODO: There should a relocation table for every section (.text, .rodata,
* etc.)
*/
uint16_t mCurrRelocSec;
/**
* Index of the current table in mSymbolTables.
*/
uint32_t mSymbolTableIdx;
/**
* Index of the current relocation table in mRelocationSections.
*/
uint32_t mRelocationSectionIdx;
/**
* Initialized data in program memory.
*/
@ -209,24 +251,40 @@ class ELF {
* Read-only (const) data.
*/
std::vector<uint8_t> mRodata;
/**
* Index of the .rodata section header in mSectionHeaders.
*/
uint32_t mRodataIdx;
/**
* Uninitialized data in program memory.
*/
std::vector<uint8_t> mBss;
/**
* Index of the .bss section header in mSectionHeaders.
*/
uint32_t mBssIdx;
/**
* Executable instructions.
*/
std::vector<uint8_t> mText;
/**
* Index of the .text section header in mSectionHeaders.
*/
uint32_t mTextIdx;
/**
* Executable instructions to be run during initialization.
*/
std::vector<uint8_t> mInit;
/**
* Index of the .init section header in mSectionHeaders.
*/
uint32_t mInitIdx;
/**
@ -250,3 +308,5 @@ class ELFException : std::exception {
};
}; // namespace GBAS
#endif // GBAS_ELF_H

72
include/parser.hpp

@ -63,6 +63,12 @@ enum class InstructionType {
INVALID,
};
enum class DirectiveType {
SECTION,
INVALID,
};
/* Types of Node:
* - Node<Instruction<args>>
* - Node<Register<reg>>
@ -78,6 +84,7 @@ enum class NodeType {
REGISTER,
DREGISTER,
LABEL,
DIRECTIVE,
NUMBER,
BINARY_OP,
UNARY_OP,
@ -98,6 +105,7 @@ class BaseInstruction;
class BaseRegister;
class BaseDRegister;
class Label;
class Directive;
struct Number;
class BaseBinaryOp;
class BaseUnaryOp;
@ -108,6 +116,7 @@ struct AbstractNodeVisitor {
virtual void visit(BaseRegister& node) = 0;
virtual void visit(BaseDRegister& node) = 0;
virtual void visit(Label& node) = 0;
virtual void visit(Directive& node) = 0;
virtual void visit(Number& node) = 0;
virtual void visit(BaseBinaryOp& node) = 0;
virtual void visit(BaseUnaryOp& node) = 0;
@ -220,6 +229,38 @@ class Label : public Node<NodeType::LABEL> {
std::string mName;
};
class Directive : public Node<NodeType::DIRECTIVE> {
public:
// Don't parse directive operands
using OperandList = std::vector<Token>;
Directive(DirectiveType type) : mType{type} {}
Directive(DirectiveType type, OperandList operands)
: mType{type}
, mOperands{operands}
{}
virtual ~Directive() override {}
DirectiveType type() const {
return mType;
}
OperandList operands() const {
return mOperands;
}
virtual void accept(AbstractNodeVisitor& visitor) override {
visitor.visit(*this);
}
private:
DirectiveType mType;
OperandList mOperands;
};
struct Number : public Node<NodeType::NUMBER> {
explicit Number(uint8_t value) : mValue{value} {}
@ -387,6 +428,15 @@ struct InstructionProps {
using InstructionPropsList =
const std::array<const InstructionProps, 22 + 5 + 5 + 5 + 7>;
struct DirectiveProps {
const std::string lexeme;
AST::DirectiveType type;
// Only support 1 form for now
int args;
};
using DirectivePropsList = const std::array<const DirectiveProps, 1>;
/*
* program line* EOF ;
*
@ -455,8 +505,14 @@ class Parser {
bool isMultiplication(const Token& tok);
std::shared_ptr<AST::BaseNode> unary();
std::shared_ptr<AST::BaseNode> primary();
/**
* Convert the next token into a Label node. Assumes that the parser has
* already determined that the token *is* a valid label.
*/
std::shared_ptr<AST::BaseNode> label();
std::shared_ptr<AST::BaseNode> number();
std::shared_ptr<AST::BaseNode> directive();
/**
* Read from tokens, starting at pos, until EOF is encountered.
@ -523,11 +579,6 @@ class Parser {
*/
std::shared_ptr<AST::BaseNode> parseOperand(const Token& tok);
/**
* True only if tok is an integer number.
*/
static bool isNumber(const Token& tok);
/**
* True only if tok is one of +, -, *, or /.
*/
@ -544,6 +595,17 @@ class Parser {
*/
static bool isInstruction(const Token& tok);
/**
* Search the DirectivePropsList for properties matching the given token.
*/
static const DirectiveProps& findDirective(const Token& tok);
/**
* True only if tok starts with a period and a letter. Following that, the
* directive must contain only letters, numbers, and underscores.
*/
static bool isDirective(const Token& tok);
/**
* True only if tok starts with a letter and, following that, contains only
* letters, numbers, and underscores.

1
include/tokenizer.hpp

@ -32,7 +32,6 @@ class Tokenizer {
TokenList tokenize(std::basic_istream<char>& lines);
static bool isReserved(Token tok);
static bool isOperator(char c);
static bool isAlphaNumeric(char c);
private:
static const std::array<Token, 2> reserved;

422
src/assembler.cpp

@ -4,67 +4,87 @@
#include <memory>
#include "assembler.hpp"
#include "char_utils.hpp"
using namespace AST;
using namespace GBAS;
Assembler::Assembler(std::shared_ptr<AST::Root> ast, std::ofstream out)
: mAst{ast}, mOut{std::move(out)} {}
Assembler::Assembler() : mCurrSectionType{SectionType::INVALID} { }
void Assembler::assemble() {
void Assembler::assemble(std::shared_ptr<AST::Root> ast, ELF& elf) {
// This could live on the stack
ELF elf{};
for (auto it = mAst->begin(); it != mAst->end(); it++) {
for (auto it = ast->begin(); it != ast->end(); it++) {
auto node = *it;
switch (node->id()) {
case NodeType::DIRECTIVE:
{
auto directive = std::dynamic_pointer_cast<Directive>(node);
switch (directive->type()) {
case DirectiveType::SECTION:
{
auto sectionType = stringToSectionType(directive->operands().at(0));
if (sectionType == SectionType::INVALID) {
throw AssemblerException{"Invalid section name"};
} else {
mCurrSectionType = sectionType;
}
}
break;
default:
throw AssemblerException{"Invalid directive type"};
}
}
break;
case NodeType::INSTRUCTION:
mCurrSectionType = SectionType::TEXT;
assembleInstruction(elf,
*std::dynamic_pointer_cast<BaseInstruction>(node));
break;
case NodeType::LABEL: {
auto label = std::dynamic_pointer_cast<Label>(node);
size_t value;
uint8_t info = ELF32_ST_BIND(STB_GLOBAL);
uint16_t other;
// TODO support bindings other than GLOBAL
switch (mCurrSectionType) {
case SectionType::DATA:
value = elf.dataSize();
info |= ELF32_ST_TYPE(STT_OBJECT);
other = elf.dataIdx();
break;
case SectionType::RODATA:
value = elf.rodataSize();
info |= ELF32_ST_TYPE(STT_OBJECT);
other = elf.rodataIdx();
break;
case SectionType::BSS:
value = elf.bssSize();
info |= ELF32_ST_TYPE(STT_OBJECT);
other = elf.bssIdx();
break;
case SectionType::TEXT:
value = elf.textSize();
info |= ELF32_ST_TYPE(STT_FUNC);
other = elf.textIdx();
break;
case SectionType::INIT:
value = elf.initSize();
info |= ELF32_ST_TYPE(STT_FUNC);
other = elf.initIdx();
break;
default:
throw AssemblerException("Invalid section type");
case NodeType::LABEL:
{
auto label = std::dynamic_pointer_cast<Label>(node);
size_t value;
uint8_t info = ELF32_ST_BIND(STB_GLOBAL);
uint16_t other;
// TODO support bindings other than GLOBAL
switch (mCurrSectionType) {
case SectionType::DATA:
value = elf.dataSize();
info |= ELF32_ST_TYPE(STT_OBJECT);
other = elf.dataIdx();
break;
case SectionType::RODATA:
value = elf.rodataSize();
info |= ELF32_ST_TYPE(STT_OBJECT);
other = elf.rodataIdx();
break;
case SectionType::BSS:
value = elf.bssSize();
info |= ELF32_ST_TYPE(STT_OBJECT);
other = elf.bssIdx();
break;
case SectionType::TEXT:
value = elf.textSize();
info |= ELF32_ST_TYPE(STT_FUNC);
other = elf.textIdx();
break;
case SectionType::INIT:
value = elf.initSize();
info |= ELF32_ST_TYPE(STT_FUNC);
other = elf.initIdx();
break;
default:
throw AssemblerException("Invalid section type");
}
// TODO relocatable
//elf.addSymbol(label->name(), value, 0, info, STV_DEFAULT, other, true);
elf.addSymbol(label->name(), value, 0, info, STV_DEFAULT, other, false);
}
elf.addSymbol(label->name(), value, 0, info, STV_DEFAULT, other, true);
} break;
break;
default:
throw AssemblerException("Invalid node");
}
}
elf.write(mOut);
}
/**
@ -273,67 +293,6 @@ struct InstructionRR {
}
};
/**
* Instruction with no operands.
*/
template <InstructionType typ>
struct InstructionNone {
constexpr uint8_t encode() {
uint8_t opcode = 0;
switch (typ) {
case InstructionType::NOP:
opcode = 0x00;
break;
case InstructionType::STOP:
opcode = 0x10;
break;
case InstructionType::RLCA:
opcode = 0x07;
break;
case InstructionType::RLA:
opcode = 0x17;
break;
case InstructionType::DAA:
opcode = 0x27;
break;
case InstructionType::SCF:
opcode = 0x37;
break;
case InstructionType::RRCA:
opcode = 0x0f;
break;
case InstructionType::RRA:
opcode = 0x1f;
break;
case InstructionType::CPL:
opcode = 0x2f;
break;
case InstructionType::CCF:
opcode = 0x3f;
break;
case InstructionType::HALT:
opcode = 0x76;
break;
case InstructionType::DI:
opcode = 0xf3;
break;
case InstructionType::RET:
opcode = 0xc9;
break;
case InstructionType::RETI:
opcode = 0xd9;
break;
case InstructionType::EI:
opcode = 0xfb;
break;
default:
throw AssemblerException("Unrecognized instruction");
break;
}
return opcode;
}
};
/**
* Instruction with a flag and 8-bit immediate.
*/
@ -401,89 +360,101 @@ enum class OperandType {
INVALID,
};
const std::map<const InstructionType,
const std::vector<std::pair<OperandType, OperandType>>>
formats{
// prefix 0x0
{InstructionType::INC,
{
{OperandType::REGISTER, OperandType::INVALID},
{OperandType::DREGISTER, OperandType::INVALID},
}},
{InstructionType::DEC,
{
{OperandType::REGISTER, OperandType::INVALID},
{OperandType::DREGISTER, OperandType::INVALID},
}},
{InstructionType::LD,
{
{OperandType::REGISTER, OperandType::IMM8},
{OperandType::DREGISTER, OperandType::IMM16},
}},
{InstructionType::NOP, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::STOP, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::RLCA, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::RLA, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::DAA, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::SCF, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::RRCA, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::RRA, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::CPL, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::CCF, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::JR,
{
{OperandType::FLAG, OperandType::IMM8},
{OperandType::IMM8, OperandType::INVALID},
}},
{InstructionType::HALT, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::SUB,
{
{OperandType::REGISTER, OperandType::INVALID},
{OperandType::REGISTER, OperandType::IMM8},
}},
{InstructionType::AND,
{
{OperandType::REGISTER, OperandType::INVALID},
{OperandType::REGISTER, OperandType::IMM8},
}},
{InstructionType::XOR,
{
{OperandType::REGISTER, OperandType::INVALID},
{OperandType::REGISTER, OperandType::IMM8},
}},
{InstructionType::OR,
{
{OperandType::REGISTER, OperandType::INVALID},
{OperandType::REGISTER, OperandType::IMM8},
}},
{InstructionType::CP,
{
{OperandType::REGISTER, OperandType::INVALID},
{OperandType::REGISTER, OperandType::IMM8},
}},
{InstructionType::POP,
{{OperandType::DREGISTER, OperandType::INVALID}}},
{InstructionType::PUSH,
{{OperandType::DREGISTER, OperandType::INVALID}}},
{InstructionType::DI, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::EI, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::RET,
{
{OperandType::INVALID, OperandType::INVALID},
{OperandType::FLAG, OperandType::IMM8},
}},
{InstructionType::RETI, {{OperandType::INVALID, OperandType::INVALID}}},
{InstructionType::JP,
{
{OperandType::FLAG, OperandType::IMM8},
{OperandType::IMM16, OperandType::INVALID},
}},
{InstructionType::CALL,
{
{OperandType::FLAG, OperandType::IMM8},
{OperandType::IMM16, OperandType::INVALID},
}},
};
static const std::map<const InstructionType,
const std::vector<std::pair<OperandType, OperandType>>>
formats{
// prefix 0x0
{ InstructionType::INC,
{
{ OperandType::REGISTER, OperandType::INVALID },
{ OperandType::DREGISTER, OperandType::INVALID },
} },
{ InstructionType::DEC,
{
{ OperandType::REGISTER, OperandType::INVALID },
{ OperandType::DREGISTER, OperandType::INVALID },
} },
{ InstructionType::LD,
{
{ OperandType::REGISTER, OperandType::IMM8 },
{ OperandType::DREGISTER, OperandType::IMM16 },
} },
{ InstructionType::NOP,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::STOP,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::RLCA,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::RLA,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::DAA,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::SCF,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::RRCA,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::RRA,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::CPL,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::CCF,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::JR,
{
{ OperandType::FLAG, OperandType::IMM8 },
{ OperandType::IMM8, OperandType::INVALID },
} },
{ InstructionType::HALT,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::SUB,
{
{ OperandType::REGISTER, OperandType::INVALID },
{ OperandType::REGISTER, OperandType::IMM8 },
} },
{ InstructionType::AND,
{
{ OperandType::REGISTER, OperandType::INVALID },
{ OperandType::REGISTER, OperandType::IMM8 },
} },
{ InstructionType::XOR,
{
{ OperandType::REGISTER, OperandType::INVALID },
{ OperandType::REGISTER, OperandType::IMM8 },
} },
{ InstructionType::OR,
{
{ OperandType::REGISTER, OperandType::INVALID },
{ OperandType::REGISTER, OperandType::IMM8 },
} },
{ InstructionType::CP,
{
{ OperandType::REGISTER, OperandType::INVALID },
{ OperandType::REGISTER, OperandType::IMM8 },
} },
{ InstructionType::POP,
{ { OperandType::DREGISTER, OperandType::INVALID } } },
{ InstructionType::PUSH,
{ { OperandType::DREGISTER, OperandType::INVALID } } },
{ InstructionType::DI, { { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::EI, { { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::RET,
{
{ OperandType::INVALID, OperandType::INVALID },
{ OperandType::FLAG, OperandType::IMM8 },
} },
{ InstructionType::RETI,
{ { OperandType::INVALID, OperandType::INVALID } } },
{ InstructionType::JP,
{
{ OperandType::FLAG, OperandType::IMM8 },
{ OperandType::IMM16, OperandType::INVALID },
} },
{ InstructionType::CALL,
{
{ OperandType::FLAG, OperandType::IMM8 },
{ OperandType::IMM16, OperandType::INVALID },
} },
};
// TODO
// jp (hl)
@ -507,7 +478,7 @@ void Assembler::assembleInstruction(ELF& elf, BaseInstruction& instr) {
return (fmt.first == OperandType::INVALID) &&
(fmt.second == OperandType::INVALID);
})) {
elf.addText(instructionNone(instr0));
elf.addText(std::vector<uint8_t>{InstructionNone{instr0.type()}.encode()});
} else {
throw AssemblerException("Invalid instruction0 usage");
}
@ -641,74 +612,6 @@ void Assembler::assembleInstruction(ELF& elf, BaseInstruction& instr) {
}
}
std::vector<uint8_t> Assembler::instructionNone(Instruction0& instr0) {
switch (instr0.type()) {
case InstructionType::NOP:
return std::vector<uint8_t>{
InstructionNone<InstructionType::NOP>{}.encode()};
break;
case InstructionType::STOP:
return std::vector<uint8_t>{
InstructionNone<InstructionType::STOP>{}.encode()};
break;
case InstructionType::RLCA:
return std::vector<uint8_t>{
InstructionNone<InstructionType::RLCA>{}.encode()};
break;
case InstructionType::RLA:
return std::vector<uint8_t>{
InstructionNone<InstructionType::RLA>{}.encode()};
break;
case InstructionType::DAA:
return std::vector<uint8_t>{
InstructionNone<InstructionType::DAA>{}.encode()};
break;
case InstructionType::SCF:
return std::vector<uint8_t>{
InstructionNone<InstructionType::SCF>{}.encode()};
break;
case InstructionType::RRCA:
return std::vector<uint8_t>{
InstructionNone<InstructionType::RRCA>{}.encode()};
break;
case InstructionType::RRA:
return std::vector<uint8_t>{
InstructionNone<InstructionType::RRA>{}.encode()};
break;
case InstructionType::CPL:
return std::vector<uint8_t>{
InstructionNone<InstructionType::CPL>{}.encode()};
break;
case InstructionType::CCF:
return std::vector<uint8_t>{
InstructionNone<InstructionType::CCF>{}.encode()};
break;
case InstructionType::HALT:
return std::vector<uint8_t>{
InstructionNone<InstructionType::HALT>{}.encode()};
break;
case InstructionType::DI:
return std::vector<uint8_t>{
InstructionNone<InstructionType::DI>{}.encode()};
break;
case InstructionType::RET:
return std::vector<uint8_t>{
InstructionNone<InstructionType::RET>{}.encode()};
break;
case InstructionType::RETI:
return std::vector<uint8_t>{
InstructionNone<InstructionType::RETI>{}.encode()};
break;
case InstructionType::EI:
return std::vector<uint8_t>{
InstructionNone<InstructionType::EI>{}.encode()};
break;
default:
throw AssemblerException("Unrecognized instruction");
break;
}
}
std::vector<uint8_t> Assembler::instructionR(Instruction1& instr1,
const BaseRegister& reg) {
switch (instr1.type()) {
@ -839,8 +742,9 @@ std::shared_ptr<BaseInstruction> Assembler::evaluateInstruction(
}
}
std::shared_ptr<BaseNode> Assembler::evaluateBinaryOp(
std::shared_ptr<BaseBinaryOp> node) {
std::shared_ptr<BaseNode>
Assembler::evaluateBinaryOp(std::shared_ptr<BaseBinaryOp> node)
{
switch (node->opType()) {
case BinaryOpType::ADD: {
auto op = std::dynamic_pointer_cast<AddOp>(node);

188
src/elf.cpp

@ -7,6 +7,26 @@
using namespace GBAS;
#undef DEBUG_ELF
#ifndef DEBUG_GBAS
#define DEBUG_ELF
#else
#define DEBUG_ELF
#endif
#ifdef DEBUG_ELF
#define ELF_EXCEPTION(msg) \
do {\
std::cerr << msg << std::endl; \
throw ELFException{msg}; \
} while(0)
#else
#define ELF_EXCEPTION(msg) \
do {\
throw ELFException{msg}; \
} while(0)
#endif
static const int ABIVERSION = 0;
static const unsigned char ELF_IDENT[] = {
EI_MAG0,
@ -35,6 +55,7 @@ ELF::ELF()
mRelocationSections{},
mSectionHeaders{},
mCurrSymTab{0},
mSymbolTableIdx{0},
mData{},
mDataIdx{0},
mRodata{},
@ -64,7 +85,7 @@ ELF::ELF()
mSectionNamesIdx = mSectionHeaders.size();
addSectionHeader(Elf32_Shdr{
// This section's name will be first in the section names table
.sh_name = 0,
.sh_name = static_cast<uint32_t>(mSectionNames.size()),
.sh_type = SHT_STRTAB,
.sh_flags = 0,
.sh_addr = 0,
@ -81,7 +102,7 @@ ELF::ELF()
mStringTableIdx = mSectionHeaders.size();
addSectionHeader(Elf32_Shdr{
// This section's name will be first in the section names table
.sh_name = 1,
.sh_name = static_cast<uint32_t>(mSectionNames.size()),
.sh_type = SHT_STRTAB,
.sh_flags = 0,
.sh_addr = 0,
@ -97,7 +118,7 @@ ELF::ELF()
// data section
mDataIdx = mSectionHeaders.size();
addSectionHeader(
Elf32_Shdr{.sh_name = 2,
Elf32_Shdr{.sh_name = static_cast<uint32_t>(mSectionNames.size()),
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC | SHF_WRITE,
.sh_addr = 0,
@ -113,7 +134,7 @@ ELF::ELF()
// rodata section
mRodataIdx = mSectionHeaders.size();
addSectionHeader(
Elf32_Shdr{.sh_name = 3,
Elf32_Shdr{.sh_name = static_cast<uint32_t>(mSectionNames.size()),
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC,
.sh_addr = 0,
@ -129,7 +150,7 @@ ELF::ELF()
// bss section
mBssIdx = mSectionHeaders.size();
addSectionHeader(
Elf32_Shdr{.sh_name = 4,
Elf32_Shdr{.sh_name = static_cast<uint32_t>(mSectionNames.size()),
.sh_type = SHT_NOBITS,
.sh_flags = SHF_ALLOC | SHF_WRITE,
.sh_addr = 0,
@ -145,7 +166,7 @@ ELF::ELF()
// text section
mTextIdx = mSectionHeaders.size();
addSectionHeader(
Elf32_Shdr{.sh_name = 5,
Elf32_Shdr{.sh_name = static_cast<uint32_t>(mSectionNames.size()),
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC | SHF_EXECINSTR,
.sh_addr = 0,
@ -161,7 +182,7 @@ ELF::ELF()
// init section
mInitIdx = mSectionHeaders.size();
addSectionHeader(
Elf32_Shdr{.sh_name = 6,
Elf32_Shdr{.sh_name = static_cast<uint32_t>(mSectionNames.size()),
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC | SHF_EXECINSTR,
.sh_addr = 0,
@ -173,6 +194,49 @@ ELF::ELF()
.sh_addralign = 0,
.sh_entsize = 0});
addSectionName("init");
// symbol table
mCurrSymTab = mSectionHeaders.size();
mSymbolTableIdx = mSymbolTables.size();
addSectionHeader(
Elf32_Shdr{.sh_name = static_cast<uint32_t>(mSectionNames.size()),
.sh_type = SHT_SYMTAB,
.sh_flags = SHF_ALLOC,
.sh_addr = 0,
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = sizeof(Elf32_Sym)});
addSectionName("symtab");
mSymbolTables.push_back(SymbolTable{});
mSymbolTables.at(0).emplace_back(
Elf32_Sym{.st_name = 0,
.st_value = 0,
.st_size = 0,
.st_info = 0,
.st_other = 0,
.st_shndx = 0});
// relocation table
mCurrRelocSec = mSectionHeaders.size();
mRelocationSectionIdx = mSymbolTables.size();
addSectionHeader(
Elf32_Shdr{.sh_name = static_cast<uint32_t>(mSectionNames.size()),
.sh_type = SHT_REL,
.sh_flags = 0,
.sh_addr = 0,
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = sizeof(Elf32_Rel)});
// TODO there are relocations for all regular sections, not just one
addSectionName("relsymtab");
// TODO Is the first relocation entry null?
}
void ELF::computeSectionOffsets() {
@ -226,7 +290,7 @@ Elf32_Sym& ELF::addSymbol(const std::string name, uint32_t value, uint32_t size,
bool relocatable) {
Elf32_Shdr& hdr = mSectionHeaders.at(mCurrSymTab);
if (hdr.sh_type != SHT_SYMTAB) {
throw ELFException("Attempting to define symbol in non-symbol table");
ELF_EXCEPTION("Attempting to define symbol in non-symbol table");
}
// No other symbols in this file should have the same name
@ -234,22 +298,22 @@ Elf32_Sym& ELF::addSymbol(const std::string name, uint32_t value, uint32_t size,
std::ostringstream builder{"Symbol "};
builder << name;
builder << " cannot be defined twice";
throw ELFException(builder.str());
ELF_EXCEPTION(builder.str());
}
auto& tbl = mSymbolTables.at(hdr.sh_offset);
auto& tbl = mSymbolTables.at(mSymbolTableIdx);
uint32_t nameIdx = mStringTable.size();
addString(name);
mSymbolNames.insert(name);
uint32_t idx = tbl.size();
tbl.emplace_back(Elf32_Sym{.st_name = nameIdx,
.st_value = value,
.st_size = size,
.st_info = info,
.st_other = visibility,
.st_shndx = other});
.st_value = value,
.st_size = size,
.st_info = info,
.st_other = visibility,
.st_shndx = other});
hdr.sh_size += sizeof(Elf32_Sym);
// If the symbol should be relocatable, find the corresponding section of
@ -258,13 +322,13 @@ Elf32_Sym& ELF::addSymbol(const std::string name, uint32_t value, uint32_t size,
// Search the section header table for a header of a relocation section
// that points back to mCurrSymTab.
auto relocHdr =
std::find_if(mSectionHeaders.begin(), mSectionHeaders.end(),
[&](auto h) { return h.sh_link == mCurrSymTab; });
std::find_if(mSectionHeaders.begin(), mSectionHeaders.end(),
[&](auto h) { return h.sh_link == mCurrSymTab; });
if (relocHdr == mSectionHeaders.end()) {
std::ostringstream builder{"Relocatable symbol "};
builder << name << " requested but no corresponding relocation"
<< " section found";
throw ELFException(builder.str());
std::ostringstream builder{};
builder << "Relocatable symbol " << name
<< " requested but no corresponding relocation section found";
ELF_EXCEPTION(builder.str());
}
auto relocEntry = mRelocationSections.at(relocHdr->sh_offset);
@ -273,7 +337,7 @@ Elf32_Sym& ELF::addSymbol(const std::string name, uint32_t value, uint32_t size,
// relocation that should occur.
relocEntry.emplace_back(
Elf32_Rel{.r_offset = static_cast<Elf32_Addr>(idx * sizeof(Elf32_Sym)),
.r_info = ELF32_R_SYM(idx) | ELF32_R_TYPE(R_386_32)});
.r_info = ELF32_R_SYM(idx) | ELF32_R_TYPE(R_386_32)});
relocHdr->sh_size += sizeof(Elf32_Rel);
}
@ -378,84 +442,6 @@ size_t ELF::initSize() {
return mInit.size();
}
Elf32_Shdr& ELF::setSection(std::string name, uint32_t addr) {
// TODO only allow setting section to symbol tables. I think we can handle
// relocation and string tables under the hood.
for (size_t i = 0; i < mSectionHeaders.size(); i++) {
if (name == getSectionName(i)) {
if (mSectionHeaders.at(i).sh_type != SHT_SYMTAB) {
std::ostringstream builder{"Attempted to change to invalid section "};
builder << name;
throw ELFException(builder.str());
} else {
mCurrSymTab = i;
return mSectionHeaders.at(i);
}
}
}
// If we get here, we didn't find an existing section, so we'll create one.
uint32_t nameidx = static_cast<uint32_t>(mSectionNames.size());
mSectionNames.push_back(name);
// TODO while we're building the ELF file, we'll cheat a little bit and use
// this as an index into mSymbolTables.
uint32_t offset = mSymbolTables.size();
mSymbolTables.emplace_back(SymbolTable{});
// The first entry in each symbol table is a null entry
mSymbolTables.at(offset).emplace_back(Elf32_Sym{.st_name = 0,
.st_value = 0,
.st_size = 0,
.st_info = 0,
.st_other = 0,
.st_shndx = 0});
uint32_t relOffset = mRelocationSections.size();
mRelocationSections.emplace_back(RelocationSection{});
uint32_t symtabLink = mSectionHeaders.size();
addSectionHeader(
Elf32_Shdr{.sh_name = nameidx,
.sh_type = SHT_SYMTAB,
// These are just symbols. .text, .rodata, and .data have
// a non-zero sh_flags field.
.sh_flags = 0,
.sh_addr = addr,
.sh_offset = offset,
// Size includes the null entry at the beginning of the table.
// Make sure to update this field when adding a symbol!
.sh_size = sizeof(Elf32_Sym),
// For a symbol table, link is the index in the section header
// table of the corresponding string table's section header.
.sh_link = mStringTableIdx,
.sh_info = 0,
// This indicates that the section has aligment constraints. 0
// or 1 mean the same thing. TODO is there a problem with
// aligning everything to 2 bytes?
.sh_addralign = 0,
.sh_entsize = sizeof(Elf32_Sym)});
uint32_t relnameidx = mSectionNames.size();
addSectionName("rel" + name);
addSectionHeader(Elf32_Shdr{
.sh_name = relnameidx,
.sh_type = SHT_REL,
.sh_flags = 0,
.sh_addr = 0,
.sh_offset = relOffset,
// TODO is there a null entry at the beginning?
.sh_size = 0,
// For a relocation section, link is the index in the section header table
// of the corresponding symbol table's section header.
.sh_link = symtabLink,
// Confusingly, for a relocation section, this is the same as sh_link
// according to https://wiki.osdev.org/ELF_Tutorial#Relocation_Sections.
// TODO does any other section type use this field??
.sh_info = symtabLink,
.sh_addralign = 0,
.sh_entsize = sizeof(Elf32_Rel)});
mCurrSymTab = symtabLink;
return mSectionHeaders.at(symtabLink);
}
std::string& ELF::getSectionName(size_t sidx) {
auto& sh = mSectionHeaders.at(sidx);
return mSectionNames.at(sh.sh_name);

101
src/parser.cpp

@ -2,12 +2,14 @@
#include <algorithm>
#include "parser.hpp"
#include "char_utils.hpp"
Parser::Parser(TokenList& tokens) : mTokens{tokens}, mRoot{}, mPos{0} {}
Parser::~Parser() {}
using namespace AST;
using namespace GBAS;
static const std::array<const Token, 8> registers = {
"a", "f", "b", "c", "d", "e", "h", "l",
@ -17,6 +19,10 @@ static const std::array<const Token, 6> doubleRegisters = {
"af", "bc", "de", "hl", "sp", "pc",
};
static DirectivePropsList directives{{
{".section", DirectiveType::SECTION, 1},
}};
static InstructionPropsList instructions{{
{"add", InstructionType::ADD, 2, 2},
{"adc", InstructionType::ADC, 2, 2},
@ -94,10 +100,10 @@ std::shared_ptr<BaseNode> Parser::program() {
std::shared_ptr<BaseNode> Parser::line() {
auto tok = peek();
if (isLabel(tok) && (peekNext() == ":")) {
if (isDirective(tok)) {
return directive();
} else if (isLabel(tok) && (peekNext() == ":")) {
return label();
// TODO
// } else if (isDirective(tok)) {
} else if (isInstruction(tok)) {
return instruction();
} else {
@ -106,14 +112,38 @@ std::shared_ptr<BaseNode> Parser::line() {
}
std::shared_ptr<BaseNode> Parser::label() {
auto tok = next();
if (isLabel(tok)) {
return std::make_shared<Label>(tok);
return parseLabel(next());
}
std::shared_ptr<BaseNode> Parser::parseLabel(const Token& tok) {
return std::make_shared<Label>(tok);
}
const DirectiveProps& Parser::findDirective(const Token& tok) {
auto props = std::find_if(directives.begin(), directives.end(),
[&](auto props) { return props.lexeme == tok; });
if (props == directives.end()) {
throw ParserException{"Invalid directive in program"};
} else {
throw ParserException("Invalid label");
return *props;
}
}
std::shared_ptr<BaseNode> Parser::directive() {
auto props = findDirective(next());
Directive::OperandList operands{};
for (int i = 0; i < props.args; i++) {
auto tok = peek();
if (isNewline(tok)) {
throw ParserException{"Expected more arguments in directive"};
} else {
operands.push_back(next());
}
}
return std::make_shared<Directive>(props.type, operands);
}
std::shared_ptr<BaseNode> Parser::instruction() {
auto inst = next();
std::vector<std::shared_ptr<BaseNode>> operands;
@ -264,8 +294,11 @@ std::shared_ptr<BaseNode> Parser::primary() {
}
std::shared_ptr<BaseNode> Parser::number() {
return std::make_shared<Number>(
static_cast<Number>(std::atoi(next().c_str()))