Browse Source

Helpers for swapping; broken support for writing ELF

Changes probably look a little noisy because I partially converted
variable and function names from camel to snake case.

Using readelf, I see the right number of sections, but the section
header table is wrong somehow. Using hexdump, the strings that should be
there are not--so it seems like they aren't being written.
assembler
Zack Marvel 1 year ago
parent
commit
d37c77e5b7
  1. 194
      include/elf.hpp
  2. 19
      include/elf_writer.hpp
  3. 9
      include/parser.hpp
  4. 19
      include/swap_helpers.hpp
  5. 16
      src/assembler.cpp
  6. 430
      src/elf.cpp
  7. 57
      src/elf_writer.cpp
  8. 82
      src/main.cpp
  9. 33
      src/parser.cpp

194
include/elf.hpp

@ -42,18 +42,20 @@ class StrTabSection;
class ISection {
public:
ISection() : mName{}, mHeader{} { }
ISection() : mName{}, header_{} { }
ISection(std::string name, Elf32_Shdr hdr) : mName{name}, mHeader{hdr} { }
ISection(std::string name, Elf32_Shdr hdr) : mName{name}, header_{hdr} { }
virtual SectionType type() { return SectionType::INVALID; }
std::string& name() { return mName; }
Elf32_Shdr& header() { return mHeader; }
Elf32_Shdr& header() { return header_; }
virtual size_t size() const = 0;
virtual void write(std::ostream& os) const = 0;
class Type {
public:
// no type by default
@ -113,7 +115,7 @@ class ISection {
private:
std::string mName;
Elf32_Shdr mHeader;
Elf32_Shdr header_;
};
template <SectionType stype>
@ -146,6 +148,10 @@ class ProgramSection : public Section<SectionType::PROGBITS> {
return mData;
}
virtual void write(std::ostream& os) const override {
os.write(reinterpret_cast<const char*>(mData.data()), mData.size());
}
void append(std::vector<uint8_t>& buf) {
mData.insert(mData.end(), buf.begin(), buf.end());
header().sh_size += buf.size();
@ -186,59 +192,61 @@ class StrTabSection : public Section<SectionType::STRTAB> {
return mStrings;
}
virtual void write(std::ostream& os) const override {
for (auto it = strings().begin(); it != strings().end(); it++) {
os.write(it->c_str(), it->size());
os.write("\0", 1);
}
}
private:
StringTable mStrings;
};
class SymTabSection : public Section<SectionType::SYMTAB> {
public:
using Symbol = Elf32_Sym;
using SymbolTable = std::vector<Symbol>;
SymTabSection()
: Section<SectionType::SYMTAB>()
, mSymbols{}
{
// First entry is always full of zeros.
mSymbols.emplace_back(
Elf32_Sym{.st_name = 0,
.st_value = 0,
.st_size = 0,
.st_info = 0,
.st_other = 0,
.st_shndx = 0});
}
public:
using Symbol = Elf32_Sym;
using SymbolTable = std::vector<Symbol>;
SymTabSection() : Section<SectionType::SYMTAB>(), mSymbols{} {
// First entry is always full of zeros.
mSymbols.emplace_back(Elf32_Sym{.st_name = 0,
.st_value = 0,
.st_size = 0,
.st_info = 0,
.st_other = 0,
.st_shndx = 0});
}
SymTabSection(std::string name, Elf32_Shdr hdr)
: Section<SectionType::SYMTAB>(name, hdr)
, mSymbols{}
{
mSymbols.emplace_back(
Elf32_Sym{.st_name = 0,
.st_value = 0,
.st_size = 0,
.st_info = 0,
.st_other = 0,
.st_shndx = 0});
}
SymTabSection(std::string name, Elf32_Shdr hdr)
: Section<SectionType::SYMTAB>(name, hdr), mSymbols{} {
mSymbols.emplace_back(Elf32_Sym{.st_name = 0,
.st_value = 0,
.st_size = 0,
.st_info = 0,
.st_other = 0,
.st_shndx = 0});
}
virtual size_t size() const override {
// TODO maybe we should add 1 to the length of every string to account for
// the terminating byte
return sizeof(Symbol) * symbols().size();
}
virtual size_t size() const override {
// TODO maybe we should add 1 to the length of every string to account for
// the terminating byte
return sizeof(Symbol) * symbols().size();
}
const SymbolTable& symbols() const {
return mSymbols;
}
const SymbolTable& symbols() const { return mSymbols; }
SymbolTable& symbols() { return mSymbols; }
SymbolTable& symbols() {
return mSymbols;
virtual void write(std::ostream& os) const override {
for (auto it = symbols().begin(); it != symbols().end(); it++) {
auto hdr = *it;
os.write(reinterpret_cast<char*>(&hdr), sizeof(hdr));
}
}
private:
SymbolTable mSymbols;
private:
SymbolTable mSymbols;
};
class RelSection : public Section<SectionType::REL> {
@ -267,6 +275,17 @@ class RelSection : public Section<SectionType::REL> {
return mRelocations;
}
const RelocationTable& relocations() const {
return mRelocations;
}
virtual void write(std::ostream& os) const override {
// TODO is the first entry supposed to be null?
for (auto it = relocations().begin(); it != relocations().end(); it++) {
os.write(reinterpret_cast<const char*>(&(*it)), sizeof(*it));
}
}
private:
std::string mOther;
RelocationTable mRelocations;
@ -274,6 +293,21 @@ class RelSection : public Section<SectionType::REL> {
using SectionList = std::vector<std::unique_ptr<ISection>>;
template <typename headerT>
headerT swap_elf_header(headerT& hdr);
template <>
Elf32_Ehdr GBAS::swap_elf_header(Elf32_Ehdr& hdr);
template <typename sheaderT>
sheaderT swap_section_header(sheaderT& hdr);
template <>
Elf32_Shdr GBAS::swap_section_header(Elf32_Shdr& hdr);
/**
* Models an ELF file, for the purposes of Game Boy programs. This means there
* is currently no support for e.g. dynamic linking or executable files.
@ -301,25 +335,21 @@ public:
* @returns a reference to the symbol in its symbol table.
* @throws ELFException if a symbol with that name already exists.
*/
Elf32_Sym& addSymbol(const std::string name,
uint32_t value,
uint32_t size,
ISection::Type type,
ISection::Binding bind,
ISection::Visibility visibility,
bool relocatable);
Elf32_Sym& add_symbol(const std::string name, uint32_t value, uint32_t size,
ISection::Type type, ISection::Binding bind,
ISection::Visibility visibility, bool relocatable);
/**
* Add str to the string table.
*
* @returns a reference to the inserted string.
*/
std::string& addString(const std::string& str);
std::string& add_string(const std::string& str);
/**
* Add some data to a PROGBITS section.
*/
void addProgbits(std::vector<uint8_t> data);
void add_progbits(std::vector<uint8_t> data);
/**
* Add some data to a PROGBITS section.
@ -327,25 +357,25 @@ public:
* @param pData: Pointer to bytes that will be copied.
* @param n: How many bytes will be copied.
*/
void addProgbits(uint8_t* pData, size_t n);
void add_progbits(uint8_t* pData, size_t n);
/**
* Change the current section.
*
* @param name: Name of an already-created section.
*/
ISection& setSection(const std::string& name);
ISection& set_section(const std::string& name);
/**
* Go through each section header and compute each section's offset. If any
* section is modified after this function has been called, the function
* should be called again.
*/
void computeSectionOffsets();
void comput_section_offsets();
Elf32_Ehdr& header() { return mHeader; }
Elf32_Ehdr& header() { return header_; }
const SectionList& sections() { return mSections; }
const SectionList& sections() { return sections_; }
protected:
/**
@ -354,7 +384,7 @@ protected:
* @param section: rvalue reference to ISection unique_ptr.
* @param relocatable: Should a corresponding RelocationSection be created?
*/
void addSection(std::unique_ptr<ISection>&& section, bool relocatable = true);
void add_section(std::unique_ptr<ISection>&& section, bool relocatable = true);
/*
typedef struct {
@ -378,61 +408,61 @@ protected:
/**
* ELF file header.
*/
Elf32_Ehdr mHeader;
Elf32_Ehdr header_;
/**
* Vector of sections.
*/
SectionList mSections;
SectionList sections_;
/**
* Index of the section name string table in mSections.
* Index of the section name string table in sections_.
*/
uint32_t mShStrTabIdx;
uint32_t shstrtab_idx_;
public:
StrTabSection& shStringTable()
{
return dynamic_cast<StrTabSection&>(*mSections.at(mShStrTabIdx));
return dynamic_cast<StrTabSection&>(*sections_.at(shstrtab_idx_));
}
protected:
/**
* Index of the current string table in mSections.
* Index of the current string table in sections_.
*/
uint32_t mStrTabIdx;
uint32_t strtab_idx_;
StrTabSection& stringTable()
StrTabSection& string_table()
{
return dynamic_cast<StrTabSection&>(*mSections.at(mStrTabIdx));
return dynamic_cast<StrTabSection&>(*sections_.at(strtab_idx_));
}
/**
* Index of the current section in mSections.
* Index of the current section in sections_.
*/
uint32_t mCurrSection;
uint32_t curr_section_;
ISection& currentSection() { return *mSections.at(mCurrSection); }
ISection& current_section() { return *sections_.at(curr_section_); }
/**
* Index of the current relocation section (corresponding to the current
* progbits section) in mSections.
* progbits section) in sections_.
*/
uint32_t mCurrRelIdx;
uint32_t curr_rel_idx_;
RelSection& currentRelocationSection()
RelSection& current_relocation_section()
{
return dynamic_cast<RelSection&>(*mSections.at(mCurrRelIdx));
return dynamic_cast<RelSection&>(*sections_.at(curr_rel_idx_));
}
/**
* Index of current symbol table in mSections.
* Index of current symbol table in sections_.
*/
uint32_t mCurrSymTabIdx;
uint32_t curr_symtab_idx_;
SymTabSection& currentSymbolTable()
SymTabSection& current_symbol_table()
{
return dynamic_cast<SymTabSection&>(*mSections.at(mCurrSymTabIdx));
return dynamic_cast<SymTabSection&>(*sections_.at(curr_symtab_idx_));
}
/**
@ -440,7 +470,7 @@ protected:
* faster to check for duplicate symbol names. The alternative is linearly
* searching through every symbol table when adding a new symbol.
*/
std::unordered_set<std::string> mSymbolNames;
std::unordered_set<std::string> symbol_names_;
};
class ELFException : std::exception {

19
include/elf_writer.hpp

@ -9,17 +9,18 @@
namespace GBAS {
class ELFWriter {
public:
ELFWriter(ELF& elf) : elf_{elf} { }
ELFWriter() = delete;
public:
ELFWriter(ELF& elf) : elf_{elf} {}
ELFWriter() = delete;
void write(std::ostream& os);
size_t populate_section_headers(size_t starting_offs);
void write(std::string path);
void write(std::ostream& os);
size_t populate_section_headers(size_t starting_offs);
private:
ELF& elf_;
private:
ELF& elf_;
};
}
} // namespace GBAS
#endif // ELF_WRITER_HPP
#endif // ELF_WRITER_HPP

9
include/parser.hpp

@ -472,7 +472,7 @@ class Parser {
* @param tokens: Input list of tokens.
* @param i: Current position in the list (allowing recursive calls).
*/
std::shared_ptr<AST::BaseNode> parse();
std::shared_ptr<AST::Root> parse();
/**
* Return the next Token in the list and increment the position counter.
@ -492,7 +492,7 @@ class Parser {
*/
Token peekNext();
std::shared_ptr<AST::BaseNode> program();
std::shared_ptr<AST::Root> program();
std::shared_ptr<AST::BaseNode> line();
std::shared_ptr<AST::BaseNode> instruction();
@ -627,6 +627,11 @@ class Parser {
*/
static bool isNewline(const Token& tok);
/**
* True only if tok is a comma.
*/
static bool isComma(const Token& tok);
/**
* True only if tok is EOF.
*/

19
include/swap_helpers.hpp

@ -0,0 +1,19 @@
#ifndef SWAP_HELPERS_HPP
#define SWAP_HELPERS_HPP
#include <elf.h>
template <typename T>
T swap(T value);
template <>
uint16_t swap(uint16_t value) {
return __builtin_bswap16(value);
}
template <>
uint32_t swap(uint32_t value) {
return __builtin_bswap32(value);
}
#endif // SWAP_HELPERS_HPP

16
src/assembler.cpp

@ -22,7 +22,7 @@ void Assembler::assemble(std::shared_ptr<AST::Root> ast, ELF& elf) {
switch (directive->type()) {
case DirectiveType::SECTION:
{
elf.setSection(directive->operands().at(0));
elf.set_section(directive->operands().at(0));
}
break;
default:
@ -72,8 +72,8 @@ void Assembler::assemble(std::shared_ptr<AST::Root> ast, ELF& elf) {
// throw AssemblerException("Invalid section type");
//}
// TODO relocatable
//elf.addSymbol(label->name(), value, 0, info, STV_DEFAULT, other, true);
elf.addSymbol(label->name(), value, 0, ISection::Type{},
//elf.add_symbol(label->name(), value, 0, info, STV_DEFAULT, other, true);
elf.add_symbol(label->name(), value, 0, ISection::Type{},
ISection::Binding{}.global(), ISection::Visibility{}, false);
}
break;
@ -474,7 +474,7 @@ void Assembler::assembleInstruction(ELF& elf, BaseInstruction& instr) {
return (fmt.first == OperandType::INVALID) &&
(fmt.second == OperandType::INVALID);
})) {
elf.addProgbits(std::vector<uint8_t>{InstructionNone{instr0.type()}.encode()});
elf.add_progbits(std::vector<uint8_t>{InstructionNone{instr0.type()}.encode()});
} else {
throw AssemblerException("Invalid instruction0 usage");
}
@ -500,7 +500,7 @@ void Assembler::assembleInstruction(ELF& elf, BaseInstruction& instr) {
switch (instr1.type()) {
case InstructionType::INC:
case InstructionType::DEC:
elf.addProgbits(instructionR(instr1, *reg));
elf.add_progbits(instructionR(instr1, *reg));
return;
break;
case InstructionType::SUB:
@ -509,7 +509,7 @@ void Assembler::assembleInstruction(ELF& elf, BaseInstruction& instr) {
case InstructionType::XOR:
case InstructionType::OR:
case InstructionType::CP:
elf.addProgbits(instructionRA(instr1, *reg));
elf.add_progbits(instructionRA(instr1, *reg));
return;
break;
default:
@ -524,7 +524,7 @@ void Assembler::assembleInstruction(ELF& elf, BaseInstruction& instr) {
case InstructionType::DEC:
case InstructionType::POP:
case InstructionType::PUSH:
elf.addProgbits(instructionD(instr1, *reg));
elf.add_progbits(instructionD(instr1, *reg));
return;
break;
default:
@ -541,7 +541,7 @@ void Assembler::assembleInstruction(ELF& elf, BaseInstruction& instr) {
std::vector<uint8_t> encoded{
InstructionA{instr1.type(), reg->reg1(), reg->reg2()}
.encode()};
elf.addProgbits(encoded);
elf.add_progbits(encoded);
}
return;
break;

430
src/elf.cpp

@ -1,10 +1,9 @@
#include <algorithm>
#include <sstream>
#include <string_view>
#include "elf.hpp"
#include "swap_helpers.hpp"
using namespace GBAS;
@ -16,24 +15,24 @@ using namespace GBAS;
#endif
#ifdef DEBUG_ELF
#define ELF_EXCEPTION(msg) \
do {\
#define ELF_EXCEPTION(msg) \
do { \
std::cerr << msg << std::endl; \
throw ELFException{msg}; \
} while(0)
throw ELFException{msg}; \
} while (0)
#else
#define ELF_EXCEPTION(msg) \
do {\
#define ELF_EXCEPTION(msg) \
do { \
throw ELFException{msg}; \
} while(0)
} while (0)
#endif
static const int ABIVERSION = 0;
static const unsigned char ELF_IDENT[] = {
EI_MAG0,
EI_MAG1,
EI_MAG2,
EI_MAG3,
ELFMAG0,
ELFMAG1,
ELFMAG2,
ELFMAG3,
ELFCLASS32,
ELFDATA2MSB,
EV_CURRENT,
@ -49,176 +48,231 @@ static const unsigned char ELF_IDENT[] = {
0,
};
ELF::ELF()
: mCurrSection{0}
, mCurrRelIdx{0}
{
template <>
Elf32_Ehdr GBAS::swap_elf_header(Elf32_Ehdr& hdr) {
Elf32_Ehdr swapped;
memcpy(swapped.e_ident, hdr.e_ident, sizeof(swapped.e_ident));
swapped.e_type = swap(hdr.e_type);
swapped.e_machine = swap(hdr.e_machine);
swapped.e_version = swap(hdr.e_version);
swapped.e_entry = swap(hdr.e_entry);
swapped.e_phoff = swap(hdr.e_phoff);
swapped.e_shoff = swap(hdr.e_shoff);
swapped.e_flags = swap(hdr.e_flags);
swapped.e_ehsize = swap(hdr.e_ehsize);
swapped.e_phentsize = swap(hdr.e_phentsize);
swapped.e_phnum = swap(hdr.e_phnum);
swapped.e_shentsize = swap(hdr.e_shentsize);
swapped.e_shnum = swap(hdr.e_shnum);
swapped.e_shstrndx = swap(hdr.e_shstrndx);
return swapped;
}
template <>
Elf32_Shdr GBAS::swap_section_header(Elf32_Shdr& hdr) {
Elf32_Shdr swapped;
swapped.sh_name = swap(hdr.sh_name);
swapped.sh_type = swap(hdr.sh_type);
swapped.sh_flags = swap(hdr.sh_flags);
swapped.sh_addr = swap(hdr.sh_addr);
swapped.sh_offset = swap(hdr.sh_offset);
swapped.sh_size = swap(hdr.sh_size);
swapped.sh_link = swap(hdr.sh_link);
swapped.sh_info = swap(hdr.sh_info);
swapped.sh_addralign = swap(hdr.sh_addralign);
swapped.sh_entsize = swap(hdr.sh_entsize);
return swapped;
}
ELF::ELF() : curr_section_{0}, curr_rel_idx_{0} {
// File header
memcpy(&mHeader.e_ident, ELF_IDENT, EI_NIDENT);
mHeader.e_type = ET_REL;
mHeader.e_machine = EM_NONE;
mHeader.e_version = EV_CURRENT;
mHeader.e_entry = 0;
mHeader.e_phoff = 0;
mHeader.e_shoff = 0; // set this later
mHeader.e_flags = 0;
mHeader.e_ehsize = sizeof(Elf32_Ehdr);
mHeader.e_phentsize = sizeof(Elf32_Phdr);
mHeader.e_phnum = 0;
mHeader.e_shentsize = sizeof(Elf32_Shdr);
mHeader.e_shnum = 0; // set this later
memcpy(&header_.e_ident, ELF_IDENT, EI_NIDENT);
header_.e_type = ET_REL;
header_.e_machine = EM_NONE;
header_.e_version = EV_CURRENT;
header_.e_entry = 0;
header_.e_phoff = 0;
header_.e_shoff = 0; // set this later
header_.e_flags = 0;
header_.e_ehsize = sizeof(Elf32_Ehdr);
header_.e_phentsize = sizeof(Elf32_Phdr);
header_.e_phnum = 0;
header_.e_shentsize = sizeof(Elf32_Shdr);
header_.e_shnum = 0; // set this later
// Section header for section names string table
mShStrTabIdx = mSections.size();
addSection(std::make_unique<StrTabSection>("shstrtab", Elf32_Shdr{
// This section's name will be first in the section names table
.sh_name = 0,
.sh_type = SHT_STRTAB,
.sh_flags = 0,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}), false);
shstrtab_idx_ = sections_.size();
add_section(
std::make_unique<StrTabSection>(
"shstrtab",
Elf32_Shdr{
// This section's name will be first in the section names table
.sh_name = 0,
.sh_type = SHT_STRTAB,
.sh_flags = 0,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}),
false);
// Section header for string table
mStrTabIdx = mSections.size();
addSection(std::make_unique<StrTabSection>("strtab", Elf32_Shdr{
// This section's name will be first in the section names table
.sh_name = 0,
.sh_type = SHT_STRTAB,
.sh_flags = 0,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}), false);
strtab_idx_ = sections_.size();
add_section(
std::make_unique<StrTabSection>(
"strtab",
Elf32_Shdr{
// This section's name will be first in the section names table
.sh_name = 0,
.sh_type = SHT_STRTAB,
.sh_flags = 0,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}),
false);
// data section
addSection(std::make_unique<ProgramSection>("data",
add_section(std::make_unique<ProgramSection>(
"data",
Elf32_Shdr{.sh_name = 0,
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC | SHF_WRITE,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}));
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC | SHF_WRITE,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}));
// rodata section
addSection(std::make_unique<ProgramSection>("rodata",
add_section(std::make_unique<ProgramSection>(
"rodata",
Elf32_Shdr{.sh_name = 0,
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}));
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}));
// bss section
addSection(std::make_unique<ProgramSection>("bss",
add_section(std::make_unique<ProgramSection>(
"bss",
Elf32_Shdr{.sh_name = 0,
.sh_type = SHT_NOBITS,
.sh_flags = SHF_ALLOC | SHF_WRITE,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}));
.sh_type = SHT_NOBITS,
.sh_flags = SHF_ALLOC | SHF_WRITE,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}));
// text section
addSection(std::make_unique<ProgramSection>("text",
add_section(std::make_unique<ProgramSection>(
"text",
Elf32_Shdr{.sh_name = 0,
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC | SHF_EXECINSTR,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}));
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC | SHF_EXECINSTR,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}));
// init section
addSection(std::make_unique<ProgramSection>("init",
add_section(std::make_unique<ProgramSection>(
"init",
Elf32_Shdr{.sh_name = 0,
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC | SHF_EXECINSTR,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}));
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC | SHF_EXECINSTR,
.sh_addr = 0,
// TODO we have to fill this in just before writing the file
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = 0}));
// symbol table
mCurrSymTabIdx = mSections.size();
addSection(std::make_unique<SymTabSection>("symtab",
Elf32_Shdr{.sh_name = 0,
.sh_type = SHT_SYMTAB,
.sh_flags = SHF_ALLOC,
.sh_addr = 0,
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = sizeof(Elf32_Sym)}), false);
curr_symtab_idx_ = sections_.size();
add_section(std::make_unique<SymTabSection>(
"symtab", Elf32_Shdr{.sh_name = 0,
.sh_type = SHT_SYMTAB,
.sh_flags = SHF_ALLOC,
.sh_addr = 0,
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = sizeof(Elf32_Sym)}),
false);
}
void ELF::addSection(std::unique_ptr<ISection>&& section, bool relocatable) {
void ELF::add_section(std::unique_ptr<ISection>&& section, bool relocatable) {
// Only one section may have a given name
auto it = std::find_if(mSections.begin(), mSections.end(),
[&](auto& sec) { return sec->name() == section->name(); });
if (it != mSections.end()) {
auto it = std::find_if(sections_.begin(), sections_.end(), [&](auto& sec) {
return sec->name() == section->name();
});
if (it != sections_.end()) {
std::ostringstream builder{};
builder << "Cannot add section with duplicate name: " << section->name();
ELF_EXCEPTION(builder.str());
}
mSections.emplace_back(std::move(section));
auto name = section->name();
// The section header table is a special case--since it doesn't exist yet in
// sections_, we can't get a reference to it with shStringTable(). But there's
// more than one way to skin a cat! It must be the provided section.
auto shstrtab = (name == "shstrtab") ? dynamic_cast<StrTabSection&>(*section)
: shStringTable();
shstrtab.strings().push_back(name);
sections_.emplace_back(std::move(section));
// After moving the object, `*section` is no longer valid.
if (relocatable) {
std::ostringstream builder{};
builder << "rel" << mSections.back()->name();
mSections.emplace_back(std::make_unique<RelSection>(
builder.str(),
Elf32_Shdr{
.sh_name = 0,
.sh_type = SHT_REL,
.sh_flags = 0,
.sh_addr = 0,
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = sizeof(Elf32_Rel)},
mSections.back()->name()));
builder << "rel" << sections_.back()->name();
sections_.emplace_back(std::make_unique<RelSection>(
builder.str(),
Elf32_Shdr{.sh_name = 0,
.sh_type = SHT_REL,
.sh_flags = 0,
.sh_addr = 0,
.sh_offset = 0,
.sh_size = 0,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = 0,
.sh_entsize = sizeof(Elf32_Rel)},
sections_.back()->name()));
}
}
//void ELF::computeSectionOffsets() {
// size_t pos = sizeof(mHeader) + mSectionHeaders.size() * sizeof(Elf32_Shdr);
// void ELF::computeSectionOffsets() {
// size_t pos = sizeof(header_) + mSectionHeaders.size() * sizeof(Elf32_Shdr);
// for (auto pHdr = mSectionHeaders.begin(); pHdr != mSectionHeaders.end();
// pHdr++) {
// pHdr->sh_offset = pos;
@ -226,7 +280,7 @@ void ELF::addSection(std::unique_ptr<ISection>&& section, bool relocatable) {
// }
//}
//void ELF::write(std::ostream& out) {
// void ELF::write(std::ostream& out) {
// computeSectionOffsets();
// // Write section headers
// for (auto itHdr = mSectionHeaders.begin(); itHdr != mSectionHeaders.end();
@ -263,49 +317,52 @@ void ELF::addSection(std::unique_ptr<ISection>&& section, bool relocatable) {
// }
//}
ISection& ELF::setSection(const std::string& name) {
auto it = std::find_if(mSections.begin(), mSections.end(),
ISection& ELF::set_section(const std::string& name) {
auto it = std::find_if(sections_.begin(), sections_.end(),
[&](auto& sec) { return sec->name() == name; });
if (it == mSections.end()) {
if (it == sections_.end()) {
std::ostringstream builder{};
builder << "Invalid section: " << name;
ELF_EXCEPTION(builder.str());
} else {
mCurrSection = std::distance(mSections.begin(), it);
// If there's a corresponding relocation section, update mCurrRelIdx also
curr_section_ = std::distance(sections_.begin(), it);
// If there's a corresponding relocation section, update curr_rel_idx_ also
std::ostringstream builder{};
builder << "rel" << name;
auto relName = builder.str();
auto relIt = std::find_if(mSections.begin(), mSections.end(),
[&](auto& sec) { return sec->name() == relName; });
mCurrRelIdx = std::distance(mSections.begin(), relIt);
return *mSections.at(mCurrSection);
auto relname = builder.str();
auto relit =
std::find_if(sections_.begin(), sections_.end(),
[&](auto& sec) { return sec->name() == relname; });
curr_rel_idx_ = std::distance(sections_.begin(), relit);
return *sections_.at(curr_section_);
}
}
Elf32_Sym& ELF::addSymbol(const std::string name, uint32_t value, uint32_t size,
ISection::Type type, ISection::Binding bind,
ISection::Visibility visibility, bool relocatable) {
Elf32_Sym& ELF::add_symbol(const std::string name, uint32_t value,
uint32_t size, ISection::Type type,
ISection::Binding bind,
ISection::Visibility visibility, bool relocatable) {
// TODO figure out info based on current section type
// No other symbols in this file should have the same name
if (mSymbolNames.find(name) != mSymbolNames.end()) {
if (symbol_names_.find(name) != symbol_names_.end()) {
std::ostringstream builder{"Symbol "};
builder << name;
builder << " cannot be defined twice";
ELF_EXCEPTION(builder.str());
}
mSymbolNames.insert(name);
symbol_names_.insert(name);
uint32_t idx = currentSymbolTable().symbols().size();
uint32_t nameIdx = stringTable().strings().size();
addString(name);
currentSymbolTable().symbols().emplace_back(Elf32_Sym{.st_name = nameIdx,
.st_value = value,
.st_size = size,
.st_info = ELF32_ST_INFO(bind.binding(), type.type()),
.st_other = ELF32_ST_VISIBILITY(visibility.visibility()),
.st_shndx = static_cast<uint8_t>(mCurrSection)});
uint32_t idx = current_symbol_table().symbols().size();
uint32_t nameidx = string_table().strings().size();
add_string(name);
current_symbol_table().symbols().emplace_back(
Elf32_Sym{.st_name = nameidx,
.st_value = value,
.st_size = size,
.st_info = ELF32_ST_INFO(bind.binding(), type.type()),
.st_other = ELF32_ST_VISIBILITY(visibility.visibility()),
.st_shndx = static_cast<uint8_t>(curr_section_)});
// If the symbol should be relocatable, find the corresponding section of
// relocation information.
@ -316,35 +373,34 @@ Elf32_Sym& ELF::addSymbol(const std::string name, uint32_t value, uint32_t size,
// r_offset is an offset into the section where the symbol lives.
// r_info is both the symbol's index in the symbol table and the type of
// relocation that should occur.
currentRelocationSection().relocations().emplace_back(Elf32_Rel{
.r_offset = value,
.r_info = ELF32_R_INFO(idx, R_386_32)});
current_relocation_section().relocations().emplace_back(
Elf32_Rel{.r_offset = value, .r_info = ELF32_R_INFO(idx, R_386_32)});
}
return currentSymbolTable().symbols().back();
return current_symbol_table().symbols().back();
}
std::string& ELF::addString(const std::string& str) {
std::string& ELF::add_string(const std::string& str) {
// size + null byte
stringTable().strings().push_back(str);
stringTable().header().sh_size += str.size() + 1;
return stringTable().strings().back();
string_table().strings().push_back(str);
string_table().header().sh_size += str.size() + 1;
return string_table().strings().back();
}
void ELF::addProgbits(std::vector<uint8_t> data) {
if (currentSection().type() != SectionType::PROGBITS) {
void ELF::add_progbits(std::vector<uint8_t> data) {
if (current_section().type() != SectionType::PROGBITS) {
ELF_EXCEPTION("Attempted to add PROGBITS to non-PROGBITS section");
}
auto& section = dynamic_cast<ProgramSection&>(currentSection());
auto& section = dynamic_cast<ProgramSection&>(current_section());
section.data().insert(section.data().end(), data.begin(), data.end());
}
void ELF::addProgbits(uint8_t* pData, size_t n) {
if (currentSection().type() != SectionType::PROGBITS) {
void ELF::add_progbits(uint8_t* pData, size_t n) {
if (current_section().type() != SectionType::PROGBITS) {
ELF_EXCEPTION("Attempted to add PROGBITS to non-PROGBITS section");
}
auto& section = dynamic_cast<ProgramSection&>(currentSection());
auto& section = dynamic_cast<ProgramSection&>(current_section());
section.data().insert(section.data().end(), pData, pData + n);
}

57
src/elf_writer.cpp

@ -1,8 +1,22 @@
#include <fstream>
#include <algorithm>
#include "elf_writer.hpp"
using namespace GBAS;
void ELFWriter::write(std::string path) {
std::ofstream os{path};
if (os.fail()) {
std::stringstream ss;
ss << "Failed to open " << path;
throw ELFException{ss.str()};
}
write(os);
}
void ELFWriter::write(std::ostream& os) {
// There are some sections of headers that we need to fill out or verify as
// we go. In the ELF header:
@ -23,15 +37,54 @@ void ELFWriter::write(std::ostream& os) {
// Let the section header table immediate follow the ELF header.
auto& elf_hdr = elf_.header();
elf_hdr.e_shoff = sizeof(elf_hdr);
printf("e_shoff=%u\n", elf_hdr.e_shoff);
elf_hdr.e_shnum = elf_.sections().size();
auto shstrpos = std::find_if(
elf_.sections().begin(), elf_.sections().end(),
[](auto& section) { return section->header().sh_type == SHT_STRTAB; });
elf_hdr.e_shstrndx = shstrpos - elf_.sections().begin();
elf_hdr = swap_elf_header(elf_hdr);
os.write(reinterpret_cast<char*>(&elf_hdr), sizeof(elf_hdr));
off_t section_offs = elf_.sections().size() * sizeof(Elf32_Shdr);
// The first entry in the ELF section header table is NULL
{
Elf32_Shdr null_hdr;
memset(&null_hdr, 0, sizeof(null_hdr));
os.write(reinterpret_cast<char*>(&null_hdr), sizeof(null_hdr));
section_offs += sizeof(null_hdr);
}
std::cout << "shstrtab size " << elf_.shStringTable().size() << std::endl;
std::string section_names{};
// Now write out the rest of the section headers and build up the section name
// string table
for (auto it = elf_.sections().begin(); it != elf_.sections().end(); it++) {
const auto& section = *it;
auto& hdr = section->header();
section_names.append(section->name() + '\0');
hdr.sh_name = it - elf_.sections().begin();
hdr.sh_size = section->size();
hdr.sh_offset = section_offs;
section_offs += section->size();
hdr = swap_section_header(hdr);
os.write(reinterpret_cast<char*>(&hdr), sizeof(hdr));
}
// Iterate over the sections a second time and write them out
for (auto it = elf_.sections().begin(); it != elf_.sections().end(); it++) {
const auto& section = *it;
section->write(os);
}
}
size_t ELFWriter::populate_section_headers(size_t starting_offs) {
size_t offs = starting_offs;
auto& shstrtab = elf_.shStringTable();
for (auto section_it = elf_.sections().begin();
section_it != elf_.sections().end();
section_it++) {
section_it != elf_.sections().end(); section_it++) {
auto& section = *section_it;
auto hdr = section->header();

82
src/main.cpp

@ -1,9 +1,12 @@
#include <fstream>
#include <getopt.h>
#include "tokenizer.hpp"
#include "elf_reader.hpp"
#include "elf_writer.hpp"
#include "assembler.hpp"
#include "parser.hpp"
class InputFile {
public:
@ -23,22 +26,91 @@ class InputFile {
using namespace GBAS;
static const std::string USAGE = " <input file>";
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cerr << argv[0] << USAGE << std::endl;
return -1;
}
InputFile infile{std::string{argv[1]}};
const struct option long_options[] = {
{"tokenize", no_argument, nullptr, 0},
{"parse", no_argument, nullptr, 0},
{nullptr, 0, nullptr, 0},
};
bool tokenize_only = false;
bool parse_only = false;
int c = 0;
int option_index = 0;
while ((c = getopt_long(argc, argv, "tp", long_options, &option_index)) != -1) {
switch (c) {
case 0: {
using namespace std::literals::string_view_literals;
const char* option_name = long_options[option_index].name;
if ("tokenize"sv == option_name) {
tokenize_only = true;
} else if ("parse"sv == option_name) {
parse_only = true;
}
}
break;
case 't':
tokenize_only = true;
break;
case 'p':
parse_only = true;
break;
case '?':
break;
default:
break;
}
if (tokenize_only && parse_only) {
std::cerr << "Only one of --tokenize and --parse may be specified" << std::endl;
return 1;
}
}
InputFile infile{std::string{argv[optind]}};
if (!infile.exists()) {
std::cerr << "Input file does not exist" << std::endl;
return -1;
}
ELFReader::read(infile.stream()).or_else([](std::string msg) {
std::cerr << msg << std::endl;
});
// infile.stream()
Tokenizer tokenizer{};
auto token_list = tokenizer.tokenize(infile.stream());
if (tokenize_only) {
for (auto& token : token_list) {
std::cout << token << std::endl;
}
return 0;
}
Parser parser{token_list};
auto root_node = parser.parse();
if (parse_only) {
// TODO print tree
return 0;
}
Assembler assembler{};
ELF elf{};
assembler.assemble(root_node, elf);
ELFWriter writer{elf};
writer.write("a.out");
// ELFReader::read(infile.stream()).or_else([](std::string msg) {
// std::cerr << msg << std::endl;
// });
return 0;
}

33
src/parser.cpp

@ -20,7 +20,7 @@ static const std::array<const Token, 6> doubleRegisters = {
};
static DirectivePropsList directives{{
{".section", DirectiveType::SECTION, 1},
{".section", DirectiveType::SECTION, 1},
}};
static InstructionPropsList instructions{{
@ -88,12 +88,16 @@ Token Parser::peek() {
Token Parser::peekNext() { return mTokens.at(mPos + 1); }
std::shared_ptr<BaseNode> Parser::parse() { return program(); };
std::shared_ptr<Root> Parser::parse() { return program(); };
std::shared_ptr<BaseNode> Parser::program() {
std::shared_ptr<Root> Parser::program() {
std::vector<std::shared_ptr<BaseNode>> lines;
while (!isEof(peek())) {
lines.push_back(line());
if (isNewline(peek())) {
next();
} else {
lines.push_back(line());
}
}
return std::make_shared<Root>(lines);
}
@ -111,18 +115,15 @@ std::shared_ptr<BaseNode> Parser::line() {
}
}
std::shared_ptr<BaseNode> Parser::label() {
return parseLabel(next());
}
std::shared_ptr<BaseNode> Parser::label() { return parseLabel(next()); }
std::shared_ptr<BaseNode> Parser::parseLabel(const Token& tok) {
return std::make_shared<Label>(tok);
}
const DirectiveProps& Parser::findDirective(const Token& tok) {
auto props = std::find_if(directives.begin(), directives.end(),
[&](auto props) { return props.lexeme == tok; });
[&](auto props) { return props.lexeme == tok; });
if (props == directives.end()) {
throw ParserException{"Invalid directive in program"};
} else {
@ -148,7 +149,11 @@ std::shared_ptr<BaseNode> Parser::instruction() {
auto inst = next();
std::vector<std::shared_ptr<BaseNode>> operands;
while (!isNewline(peek()) && operands.size() < 3) {
operands.push_back(operand());
if (isComma(peek())) {
next();
} else {
operands.push_back(operand());
}
}
auto props = findInstruction(inst);
if (props == instructions.end()) {
@ -293,9 +298,7 @@ std::shared_ptr<BaseNode> Parser::primary() {
}
}
std::shared_ptr<BaseNode> Parser::number() {
return parseNumber(next());
}
std::shared_ptr<BaseNode> Parser::number() { return parseNumber(next()); }
std::shared_ptr<BaseNode> Parser::parseNumber(const Token& tok) {
return std::make_shared<Number>(static_cast<Number>(std::atoi(tok.c_str())));
@ -303,6 +306,8 @@ std::shared_ptr<BaseNode> Parser::parseNumber(const Token& tok) {
bool Parser::isNewline(const Token& tok) { return tok == "EOL"; }
bool Parser::isComma(const Token& tok) { return tok == ","; }
int Parser::expectNewline(const TokenList& list, int start, int max) {
for (int i = start; i < max; i++) {
if (isNewline(list.at(i))) {
@ -375,7 +380,7 @@ bool Parser::isDirective(const Token& tok) {
}
return std::all_of(tok.begin() + 1, tok.end(),
[](auto c) { return isAlphaNumeric(c); });
[](auto c) { return isAlphaNumeric(c); });
}
bool Parser::isRegister(const Token& tok) {

Loading…
Cancel
Save