Browse Source

Initial commit

assembler
Zack Marvel 3 years ago
commit
7d8865ce30
  1. 32
      .gitignore
  2. 27
      Makefile
  3. 9
      README.md
  4. 52
      include/tokenizer.h
  5. 18
      src/main.cpp
  6. 62
      src/tokenizer.cpp
  7. 92
      test/TokenizerTest.cpp
  8. 1
      test/data/arith0.asm
  9. 5
      test/data/arith0.tokens
  10. 3
      test/data/arith1.asm
  11. 13
      test/data/arith1.tokens
  12. 1
      test/data/comment0.asm
  13. 2
      test/data/comment0.tokens
  14. 1
      test/data/comment1.asm
  15. 2
      test/data/comment1.tokens
  16. 1
      test/data/comment2.asm
  17. 5
      test/data/comment2.tokens
  18. 1
      test/data/directive.asm
  19. 1
      test/data/directive0_align_invalid.asm
  20. 1
      test/data/directive1_align_3_args.asm
  21. 1
      test/data/directive2_ascii0.asm
  22. 1
      test/data/directive2_ascii1.asm
  23. 1
      test/data/directive2_ascii2.asm
  24. 1
      test/data/directive2_ascii3.asm
  25. 1
      test/data/directive3_asciz0.asm
  26. 1
      test/data/directive3_asciz1.asm
  27. 2
      test/data/directive4_balign0.asm
  28. 2
      test/data/directive4_balign1.asm
  29. 2
      test/data/directive4_balign2.asm
  30. 2
      test/data/directive4_balign3.asm
  31. 2
      test/data/directive5_byte0.asm
  32. 2
      test/data/directive5_byte1.asm
  33. 2
      test/data/directive5_byte2.asm
  34. 0
      test/data/directive6_comm.asm

32
.gitignore

@ -0,0 +1,32 @@
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app

27
Makefile

@ -0,0 +1,27 @@
CXXFLAGS = -g -Wall -std=c++11
LDFLAGS = -g -Wall
EXE_SRC = src/main.cpp
EXE = gbas
TEST_EXE = gbas_test
SRCS = src/tokenizer.cpp
OBJS = $(patsubst %.cpp,%.o,$(SRCS))
EXE_OBJS = $(OBJS) $(patsubst %.cpp,%.o,$(EXE_SRC))
INC = -Iinclude
TEST_SRCS = test/TokenizerTest.cpp
TEST_OBJS = $(OBJS) $(patsubst %.cpp,%.o,$(TEST_SRCS))
$(EXE): $(EXE_OBJS)
$(CXX) $(CXXFLAGS) -o [email protected] $^
$(TEST_EXE): $(TEST_OBJS)
$(CXX) $(CXXFLAGS) -o [email protected] $^ -lboost_unit_test_framework
%.o: %.cpp Makefile
$(CXX) -c $(CXXFLAGS) $(INC) -o [email protected] $<
clean:
rm -f $(OBJS) $(EXE) $(TEST_OBJS) $(TEST_EXE)

9
README.md

@ -0,0 +1,9 @@
# gbas
`gbas` is an assembler for the Game Boy, a Z80 derivative. The goal of this
project is to provide an assembler syntax for the Z80 as close as possible to
the GNU assembler. Particularly, other assemblers for the Z80 support a
different syntax for assembler directives, such as `.align`. The goal is to
provide a syntax more familiar to Game Boy programmers who have used the GNU
assembler before.

52
include/tokenizer.h

@ -0,0 +1,52 @@
#include <array>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
typedef std::string Token;
class TokenizerException : std::exception {
public:
TokenizerException(const char *msg) {
this->msg = msg;
}
TokenizerException(const std::string &msg) {
this->msg = msg;
}
virtual const char* what() const throw() {
return msg.c_str();
};
private:
std::string msg;
};
class TokenList : public std::vector<Token> {
public:
TokenList();
~TokenList();
void add(Token tok);
void add_reserved(Token tok);
};
const int MAX_LINE_LEN = 128;
class Tokenizer {
public:
TokenList *tokenize(std::basic_istream<char> &lines);
static bool isReserved(Token tok);
private:
static const std::array<Token, 2> reserved;
};

18
src/main.cpp

@ -0,0 +1,18 @@
#include "tokenizer.h"
int main(int argc, char *argv[]) {
auto tokenizer = Tokenizer{};
auto input = "add a, 32\n";
std::stringstream stream;
stream << input;
auto tokens = tokenizer.tokenize(stream);
std::cout << tokens->size() << std::endl;
std::cout << tokens->at(0) << std::endl;
std::cout << tokens->at(1) << std::endl;
std::cout << tokens->at(2) << std::endl;
std::cout << tokens->at(3) << std::endl;
return 0;
}

62
src/tokenizer.cpp

@ -0,0 +1,62 @@
#include <algorithm>
#include <string.h>
#include "tokenizer.h"
const std::array<Token, 2> Tokenizer::reserved = {
"EOL",
"EOF",
};
TokenList::TokenList() {
}
void TokenList::add(Token tok) {
if (Tokenizer::isReserved(tok)) {
throw TokenizerException(tok + " is reserved");
}
push_back(tok);
}
void TokenList::add_reserved(Token tok) {
push_back(tok);
}
bool Tokenizer::isReserved(Token tok) {
auto it = std::find(reserved.begin(), reserved.end(), tok);
return it != reserved.end();
}
TokenList *Tokenizer::tokenize(std::basic_istream<char> &lines) {
auto tokens = new TokenList();
int lineno = 0;
for (std::array<char, MAX_LINE_LEN> line; lines.getline(&line[0], sizeof(line));) {
size_t begin = 0;
for (size_t pos = 0; pos < MAX_LINE_LEN; pos++) {
char curr = line[pos];
if ((curr == '\n') || (curr == '\0') || (curr == ';')) {
if (begin < pos) {
auto tok = Token{&line[begin], pos-begin};
tokens->add(tok);
}
tokens->add_reserved("EOL");
lineno++;
break;
} else if ((curr == ' ') || (curr == ',')) {
if (begin < pos) {
auto tok = Token{&line[begin], pos-begin};
tokens->add(tok);
}
begin = pos + 1;
}
}
}
tokens->add_reserved("EOF");
return tokens;
}

92
test/TokenizerTest.cpp

@ -0,0 +1,92 @@
#include <fstream>
#define BOOST_TEST_MAIN
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
#include "tokenizer.h"
BOOST_AUTO_TEST_CASE(test_Tokenizer_isReserved) {
BOOST_TEST(Tokenizer::isReserved("add") == false);
BOOST_TEST(Tokenizer::isReserved("73") == false);
BOOST_TEST(Tokenizer::isReserved(".section") == false);
BOOST_TEST(Tokenizer::isReserved("EOL") == true);
}
BOOST_AUTO_TEST_CASE(test_Tokenizer_tokenize0) {
auto tokenizer = Tokenizer{};
auto input = "add a, 32\n";
std::stringstream stream;
stream << input;
auto tokens = tokenizer.tokenize(stream);
BOOST_REQUIRE_EQUAL(tokens->size(), 5);
BOOST_REQUIRE_EQUAL(tokens->at(0), Token("add"));
BOOST_REQUIRE_EQUAL(tokens->at(1), Token("a"));
BOOST_REQUIRE_EQUAL(tokens->at(2), Token("32"));
BOOST_REQUIRE_EQUAL(tokens->at(3), Token("EOL"));
BOOST_REQUIRE_EQUAL(tokens->at(4), Token("EOF"));
}
class TokenizerTestFile {
public:
explicit TokenizerTestFile(std::string filename, std::string expectedFilename) :
filename(filename), expectedFilename(expectedFilename) {
stream = std::ifstream{std::string{"test/data/"} + filename};
expectedStream = std::ifstream{std::string{"test/data/"} + expectedFilename};
}
~TokenizerTestFile() {
stream.close();
expectedStream.close();
}
void doTest() {
std::array<char, MAX_LINELEN> line;
auto tokenizer = Tokenizer{};
auto tokens = tokenizer.tokenize(stream);
expectedStream.getline(line.data(), MAX_LINELEN);
auto token = tokens->begin();
while ((!expectedStream.eof()) && (token != tokens->end())) {
BOOST_REQUIRE_EQUAL(std::string{line.data()}, *token);
expectedStream.getline(line.data(), MAX_LINELEN);
token++;
}
BOOST_TEST(expectedStream.eof());
if (token != tokens->end()) {
BOOST_FAIL("token != tokens->end()");
}
}
static const int MAX_LINELEN = 256;
private:
std::string filename;
std::string expectedFilename;
std::ifstream stream;
std::ifstream expectedStream;
};
BOOST_AUTO_TEST_CASE(test_Tokenizer_arith0) {
TokenizerTestFile testFile{"arith0.asm", "arith0.tokens"};
testFile.doTest();
}
BOOST_AUTO_TEST_CASE(test_Tokenizer_arith1) {
TokenizerTestFile testFile{"arith1.asm", "arith1.tokens"};
testFile.doTest();
}
BOOST_AUTO_TEST_CASE(test_Tokenizer_comment0) {
TokenizerTestFile testFile{"comment0.asm", "comment0.tokens"};
testFile.doTest();
}
BOOST_AUTO_TEST_CASE(test_Tokenizer_comment1) {
TokenizerTestFile testFile{"comment1.asm", "comment1.tokens"};
testFile.doTest();
}
BOOST_AUTO_TEST_CASE(test_Tokenizer_comment2) {
TokenizerTestFile testFile{"comment2.asm", "comment2.tokens"};
testFile.doTest();
}

1
test/data/arith0.asm

@ -0,0 +1 @@
add a, 32

5
test/data/arith0.tokens

@ -0,0 +1,5 @@
add
a
32
EOL
EOF

3
test/data/arith1.asm

@ -0,0 +1,3 @@
add a,32
add a, 32
add a, 32

13
test/data/arith1.tokens

@ -0,0 +1,13 @@
add
a
32
EOL
add
a
32
EOL
add
a
32
EOL
EOF

1
test/data/comment0.asm

@ -0,0 +1 @@
; File containing only a comment, starting at the beginning of a line

2
test/data/comment0.tokens

@ -0,0 +1,2 @@
EOL
EOF

1
test/data/comment1.asm

@ -0,0 +1 @@
; Comment with a couple of spaces before it begins.

2
test/data/comment1.tokens

@ -0,0 +1,2 @@
EOL
EOF

1
test/data/comment2.asm

@ -0,0 +1 @@
add a, 32 ; End of line comment

5
test/data/comment2.tokens

@ -0,0 +1,5 @@
add
a
32
EOL
EOF

1
test/data/directive.asm

@ -0,0 +1 @@
.align

1
test/data/directive0_align_invalid.asm

@ -0,0 +1 @@
.align

1
test/data/directive1_align_3_args.asm

@ -0,0 +1 @@
.align 16, 0, 256

1
test/data/directive2_ascii0.asm

@ -0,0 +1 @@
.ascii "a string"

1
test/data/directive2_ascii1.asm

@ -0,0 +1 @@
.ascii " a string starting with a space"

1
test/data/directive2_ascii2.asm

@ -0,0 +1 @@
.ascii "a string ending with a null byte\0"

1
test/data/directive2_ascii3.asm

@ -0,0 +1 @@
.ascii "a string", "another string", "a third string"

1
test/data/directive3_asciz0.asm

@ -0,0 +1 @@
.asciz "a string"

1
test/data/directive3_asciz1.asm

@ -0,0 +1 @@
.asciz "a string", "another string"

2
test/data/directive4_balign0.asm

@ -0,0 +1,2 @@
; Invalid balign directive (no arguments)
.balign

2
test/data/directive4_balign1.asm

@ -0,0 +1,2 @@
; balign with one argument.
.balign 4

2
test/data/directive4_balign2.asm

@ -0,0 +1,2 @@
; balign with two arguments.
.balign 4, 0xff

2
test/data/directive4_balign3.asm

@ -0,0 +1,2 @@
; balign with two arguments.
.balign 4, 0xff, 256

2
test/data/directive5_byte0.asm

@ -0,0 +1,2 @@
; byte with no arguments
.byte

2
test/data/directive5_byte1.asm

@ -0,0 +1,2 @@
; byte with one argument.
.byte 1

2
test/data/directive5_byte2.asm

@ -0,0 +1,2 @@
; byte with five arguments.
.byte 1, 2, 3, 4, 5

0
test/data/directive6_comm.asm

Loading…
Cancel
Save