import from github

This commit is contained in:
2022-05-19 17:14:13 +00:00
parent 5247c34f50
commit ab32b30591
12612 changed files with 1905035 additions and 83 deletions

1
tools/preproc/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
preproc

19
tools/preproc/LICENSE Normal file
View File

@ -0,0 +1,19 @@
Copyright (c) 2016 YamaArashi
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

26
tools/preproc/Makefile Normal file
View File

@ -0,0 +1,26 @@
CXX ?= g++
CXXFLAGS := -std=c++11 -O2 -Wall -Wno-switch -Werror
SRCS := asm_file.cpp c_file.cpp charmap.cpp preproc.cpp string_parser.cpp \
utf8.cpp
HEADERS := asm_file.h c_file.h char_util.h charmap.h preproc.h string_parser.h \
utf8.h
ifeq ($(OS),Windows_NT)
EXE := .exe
else
EXE :=
endif
.PHONY: all clean
all: preproc$(EXE)
@:
preproc$(EXE): $(SRCS) $(HEADERS)
$(CXX) $(CXXFLAGS) $(SRCS) -o $@ $(LDFLAGS)
clean:
$(RM) preproc preproc.exe

599
tools/preproc/asm_file.cpp Normal file
View File

@ -0,0 +1,599 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include <cstdio>
#include <cstdarg>
#include <stdexcept>
#include "preproc.h"
#include "asm_file.h"
#include "char_util.h"
#include "utf8.h"
#include "string_parser.h"
#include "../../gflib/characters.h"
AsmFile::AsmFile(std::string filename) : m_filename(filename)
{
FILE *fp = std::fopen(filename.c_str(), "rb");
if (fp == NULL)
FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
std::fseek(fp, 0, SEEK_END);
m_size = std::ftell(fp);
if (m_size < 0)
FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str());
m_buffer = new char[m_size + 1];
std::rewind(fp);
if (std::fread(m_buffer, m_size, 1, fp) != 1)
FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str());
m_buffer[m_size] = 0;
std::fclose(fp);
m_pos = 0;
m_lineNum = 1;
m_lineStart = 0;
RemoveComments();
}
AsmFile::AsmFile(AsmFile&& other) : m_filename(std::move(other.m_filename))
{
m_buffer = other.m_buffer;
m_pos = other.m_pos;
m_size = other.m_size;
m_lineNum = other.m_lineNum;
m_lineStart = other.m_lineStart;
other.m_buffer = nullptr;
}
AsmFile::~AsmFile()
{
delete[] m_buffer;
}
// Removes comments to simplify further processing.
// It stops upon encountering a null character,
// which may or may not be the end of file marker.
// If it's not, the error will be caught later.
void AsmFile::RemoveComments()
{
long pos = 0;
char stringChar = 0;
for (;;)
{
if (m_buffer[pos] == 0)
return;
if (stringChar != 0)
{
if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == stringChar)
{
pos += 2;
}
else
{
if (m_buffer[pos] == stringChar)
stringChar = 0;
pos++;
}
}
else if (m_buffer[pos] == '@' && (pos == 0 || m_buffer[pos - 1] != '\\'))
{
while (m_buffer[pos] != '\n' && m_buffer[pos] != 0)
m_buffer[pos++] = ' ';
}
else if (m_buffer[pos] == '/' && m_buffer[pos + 1] == '*')
{
m_buffer[pos++] = ' ';
m_buffer[pos++] = ' ';
for (;;)
{
if (m_buffer[pos] == 0)
return;
if (m_buffer[pos] == '*' && m_buffer[pos + 1] == '/')
{
m_buffer[pos++] = ' ';
m_buffer[pos++] = ' ';
break;
}
else
{
if (m_buffer[pos] != '\n')
m_buffer[pos] = ' ';
pos++;
}
}
}
else
{
if (m_buffer[pos] == '"' || m_buffer[pos] == '\'')
stringChar = m_buffer[pos];
pos++;
}
}
}
// Checks if we're at a particular directive and if so, consumes it.
// Returns whether the directive was found.
bool AsmFile::CheckForDirective(std::string name)
{
long i;
long length = static_cast<long>(name.length());
for (i = 0; i < length && m_pos + i < m_size; i++)
if (name[i] != m_buffer[m_pos + i])
return false;
if (i < length)
return false;
m_pos += length;
return true;
}
// Checks if we're at a known directive and if so, consumes it.
// Returns which directive was found.
Directive AsmFile::GetDirective()
{
SkipWhitespace();
if (CheckForDirective(".include"))
return Directive::Include;
else if (CheckForDirective(".string"))
return Directive::String;
else if (CheckForDirective(".braille"))
return Directive::Braille;
else
return Directive::Unknown;
}
// Checks if we're at label that ends with '::'.
// Returns the name if so and an empty string if not.
std::string AsmFile::GetGlobalLabel()
{
long start = m_pos;
long pos = m_pos;
if (IsIdentifierStartingChar(m_buffer[pos]))
{
pos++;
while (IsIdentifierChar(m_buffer[pos]))
pos++;
}
if (m_buffer[pos] == ':' && m_buffer[pos + 1] == ':')
{
m_pos = pos + 2;
ExpectEmptyRestOfLine();
return std::string(&m_buffer[start], pos - start);
}
return std::string();
}
// Skips tabs and spaces.
void AsmFile::SkipWhitespace()
{
while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
m_pos++;
}
// Reads include path.
std::string AsmFile::ReadPath()
{
SkipWhitespace();
if (m_buffer[m_pos] != '"')
RaiseError("expected file path");
m_pos++;
int length = 0;
long startPos = m_pos;
while (m_buffer[m_pos] != '"')
{
unsigned char c = m_buffer[m_pos++];
if (c == 0)
{
if (m_pos >= m_size)
RaiseError("unexpected EOF in include string");
else
RaiseError("unexpected null character in include string");
}
if (!IsAsciiPrintable(c))
RaiseError("unexpected character '\\x%02X' in include string", c);
// Don't bother allowing any escape sequences.
if (c == '\\')
{
c = m_buffer[m_pos];
RaiseError("unexpected escape '\\%c' in include string", c);
}
length++;
if (length > kMaxPath)
RaiseError("path is too long");
}
m_pos++; // Go past the right quote.
ExpectEmptyRestOfLine();
return std::string(&m_buffer[startPos], length);
}
// Reads a charmap string.
int AsmFile::ReadString(unsigned char* s)
{
SkipWhitespace();
int length;
StringParser stringParser(m_buffer, m_size);
try
{
m_pos += stringParser.ParseString(m_pos, s, length);
}
catch (std::runtime_error& e)
{
RaiseError(e.what());
}
SkipWhitespace();
if (ConsumeComma())
{
SkipWhitespace();
int padLength = ReadPadLength();
while (length < padLength)
{
s[length++] = CHAR_SPACE;
}
}
ExpectEmptyRestOfLine();
return length;
}
void AsmFile::VerifyStringLength(int length)
{
if (length == kMaxStringLength)
RaiseError("mapped string longer than %d bytes", kMaxStringLength);
}
int AsmFile::ReadBraille(unsigned char* s)
{
static std::map<char, unsigned char> encoding =
{
{ 'A', BRAILLE_CHAR_A },
{ 'B', BRAILLE_CHAR_B },
{ 'C', BRAILLE_CHAR_C },
{ 'D', BRAILLE_CHAR_D },
{ 'E', BRAILLE_CHAR_E },
{ 'F', BRAILLE_CHAR_F },
{ 'G', BRAILLE_CHAR_G },
{ 'H', BRAILLE_CHAR_H },
{ 'I', BRAILLE_CHAR_I },
{ 'J', BRAILLE_CHAR_J },
{ 'K', BRAILLE_CHAR_K },
{ 'L', BRAILLE_CHAR_L },
{ 'M', BRAILLE_CHAR_M },
{ 'N', BRAILLE_CHAR_N },
{ 'O', BRAILLE_CHAR_O },
{ 'P', BRAILLE_CHAR_P },
{ 'Q', BRAILLE_CHAR_Q },
{ 'R', BRAILLE_CHAR_R },
{ 'S', BRAILLE_CHAR_S },
{ 'T', BRAILLE_CHAR_T },
{ 'U', BRAILLE_CHAR_U },
{ 'V', BRAILLE_CHAR_V },
{ 'W', BRAILLE_CHAR_W },
{ 'X', BRAILLE_CHAR_X },
{ 'Y', BRAILLE_CHAR_Y },
{ 'Z', BRAILLE_CHAR_Z },
{ 'a', BRAILLE_CHAR_A },
{ 'b', BRAILLE_CHAR_B },
{ 'c', BRAILLE_CHAR_C },
{ 'd', BRAILLE_CHAR_D },
{ 'e', BRAILLE_CHAR_E },
{ 'f', BRAILLE_CHAR_F },
{ 'g', BRAILLE_CHAR_G },
{ 'h', BRAILLE_CHAR_H },
{ 'i', BRAILLE_CHAR_I },
{ 'j', BRAILLE_CHAR_J },
{ 'k', BRAILLE_CHAR_K },
{ 'l', BRAILLE_CHAR_L },
{ 'm', BRAILLE_CHAR_M },
{ 'n', BRAILLE_CHAR_N },
{ 'o', BRAILLE_CHAR_O },
{ 'p', BRAILLE_CHAR_P },
{ 'q', BRAILLE_CHAR_Q },
{ 'r', BRAILLE_CHAR_R },
{ 's', BRAILLE_CHAR_S },
{ 't', BRAILLE_CHAR_T },
{ 'u', BRAILLE_CHAR_U },
{ 'v', BRAILLE_CHAR_V },
{ 'w', BRAILLE_CHAR_W },
{ 'x', BRAILLE_CHAR_X },
{ 'y', BRAILLE_CHAR_Y },
{ 'z', BRAILLE_CHAR_Z },
{ '0', BRAILLE_CHAR_0 },
{ '1', BRAILLE_CHAR_1 },
{ '2', BRAILLE_CHAR_2 },
{ '3', BRAILLE_CHAR_3 },
{ '4', BRAILLE_CHAR_4 },
{ '5', BRAILLE_CHAR_5 },
{ '6', BRAILLE_CHAR_6 },
{ '7', BRAILLE_CHAR_7 },
{ '8', BRAILLE_CHAR_8 },
{ '9', BRAILLE_CHAR_9 },
{ ' ', BRAILLE_CHAR_SPACE },
{ ',', BRAILLE_CHAR_COMMA },
{ '.', BRAILLE_CHAR_PERIOD },
{ '?', BRAILLE_CHAR_QUESTION_MARK },
{ '!', BRAILLE_CHAR_EXCL_MARK },
{ ':', BRAILLE_CHAR_COLON },
{ ';', BRAILLE_CHAR_SEMICOLON },
{ '-', BRAILLE_CHAR_HYPHEN },
{ '/', BRAILLE_CHAR_SLASH },
{ '(', BRAILLE_CHAR_PAREN },
{ ')', BRAILLE_CHAR_PAREN },
{ '\'', BRAILLE_CHAR_APOSTROPHE },
{ '#', BRAILLE_CHAR_NUMBER },
{ '$', EOS },
};
SkipWhitespace();
int length = 0;
if (m_buffer[m_pos] != '"')
RaiseError("expected braille string literal");
m_pos++;
bool inNumber = false;
while (m_buffer[m_pos] != '"')
{
if (m_buffer[m_pos] == '\\' && m_buffer[m_pos + 1] == 'n')
{
VerifyStringLength(length);
s[length++] = CHAR_NEWLINE;
m_pos += 2;
}
else
{
char c = m_buffer[m_pos];
if (encoding.count(c) == 0)
{
if (IsAsciiPrintable(c))
RaiseError("character '%c' not valid in braille string", m_buffer[m_pos]);
else
RaiseError("character '\\x%02X' not valid in braille string", m_buffer[m_pos]);
}
if (!inNumber && c >= '0' && c <= '9' )
{
// Output number indicator at start of a number
inNumber = true;
VerifyStringLength(length);
s[length++] = BRAILLE_CHAR_NUMBER;
}
else if (inNumber && encoding[c] == BRAILLE_CHAR_SPACE)
{
// Number ends at a space.
// Non-number characters encountered before a space will simply be output as is.
inNumber = false;
}
VerifyStringLength(length);
s[length++] = encoding[c];
m_pos++;
}
}
m_pos++; // Go past the right quote.
ExpectEmptyRestOfLine();
return length;
}
// If we're at a comma, consumes it.
// Returns whether a comma was found.
bool AsmFile::ConsumeComma()
{
if (m_buffer[m_pos] == ',')
{
m_pos++;
return true;
}
return false;
}
// Converts digit character to numerical value.
static int ConvertDigit(char c, int radix)
{
int digit;
if (c >= '0' && c <= '9')
digit = c - '0';
else if (c >= 'A' && c <= 'F')
digit = 10 + c - 'A';
else if (c >= 'a' && c <= 'f')
digit = 10 + c - 'a';
else
return -1;
return (digit < radix) ? digit : -1;
}
// Reads an integer. If the integer is greater than maxValue, it returns -1.
int AsmFile::ReadPadLength()
{
if (!IsAsciiDigit(m_buffer[m_pos]))
RaiseError("expected integer");
int radix = 10;
if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x')
{
radix = 16;
m_pos += 2;
}
unsigned n = 0;
int digit;
while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1)
{
n = n * radix + digit;
if (n > kMaxStringLength)
RaiseError("pad length greater than maximum length (%d)", kMaxStringLength);
m_pos++;
}
return n;
}
// Outputs the current line and moves to the next one.
void AsmFile::OutputLine()
{
while (m_buffer[m_pos] != '\n' && m_buffer[m_pos] != 0)
m_pos++;
if (m_buffer[m_pos] == 0)
{
if (m_pos >= m_size)
{
RaiseWarning("file doesn't end with newline");
puts(&m_buffer[m_lineStart]);
}
else
{
RaiseError("unexpected null character");
}
}
else
{
m_buffer[m_pos] = 0;
puts(&m_buffer[m_lineStart]);
m_buffer[m_pos] = '\n';
m_pos++;
m_lineStart = m_pos;
m_lineNum++;
}
}
// Asserts that the rest of the line is empty and moves to the next one.
void AsmFile::ExpectEmptyRestOfLine()
{
SkipWhitespace();
if (m_buffer[m_pos] == 0)
{
if (m_pos >= m_size)
RaiseWarning("file doesn't end with newline");
else
RaiseError("unexpected null character");
}
else if (m_buffer[m_pos] == '\n')
{
m_pos++;
m_lineStart = m_pos;
m_lineNum++;
}
else if (m_buffer[m_pos] == '\r' && m_buffer[m_pos + 1] == '\n')
{
m_pos += 2;
m_lineStart = m_pos;
m_lineNum++;
}
else
{
RaiseError("junk at end of line");
}
}
// Checks if we're at the end of the file.
bool AsmFile::IsAtEnd()
{
return (m_pos >= m_size);
}
// Output the current location to set gas's logical file and line numbers.
void AsmFile::OutputLocation()
{
std::printf("# %ld \"%s\"\n", m_lineNum, m_filename.c_str());
}
// Reports a diagnostic message.
void AsmFile::ReportDiagnostic(const char* type, const char* format, std::va_list args)
{
const int bufferSize = 1024;
char buffer[bufferSize];
std::vsnprintf(buffer, bufferSize, format, args);
std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer);
}
#define DO_REPORT(type) \
do \
{ \
std::va_list args; \
va_start(args, format); \
ReportDiagnostic(type, format, args); \
va_end(args); \
} while (0)
// Reports an error diagnostic and terminates the program.
void AsmFile::RaiseError(const char* format, ...)
{
DO_REPORT("error");
std::exit(1);
}
// Reports a warning diagnostic.
void AsmFile::RaiseWarning(const char* format, ...)
{
DO_REPORT("warning");
}

73
tools/preproc/asm_file.h Normal file
View File

@ -0,0 +1,73 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef ASM_FILE_H
#define ASM_FILE_H
#include <cstdarg>
#include <cstdint>
#include <string>
#include "preproc.h"
enum class Directive
{
Include,
String,
Braille,
Unknown
};
class AsmFile
{
public:
AsmFile(std::string filename);
AsmFile(AsmFile&& other);
AsmFile(const AsmFile&) = delete;
~AsmFile();
Directive GetDirective();
std::string GetGlobalLabel();
std::string ReadPath();
int ReadString(unsigned char* s);
int ReadBraille(unsigned char* s);
bool IsAtEnd();
void OutputLine();
void OutputLocation();
private:
char* m_buffer;
long m_pos;
long m_size;
long m_lineNum;
long m_lineStart;
std::string m_filename;
bool ConsumeComma();
int ReadPadLength();
void RemoveComments();
bool CheckForDirective(std::string name);
void SkipWhitespace();
void ExpectEmptyRestOfLine();
void ReportDiagnostic(const char* type, const char* format, std::va_list args);
void RaiseError(const char* format, ...);
void RaiseWarning(const char* format, ...);
void VerifyStringLength(int length);
};
#endif // ASM_FILE_H

459
tools/preproc/c_file.cpp Normal file
View File

@ -0,0 +1,459 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include <cstdio>
#include <cstdarg>
#include <stdexcept>
#include <string>
#include <memory>
#include <cstring>
#include <cerrno>
#include "preproc.h"
#include "c_file.h"
#include "char_util.h"
#include "utf8.h"
#include "string_parser.h"
CFile::CFile(const char * filenameCStr, bool isStdin)
{
FILE *fp;
if (isStdin) {
fp = stdin;
m_filename = std::string{"<stdin>/"}.append(filenameCStr);
} else {
fp = std::fopen(filenameCStr, "rb");
m_filename = std::string(filenameCStr);
}
std::string& filename = m_filename;
if (fp == NULL)
FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
m_size = 0;
m_buffer = (char *)malloc(CHUNK_SIZE + 1);
if (m_buffer == NULL) {
FATAL_ERROR("Failed to allocate memory to process file \"%s\"!", filename.c_str());
}
std::size_t numAllocatedBytes = CHUNK_SIZE + 1;
std::size_t bufferOffset = 0;
std::size_t count;
while ((count = std::fread(m_buffer + bufferOffset, 1, CHUNK_SIZE, fp)) != 0) {
if (!std::ferror(fp)) {
m_size += count;
if (std::feof(fp)) {
break;
}
numAllocatedBytes += CHUNK_SIZE;
bufferOffset += CHUNK_SIZE;
m_buffer = (char *)realloc(m_buffer, numAllocatedBytes);
if (m_buffer == NULL) {
FATAL_ERROR("Failed to allocate memory to process file \"%s\"!", filename.c_str());
}
} else {
FATAL_ERROR("Failed to read \"%s\". (error: %s)", filename.c_str(), std::strerror(errno));
}
}
m_buffer[m_size] = 0;
std::fclose(fp);
m_pos = 0;
m_lineNum = 1;
m_isStdin = isStdin;
}
CFile::CFile(CFile&& other) : m_filename(std::move(other.m_filename))
{
m_buffer = other.m_buffer;
m_pos = other.m_pos;
m_size = other.m_size;
m_lineNum = other.m_lineNum;
m_isStdin = other.m_isStdin;
other.m_buffer = NULL;
}
CFile::~CFile()
{
free(m_buffer);
}
void CFile::Preproc()
{
char stringChar = 0;
while (m_pos < m_size)
{
if (stringChar)
{
if (m_buffer[m_pos] == stringChar)
{
std::putchar(stringChar);
m_pos++;
stringChar = 0;
}
else if (m_buffer[m_pos] == '\\' && m_buffer[m_pos + 1] == stringChar)
{
std::putchar('\\');
std::putchar(stringChar);
m_pos += 2;
}
else
{
if (m_buffer[m_pos] == '\n')
m_lineNum++;
std::putchar(m_buffer[m_pos]);
m_pos++;
}
}
else
{
TryConvertString();
TryConvertIncbin();
if (m_pos >= m_size)
break;
char c = m_buffer[m_pos++];
std::putchar(c);
if (c == '\n')
m_lineNum++;
else if (c == '"')
stringChar = '"';
else if (c == '\'')
stringChar = '\'';
}
}
}
bool CFile::ConsumeHorizontalWhitespace()
{
if (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
{
m_pos++;
return true;
}
return false;
}
bool CFile::ConsumeNewline()
{
if (m_buffer[m_pos] == '\r' && m_buffer[m_pos + 1] == '\n')
{
m_pos += 2;
m_lineNum++;
std::putchar('\n');
return true;
}
if (m_buffer[m_pos] == '\n')
{
m_pos++;
m_lineNum++;
std::putchar('\n');
return true;
}
return false;
}
void CFile::SkipWhitespace()
{
while (ConsumeHorizontalWhitespace() || ConsumeNewline())
;
}
void CFile::TryConvertString()
{
long oldPos = m_pos;
long oldLineNum = m_lineNum;
bool noTerminator = false;
if (m_buffer[m_pos] != '_' || (m_pos > 0 && IsIdentifierChar(m_buffer[m_pos - 1])))
return;
m_pos++;
if (m_buffer[m_pos] == '_')
{
noTerminator = true;
m_pos++;
}
SkipWhitespace();
if (m_buffer[m_pos] != '(')
{
m_pos = oldPos;
m_lineNum = oldLineNum;
return;
}
m_pos++;
SkipWhitespace();
std::printf("{ ");
while (1)
{
SkipWhitespace();
if (m_buffer[m_pos] == '"')
{
unsigned char s[kMaxStringLength];
int length;
StringParser stringParser(m_buffer, m_size);
try
{
m_pos += stringParser.ParseString(m_pos, s, length);
}
catch (std::runtime_error& e)
{
RaiseError(e.what());
}
for (int i = 0; i < length; i++)
printf("0x%02X, ", s[i]);
}
else if (m_buffer[m_pos] == ')')
{
m_pos++;
break;
}
else
{
if (m_pos >= m_size)
RaiseError("unexpected EOF");
if (IsAsciiPrintable(m_buffer[m_pos]))
RaiseError("unexpected character '%c'", m_buffer[m_pos]);
else
RaiseError("unexpected character '\\x%02X'", m_buffer[m_pos]);
}
}
if (noTerminator)
std::printf(" }");
else
std::printf("0xFF }");
}
bool CFile::CheckIdentifier(const std::string& ident)
{
unsigned int i;
for (i = 0; i < ident.length() && m_pos + i < (unsigned)m_size; i++)
if (ident[i] != m_buffer[m_pos + i])
return false;
return (i == ident.length());
}
std::unique_ptr<unsigned char[]> CFile::ReadWholeFile(const std::string& path, int& size)
{
FILE* fp = std::fopen(path.c_str(), "rb");
if (fp == nullptr)
RaiseError("Failed to open \"%s\" for reading.\n", path.c_str());
std::fseek(fp, 0, SEEK_END);
size = std::ftell(fp);
std::unique_ptr<unsigned char[]> buffer = std::unique_ptr<unsigned char[]>(new unsigned char[size]);
std::rewind(fp);
if (std::fread(buffer.get(), size, 1, fp) != 1)
RaiseError("Failed to read \"%s\".\n", path.c_str());
std::fclose(fp);
return buffer;
}
int ExtractData(const std::unique_ptr<unsigned char[]>& buffer, int offset, int size)
{
switch (size)
{
case 1:
return buffer[offset];
case 2:
return (buffer[offset + 1] << 8)
| buffer[offset];
case 4:
return (buffer[offset + 3] << 24)
| (buffer[offset + 2] << 16)
| (buffer[offset + 1] << 8)
| buffer[offset];
default:
FATAL_ERROR("Invalid size passed to ExtractData.\n");
}
}
void CFile::TryConvertIncbin()
{
std::string idents[6] = { "INCBIN_S8", "INCBIN_U8", "INCBIN_S16", "INCBIN_U16", "INCBIN_S32", "INCBIN_U32" };
int incbinType = -1;
for (int i = 0; i < 6; i++)
{
if (CheckIdentifier(idents[i]))
{
incbinType = i;
break;
}
}
if (incbinType == -1)
return;
int size = 1 << (incbinType / 2);
bool isSigned = ((incbinType % 2) == 0);
long oldPos = m_pos;
long oldLineNum = m_lineNum;
m_pos += idents[incbinType].length();
SkipWhitespace();
if (m_buffer[m_pos] != '(')
{
m_pos = oldPos;
m_lineNum = oldLineNum;
return;
}
m_pos++;
std::printf("{");
while (true)
{
SkipWhitespace();
if (m_buffer[m_pos] != '"')
RaiseError("expected double quote");
m_pos++;
int startPos = m_pos;
while (m_buffer[m_pos] != '"')
{
if (m_buffer[m_pos] == 0)
{
if (m_pos >= m_size)
RaiseError("unexpected EOF in path string");
else
RaiseError("unexpected null character in path string");
}
if (m_buffer[m_pos] == '\r' || m_buffer[m_pos] == '\n')
RaiseError("unexpected end of line character in path string");
if (m_buffer[m_pos] == '\\')
RaiseError("unexpected escape in path string");
m_pos++;
}
std::string path(&m_buffer[startPos], m_pos - startPos);
m_pos++;
int fileSize;
std::unique_ptr<unsigned char[]> buffer = ReadWholeFile(path, fileSize);
if ((fileSize % size) != 0)
RaiseError("Size %d doesn't evenly divide file size %d.\n", size, fileSize);
int count = fileSize / size;
int offset = 0;
for (int i = 0; i < count; i++)
{
int data = ExtractData(buffer, offset, size);
offset += size;
if (isSigned)
std::printf("%d,", data);
else
std::printf("%uu,", data);
}
SkipWhitespace();
if (m_buffer[m_pos] != ',')
break;
m_pos++;
}
if (m_buffer[m_pos] != ')')
RaiseError("expected ')'");
m_pos++;
std::printf("}");
}
// Reports a diagnostic message.
void CFile::ReportDiagnostic(const char* type, const char* format, std::va_list args)
{
const int bufferSize = 1024;
char buffer[bufferSize];
std::vsnprintf(buffer, bufferSize, format, args);
std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer);
}
#define DO_REPORT(type) \
do \
{ \
std::va_list args; \
va_start(args, format); \
ReportDiagnostic(type, format, args); \
va_end(args); \
} while (0)
// Reports an error diagnostic and terminates the program.
void CFile::RaiseError(const char* format, ...)
{
DO_REPORT("error");
std::exit(1);
}
// Reports a warning diagnostic.
void CFile::RaiseWarning(const char* format, ...)
{
DO_REPORT("warning");
}

61
tools/preproc/c_file.h Normal file
View File

@ -0,0 +1,61 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef C_FILE_H
#define C_FILE_H
#include <cstdarg>
#include <cstdint>
#include <string>
#include <memory>
#include "preproc.h"
class CFile
{
public:
CFile(const char * filenameCStr, bool isStdin);
CFile(CFile&& other);
CFile(const CFile&) = delete;
~CFile();
void Preproc();
private:
char* m_buffer;
long m_pos;
long m_size;
long m_lineNum;
std::string m_filename;
bool m_isStdin;
bool ConsumeHorizontalWhitespace();
bool ConsumeNewline();
void SkipWhitespace();
void TryConvertString();
std::unique_ptr<unsigned char[]> ReadWholeFile(const std::string& path, int& size);
bool CheckIdentifier(const std::string& ident);
void TryConvertIncbin();
void ReportDiagnostic(const char* type, const char* format, std::va_list args);
void RaiseError(const char* format, ...);
void RaiseWarning(const char* format, ...);
};
#define CHUNK_SIZE 4096
#endif // C_FILE_H

71
tools/preproc/char_util.h Normal file
View File

@ -0,0 +1,71 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef CHAR_UTIL_H
#define CHAR_UTIL_H
#include <cstdint>
#include <cassert>
inline bool IsAscii(unsigned char c)
{
return (c < 128);
}
inline bool IsAsciiAlpha(unsigned char c)
{
return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'));
}
inline bool IsAsciiDigit(unsigned char c)
{
return (c >= '0' && c <= '9');
}
inline bool IsAsciiHexDigit(unsigned char c)
{
return ((c >= '0' && c <= '9')
|| (c >= 'a' && c <= 'f')
|| (c >= 'A' && c <= 'F'));
}
inline bool IsAsciiAlphanum(unsigned char c)
{
return (IsAsciiAlpha(c) || IsAsciiDigit(c));
}
inline bool IsAsciiPrintable(unsigned char c)
{
return (c >= ' ' && c <= '~');
}
// Returns whether the character can start a C identifier or the identifier of a "{FOO}" constant in strings.
inline bool IsIdentifierStartingChar(unsigned char c)
{
return IsAsciiAlpha(c) || c == '_';
}
// Returns whether the character can be used in a C identifier or the identifier of a "{FOO}" constant in strings.
inline bool IsIdentifierChar(unsigned char c)
{
return IsAsciiAlphanum(c) || c == '_';
}
#endif // CHAR_UTIL_H

408
tools/preproc/charmap.cpp Normal file
View File

@ -0,0 +1,408 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include <cstdio>
#include <cstdint>
#include <cstdarg>
#include "preproc.h"
#include "charmap.h"
#include "char_util.h"
#include "utf8.h"
enum LhsType
{
Char,
Escape,
Constant,
None
};
struct Lhs
{
LhsType type;
std::string name;
std::int32_t code;
};
class CharmapReader
{
public:
CharmapReader(std::string filename);
CharmapReader(const CharmapReader&) = delete;
~CharmapReader();
Lhs ReadLhs();
void ExpectEqualsSign();
std::string ReadSequence();
void ExpectEmptyRestOfLine();
void RaiseError(const char* format, ...);
private:
char* m_buffer;
long m_pos;
long m_size;
long m_lineNum;
std::string m_filename;
void RemoveComments();
std::string ReadConstant();
void SkipWhitespace();
};
CharmapReader::CharmapReader(std::string filename) : m_filename(filename)
{
FILE *fp = std::fopen(filename.c_str(), "rb");
if (fp == NULL)
FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
std::fseek(fp, 0, SEEK_END);
m_size = std::ftell(fp);
if (m_size < 0)
FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str());
m_buffer = new char[m_size + 1];
std::rewind(fp);
if (std::fread(m_buffer, m_size, 1, fp) != 1)
FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str());
m_buffer[m_size] = 0;
std::fclose(fp);
m_pos = 0;
m_lineNum = 1;
RemoveComments();
}
CharmapReader::~CharmapReader()
{
delete[] m_buffer;
}
Lhs CharmapReader::ReadLhs()
{
Lhs lhs;
for (;;)
{
SkipWhitespace();
if (m_buffer[m_pos] == '\n')
{
m_pos++;
m_lineNum++;
}
else
{
break;
}
}
if (m_buffer[m_pos] == '\'')
{
m_pos++;
bool isEscape = (m_buffer[m_pos] == '\\');
if (isEscape)
{
m_pos++;
}
unsigned char c = m_buffer[m_pos];
if (c == 0)
{
if (m_pos >= m_size)
RaiseError("unexpected EOF in UTF-8 character literal");
else
RaiseError("unexpected null character in UTF-8 character literal");
}
if (IsAscii(c) && !IsAsciiPrintable(c))
RaiseError("unexpected character U+%X in UTF-8 character literal", c);
UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
std::int32_t code = unicodeChar.code;
if (code == -1)
RaiseError("invalid encoding in UTF-8 character literal");
m_pos += unicodeChar.encodingLength;
if (m_buffer[m_pos] != '\'')
RaiseError("unterminated character literal");
m_pos++;
lhs.code = code;
if (isEscape)
{
if (code >= 128)
RaiseError("escapes using non-ASCII characters are invalid");
switch (code)
{
case '\'':
lhs.type = LhsType::Char;
break;
case '\\':
lhs.type = LhsType::Char;
case '"':
RaiseError("cannot escape double quote");
break;
default:
lhs.type = LhsType::Escape;
}
}
else
{
if (code == '\'')
RaiseError("empty character literal");
lhs.type = LhsType::Char;
}
}
else if (IsIdentifierStartingChar(m_buffer[m_pos]))
{
lhs.type = LhsType::Constant;
lhs.name = ReadConstant();
}
else if (m_buffer[m_pos] == '\r')
{
RaiseError("only Unix-style LF newlines are supported");
}
else if (m_buffer[m_pos] == 0)
{
if (m_pos < m_size)
RaiseError("unexpected null character");
lhs.type = LhsType::None;
}
else
{
RaiseError("junk at start of line");
}
return lhs;
}
void CharmapReader::ExpectEqualsSign()
{
SkipWhitespace();
if (m_buffer[m_pos] != '=')
RaiseError("expected equals sign");
m_pos++;
}
static unsigned int ConvertHexDigit(char c)
{
unsigned int digit = 0;
if (c >= '0' && c <= '9')
digit = c - '0';
else if (c >= 'A' && c <= 'F')
digit = 10 + c - 'A';
else if (c >= 'a' && c <= 'f')
digit = 10 + c - 'a';
return digit;
}
std::string CharmapReader::ReadSequence()
{
SkipWhitespace();
long startPos = m_pos;
unsigned int length = 0;
while (IsAsciiHexDigit(m_buffer[m_pos]) && IsAsciiHexDigit(m_buffer[m_pos + 1]))
{
m_pos += 2;
length++;
if (length > kMaxCharmapSequenceLength)
RaiseError("byte sequence too long (max is %lu bytes)", kMaxCharmapSequenceLength);
SkipWhitespace();
}
if (IsAsciiHexDigit(m_buffer[m_pos]))
RaiseError("each byte must have 2 hex digits");
if (length == 0)
RaiseError("expected byte sequence");
std::string sequence;
sequence.reserve(length);
m_pos = startPos;
for (unsigned int i = 0; i < length; i++)
{
unsigned int digit1 = ConvertHexDigit(m_buffer[m_pos]);
unsigned int digit2 = ConvertHexDigit(m_buffer[m_pos + 1]);
unsigned char byte = digit1 * 16 + digit2;
sequence += byte;
m_pos += 2;
SkipWhitespace();
}
return sequence;
}
void CharmapReader::ExpectEmptyRestOfLine()
{
SkipWhitespace();
if (m_buffer[m_pos] == 0)
{
if (m_pos < m_size)
RaiseError("unexpected null character");
}
else if (m_buffer[m_pos] == '\n')
{
m_pos++;
m_lineNum++;
}
else if (m_buffer[m_pos] == '\r')
{
RaiseError("only Unix-style LF newlines are supported");
}
else
{
RaiseError("junk at end of line");
}
}
void CharmapReader::RaiseError(const char* format, ...)
{
const int bufferSize = 1024;
char buffer[bufferSize];
std::va_list args;
va_start(args, format);
std::vsnprintf(buffer, bufferSize, format, args);
va_end(args);
std::fprintf(stderr, "%s:%ld: error: %s\n", m_filename.c_str(), m_lineNum, buffer);
std::exit(1);
}
void CharmapReader::RemoveComments()
{
long pos = 0;
bool inString = false;
for (;;)
{
if (m_buffer[pos] == 0)
return;
if (inString)
{
if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == '\'')
{
pos += 2;
}
else
{
if (m_buffer[pos] == '\'')
inString = false;
pos++;
}
}
else if (m_buffer[pos] == '@')
{
while (m_buffer[pos] != '\n' && m_buffer[pos] != 0)
m_buffer[pos++] = ' ';
}
else
{
if (m_buffer[pos] == '\'')
inString = true;
pos++;
}
}
}
std::string CharmapReader::ReadConstant()
{
long startPos = m_pos;
while (IsIdentifierChar(m_buffer[m_pos]))
m_pos++;
return std::string(&m_buffer[startPos], m_pos - startPos);
}
void CharmapReader::SkipWhitespace()
{
while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
m_pos++;
}
Charmap::Charmap(std::string filename)
{
CharmapReader reader(filename);
for (;;)
{
Lhs lhs = reader.ReadLhs();
if (lhs.type == LhsType::None)
return;
reader.ExpectEqualsSign();
std::string sequence = reader.ReadSequence();
switch (lhs.type)
{
case LhsType::Char:
if (m_chars.find(lhs.code) != m_chars.end())
reader.RaiseError("redefining char");
m_chars[lhs.code] = sequence;
break;
case LhsType::Escape:
if (m_escapes[lhs.code].length() != 0)
reader.RaiseError("redefining escape");
m_escapes[lhs.code] = sequence;
break;
case LhsType::Constant:
if (m_constants.find(lhs.name) != m_constants.end())
reader.RaiseError("redefining constant");
m_constants[lhs.name] = sequence;
break;
}
reader.ExpectEmptyRestOfLine();
}
}

64
tools/preproc/charmap.h Normal file
View File

@ -0,0 +1,64 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef CHARMAP_H
#define CHARMAP_H
#include <cstdint>
#include <string>
#include <map>
#include <vector>
class Charmap
{
public:
Charmap(std::string filename);
std::string Char(std::int32_t code)
{
auto it = m_chars.find(code);
if (it == m_chars.end())
return std::string();
return it->second;
}
std::string Escape(unsigned char code)
{
return m_escapes[code];
}
std::string Constant(std::string identifier)
{
auto it = m_constants.find(identifier);
if (it == m_constants.end())
return std::string();
return it->second;
}
private:
std::map<std::int32_t, std::string> m_chars;
std::string m_escapes[128];
std::map<std::string, std::string> m_constants;
};
#endif // CHARMAP_H

164
tools/preproc/preproc.cpp Normal file
View File

@ -0,0 +1,164 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include <string>
#include <stack>
#include "preproc.h"
#include "asm_file.h"
#include "c_file.h"
#include "charmap.h"
Charmap* g_charmap;
void PrintAsmBytes(unsigned char *s, int length)
{
if (length > 0)
{
std::printf("\t.byte ");
for (int i = 0; i < length; i++)
{
std::printf("0x%02X", s[i]);
if (i < length - 1)
std::printf(", ");
}
std::putchar('\n');
}
}
void PreprocAsmFile(std::string filename)
{
std::stack<AsmFile> stack;
stack.push(AsmFile(filename));
for (;;)
{
while (stack.top().IsAtEnd())
{
stack.pop();
if (stack.empty())
return;
else
stack.top().OutputLocation();
}
Directive directive = stack.top().GetDirective();
switch (directive)
{
case Directive::Include:
stack.push(AsmFile(stack.top().ReadPath()));
stack.top().OutputLocation();
break;
case Directive::String:
{
unsigned char s[kMaxStringLength];
int length = stack.top().ReadString(s);
PrintAsmBytes(s, length);
break;
}
case Directive::Braille:
{
unsigned char s[kMaxStringLength];
int length = stack.top().ReadBraille(s);
PrintAsmBytes(s, length);
break;
}
case Directive::Unknown:
{
std::string globalLabel = stack.top().GetGlobalLabel();
if (globalLabel.length() != 0)
{
const char *s = globalLabel.c_str();
std::printf("%s: ; .global %s\n", s, s);
}
else
{
stack.top().OutputLine();
}
break;
}
}
}
}
void PreprocCFile(const char * filename, bool isStdin)
{
CFile cFile(filename, isStdin);
cFile.Preproc();
}
char* GetFileExtension(char* filename)
{
char* extension = filename;
while (*extension != 0)
extension++;
while (extension > filename && *extension != '.')
extension--;
if (extension == filename)
return nullptr;
extension++;
if (*extension == 0)
return nullptr;
return extension;
}
int main(int argc, char **argv)
{
if (argc < 3 || argc > 4)
{
std::fprintf(stderr, "Usage: %s SRC_FILE CHARMAP_FILE [-i]\nwhere -i denotes if input is from stdin\n", argv[0]);
return 1;
}
g_charmap = new Charmap(argv[2]);
char* extension = GetFileExtension(argv[1]);
if (!extension)
FATAL_ERROR("\"%s\" has no file extension.\n", argv[1]);
if ((extension[0] == 's') && extension[1] == 0)
PreprocAsmFile(argv[1]);
else if ((extension[0] == 'c' || extension[0] == 'i') && extension[1] == 0) {
if (argc == 4) {
if (argv[3][0] == '-' && argv[3][1] == 'i' && argv[3][2] == '\0') {
PreprocCFile(argv[1], true);
} else {
FATAL_ERROR("unknown argument flag \"%s\".\n", argv[3]);
}
} else {
PreprocCFile(argv[1], false);
}
} else
FATAL_ERROR("\"%s\" has an unknown file extension of \"%s\".\n", argv[1], extension);
return 0;
}

54
tools/preproc/preproc.h Normal file
View File

@ -0,0 +1,54 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef PREPROC_H
#define PREPROC_H
#include <cstdio>
#include <cstdlib>
#include "charmap.h"
#ifdef _MSC_VER
#define FATAL_ERROR(format, ...) \
do \
{ \
std::fprintf(stderr, format, __VA_ARGS__); \
std::exit(1); \
} while (0)
#else
#define FATAL_ERROR(format, ...) \
do \
{ \
std::fprintf(stderr, format, ##__VA_ARGS__); \
std::exit(1); \
} while (0)
#endif // _MSC_VER
const int kMaxPath = 256;
const int kMaxStringLength = 1024;
const unsigned long kMaxCharmapSequenceLength = 16;
extern Charmap* g_charmap;
#endif // PREPROC_H

View File

@ -0,0 +1,355 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include <cstdio>
#include <cstdarg>
#include <stdexcept>
#include "preproc.h"
#include "string_parser.h"
#include "char_util.h"
#include "utf8.h"
// Reads a charmap char or escape sequence.
std::string StringParser::ReadCharOrEscape()
{
std::string sequence;
bool isEscape = (m_buffer[m_pos] == '\\');
if (isEscape)
{
m_pos++;
if (m_buffer[m_pos] == '"')
{
sequence = g_charmap->Char('"');
if (sequence.length() == 0)
RaiseError("no mapping exists for double quote");
return sequence;
}
else if (m_buffer[m_pos] == '\\')
{
sequence = g_charmap->Char('\\');
if (sequence.length() == 0)
RaiseError("no mapping exists for backslash");
return sequence;
}
}
unsigned char c = m_buffer[m_pos];
if (c == 0)
{
if (m_pos >= m_size)
RaiseError("unexpected EOF in UTF-8 string");
else
RaiseError("unexpected null character in UTF-8 string");
}
if (IsAscii(c) && !IsAsciiPrintable(c))
RaiseError("unexpected character U+%X in UTF-8 string", c);
UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
m_pos += unicodeChar.encodingLength;
std::int32_t code = unicodeChar.code;
if (code == -1)
RaiseError("invalid encoding in UTF-8 string");
if (isEscape && code >= 128)
RaiseError("escapes using non-ASCII characters are invalid");
sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code);
if (sequence.length() == 0)
{
if (isEscape)
RaiseError("unknown escape '\\%c'", code);
else
RaiseError("unknown character U+%X", code);
}
return sequence;
}
// Reads a charmap constant, i.e. "{FOO}".
std::string StringParser::ReadBracketedConstants()
{
std::string totalSequence;
m_pos++; // Assume we're on the left curly bracket.
while (m_buffer[m_pos] != '}')
{
SkipWhitespace();
if (IsIdentifierStartingChar(m_buffer[m_pos]))
{
long startPos = m_pos;
m_pos++;
while (IsIdentifierChar(m_buffer[m_pos]))
m_pos++;
std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos));
if (sequence.length() == 0)
{
m_buffer[m_pos] = 0;
RaiseError("unknown constant '%s'", &m_buffer[startPos]);
}
totalSequence += sequence;
}
else if (IsAsciiDigit(m_buffer[m_pos]))
{
Integer integer = ReadInteger();
switch (integer.size)
{
case 1:
totalSequence += (unsigned char)integer.value;
break;
case 2:
totalSequence += (unsigned char)integer.value;
totalSequence += (unsigned char)(integer.value >> 8);
break;
case 4:
totalSequence += (unsigned char)integer.value;
totalSequence += (unsigned char)(integer.value >> 8);
totalSequence += (unsigned char)(integer.value >> 16);
totalSequence += (unsigned char)(integer.value >> 24);
break;
}
}
else if (m_buffer[m_pos] == 0)
{
if (m_pos >= m_size)
RaiseError("unexpected EOF after left curly bracket");
else
RaiseError("unexpected null character within curly brackets");
}
else
{
if (IsAsciiPrintable(m_buffer[m_pos]))
RaiseError("unexpected character '%c' within curly brackets", m_buffer[m_pos]);
else
RaiseError("unexpected character '\\x%02X' within curly brackets", m_buffer[m_pos]);
}
}
m_pos++; // Go past the right curly bracket.
return totalSequence;
}
// Reads a charmap string.
int StringParser::ParseString(long srcPos, unsigned char* dest, int& destLength)
{
m_pos = srcPos;
if (m_buffer[m_pos] != '"')
RaiseError("expected UTF-8 string literal");
long start = m_pos;
m_pos++;
destLength = 0;
while (m_buffer[m_pos] != '"')
{
std::string sequence = (m_buffer[m_pos] == '{') ? ReadBracketedConstants() : ReadCharOrEscape();
for (const char& c : sequence)
{
if (destLength == kMaxStringLength)
RaiseError("mapped string longer than %d bytes", kMaxStringLength);
dest[destLength++] = c;
}
}
m_pos++; // Go past the right quote.
return m_pos - start;
}
void StringParser::RaiseError(const char* format, ...)
{
const int bufferSize = 1024;
char buffer[bufferSize];
std::va_list args;
va_start(args, format);
std::vsnprintf(buffer, bufferSize, format, args);
va_end(args);
throw std::runtime_error(buffer);
}
// Converts digit character to numerical value.
static int ConvertDigit(char c, int radix)
{
int digit;
if (c >= '0' && c <= '9')
digit = c - '0';
else if (c >= 'A' && c <= 'F')
digit = 10 + c - 'A';
else if (c >= 'a' && c <= 'f')
digit = 10 + c - 'a';
else
return -1;
return (digit < radix) ? digit : -1;
}
void StringParser::SkipRestOfInteger(int radix)
{
while (ConvertDigit(m_buffer[m_pos], radix) != -1)
m_pos++;
}
StringParser::Integer StringParser::ReadDecimal()
{
const int radix = 10;
std::uint64_t n = 0;
int digit;
std::uint64_t max = UINT32_MAX;
long startPos = m_pos;
while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1)
{
n = n * radix + digit;
if (n >= max)
{
SkipRestOfInteger(radix);
std::string intLiteral(m_buffer + startPos, m_pos - startPos);
RaiseError("integer literal \"%s\" is too large", intLiteral.c_str());
}
m_pos++;
}
int size;
if (m_buffer[m_pos] == 'H')
{
if (n >= 0x10000)
{
RaiseError("%lu is too large to be a halfword", (unsigned long)n);
}
size = 2;
m_pos++;
}
else if (m_buffer[m_pos] == 'W')
{
size = 4;
m_pos++;
}
else
{
if (n >= 0x10000)
size = 4;
else if (n >= 0x100)
size = 2;
else
size = 1;
}
return{ static_cast<std::uint32_t>(n), size };
}
StringParser::Integer StringParser::ReadHex()
{
const int radix = 16;
std::uint64_t n = 0;
int digit;
std::uint64_t max = UINT32_MAX;
long startPos = m_pos;
while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1)
{
n = n * radix + digit;
if (n >= max)
{
SkipRestOfInteger(radix);
std::string intLiteral(m_buffer + startPos, m_pos - startPos);
RaiseError("integer literal \"%s\" is too large", intLiteral.c_str());
}
m_pos++;
}
int length = m_pos - startPos;
int size = 0;
switch (length)
{
case 2:
size = 1;
break;
case 4:
size = 2;
break;
case 8:
size = 4;
break;
default:
{
std::string intLiteral(m_buffer + startPos, m_pos - startPos);
RaiseError("hex integer literal \"0x%s\" doesn't have length of 2, 4, or 8 digits", intLiteral.c_str());
}
}
return{ static_cast<std::uint32_t>(n), size };
}
StringParser::Integer StringParser::ReadInteger()
{
if (!IsAsciiDigit(m_buffer[m_pos]))
RaiseError("expected integer");
if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x')
{
m_pos += 2;
return ReadHex();
}
return ReadDecimal();
}
// Skips tabs and spaces.
void StringParser::SkipWhitespace()
{
while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
m_pos++;
}

View File

@ -0,0 +1,55 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef STRING_PARSER_H
#define STRING_PARSER_H
#include <cstdint>
#include <string>
#include "preproc.h"
class StringParser
{
public:
StringParser(char* buffer, long size) : m_buffer(buffer), m_size(size), m_pos(0) {}
int ParseString(long srcPos, unsigned char* dest, int &destLength);
private:
struct Integer
{
std::uint32_t value;
int size;
};
char* m_buffer;
long m_size;
long m_pos;
Integer ReadInteger();
Integer ReadDecimal();
Integer ReadHex();
std::string ReadCharOrEscape();
std::string ReadBracketedConstants();
void SkipWhitespace();
void SkipRestOfInteger(int radix);
void RaiseError(const char* format, ...);
};
#endif // STRING_PARSER_H

92
tools/preproc/utf8.cpp Normal file
View File

@ -0,0 +1,92 @@
// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
//
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include <cstdint>
#include "utf8.h"
static const unsigned char s_byteTypeTable[] =
{
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
};
const unsigned char s0 = 0 * 12;
const unsigned char s1 = 1 * 12;
const unsigned char s2 = 2 * 12;
const unsigned char s3 = 3 * 12;
const unsigned char s4 = 4 * 12;
const unsigned char s5 = 5 * 12;
const unsigned char s6 = 6 * 12;
const unsigned char s7 = 7 * 12;
const unsigned char s8 = 8 * 12;
static const unsigned char s_transitionTable[] =
{
s0,s1,s2,s3,s5,s8,s7,s1,s1,s1,s4,s6, // s0
s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s1
s1,s0,s1,s1,s1,s1,s1,s0,s1,s0,s1,s1, // s2
s1,s2,s1,s1,s1,s1,s1,s2,s1,s2,s1,s1, // s3
s1,s1,s1,s1,s1,s1,s1,s2,s1,s1,s1,s1, // s4
s1,s2,s1,s1,s1,s1,s1,s1,s1,s2,s1,s1, // s5
s1,s1,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s6
s1,s3,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s7
s1,s3,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s8
};
// Decodes UTF-8 encoded Unicode code point at "s".
UnicodeChar DecodeUtf8(const char* s)
{
UnicodeChar unicodeChar;
int state = s0;
auto start = s;
do
{
unsigned char byte = *s++;
int type = s_byteTypeTable[byte];
if (state == s0)
unicodeChar.code = (0xFF >> type) & byte;
else
unicodeChar.code = (unicodeChar.code << 6) | (byte & 0x3F);
state = s_transitionTable[state + type];
if (state == s1)
{
unicodeChar.code = -1;
return unicodeChar;
}
} while (state != s0);
unicodeChar.encodingLength = s - start;
return unicodeChar;
}

34
tools/preproc/utf8.h Normal file
View File

@ -0,0 +1,34 @@
// Copyright(c) 2016 YamaArashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef UTF8_H
#define UTF8_H
#include <cstdint>
struct UnicodeChar
{
std::int32_t code;
int encodingLength;
};
UnicodeChar DecodeUtf8(const char* s);
#endif // UTF8_H