import from github
This commit is contained in:
1
tools/preproc/.gitignore
vendored
Normal file
1
tools/preproc/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
preproc
|
19
tools/preproc/LICENSE
Normal file
19
tools/preproc/LICENSE
Normal file
@ -0,0 +1,19 @@
|
||||
Copyright (c) 2016 YamaArashi
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
26
tools/preproc/Makefile
Normal file
26
tools/preproc/Makefile
Normal file
@ -0,0 +1,26 @@
|
||||
CXX ?= g++
|
||||
|
||||
CXXFLAGS := -std=c++11 -O2 -Wall -Wno-switch -Werror
|
||||
|
||||
SRCS := asm_file.cpp c_file.cpp charmap.cpp preproc.cpp string_parser.cpp \
|
||||
utf8.cpp
|
||||
|
||||
HEADERS := asm_file.h c_file.h char_util.h charmap.h preproc.h string_parser.h \
|
||||
utf8.h
|
||||
|
||||
ifeq ($(OS),Windows_NT)
|
||||
EXE := .exe
|
||||
else
|
||||
EXE :=
|
||||
endif
|
||||
|
||||
.PHONY: all clean
|
||||
|
||||
all: preproc$(EXE)
|
||||
@:
|
||||
|
||||
preproc$(EXE): $(SRCS) $(HEADERS)
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) -o $@ $(LDFLAGS)
|
||||
|
||||
clean:
|
||||
$(RM) preproc preproc.exe
|
599
tools/preproc/asm_file.cpp
Normal file
599
tools/preproc/asm_file.cpp
Normal file
@ -0,0 +1,599 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdarg>
|
||||
#include <stdexcept>
|
||||
#include "preproc.h"
|
||||
#include "asm_file.h"
|
||||
#include "char_util.h"
|
||||
#include "utf8.h"
|
||||
#include "string_parser.h"
|
||||
#include "../../gflib/characters.h"
|
||||
|
||||
AsmFile::AsmFile(std::string filename) : m_filename(filename)
|
||||
{
|
||||
FILE *fp = std::fopen(filename.c_str(), "rb");
|
||||
|
||||
if (fp == NULL)
|
||||
FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
|
||||
|
||||
std::fseek(fp, 0, SEEK_END);
|
||||
|
||||
m_size = std::ftell(fp);
|
||||
|
||||
if (m_size < 0)
|
||||
FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str());
|
||||
|
||||
m_buffer = new char[m_size + 1];
|
||||
|
||||
std::rewind(fp);
|
||||
|
||||
if (std::fread(m_buffer, m_size, 1, fp) != 1)
|
||||
FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str());
|
||||
|
||||
m_buffer[m_size] = 0;
|
||||
|
||||
std::fclose(fp);
|
||||
|
||||
m_pos = 0;
|
||||
m_lineNum = 1;
|
||||
m_lineStart = 0;
|
||||
|
||||
RemoveComments();
|
||||
}
|
||||
|
||||
AsmFile::AsmFile(AsmFile&& other) : m_filename(std::move(other.m_filename))
|
||||
{
|
||||
m_buffer = other.m_buffer;
|
||||
m_pos = other.m_pos;
|
||||
m_size = other.m_size;
|
||||
m_lineNum = other.m_lineNum;
|
||||
m_lineStart = other.m_lineStart;
|
||||
|
||||
other.m_buffer = nullptr;
|
||||
}
|
||||
|
||||
AsmFile::~AsmFile()
|
||||
{
|
||||
delete[] m_buffer;
|
||||
}
|
||||
|
||||
// Removes comments to simplify further processing.
|
||||
// It stops upon encountering a null character,
|
||||
// which may or may not be the end of file marker.
|
||||
// If it's not, the error will be caught later.
|
||||
void AsmFile::RemoveComments()
|
||||
{
|
||||
long pos = 0;
|
||||
char stringChar = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (m_buffer[pos] == 0)
|
||||
return;
|
||||
|
||||
if (stringChar != 0)
|
||||
{
|
||||
if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == stringChar)
|
||||
{
|
||||
pos += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_buffer[pos] == stringChar)
|
||||
stringChar = 0;
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
else if (m_buffer[pos] == '@' && (pos == 0 || m_buffer[pos - 1] != '\\'))
|
||||
{
|
||||
while (m_buffer[pos] != '\n' && m_buffer[pos] != 0)
|
||||
m_buffer[pos++] = ' ';
|
||||
}
|
||||
else if (m_buffer[pos] == '/' && m_buffer[pos + 1] == '*')
|
||||
{
|
||||
m_buffer[pos++] = ' ';
|
||||
m_buffer[pos++] = ' ';
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (m_buffer[pos] == 0)
|
||||
return;
|
||||
|
||||
if (m_buffer[pos] == '*' && m_buffer[pos + 1] == '/')
|
||||
{
|
||||
m_buffer[pos++] = ' ';
|
||||
m_buffer[pos++] = ' ';
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_buffer[pos] != '\n')
|
||||
m_buffer[pos] = ' ';
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_buffer[pos] == '"' || m_buffer[pos] == '\'')
|
||||
stringChar = m_buffer[pos];
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if we're at a particular directive and if so, consumes it.
|
||||
// Returns whether the directive was found.
|
||||
bool AsmFile::CheckForDirective(std::string name)
|
||||
{
|
||||
long i;
|
||||
long length = static_cast<long>(name.length());
|
||||
|
||||
for (i = 0; i < length && m_pos + i < m_size; i++)
|
||||
if (name[i] != m_buffer[m_pos + i])
|
||||
return false;
|
||||
|
||||
if (i < length)
|
||||
return false;
|
||||
|
||||
m_pos += length;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Checks if we're at a known directive and if so, consumes it.
|
||||
// Returns which directive was found.
|
||||
Directive AsmFile::GetDirective()
|
||||
{
|
||||
SkipWhitespace();
|
||||
|
||||
if (CheckForDirective(".include"))
|
||||
return Directive::Include;
|
||||
else if (CheckForDirective(".string"))
|
||||
return Directive::String;
|
||||
else if (CheckForDirective(".braille"))
|
||||
return Directive::Braille;
|
||||
else
|
||||
return Directive::Unknown;
|
||||
}
|
||||
|
||||
// Checks if we're at label that ends with '::'.
|
||||
// Returns the name if so and an empty string if not.
|
||||
std::string AsmFile::GetGlobalLabel()
|
||||
{
|
||||
long start = m_pos;
|
||||
long pos = m_pos;
|
||||
|
||||
if (IsIdentifierStartingChar(m_buffer[pos]))
|
||||
{
|
||||
pos++;
|
||||
|
||||
while (IsIdentifierChar(m_buffer[pos]))
|
||||
pos++;
|
||||
}
|
||||
|
||||
if (m_buffer[pos] == ':' && m_buffer[pos + 1] == ':')
|
||||
{
|
||||
m_pos = pos + 2;
|
||||
ExpectEmptyRestOfLine();
|
||||
return std::string(&m_buffer[start], pos - start);
|
||||
}
|
||||
|
||||
return std::string();
|
||||
}
|
||||
|
||||
// Skips tabs and spaces.
|
||||
void AsmFile::SkipWhitespace()
|
||||
{
|
||||
while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
|
||||
m_pos++;
|
||||
}
|
||||
|
||||
// Reads include path.
|
||||
std::string AsmFile::ReadPath()
|
||||
{
|
||||
SkipWhitespace();
|
||||
|
||||
if (m_buffer[m_pos] != '"')
|
||||
RaiseError("expected file path");
|
||||
|
||||
m_pos++;
|
||||
|
||||
int length = 0;
|
||||
long startPos = m_pos;
|
||||
|
||||
while (m_buffer[m_pos] != '"')
|
||||
{
|
||||
unsigned char c = m_buffer[m_pos++];
|
||||
|
||||
if (c == 0)
|
||||
{
|
||||
if (m_pos >= m_size)
|
||||
RaiseError("unexpected EOF in include string");
|
||||
else
|
||||
RaiseError("unexpected null character in include string");
|
||||
}
|
||||
|
||||
if (!IsAsciiPrintable(c))
|
||||
RaiseError("unexpected character '\\x%02X' in include string", c);
|
||||
|
||||
// Don't bother allowing any escape sequences.
|
||||
if (c == '\\')
|
||||
{
|
||||
c = m_buffer[m_pos];
|
||||
RaiseError("unexpected escape '\\%c' in include string", c);
|
||||
}
|
||||
|
||||
length++;
|
||||
|
||||
if (length > kMaxPath)
|
||||
RaiseError("path is too long");
|
||||
}
|
||||
|
||||
m_pos++; // Go past the right quote.
|
||||
|
||||
ExpectEmptyRestOfLine();
|
||||
|
||||
return std::string(&m_buffer[startPos], length);
|
||||
}
|
||||
|
||||
// Reads a charmap string.
|
||||
int AsmFile::ReadString(unsigned char* s)
|
||||
{
|
||||
SkipWhitespace();
|
||||
|
||||
int length;
|
||||
StringParser stringParser(m_buffer, m_size);
|
||||
|
||||
try
|
||||
{
|
||||
m_pos += stringParser.ParseString(m_pos, s, length);
|
||||
}
|
||||
catch (std::runtime_error& e)
|
||||
{
|
||||
RaiseError(e.what());
|
||||
}
|
||||
|
||||
SkipWhitespace();
|
||||
|
||||
if (ConsumeComma())
|
||||
{
|
||||
SkipWhitespace();
|
||||
int padLength = ReadPadLength();
|
||||
|
||||
while (length < padLength)
|
||||
{
|
||||
s[length++] = CHAR_SPACE;
|
||||
}
|
||||
}
|
||||
|
||||
ExpectEmptyRestOfLine();
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
void AsmFile::VerifyStringLength(int length)
|
||||
{
|
||||
if (length == kMaxStringLength)
|
||||
RaiseError("mapped string longer than %d bytes", kMaxStringLength);
|
||||
}
|
||||
|
||||
int AsmFile::ReadBraille(unsigned char* s)
|
||||
{
|
||||
static std::map<char, unsigned char> encoding =
|
||||
{
|
||||
{ 'A', BRAILLE_CHAR_A },
|
||||
{ 'B', BRAILLE_CHAR_B },
|
||||
{ 'C', BRAILLE_CHAR_C },
|
||||
{ 'D', BRAILLE_CHAR_D },
|
||||
{ 'E', BRAILLE_CHAR_E },
|
||||
{ 'F', BRAILLE_CHAR_F },
|
||||
{ 'G', BRAILLE_CHAR_G },
|
||||
{ 'H', BRAILLE_CHAR_H },
|
||||
{ 'I', BRAILLE_CHAR_I },
|
||||
{ 'J', BRAILLE_CHAR_J },
|
||||
{ 'K', BRAILLE_CHAR_K },
|
||||
{ 'L', BRAILLE_CHAR_L },
|
||||
{ 'M', BRAILLE_CHAR_M },
|
||||
{ 'N', BRAILLE_CHAR_N },
|
||||
{ 'O', BRAILLE_CHAR_O },
|
||||
{ 'P', BRAILLE_CHAR_P },
|
||||
{ 'Q', BRAILLE_CHAR_Q },
|
||||
{ 'R', BRAILLE_CHAR_R },
|
||||
{ 'S', BRAILLE_CHAR_S },
|
||||
{ 'T', BRAILLE_CHAR_T },
|
||||
{ 'U', BRAILLE_CHAR_U },
|
||||
{ 'V', BRAILLE_CHAR_V },
|
||||
{ 'W', BRAILLE_CHAR_W },
|
||||
{ 'X', BRAILLE_CHAR_X },
|
||||
{ 'Y', BRAILLE_CHAR_Y },
|
||||
{ 'Z', BRAILLE_CHAR_Z },
|
||||
{ 'a', BRAILLE_CHAR_A },
|
||||
{ 'b', BRAILLE_CHAR_B },
|
||||
{ 'c', BRAILLE_CHAR_C },
|
||||
{ 'd', BRAILLE_CHAR_D },
|
||||
{ 'e', BRAILLE_CHAR_E },
|
||||
{ 'f', BRAILLE_CHAR_F },
|
||||
{ 'g', BRAILLE_CHAR_G },
|
||||
{ 'h', BRAILLE_CHAR_H },
|
||||
{ 'i', BRAILLE_CHAR_I },
|
||||
{ 'j', BRAILLE_CHAR_J },
|
||||
{ 'k', BRAILLE_CHAR_K },
|
||||
{ 'l', BRAILLE_CHAR_L },
|
||||
{ 'm', BRAILLE_CHAR_M },
|
||||
{ 'n', BRAILLE_CHAR_N },
|
||||
{ 'o', BRAILLE_CHAR_O },
|
||||
{ 'p', BRAILLE_CHAR_P },
|
||||
{ 'q', BRAILLE_CHAR_Q },
|
||||
{ 'r', BRAILLE_CHAR_R },
|
||||
{ 's', BRAILLE_CHAR_S },
|
||||
{ 't', BRAILLE_CHAR_T },
|
||||
{ 'u', BRAILLE_CHAR_U },
|
||||
{ 'v', BRAILLE_CHAR_V },
|
||||
{ 'w', BRAILLE_CHAR_W },
|
||||
{ 'x', BRAILLE_CHAR_X },
|
||||
{ 'y', BRAILLE_CHAR_Y },
|
||||
{ 'z', BRAILLE_CHAR_Z },
|
||||
{ '0', BRAILLE_CHAR_0 },
|
||||
{ '1', BRAILLE_CHAR_1 },
|
||||
{ '2', BRAILLE_CHAR_2 },
|
||||
{ '3', BRAILLE_CHAR_3 },
|
||||
{ '4', BRAILLE_CHAR_4 },
|
||||
{ '5', BRAILLE_CHAR_5 },
|
||||
{ '6', BRAILLE_CHAR_6 },
|
||||
{ '7', BRAILLE_CHAR_7 },
|
||||
{ '8', BRAILLE_CHAR_8 },
|
||||
{ '9', BRAILLE_CHAR_9 },
|
||||
{ ' ', BRAILLE_CHAR_SPACE },
|
||||
{ ',', BRAILLE_CHAR_COMMA },
|
||||
{ '.', BRAILLE_CHAR_PERIOD },
|
||||
{ '?', BRAILLE_CHAR_QUESTION_MARK },
|
||||
{ '!', BRAILLE_CHAR_EXCL_MARK },
|
||||
{ ':', BRAILLE_CHAR_COLON },
|
||||
{ ';', BRAILLE_CHAR_SEMICOLON },
|
||||
{ '-', BRAILLE_CHAR_HYPHEN },
|
||||
{ '/', BRAILLE_CHAR_SLASH },
|
||||
{ '(', BRAILLE_CHAR_PAREN },
|
||||
{ ')', BRAILLE_CHAR_PAREN },
|
||||
{ '\'', BRAILLE_CHAR_APOSTROPHE },
|
||||
{ '#', BRAILLE_CHAR_NUMBER },
|
||||
{ '$', EOS },
|
||||
};
|
||||
|
||||
SkipWhitespace();
|
||||
|
||||
int length = 0;
|
||||
|
||||
if (m_buffer[m_pos] != '"')
|
||||
RaiseError("expected braille string literal");
|
||||
|
||||
m_pos++;
|
||||
|
||||
bool inNumber = false;
|
||||
while (m_buffer[m_pos] != '"')
|
||||
{
|
||||
if (m_buffer[m_pos] == '\\' && m_buffer[m_pos + 1] == 'n')
|
||||
{
|
||||
VerifyStringLength(length);
|
||||
s[length++] = CHAR_NEWLINE;
|
||||
m_pos += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
char c = m_buffer[m_pos];
|
||||
|
||||
if (encoding.count(c) == 0)
|
||||
{
|
||||
if (IsAsciiPrintable(c))
|
||||
RaiseError("character '%c' not valid in braille string", m_buffer[m_pos]);
|
||||
else
|
||||
RaiseError("character '\\x%02X' not valid in braille string", m_buffer[m_pos]);
|
||||
}
|
||||
|
||||
if (!inNumber && c >= '0' && c <= '9' )
|
||||
{
|
||||
// Output number indicator at start of a number
|
||||
inNumber = true;
|
||||
VerifyStringLength(length);
|
||||
s[length++] = BRAILLE_CHAR_NUMBER;
|
||||
}
|
||||
else if (inNumber && encoding[c] == BRAILLE_CHAR_SPACE)
|
||||
{
|
||||
// Number ends at a space.
|
||||
// Non-number characters encountered before a space will simply be output as is.
|
||||
inNumber = false;
|
||||
}
|
||||
|
||||
VerifyStringLength(length);
|
||||
s[length++] = encoding[c];
|
||||
m_pos++;
|
||||
}
|
||||
}
|
||||
|
||||
m_pos++; // Go past the right quote.
|
||||
|
||||
ExpectEmptyRestOfLine();
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
// If we're at a comma, consumes it.
|
||||
// Returns whether a comma was found.
|
||||
bool AsmFile::ConsumeComma()
|
||||
{
|
||||
if (m_buffer[m_pos] == ',')
|
||||
{
|
||||
m_pos++;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Converts digit character to numerical value.
|
||||
static int ConvertDigit(char c, int radix)
|
||||
{
|
||||
int digit;
|
||||
|
||||
if (c >= '0' && c <= '9')
|
||||
digit = c - '0';
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
digit = 10 + c - 'A';
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
digit = 10 + c - 'a';
|
||||
else
|
||||
return -1;
|
||||
|
||||
return (digit < radix) ? digit : -1;
|
||||
}
|
||||
|
||||
// Reads an integer. If the integer is greater than maxValue, it returns -1.
|
||||
int AsmFile::ReadPadLength()
|
||||
{
|
||||
if (!IsAsciiDigit(m_buffer[m_pos]))
|
||||
RaiseError("expected integer");
|
||||
|
||||
int radix = 10;
|
||||
|
||||
if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x')
|
||||
{
|
||||
radix = 16;
|
||||
m_pos += 2;
|
||||
}
|
||||
|
||||
unsigned n = 0;
|
||||
int digit;
|
||||
|
||||
while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1)
|
||||
{
|
||||
n = n * radix + digit;
|
||||
|
||||
if (n > kMaxStringLength)
|
||||
RaiseError("pad length greater than maximum length (%d)", kMaxStringLength);
|
||||
|
||||
m_pos++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
// Outputs the current line and moves to the next one.
|
||||
void AsmFile::OutputLine()
|
||||
{
|
||||
while (m_buffer[m_pos] != '\n' && m_buffer[m_pos] != 0)
|
||||
m_pos++;
|
||||
|
||||
if (m_buffer[m_pos] == 0)
|
||||
{
|
||||
if (m_pos >= m_size)
|
||||
{
|
||||
RaiseWarning("file doesn't end with newline");
|
||||
puts(&m_buffer[m_lineStart]);
|
||||
}
|
||||
else
|
||||
{
|
||||
RaiseError("unexpected null character");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_buffer[m_pos] = 0;
|
||||
puts(&m_buffer[m_lineStart]);
|
||||
m_buffer[m_pos] = '\n';
|
||||
m_pos++;
|
||||
m_lineStart = m_pos;
|
||||
m_lineNum++;
|
||||
}
|
||||
}
|
||||
|
||||
// Asserts that the rest of the line is empty and moves to the next one.
|
||||
void AsmFile::ExpectEmptyRestOfLine()
|
||||
{
|
||||
SkipWhitespace();
|
||||
|
||||
if (m_buffer[m_pos] == 0)
|
||||
{
|
||||
if (m_pos >= m_size)
|
||||
RaiseWarning("file doesn't end with newline");
|
||||
else
|
||||
RaiseError("unexpected null character");
|
||||
}
|
||||
else if (m_buffer[m_pos] == '\n')
|
||||
{
|
||||
m_pos++;
|
||||
m_lineStart = m_pos;
|
||||
m_lineNum++;
|
||||
}
|
||||
else if (m_buffer[m_pos] == '\r' && m_buffer[m_pos + 1] == '\n')
|
||||
{
|
||||
m_pos += 2;
|
||||
m_lineStart = m_pos;
|
||||
m_lineNum++;
|
||||
}
|
||||
else
|
||||
{
|
||||
RaiseError("junk at end of line");
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if we're at the end of the file.
|
||||
bool AsmFile::IsAtEnd()
|
||||
{
|
||||
return (m_pos >= m_size);
|
||||
}
|
||||
|
||||
// Output the current location to set gas's logical file and line numbers.
|
||||
void AsmFile::OutputLocation()
|
||||
{
|
||||
std::printf("# %ld \"%s\"\n", m_lineNum, m_filename.c_str());
|
||||
}
|
||||
|
||||
// Reports a diagnostic message.
|
||||
void AsmFile::ReportDiagnostic(const char* type, const char* format, std::va_list args)
|
||||
{
|
||||
const int bufferSize = 1024;
|
||||
char buffer[bufferSize];
|
||||
std::vsnprintf(buffer, bufferSize, format, args);
|
||||
std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer);
|
||||
}
|
||||
|
||||
#define DO_REPORT(type) \
|
||||
do \
|
||||
{ \
|
||||
std::va_list args; \
|
||||
va_start(args, format); \
|
||||
ReportDiagnostic(type, format, args); \
|
||||
va_end(args); \
|
||||
} while (0)
|
||||
|
||||
// Reports an error diagnostic and terminates the program.
|
||||
void AsmFile::RaiseError(const char* format, ...)
|
||||
{
|
||||
DO_REPORT("error");
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
// Reports a warning diagnostic.
|
||||
void AsmFile::RaiseWarning(const char* format, ...)
|
||||
{
|
||||
DO_REPORT("warning");
|
||||
}
|
73
tools/preproc/asm_file.h
Normal file
73
tools/preproc/asm_file.h
Normal file
@ -0,0 +1,73 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef ASM_FILE_H
|
||||
#define ASM_FILE_H
|
||||
|
||||
#include <cstdarg>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include "preproc.h"
|
||||
|
||||
enum class Directive
|
||||
{
|
||||
Include,
|
||||
String,
|
||||
Braille,
|
||||
Unknown
|
||||
};
|
||||
|
||||
class AsmFile
|
||||
{
|
||||
public:
|
||||
AsmFile(std::string filename);
|
||||
AsmFile(AsmFile&& other);
|
||||
AsmFile(const AsmFile&) = delete;
|
||||
~AsmFile();
|
||||
Directive GetDirective();
|
||||
std::string GetGlobalLabel();
|
||||
std::string ReadPath();
|
||||
int ReadString(unsigned char* s);
|
||||
int ReadBraille(unsigned char* s);
|
||||
bool IsAtEnd();
|
||||
void OutputLine();
|
||||
void OutputLocation();
|
||||
|
||||
private:
|
||||
char* m_buffer;
|
||||
long m_pos;
|
||||
long m_size;
|
||||
long m_lineNum;
|
||||
long m_lineStart;
|
||||
std::string m_filename;
|
||||
|
||||
bool ConsumeComma();
|
||||
int ReadPadLength();
|
||||
void RemoveComments();
|
||||
bool CheckForDirective(std::string name);
|
||||
void SkipWhitespace();
|
||||
void ExpectEmptyRestOfLine();
|
||||
void ReportDiagnostic(const char* type, const char* format, std::va_list args);
|
||||
void RaiseError(const char* format, ...);
|
||||
void RaiseWarning(const char* format, ...);
|
||||
void VerifyStringLength(int length);
|
||||
};
|
||||
|
||||
#endif // ASM_FILE_H
|
459
tools/preproc/c_file.cpp
Normal file
459
tools/preproc/c_file.cpp
Normal file
@ -0,0 +1,459 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdarg>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <cstring>
|
||||
#include <cerrno>
|
||||
#include "preproc.h"
|
||||
#include "c_file.h"
|
||||
#include "char_util.h"
|
||||
#include "utf8.h"
|
||||
#include "string_parser.h"
|
||||
|
||||
CFile::CFile(const char * filenameCStr, bool isStdin)
|
||||
{
|
||||
FILE *fp;
|
||||
|
||||
if (isStdin) {
|
||||
fp = stdin;
|
||||
m_filename = std::string{"<stdin>/"}.append(filenameCStr);
|
||||
} else {
|
||||
fp = std::fopen(filenameCStr, "rb");
|
||||
m_filename = std::string(filenameCStr);
|
||||
}
|
||||
|
||||
std::string& filename = m_filename;
|
||||
|
||||
if (fp == NULL)
|
||||
FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
|
||||
|
||||
m_size = 0;
|
||||
m_buffer = (char *)malloc(CHUNK_SIZE + 1);
|
||||
if (m_buffer == NULL) {
|
||||
FATAL_ERROR("Failed to allocate memory to process file \"%s\"!", filename.c_str());
|
||||
}
|
||||
|
||||
std::size_t numAllocatedBytes = CHUNK_SIZE + 1;
|
||||
std::size_t bufferOffset = 0;
|
||||
std::size_t count;
|
||||
|
||||
while ((count = std::fread(m_buffer + bufferOffset, 1, CHUNK_SIZE, fp)) != 0) {
|
||||
if (!std::ferror(fp)) {
|
||||
m_size += count;
|
||||
|
||||
if (std::feof(fp)) {
|
||||
break;
|
||||
}
|
||||
|
||||
numAllocatedBytes += CHUNK_SIZE;
|
||||
bufferOffset += CHUNK_SIZE;
|
||||
m_buffer = (char *)realloc(m_buffer, numAllocatedBytes);
|
||||
if (m_buffer == NULL) {
|
||||
FATAL_ERROR("Failed to allocate memory to process file \"%s\"!", filename.c_str());
|
||||
}
|
||||
} else {
|
||||
FATAL_ERROR("Failed to read \"%s\". (error: %s)", filename.c_str(), std::strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
m_buffer[m_size] = 0;
|
||||
|
||||
std::fclose(fp);
|
||||
|
||||
m_pos = 0;
|
||||
m_lineNum = 1;
|
||||
m_isStdin = isStdin;
|
||||
}
|
||||
|
||||
CFile::CFile(CFile&& other) : m_filename(std::move(other.m_filename))
|
||||
{
|
||||
m_buffer = other.m_buffer;
|
||||
m_pos = other.m_pos;
|
||||
m_size = other.m_size;
|
||||
m_lineNum = other.m_lineNum;
|
||||
m_isStdin = other.m_isStdin;
|
||||
|
||||
other.m_buffer = NULL;
|
||||
}
|
||||
|
||||
CFile::~CFile()
|
||||
{
|
||||
free(m_buffer);
|
||||
}
|
||||
|
||||
void CFile::Preproc()
|
||||
{
|
||||
char stringChar = 0;
|
||||
|
||||
while (m_pos < m_size)
|
||||
{
|
||||
if (stringChar)
|
||||
{
|
||||
if (m_buffer[m_pos] == stringChar)
|
||||
{
|
||||
std::putchar(stringChar);
|
||||
m_pos++;
|
||||
stringChar = 0;
|
||||
}
|
||||
else if (m_buffer[m_pos] == '\\' && m_buffer[m_pos + 1] == stringChar)
|
||||
{
|
||||
std::putchar('\\');
|
||||
std::putchar(stringChar);
|
||||
m_pos += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_buffer[m_pos] == '\n')
|
||||
m_lineNum++;
|
||||
std::putchar(m_buffer[m_pos]);
|
||||
m_pos++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
TryConvertString();
|
||||
TryConvertIncbin();
|
||||
|
||||
if (m_pos >= m_size)
|
||||
break;
|
||||
|
||||
char c = m_buffer[m_pos++];
|
||||
|
||||
std::putchar(c);
|
||||
|
||||
if (c == '\n')
|
||||
m_lineNum++;
|
||||
else if (c == '"')
|
||||
stringChar = '"';
|
||||
else if (c == '\'')
|
||||
stringChar = '\'';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool CFile::ConsumeHorizontalWhitespace()
|
||||
{
|
||||
if (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
|
||||
{
|
||||
m_pos++;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CFile::ConsumeNewline()
|
||||
{
|
||||
if (m_buffer[m_pos] == '\r' && m_buffer[m_pos + 1] == '\n')
|
||||
{
|
||||
m_pos += 2;
|
||||
m_lineNum++;
|
||||
std::putchar('\n');
|
||||
return true;
|
||||
}
|
||||
|
||||
if (m_buffer[m_pos] == '\n')
|
||||
{
|
||||
m_pos++;
|
||||
m_lineNum++;
|
||||
std::putchar('\n');
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void CFile::SkipWhitespace()
|
||||
{
|
||||
while (ConsumeHorizontalWhitespace() || ConsumeNewline())
|
||||
;
|
||||
}
|
||||
|
||||
void CFile::TryConvertString()
|
||||
{
|
||||
long oldPos = m_pos;
|
||||
long oldLineNum = m_lineNum;
|
||||
bool noTerminator = false;
|
||||
|
||||
if (m_buffer[m_pos] != '_' || (m_pos > 0 && IsIdentifierChar(m_buffer[m_pos - 1])))
|
||||
return;
|
||||
|
||||
m_pos++;
|
||||
|
||||
if (m_buffer[m_pos] == '_')
|
||||
{
|
||||
noTerminator = true;
|
||||
m_pos++;
|
||||
}
|
||||
|
||||
SkipWhitespace();
|
||||
|
||||
if (m_buffer[m_pos] != '(')
|
||||
{
|
||||
m_pos = oldPos;
|
||||
m_lineNum = oldLineNum;
|
||||
return;
|
||||
}
|
||||
|
||||
m_pos++;
|
||||
|
||||
SkipWhitespace();
|
||||
|
||||
std::printf("{ ");
|
||||
|
||||
while (1)
|
||||
{
|
||||
SkipWhitespace();
|
||||
|
||||
if (m_buffer[m_pos] == '"')
|
||||
{
|
||||
unsigned char s[kMaxStringLength];
|
||||
int length;
|
||||
StringParser stringParser(m_buffer, m_size);
|
||||
|
||||
try
|
||||
{
|
||||
m_pos += stringParser.ParseString(m_pos, s, length);
|
||||
}
|
||||
catch (std::runtime_error& e)
|
||||
{
|
||||
RaiseError(e.what());
|
||||
}
|
||||
|
||||
for (int i = 0; i < length; i++)
|
||||
printf("0x%02X, ", s[i]);
|
||||
}
|
||||
else if (m_buffer[m_pos] == ')')
|
||||
{
|
||||
m_pos++;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_pos >= m_size)
|
||||
RaiseError("unexpected EOF");
|
||||
if (IsAsciiPrintable(m_buffer[m_pos]))
|
||||
RaiseError("unexpected character '%c'", m_buffer[m_pos]);
|
||||
else
|
||||
RaiseError("unexpected character '\\x%02X'", m_buffer[m_pos]);
|
||||
}
|
||||
}
|
||||
|
||||
if (noTerminator)
|
||||
std::printf(" }");
|
||||
else
|
||||
std::printf("0xFF }");
|
||||
}
|
||||
|
||||
bool CFile::CheckIdentifier(const std::string& ident)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < ident.length() && m_pos + i < (unsigned)m_size; i++)
|
||||
if (ident[i] != m_buffer[m_pos + i])
|
||||
return false;
|
||||
|
||||
return (i == ident.length());
|
||||
}
|
||||
|
||||
std::unique_ptr<unsigned char[]> CFile::ReadWholeFile(const std::string& path, int& size)
|
||||
{
|
||||
FILE* fp = std::fopen(path.c_str(), "rb");
|
||||
|
||||
if (fp == nullptr)
|
||||
RaiseError("Failed to open \"%s\" for reading.\n", path.c_str());
|
||||
|
||||
std::fseek(fp, 0, SEEK_END);
|
||||
|
||||
size = std::ftell(fp);
|
||||
|
||||
std::unique_ptr<unsigned char[]> buffer = std::unique_ptr<unsigned char[]>(new unsigned char[size]);
|
||||
|
||||
std::rewind(fp);
|
||||
|
||||
if (std::fread(buffer.get(), size, 1, fp) != 1)
|
||||
RaiseError("Failed to read \"%s\".\n", path.c_str());
|
||||
|
||||
std::fclose(fp);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
int ExtractData(const std::unique_ptr<unsigned char[]>& buffer, int offset, int size)
|
||||
{
|
||||
switch (size)
|
||||
{
|
||||
case 1:
|
||||
return buffer[offset];
|
||||
case 2:
|
||||
return (buffer[offset + 1] << 8)
|
||||
| buffer[offset];
|
||||
case 4:
|
||||
return (buffer[offset + 3] << 24)
|
||||
| (buffer[offset + 2] << 16)
|
||||
| (buffer[offset + 1] << 8)
|
||||
| buffer[offset];
|
||||
default:
|
||||
FATAL_ERROR("Invalid size passed to ExtractData.\n");
|
||||
}
|
||||
}
|
||||
|
||||
void CFile::TryConvertIncbin()
|
||||
{
|
||||
std::string idents[6] = { "INCBIN_S8", "INCBIN_U8", "INCBIN_S16", "INCBIN_U16", "INCBIN_S32", "INCBIN_U32" };
|
||||
int incbinType = -1;
|
||||
|
||||
for (int i = 0; i < 6; i++)
|
||||
{
|
||||
if (CheckIdentifier(idents[i]))
|
||||
{
|
||||
incbinType = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (incbinType == -1)
|
||||
return;
|
||||
|
||||
int size = 1 << (incbinType / 2);
|
||||
bool isSigned = ((incbinType % 2) == 0);
|
||||
|
||||
long oldPos = m_pos;
|
||||
long oldLineNum = m_lineNum;
|
||||
|
||||
m_pos += idents[incbinType].length();
|
||||
|
||||
SkipWhitespace();
|
||||
|
||||
if (m_buffer[m_pos] != '(')
|
||||
{
|
||||
m_pos = oldPos;
|
||||
m_lineNum = oldLineNum;
|
||||
return;
|
||||
}
|
||||
|
||||
m_pos++;
|
||||
|
||||
std::printf("{");
|
||||
|
||||
while (true)
|
||||
{
|
||||
SkipWhitespace();
|
||||
|
||||
if (m_buffer[m_pos] != '"')
|
||||
RaiseError("expected double quote");
|
||||
|
||||
m_pos++;
|
||||
|
||||
int startPos = m_pos;
|
||||
|
||||
while (m_buffer[m_pos] != '"')
|
||||
{
|
||||
if (m_buffer[m_pos] == 0)
|
||||
{
|
||||
if (m_pos >= m_size)
|
||||
RaiseError("unexpected EOF in path string");
|
||||
else
|
||||
RaiseError("unexpected null character in path string");
|
||||
}
|
||||
|
||||
if (m_buffer[m_pos] == '\r' || m_buffer[m_pos] == '\n')
|
||||
RaiseError("unexpected end of line character in path string");
|
||||
|
||||
if (m_buffer[m_pos] == '\\')
|
||||
RaiseError("unexpected escape in path string");
|
||||
|
||||
m_pos++;
|
||||
}
|
||||
|
||||
std::string path(&m_buffer[startPos], m_pos - startPos);
|
||||
|
||||
m_pos++;
|
||||
|
||||
int fileSize;
|
||||
std::unique_ptr<unsigned char[]> buffer = ReadWholeFile(path, fileSize);
|
||||
|
||||
if ((fileSize % size) != 0)
|
||||
RaiseError("Size %d doesn't evenly divide file size %d.\n", size, fileSize);
|
||||
|
||||
int count = fileSize / size;
|
||||
int offset = 0;
|
||||
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
int data = ExtractData(buffer, offset, size);
|
||||
offset += size;
|
||||
|
||||
if (isSigned)
|
||||
std::printf("%d,", data);
|
||||
else
|
||||
std::printf("%uu,", data);
|
||||
}
|
||||
|
||||
SkipWhitespace();
|
||||
|
||||
if (m_buffer[m_pos] != ',')
|
||||
break;
|
||||
|
||||
m_pos++;
|
||||
}
|
||||
|
||||
if (m_buffer[m_pos] != ')')
|
||||
RaiseError("expected ')'");
|
||||
|
||||
m_pos++;
|
||||
|
||||
std::printf("}");
|
||||
}
|
||||
|
||||
// Reports a diagnostic message.
|
||||
void CFile::ReportDiagnostic(const char* type, const char* format, std::va_list args)
|
||||
{
|
||||
const int bufferSize = 1024;
|
||||
char buffer[bufferSize];
|
||||
std::vsnprintf(buffer, bufferSize, format, args);
|
||||
std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer);
|
||||
}
|
||||
|
||||
#define DO_REPORT(type) \
|
||||
do \
|
||||
{ \
|
||||
std::va_list args; \
|
||||
va_start(args, format); \
|
||||
ReportDiagnostic(type, format, args); \
|
||||
va_end(args); \
|
||||
} while (0)
|
||||
|
||||
// Reports an error diagnostic and terminates the program.
|
||||
void CFile::RaiseError(const char* format, ...)
|
||||
{
|
||||
DO_REPORT("error");
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
// Reports a warning diagnostic.
|
||||
void CFile::RaiseWarning(const char* format, ...)
|
||||
{
|
||||
DO_REPORT("warning");
|
||||
}
|
61
tools/preproc/c_file.h
Normal file
61
tools/preproc/c_file.h
Normal file
@ -0,0 +1,61 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef C_FILE_H
|
||||
#define C_FILE_H
|
||||
|
||||
#include <cstdarg>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "preproc.h"
|
||||
|
||||
class CFile
|
||||
{
|
||||
public:
|
||||
CFile(const char * filenameCStr, bool isStdin);
|
||||
CFile(CFile&& other);
|
||||
CFile(const CFile&) = delete;
|
||||
~CFile();
|
||||
void Preproc();
|
||||
|
||||
private:
|
||||
char* m_buffer;
|
||||
long m_pos;
|
||||
long m_size;
|
||||
long m_lineNum;
|
||||
std::string m_filename;
|
||||
bool m_isStdin;
|
||||
|
||||
bool ConsumeHorizontalWhitespace();
|
||||
bool ConsumeNewline();
|
||||
void SkipWhitespace();
|
||||
void TryConvertString();
|
||||
std::unique_ptr<unsigned char[]> ReadWholeFile(const std::string& path, int& size);
|
||||
bool CheckIdentifier(const std::string& ident);
|
||||
void TryConvertIncbin();
|
||||
void ReportDiagnostic(const char* type, const char* format, std::va_list args);
|
||||
void RaiseError(const char* format, ...);
|
||||
void RaiseWarning(const char* format, ...);
|
||||
};
|
||||
|
||||
#define CHUNK_SIZE 4096
|
||||
|
||||
#endif // C_FILE_H
|
71
tools/preproc/char_util.h
Normal file
71
tools/preproc/char_util.h
Normal file
@ -0,0 +1,71 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef CHAR_UTIL_H
|
||||
#define CHAR_UTIL_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <cassert>
|
||||
|
||||
inline bool IsAscii(unsigned char c)
|
||||
{
|
||||
return (c < 128);
|
||||
}
|
||||
|
||||
inline bool IsAsciiAlpha(unsigned char c)
|
||||
{
|
||||
return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'));
|
||||
}
|
||||
|
||||
inline bool IsAsciiDigit(unsigned char c)
|
||||
{
|
||||
return (c >= '0' && c <= '9');
|
||||
}
|
||||
|
||||
inline bool IsAsciiHexDigit(unsigned char c)
|
||||
{
|
||||
return ((c >= '0' && c <= '9')
|
||||
|| (c >= 'a' && c <= 'f')
|
||||
|| (c >= 'A' && c <= 'F'));
|
||||
}
|
||||
|
||||
inline bool IsAsciiAlphanum(unsigned char c)
|
||||
{
|
||||
return (IsAsciiAlpha(c) || IsAsciiDigit(c));
|
||||
}
|
||||
|
||||
inline bool IsAsciiPrintable(unsigned char c)
|
||||
{
|
||||
return (c >= ' ' && c <= '~');
|
||||
}
|
||||
|
||||
// Returns whether the character can start a C identifier or the identifier of a "{FOO}" constant in strings.
|
||||
inline bool IsIdentifierStartingChar(unsigned char c)
|
||||
{
|
||||
return IsAsciiAlpha(c) || c == '_';
|
||||
}
|
||||
|
||||
// Returns whether the character can be used in a C identifier or the identifier of a "{FOO}" constant in strings.
|
||||
inline bool IsIdentifierChar(unsigned char c)
|
||||
{
|
||||
return IsAsciiAlphanum(c) || c == '_';
|
||||
}
|
||||
|
||||
#endif // CHAR_UTIL_H
|
408
tools/preproc/charmap.cpp
Normal file
408
tools/preproc/charmap.cpp
Normal file
@ -0,0 +1,408 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdint>
|
||||
#include <cstdarg>
|
||||
#include "preproc.h"
|
||||
#include "charmap.h"
|
||||
#include "char_util.h"
|
||||
#include "utf8.h"
|
||||
|
||||
enum LhsType
|
||||
{
|
||||
Char,
|
||||
Escape,
|
||||
Constant,
|
||||
None
|
||||
};
|
||||
|
||||
struct Lhs
|
||||
{
|
||||
LhsType type;
|
||||
std::string name;
|
||||
std::int32_t code;
|
||||
};
|
||||
|
||||
class CharmapReader
|
||||
{
|
||||
public:
|
||||
CharmapReader(std::string filename);
|
||||
CharmapReader(const CharmapReader&) = delete;
|
||||
~CharmapReader();
|
||||
Lhs ReadLhs();
|
||||
void ExpectEqualsSign();
|
||||
std::string ReadSequence();
|
||||
void ExpectEmptyRestOfLine();
|
||||
void RaiseError(const char* format, ...);
|
||||
|
||||
private:
|
||||
char* m_buffer;
|
||||
long m_pos;
|
||||
long m_size;
|
||||
long m_lineNum;
|
||||
std::string m_filename;
|
||||
|
||||
void RemoveComments();
|
||||
std::string ReadConstant();
|
||||
void SkipWhitespace();
|
||||
};
|
||||
|
||||
CharmapReader::CharmapReader(std::string filename) : m_filename(filename)
|
||||
{
|
||||
FILE *fp = std::fopen(filename.c_str(), "rb");
|
||||
|
||||
if (fp == NULL)
|
||||
FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
|
||||
|
||||
std::fseek(fp, 0, SEEK_END);
|
||||
|
||||
m_size = std::ftell(fp);
|
||||
|
||||
if (m_size < 0)
|
||||
FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str());
|
||||
|
||||
m_buffer = new char[m_size + 1];
|
||||
|
||||
std::rewind(fp);
|
||||
|
||||
if (std::fread(m_buffer, m_size, 1, fp) != 1)
|
||||
FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str());
|
||||
|
||||
m_buffer[m_size] = 0;
|
||||
|
||||
std::fclose(fp);
|
||||
|
||||
m_pos = 0;
|
||||
m_lineNum = 1;
|
||||
|
||||
RemoveComments();
|
||||
}
|
||||
|
||||
CharmapReader::~CharmapReader()
|
||||
{
|
||||
delete[] m_buffer;
|
||||
}
|
||||
|
||||
Lhs CharmapReader::ReadLhs()
|
||||
{
|
||||
Lhs lhs;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
SkipWhitespace();
|
||||
|
||||
if (m_buffer[m_pos] == '\n')
|
||||
{
|
||||
m_pos++;
|
||||
m_lineNum++;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_buffer[m_pos] == '\'')
|
||||
{
|
||||
m_pos++;
|
||||
|
||||
bool isEscape = (m_buffer[m_pos] == '\\');
|
||||
|
||||
if (isEscape)
|
||||
{
|
||||
m_pos++;
|
||||
}
|
||||
|
||||
unsigned char c = m_buffer[m_pos];
|
||||
|
||||
if (c == 0)
|
||||
{
|
||||
if (m_pos >= m_size)
|
||||
RaiseError("unexpected EOF in UTF-8 character literal");
|
||||
else
|
||||
RaiseError("unexpected null character in UTF-8 character literal");
|
||||
}
|
||||
|
||||
if (IsAscii(c) && !IsAsciiPrintable(c))
|
||||
RaiseError("unexpected character U+%X in UTF-8 character literal", c);
|
||||
|
||||
UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
|
||||
std::int32_t code = unicodeChar.code;
|
||||
|
||||
if (code == -1)
|
||||
RaiseError("invalid encoding in UTF-8 character literal");
|
||||
|
||||
m_pos += unicodeChar.encodingLength;
|
||||
|
||||
if (m_buffer[m_pos] != '\'')
|
||||
RaiseError("unterminated character literal");
|
||||
|
||||
m_pos++;
|
||||
|
||||
lhs.code = code;
|
||||
|
||||
if (isEscape)
|
||||
{
|
||||
if (code >= 128)
|
||||
RaiseError("escapes using non-ASCII characters are invalid");
|
||||
|
||||
switch (code)
|
||||
{
|
||||
case '\'':
|
||||
lhs.type = LhsType::Char;
|
||||
break;
|
||||
case '\\':
|
||||
lhs.type = LhsType::Char;
|
||||
case '"':
|
||||
RaiseError("cannot escape double quote");
|
||||
break;
|
||||
default:
|
||||
lhs.type = LhsType::Escape;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (code == '\'')
|
||||
RaiseError("empty character literal");
|
||||
|
||||
lhs.type = LhsType::Char;
|
||||
}
|
||||
}
|
||||
else if (IsIdentifierStartingChar(m_buffer[m_pos]))
|
||||
{
|
||||
lhs.type = LhsType::Constant;
|
||||
lhs.name = ReadConstant();
|
||||
}
|
||||
else if (m_buffer[m_pos] == '\r')
|
||||
{
|
||||
RaiseError("only Unix-style LF newlines are supported");
|
||||
}
|
||||
else if (m_buffer[m_pos] == 0)
|
||||
{
|
||||
if (m_pos < m_size)
|
||||
RaiseError("unexpected null character");
|
||||
lhs.type = LhsType::None;
|
||||
}
|
||||
else
|
||||
{
|
||||
RaiseError("junk at start of line");
|
||||
}
|
||||
|
||||
return lhs;
|
||||
}
|
||||
|
||||
void CharmapReader::ExpectEqualsSign()
|
||||
{
|
||||
SkipWhitespace();
|
||||
|
||||
if (m_buffer[m_pos] != '=')
|
||||
RaiseError("expected equals sign");
|
||||
|
||||
m_pos++;
|
||||
}
|
||||
|
||||
static unsigned int ConvertHexDigit(char c)
|
||||
{
|
||||
unsigned int digit = 0;
|
||||
|
||||
if (c >= '0' && c <= '9')
|
||||
digit = c - '0';
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
digit = 10 + c - 'A';
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
digit = 10 + c - 'a';
|
||||
|
||||
return digit;
|
||||
}
|
||||
|
||||
std::string CharmapReader::ReadSequence()
|
||||
{
|
||||
SkipWhitespace();
|
||||
|
||||
long startPos = m_pos;
|
||||
|
||||
unsigned int length = 0;
|
||||
|
||||
while (IsAsciiHexDigit(m_buffer[m_pos]) && IsAsciiHexDigit(m_buffer[m_pos + 1]))
|
||||
{
|
||||
m_pos += 2;
|
||||
length++;
|
||||
|
||||
if (length > kMaxCharmapSequenceLength)
|
||||
RaiseError("byte sequence too long (max is %lu bytes)", kMaxCharmapSequenceLength);
|
||||
|
||||
SkipWhitespace();
|
||||
}
|
||||
|
||||
if (IsAsciiHexDigit(m_buffer[m_pos]))
|
||||
RaiseError("each byte must have 2 hex digits");
|
||||
|
||||
if (length == 0)
|
||||
RaiseError("expected byte sequence");
|
||||
|
||||
std::string sequence;
|
||||
sequence.reserve(length);
|
||||
|
||||
m_pos = startPos;
|
||||
|
||||
for (unsigned int i = 0; i < length; i++)
|
||||
{
|
||||
unsigned int digit1 = ConvertHexDigit(m_buffer[m_pos]);
|
||||
unsigned int digit2 = ConvertHexDigit(m_buffer[m_pos + 1]);
|
||||
unsigned char byte = digit1 * 16 + digit2;
|
||||
sequence += byte;
|
||||
|
||||
m_pos += 2;
|
||||
SkipWhitespace();
|
||||
}
|
||||
|
||||
return sequence;
|
||||
}
|
||||
|
||||
void CharmapReader::ExpectEmptyRestOfLine()
|
||||
{
|
||||
SkipWhitespace();
|
||||
|
||||
if (m_buffer[m_pos] == 0)
|
||||
{
|
||||
if (m_pos < m_size)
|
||||
RaiseError("unexpected null character");
|
||||
}
|
||||
else if (m_buffer[m_pos] == '\n')
|
||||
{
|
||||
m_pos++;
|
||||
m_lineNum++;
|
||||
}
|
||||
else if (m_buffer[m_pos] == '\r')
|
||||
{
|
||||
RaiseError("only Unix-style LF newlines are supported");
|
||||
}
|
||||
else
|
||||
{
|
||||
RaiseError("junk at end of line");
|
||||
}
|
||||
}
|
||||
|
||||
void CharmapReader::RaiseError(const char* format, ...)
|
||||
{
|
||||
const int bufferSize = 1024;
|
||||
char buffer[bufferSize];
|
||||
|
||||
std::va_list args;
|
||||
va_start(args, format);
|
||||
std::vsnprintf(buffer, bufferSize, format, args);
|
||||
va_end(args);
|
||||
|
||||
std::fprintf(stderr, "%s:%ld: error: %s\n", m_filename.c_str(), m_lineNum, buffer);
|
||||
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
void CharmapReader::RemoveComments()
|
||||
{
|
||||
long pos = 0;
|
||||
bool inString = false;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (m_buffer[pos] == 0)
|
||||
return;
|
||||
|
||||
if (inString)
|
||||
{
|
||||
if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == '\'')
|
||||
{
|
||||
pos += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_buffer[pos] == '\'')
|
||||
inString = false;
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
else if (m_buffer[pos] == '@')
|
||||
{
|
||||
while (m_buffer[pos] != '\n' && m_buffer[pos] != 0)
|
||||
m_buffer[pos++] = ' ';
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_buffer[pos] == '\'')
|
||||
inString = true;
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string CharmapReader::ReadConstant()
|
||||
{
|
||||
long startPos = m_pos;
|
||||
|
||||
while (IsIdentifierChar(m_buffer[m_pos]))
|
||||
m_pos++;
|
||||
|
||||
return std::string(&m_buffer[startPos], m_pos - startPos);
|
||||
}
|
||||
|
||||
void CharmapReader::SkipWhitespace()
|
||||
{
|
||||
while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
|
||||
m_pos++;
|
||||
}
|
||||
|
||||
Charmap::Charmap(std::string filename)
|
||||
{
|
||||
CharmapReader reader(filename);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
Lhs lhs = reader.ReadLhs();
|
||||
|
||||
if (lhs.type == LhsType::None)
|
||||
return;
|
||||
|
||||
reader.ExpectEqualsSign();
|
||||
|
||||
std::string sequence = reader.ReadSequence();
|
||||
|
||||
switch (lhs.type)
|
||||
{
|
||||
case LhsType::Char:
|
||||
if (m_chars.find(lhs.code) != m_chars.end())
|
||||
reader.RaiseError("redefining char");
|
||||
m_chars[lhs.code] = sequence;
|
||||
break;
|
||||
case LhsType::Escape:
|
||||
if (m_escapes[lhs.code].length() != 0)
|
||||
reader.RaiseError("redefining escape");
|
||||
m_escapes[lhs.code] = sequence;
|
||||
break;
|
||||
case LhsType::Constant:
|
||||
if (m_constants.find(lhs.name) != m_constants.end())
|
||||
reader.RaiseError("redefining constant");
|
||||
m_constants[lhs.name] = sequence;
|
||||
break;
|
||||
}
|
||||
|
||||
reader.ExpectEmptyRestOfLine();
|
||||
}
|
||||
}
|
64
tools/preproc/charmap.h
Normal file
64
tools/preproc/charmap.h
Normal file
@ -0,0 +1,64 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef CHARMAP_H
|
||||
#define CHARMAP_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
class Charmap
|
||||
{
|
||||
public:
|
||||
Charmap(std::string filename);
|
||||
|
||||
std::string Char(std::int32_t code)
|
||||
{
|
||||
auto it = m_chars.find(code);
|
||||
|
||||
if (it == m_chars.end())
|
||||
return std::string();
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
std::string Escape(unsigned char code)
|
||||
{
|
||||
return m_escapes[code];
|
||||
}
|
||||
|
||||
std::string Constant(std::string identifier)
|
||||
{
|
||||
auto it = m_constants.find(identifier);
|
||||
|
||||
if (it == m_constants.end())
|
||||
return std::string();
|
||||
|
||||
return it->second;
|
||||
}
|
||||
private:
|
||||
std::map<std::int32_t, std::string> m_chars;
|
||||
std::string m_escapes[128];
|
||||
std::map<std::string, std::string> m_constants;
|
||||
};
|
||||
|
||||
#endif // CHARMAP_H
|
164
tools/preproc/preproc.cpp
Normal file
164
tools/preproc/preproc.cpp
Normal file
@ -0,0 +1,164 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <string>
|
||||
#include <stack>
|
||||
#include "preproc.h"
|
||||
#include "asm_file.h"
|
||||
#include "c_file.h"
|
||||
#include "charmap.h"
|
||||
|
||||
Charmap* g_charmap;
|
||||
|
||||
void PrintAsmBytes(unsigned char *s, int length)
|
||||
{
|
||||
if (length > 0)
|
||||
{
|
||||
std::printf("\t.byte ");
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
std::printf("0x%02X", s[i]);
|
||||
|
||||
if (i < length - 1)
|
||||
std::printf(", ");
|
||||
}
|
||||
std::putchar('\n');
|
||||
}
|
||||
}
|
||||
|
||||
void PreprocAsmFile(std::string filename)
|
||||
{
|
||||
std::stack<AsmFile> stack;
|
||||
|
||||
stack.push(AsmFile(filename));
|
||||
|
||||
for (;;)
|
||||
{
|
||||
while (stack.top().IsAtEnd())
|
||||
{
|
||||
stack.pop();
|
||||
|
||||
if (stack.empty())
|
||||
return;
|
||||
else
|
||||
stack.top().OutputLocation();
|
||||
}
|
||||
|
||||
Directive directive = stack.top().GetDirective();
|
||||
|
||||
switch (directive)
|
||||
{
|
||||
case Directive::Include:
|
||||
stack.push(AsmFile(stack.top().ReadPath()));
|
||||
stack.top().OutputLocation();
|
||||
break;
|
||||
case Directive::String:
|
||||
{
|
||||
unsigned char s[kMaxStringLength];
|
||||
int length = stack.top().ReadString(s);
|
||||
PrintAsmBytes(s, length);
|
||||
break;
|
||||
}
|
||||
case Directive::Braille:
|
||||
{
|
||||
unsigned char s[kMaxStringLength];
|
||||
int length = stack.top().ReadBraille(s);
|
||||
PrintAsmBytes(s, length);
|
||||
break;
|
||||
}
|
||||
case Directive::Unknown:
|
||||
{
|
||||
std::string globalLabel = stack.top().GetGlobalLabel();
|
||||
|
||||
if (globalLabel.length() != 0)
|
||||
{
|
||||
const char *s = globalLabel.c_str();
|
||||
std::printf("%s: ; .global %s\n", s, s);
|
||||
}
|
||||
else
|
||||
{
|
||||
stack.top().OutputLine();
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PreprocCFile(const char * filename, bool isStdin)
|
||||
{
|
||||
CFile cFile(filename, isStdin);
|
||||
cFile.Preproc();
|
||||
}
|
||||
|
||||
char* GetFileExtension(char* filename)
|
||||
{
|
||||
char* extension = filename;
|
||||
|
||||
while (*extension != 0)
|
||||
extension++;
|
||||
|
||||
while (extension > filename && *extension != '.')
|
||||
extension--;
|
||||
|
||||
if (extension == filename)
|
||||
return nullptr;
|
||||
|
||||
extension++;
|
||||
|
||||
if (*extension == 0)
|
||||
return nullptr;
|
||||
|
||||
return extension;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc < 3 || argc > 4)
|
||||
{
|
||||
std::fprintf(stderr, "Usage: %s SRC_FILE CHARMAP_FILE [-i]\nwhere -i denotes if input is from stdin\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
g_charmap = new Charmap(argv[2]);
|
||||
|
||||
char* extension = GetFileExtension(argv[1]);
|
||||
|
||||
if (!extension)
|
||||
FATAL_ERROR("\"%s\" has no file extension.\n", argv[1]);
|
||||
|
||||
if ((extension[0] == 's') && extension[1] == 0)
|
||||
PreprocAsmFile(argv[1]);
|
||||
else if ((extension[0] == 'c' || extension[0] == 'i') && extension[1] == 0) {
|
||||
if (argc == 4) {
|
||||
if (argv[3][0] == '-' && argv[3][1] == 'i' && argv[3][2] == '\0') {
|
||||
PreprocCFile(argv[1], true);
|
||||
} else {
|
||||
FATAL_ERROR("unknown argument flag \"%s\".\n", argv[3]);
|
||||
}
|
||||
} else {
|
||||
PreprocCFile(argv[1], false);
|
||||
}
|
||||
} else
|
||||
FATAL_ERROR("\"%s\" has an unknown file extension of \"%s\".\n", argv[1], extension);
|
||||
|
||||
return 0;
|
||||
}
|
54
tools/preproc/preproc.h
Normal file
54
tools/preproc/preproc.h
Normal file
@ -0,0 +1,54 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef PREPROC_H
|
||||
#define PREPROC_H
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include "charmap.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#define FATAL_ERROR(format, ...) \
|
||||
do \
|
||||
{ \
|
||||
std::fprintf(stderr, format, __VA_ARGS__); \
|
||||
std::exit(1); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define FATAL_ERROR(format, ...) \
|
||||
do \
|
||||
{ \
|
||||
std::fprintf(stderr, format, ##__VA_ARGS__); \
|
||||
std::exit(1); \
|
||||
} while (0)
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
const int kMaxPath = 256;
|
||||
const int kMaxStringLength = 1024;
|
||||
const unsigned long kMaxCharmapSequenceLength = 16;
|
||||
|
||||
extern Charmap* g_charmap;
|
||||
|
||||
#endif // PREPROC_H
|
355
tools/preproc/string_parser.cpp
Normal file
355
tools/preproc/string_parser.cpp
Normal file
@ -0,0 +1,355 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdarg>
|
||||
#include <stdexcept>
|
||||
#include "preproc.h"
|
||||
#include "string_parser.h"
|
||||
#include "char_util.h"
|
||||
#include "utf8.h"
|
||||
|
||||
// Reads a charmap char or escape sequence.
|
||||
std::string StringParser::ReadCharOrEscape()
|
||||
{
|
||||
std::string sequence;
|
||||
|
||||
bool isEscape = (m_buffer[m_pos] == '\\');
|
||||
|
||||
if (isEscape)
|
||||
{
|
||||
m_pos++;
|
||||
|
||||
if (m_buffer[m_pos] == '"')
|
||||
{
|
||||
sequence = g_charmap->Char('"');
|
||||
|
||||
if (sequence.length() == 0)
|
||||
RaiseError("no mapping exists for double quote");
|
||||
|
||||
return sequence;
|
||||
}
|
||||
else if (m_buffer[m_pos] == '\\')
|
||||
{
|
||||
sequence = g_charmap->Char('\\');
|
||||
|
||||
if (sequence.length() == 0)
|
||||
RaiseError("no mapping exists for backslash");
|
||||
|
||||
return sequence;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned char c = m_buffer[m_pos];
|
||||
|
||||
if (c == 0)
|
||||
{
|
||||
if (m_pos >= m_size)
|
||||
RaiseError("unexpected EOF in UTF-8 string");
|
||||
else
|
||||
RaiseError("unexpected null character in UTF-8 string");
|
||||
}
|
||||
|
||||
if (IsAscii(c) && !IsAsciiPrintable(c))
|
||||
RaiseError("unexpected character U+%X in UTF-8 string", c);
|
||||
|
||||
UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
|
||||
m_pos += unicodeChar.encodingLength;
|
||||
std::int32_t code = unicodeChar.code;
|
||||
|
||||
if (code == -1)
|
||||
RaiseError("invalid encoding in UTF-8 string");
|
||||
|
||||
if (isEscape && code >= 128)
|
||||
RaiseError("escapes using non-ASCII characters are invalid");
|
||||
|
||||
sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code);
|
||||
|
||||
if (sequence.length() == 0)
|
||||
{
|
||||
if (isEscape)
|
||||
RaiseError("unknown escape '\\%c'", code);
|
||||
else
|
||||
RaiseError("unknown character U+%X", code);
|
||||
}
|
||||
|
||||
return sequence;
|
||||
}
|
||||
|
||||
// Reads a charmap constant, i.e. "{FOO}".
|
||||
std::string StringParser::ReadBracketedConstants()
|
||||
{
|
||||
std::string totalSequence;
|
||||
|
||||
m_pos++; // Assume we're on the left curly bracket.
|
||||
|
||||
while (m_buffer[m_pos] != '}')
|
||||
{
|
||||
SkipWhitespace();
|
||||
|
||||
if (IsIdentifierStartingChar(m_buffer[m_pos]))
|
||||
{
|
||||
long startPos = m_pos;
|
||||
|
||||
m_pos++;
|
||||
|
||||
while (IsIdentifierChar(m_buffer[m_pos]))
|
||||
m_pos++;
|
||||
|
||||
std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos));
|
||||
|
||||
if (sequence.length() == 0)
|
||||
{
|
||||
m_buffer[m_pos] = 0;
|
||||
RaiseError("unknown constant '%s'", &m_buffer[startPos]);
|
||||
}
|
||||
|
||||
totalSequence += sequence;
|
||||
}
|
||||
else if (IsAsciiDigit(m_buffer[m_pos]))
|
||||
{
|
||||
Integer integer = ReadInteger();
|
||||
|
||||
switch (integer.size)
|
||||
{
|
||||
case 1:
|
||||
totalSequence += (unsigned char)integer.value;
|
||||
break;
|
||||
case 2:
|
||||
totalSequence += (unsigned char)integer.value;
|
||||
totalSequence += (unsigned char)(integer.value >> 8);
|
||||
break;
|
||||
case 4:
|
||||
totalSequence += (unsigned char)integer.value;
|
||||
totalSequence += (unsigned char)(integer.value >> 8);
|
||||
totalSequence += (unsigned char)(integer.value >> 16);
|
||||
totalSequence += (unsigned char)(integer.value >> 24);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (m_buffer[m_pos] == 0)
|
||||
{
|
||||
if (m_pos >= m_size)
|
||||
RaiseError("unexpected EOF after left curly bracket");
|
||||
else
|
||||
RaiseError("unexpected null character within curly brackets");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IsAsciiPrintable(m_buffer[m_pos]))
|
||||
RaiseError("unexpected character '%c' within curly brackets", m_buffer[m_pos]);
|
||||
else
|
||||
RaiseError("unexpected character '\\x%02X' within curly brackets", m_buffer[m_pos]);
|
||||
}
|
||||
}
|
||||
|
||||
m_pos++; // Go past the right curly bracket.
|
||||
|
||||
return totalSequence;
|
||||
}
|
||||
|
||||
// Reads a charmap string.
|
||||
int StringParser::ParseString(long srcPos, unsigned char* dest, int& destLength)
|
||||
{
|
||||
m_pos = srcPos;
|
||||
|
||||
if (m_buffer[m_pos] != '"')
|
||||
RaiseError("expected UTF-8 string literal");
|
||||
|
||||
long start = m_pos;
|
||||
|
||||
m_pos++;
|
||||
|
||||
destLength = 0;
|
||||
|
||||
while (m_buffer[m_pos] != '"')
|
||||
{
|
||||
std::string sequence = (m_buffer[m_pos] == '{') ? ReadBracketedConstants() : ReadCharOrEscape();
|
||||
|
||||
for (const char& c : sequence)
|
||||
{
|
||||
if (destLength == kMaxStringLength)
|
||||
RaiseError("mapped string longer than %d bytes", kMaxStringLength);
|
||||
|
||||
dest[destLength++] = c;
|
||||
}
|
||||
}
|
||||
|
||||
m_pos++; // Go past the right quote.
|
||||
|
||||
return m_pos - start;
|
||||
}
|
||||
|
||||
void StringParser::RaiseError(const char* format, ...)
|
||||
{
|
||||
const int bufferSize = 1024;
|
||||
char buffer[bufferSize];
|
||||
|
||||
std::va_list args;
|
||||
va_start(args, format);
|
||||
std::vsnprintf(buffer, bufferSize, format, args);
|
||||
va_end(args);
|
||||
|
||||
throw std::runtime_error(buffer);
|
||||
}
|
||||
|
||||
// Converts digit character to numerical value.
|
||||
static int ConvertDigit(char c, int radix)
|
||||
{
|
||||
int digit;
|
||||
|
||||
if (c >= '0' && c <= '9')
|
||||
digit = c - '0';
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
digit = 10 + c - 'A';
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
digit = 10 + c - 'a';
|
||||
else
|
||||
return -1;
|
||||
|
||||
return (digit < radix) ? digit : -1;
|
||||
}
|
||||
|
||||
void StringParser::SkipRestOfInteger(int radix)
|
||||
{
|
||||
while (ConvertDigit(m_buffer[m_pos], radix) != -1)
|
||||
m_pos++;
|
||||
}
|
||||
|
||||
StringParser::Integer StringParser::ReadDecimal()
|
||||
{
|
||||
const int radix = 10;
|
||||
std::uint64_t n = 0;
|
||||
int digit;
|
||||
std::uint64_t max = UINT32_MAX;
|
||||
long startPos = m_pos;
|
||||
|
||||
while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1)
|
||||
{
|
||||
n = n * radix + digit;
|
||||
|
||||
if (n >= max)
|
||||
{
|
||||
SkipRestOfInteger(radix);
|
||||
|
||||
std::string intLiteral(m_buffer + startPos, m_pos - startPos);
|
||||
RaiseError("integer literal \"%s\" is too large", intLiteral.c_str());
|
||||
}
|
||||
|
||||
m_pos++;
|
||||
}
|
||||
|
||||
int size;
|
||||
|
||||
if (m_buffer[m_pos] == 'H')
|
||||
{
|
||||
if (n >= 0x10000)
|
||||
{
|
||||
RaiseError("%lu is too large to be a halfword", (unsigned long)n);
|
||||
}
|
||||
|
||||
size = 2;
|
||||
m_pos++;
|
||||
}
|
||||
else if (m_buffer[m_pos] == 'W')
|
||||
{
|
||||
size = 4;
|
||||
m_pos++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (n >= 0x10000)
|
||||
size = 4;
|
||||
else if (n >= 0x100)
|
||||
size = 2;
|
||||
else
|
||||
size = 1;
|
||||
}
|
||||
|
||||
return{ static_cast<std::uint32_t>(n), size };
|
||||
}
|
||||
|
||||
StringParser::Integer StringParser::ReadHex()
|
||||
{
|
||||
const int radix = 16;
|
||||
std::uint64_t n = 0;
|
||||
int digit;
|
||||
std::uint64_t max = UINT32_MAX;
|
||||
long startPos = m_pos;
|
||||
|
||||
while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1)
|
||||
{
|
||||
n = n * radix + digit;
|
||||
|
||||
if (n >= max)
|
||||
{
|
||||
SkipRestOfInteger(radix);
|
||||
|
||||
std::string intLiteral(m_buffer + startPos, m_pos - startPos);
|
||||
RaiseError("integer literal \"%s\" is too large", intLiteral.c_str());
|
||||
}
|
||||
|
||||
m_pos++;
|
||||
}
|
||||
|
||||
int length = m_pos - startPos;
|
||||
int size = 0;
|
||||
|
||||
switch (length)
|
||||
{
|
||||
case 2:
|
||||
size = 1;
|
||||
break;
|
||||
case 4:
|
||||
size = 2;
|
||||
break;
|
||||
case 8:
|
||||
size = 4;
|
||||
break;
|
||||
default:
|
||||
{
|
||||
std::string intLiteral(m_buffer + startPos, m_pos - startPos);
|
||||
RaiseError("hex integer literal \"0x%s\" doesn't have length of 2, 4, or 8 digits", intLiteral.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
return{ static_cast<std::uint32_t>(n), size };
|
||||
}
|
||||
|
||||
StringParser::Integer StringParser::ReadInteger()
|
||||
{
|
||||
if (!IsAsciiDigit(m_buffer[m_pos]))
|
||||
RaiseError("expected integer");
|
||||
|
||||
if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x')
|
||||
{
|
||||
m_pos += 2;
|
||||
return ReadHex();
|
||||
}
|
||||
|
||||
return ReadDecimal();
|
||||
}
|
||||
|
||||
// Skips tabs and spaces.
|
||||
void StringParser::SkipWhitespace()
|
||||
{
|
||||
while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
|
||||
m_pos++;
|
||||
}
|
55
tools/preproc/string_parser.h
Normal file
55
tools/preproc/string_parser.h
Normal file
@ -0,0 +1,55 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef STRING_PARSER_H
|
||||
#define STRING_PARSER_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include "preproc.h"
|
||||
|
||||
class StringParser
|
||||
{
|
||||
public:
|
||||
StringParser(char* buffer, long size) : m_buffer(buffer), m_size(size), m_pos(0) {}
|
||||
int ParseString(long srcPos, unsigned char* dest, int &destLength);
|
||||
|
||||
private:
|
||||
struct Integer
|
||||
{
|
||||
std::uint32_t value;
|
||||
int size;
|
||||
};
|
||||
|
||||
char* m_buffer;
|
||||
long m_size;
|
||||
long m_pos;
|
||||
|
||||
Integer ReadInteger();
|
||||
Integer ReadDecimal();
|
||||
Integer ReadHex();
|
||||
std::string ReadCharOrEscape();
|
||||
std::string ReadBracketedConstants();
|
||||
void SkipWhitespace();
|
||||
void SkipRestOfInteger(int radix);
|
||||
void RaiseError(const char* format, ...);
|
||||
};
|
||||
|
||||
#endif // STRING_PARSER_H
|
92
tools/preproc/utf8.cpp
Normal file
92
tools/preproc/utf8.cpp
Normal file
@ -0,0 +1,92 @@
|
||||
// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
|
||||
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
|
||||
//
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <cstdint>
|
||||
#include "utf8.h"
|
||||
|
||||
static const unsigned char s_byteTypeTable[] =
|
||||
{
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
|
||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
|
||||
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
|
||||
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
|
||||
};
|
||||
|
||||
const unsigned char s0 = 0 * 12;
|
||||
const unsigned char s1 = 1 * 12;
|
||||
const unsigned char s2 = 2 * 12;
|
||||
const unsigned char s3 = 3 * 12;
|
||||
const unsigned char s4 = 4 * 12;
|
||||
const unsigned char s5 = 5 * 12;
|
||||
const unsigned char s6 = 6 * 12;
|
||||
const unsigned char s7 = 7 * 12;
|
||||
const unsigned char s8 = 8 * 12;
|
||||
|
||||
static const unsigned char s_transitionTable[] =
|
||||
{
|
||||
s0,s1,s2,s3,s5,s8,s7,s1,s1,s1,s4,s6, // s0
|
||||
s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s1
|
||||
s1,s0,s1,s1,s1,s1,s1,s0,s1,s0,s1,s1, // s2
|
||||
s1,s2,s1,s1,s1,s1,s1,s2,s1,s2,s1,s1, // s3
|
||||
s1,s1,s1,s1,s1,s1,s1,s2,s1,s1,s1,s1, // s4
|
||||
s1,s2,s1,s1,s1,s1,s1,s1,s1,s2,s1,s1, // s5
|
||||
s1,s1,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s6
|
||||
s1,s3,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s7
|
||||
s1,s3,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s8
|
||||
};
|
||||
|
||||
// Decodes UTF-8 encoded Unicode code point at "s".
|
||||
UnicodeChar DecodeUtf8(const char* s)
|
||||
{
|
||||
UnicodeChar unicodeChar;
|
||||
int state = s0;
|
||||
auto start = s;
|
||||
|
||||
do
|
||||
{
|
||||
unsigned char byte = *s++;
|
||||
int type = s_byteTypeTable[byte];
|
||||
|
||||
if (state == s0)
|
||||
unicodeChar.code = (0xFF >> type) & byte;
|
||||
else
|
||||
unicodeChar.code = (unicodeChar.code << 6) | (byte & 0x3F);
|
||||
|
||||
state = s_transitionTable[state + type];
|
||||
|
||||
if (state == s1)
|
||||
{
|
||||
unicodeChar.code = -1;
|
||||
return unicodeChar;
|
||||
}
|
||||
} while (state != s0);
|
||||
|
||||
unicodeChar.encodingLength = s - start;
|
||||
|
||||
return unicodeChar;
|
||||
}
|
34
tools/preproc/utf8.h
Normal file
34
tools/preproc/utf8.h
Normal file
@ -0,0 +1,34 @@
|
||||
// Copyright(c) 2016 YamaArashi
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef UTF8_H
|
||||
#define UTF8_H
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
struct UnicodeChar
|
||||
{
|
||||
std::int32_t code;
|
||||
int encodingLength;
|
||||
};
|
||||
|
||||
UnicodeChar DecodeUtf8(const char* s);
|
||||
|
||||
#endif // UTF8_H
|
Reference in New Issue
Block a user