409 lines
9.2 KiB
C++
409 lines
9.2 KiB
C++
|
// Copyright(c) 2016 YamaArashi
|
||
|
//
|
||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
|
// of this software and associated documentation files (the "Software"), to deal
|
||
|
// in the Software without restriction, including without limitation the rights
|
||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
|
// copies of the Software, and to permit persons to whom the Software is
|
||
|
// furnished to do so, subject to the following conditions:
|
||
|
//
|
||
|
// The above copyright notice and this permission notice shall be included in
|
||
|
// all copies or substantial portions of the Software.
|
||
|
//
|
||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||
|
// THE SOFTWARE.
|
||
|
|
||
|
#include <cstdio>
|
||
|
#include <cstdint>
|
||
|
#include <cstdarg>
|
||
|
#include "preproc.h"
|
||
|
#include "charmap.h"
|
||
|
#include "char_util.h"
|
||
|
#include "utf8.h"
|
||
|
|
||
|
enum LhsType
|
||
|
{
|
||
|
Char,
|
||
|
Escape,
|
||
|
Constant,
|
||
|
None
|
||
|
};
|
||
|
|
||
|
struct Lhs
|
||
|
{
|
||
|
LhsType type;
|
||
|
std::string name;
|
||
|
std::int32_t code;
|
||
|
};
|
||
|
|
||
|
class CharmapReader
|
||
|
{
|
||
|
public:
|
||
|
CharmapReader(std::string filename);
|
||
|
CharmapReader(const CharmapReader&) = delete;
|
||
|
~CharmapReader();
|
||
|
Lhs ReadLhs();
|
||
|
void ExpectEqualsSign();
|
||
|
std::string ReadSequence();
|
||
|
void ExpectEmptyRestOfLine();
|
||
|
void RaiseError(const char* format, ...);
|
||
|
|
||
|
private:
|
||
|
char* m_buffer;
|
||
|
long m_pos;
|
||
|
long m_size;
|
||
|
long m_lineNum;
|
||
|
std::string m_filename;
|
||
|
|
||
|
void RemoveComments();
|
||
|
std::string ReadConstant();
|
||
|
void SkipWhitespace();
|
||
|
};
|
||
|
|
||
|
CharmapReader::CharmapReader(std::string filename) : m_filename(filename)
|
||
|
{
|
||
|
FILE *fp = std::fopen(filename.c_str(), "rb");
|
||
|
|
||
|
if (fp == NULL)
|
||
|
FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
|
||
|
|
||
|
std::fseek(fp, 0, SEEK_END);
|
||
|
|
||
|
m_size = std::ftell(fp);
|
||
|
|
||
|
if (m_size < 0)
|
||
|
FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str());
|
||
|
|
||
|
m_buffer = new char[m_size + 1];
|
||
|
|
||
|
std::rewind(fp);
|
||
|
|
||
|
if (std::fread(m_buffer, m_size, 1, fp) != 1)
|
||
|
FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str());
|
||
|
|
||
|
m_buffer[m_size] = 0;
|
||
|
|
||
|
std::fclose(fp);
|
||
|
|
||
|
m_pos = 0;
|
||
|
m_lineNum = 1;
|
||
|
|
||
|
RemoveComments();
|
||
|
}
|
||
|
|
||
|
CharmapReader::~CharmapReader()
|
||
|
{
|
||
|
delete[] m_buffer;
|
||
|
}
|
||
|
|
||
|
Lhs CharmapReader::ReadLhs()
|
||
|
{
|
||
|
Lhs lhs;
|
||
|
|
||
|
for (;;)
|
||
|
{
|
||
|
SkipWhitespace();
|
||
|
|
||
|
if (m_buffer[m_pos] == '\n')
|
||
|
{
|
||
|
m_pos++;
|
||
|
m_lineNum++;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (m_buffer[m_pos] == '\'')
|
||
|
{
|
||
|
m_pos++;
|
||
|
|
||
|
bool isEscape = (m_buffer[m_pos] == '\\');
|
||
|
|
||
|
if (isEscape)
|
||
|
{
|
||
|
m_pos++;
|
||
|
}
|
||
|
|
||
|
unsigned char c = m_buffer[m_pos];
|
||
|
|
||
|
if (c == 0)
|
||
|
{
|
||
|
if (m_pos >= m_size)
|
||
|
RaiseError("unexpected EOF in UTF-8 character literal");
|
||
|
else
|
||
|
RaiseError("unexpected null character in UTF-8 character literal");
|
||
|
}
|
||
|
|
||
|
if (IsAscii(c) && !IsAsciiPrintable(c))
|
||
|
RaiseError("unexpected character U+%X in UTF-8 character literal", c);
|
||
|
|
||
|
UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
|
||
|
std::int32_t code = unicodeChar.code;
|
||
|
|
||
|
if (code == -1)
|
||
|
RaiseError("invalid encoding in UTF-8 character literal");
|
||
|
|
||
|
m_pos += unicodeChar.encodingLength;
|
||
|
|
||
|
if (m_buffer[m_pos] != '\'')
|
||
|
RaiseError("unterminated character literal");
|
||
|
|
||
|
m_pos++;
|
||
|
|
||
|
lhs.code = code;
|
||
|
|
||
|
if (isEscape)
|
||
|
{
|
||
|
if (code >= 128)
|
||
|
RaiseError("escapes using non-ASCII characters are invalid");
|
||
|
|
||
|
switch (code)
|
||
|
{
|
||
|
case '\'':
|
||
|
lhs.type = LhsType::Char;
|
||
|
break;
|
||
|
case '\\':
|
||
|
lhs.type = LhsType::Char;
|
||
|
case '"':
|
||
|
RaiseError("cannot escape double quote");
|
||
|
break;
|
||
|
default:
|
||
|
lhs.type = LhsType::Escape;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (code == '\'')
|
||
|
RaiseError("empty character literal");
|
||
|
|
||
|
lhs.type = LhsType::Char;
|
||
|
}
|
||
|
}
|
||
|
else if (IsIdentifierStartingChar(m_buffer[m_pos]))
|
||
|
{
|
||
|
lhs.type = LhsType::Constant;
|
||
|
lhs.name = ReadConstant();
|
||
|
}
|
||
|
else if (m_buffer[m_pos] == '\r')
|
||
|
{
|
||
|
RaiseError("only Unix-style LF newlines are supported");
|
||
|
}
|
||
|
else if (m_buffer[m_pos] == 0)
|
||
|
{
|
||
|
if (m_pos < m_size)
|
||
|
RaiseError("unexpected null character");
|
||
|
lhs.type = LhsType::None;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
RaiseError("junk at start of line");
|
||
|
}
|
||
|
|
||
|
return lhs;
|
||
|
}
|
||
|
|
||
|
void CharmapReader::ExpectEqualsSign()
|
||
|
{
|
||
|
SkipWhitespace();
|
||
|
|
||
|
if (m_buffer[m_pos] != '=')
|
||
|
RaiseError("expected equals sign");
|
||
|
|
||
|
m_pos++;
|
||
|
}
|
||
|
|
||
|
static unsigned int ConvertHexDigit(char c)
|
||
|
{
|
||
|
unsigned int digit = 0;
|
||
|
|
||
|
if (c >= '0' && c <= '9')
|
||
|
digit = c - '0';
|
||
|
else if (c >= 'A' && c <= 'F')
|
||
|
digit = 10 + c - 'A';
|
||
|
else if (c >= 'a' && c <= 'f')
|
||
|
digit = 10 + c - 'a';
|
||
|
|
||
|
return digit;
|
||
|
}
|
||
|
|
||
|
std::string CharmapReader::ReadSequence()
|
||
|
{
|
||
|
SkipWhitespace();
|
||
|
|
||
|
long startPos = m_pos;
|
||
|
|
||
|
unsigned int length = 0;
|
||
|
|
||
|
while (IsAsciiHexDigit(m_buffer[m_pos]) && IsAsciiHexDigit(m_buffer[m_pos + 1]))
|
||
|
{
|
||
|
m_pos += 2;
|
||
|
length++;
|
||
|
|
||
|
if (length > kMaxCharmapSequenceLength)
|
||
|
RaiseError("byte sequence too long (max is %lu bytes)", kMaxCharmapSequenceLength);
|
||
|
|
||
|
SkipWhitespace();
|
||
|
}
|
||
|
|
||
|
if (IsAsciiHexDigit(m_buffer[m_pos]))
|
||
|
RaiseError("each byte must have 2 hex digits");
|
||
|
|
||
|
if (length == 0)
|
||
|
RaiseError("expected byte sequence");
|
||
|
|
||
|
std::string sequence;
|
||
|
sequence.reserve(length);
|
||
|
|
||
|
m_pos = startPos;
|
||
|
|
||
|
for (unsigned int i = 0; i < length; i++)
|
||
|
{
|
||
|
unsigned int digit1 = ConvertHexDigit(m_buffer[m_pos]);
|
||
|
unsigned int digit2 = ConvertHexDigit(m_buffer[m_pos + 1]);
|
||
|
unsigned char byte = digit1 * 16 + digit2;
|
||
|
sequence += byte;
|
||
|
|
||
|
m_pos += 2;
|
||
|
SkipWhitespace();
|
||
|
}
|
||
|
|
||
|
return sequence;
|
||
|
}
|
||
|
|
||
|
void CharmapReader::ExpectEmptyRestOfLine()
|
||
|
{
|
||
|
SkipWhitespace();
|
||
|
|
||
|
if (m_buffer[m_pos] == 0)
|
||
|
{
|
||
|
if (m_pos < m_size)
|
||
|
RaiseError("unexpected null character");
|
||
|
}
|
||
|
else if (m_buffer[m_pos] == '\n')
|
||
|
{
|
||
|
m_pos++;
|
||
|
m_lineNum++;
|
||
|
}
|
||
|
else if (m_buffer[m_pos] == '\r')
|
||
|
{
|
||
|
RaiseError("only Unix-style LF newlines are supported");
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
RaiseError("junk at end of line");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void CharmapReader::RaiseError(const char* format, ...)
|
||
|
{
|
||
|
const int bufferSize = 1024;
|
||
|
char buffer[bufferSize];
|
||
|
|
||
|
std::va_list args;
|
||
|
va_start(args, format);
|
||
|
std::vsnprintf(buffer, bufferSize, format, args);
|
||
|
va_end(args);
|
||
|
|
||
|
std::fprintf(stderr, "%s:%ld: error: %s\n", m_filename.c_str(), m_lineNum, buffer);
|
||
|
|
||
|
std::exit(1);
|
||
|
}
|
||
|
|
||
|
void CharmapReader::RemoveComments()
|
||
|
{
|
||
|
long pos = 0;
|
||
|
bool inString = false;
|
||
|
|
||
|
for (;;)
|
||
|
{
|
||
|
if (m_buffer[pos] == 0)
|
||
|
return;
|
||
|
|
||
|
if (inString)
|
||
|
{
|
||
|
if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == '\'')
|
||
|
{
|
||
|
pos += 2;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (m_buffer[pos] == '\'')
|
||
|
inString = false;
|
||
|
pos++;
|
||
|
}
|
||
|
}
|
||
|
else if (m_buffer[pos] == '@')
|
||
|
{
|
||
|
while (m_buffer[pos] != '\n' && m_buffer[pos] != 0)
|
||
|
m_buffer[pos++] = ' ';
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (m_buffer[pos] == '\'')
|
||
|
inString = true;
|
||
|
pos++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
std::string CharmapReader::ReadConstant()
|
||
|
{
|
||
|
long startPos = m_pos;
|
||
|
|
||
|
while (IsIdentifierChar(m_buffer[m_pos]))
|
||
|
m_pos++;
|
||
|
|
||
|
return std::string(&m_buffer[startPos], m_pos - startPos);
|
||
|
}
|
||
|
|
||
|
void CharmapReader::SkipWhitespace()
|
||
|
{
|
||
|
while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
|
||
|
m_pos++;
|
||
|
}
|
||
|
|
||
|
Charmap::Charmap(std::string filename)
|
||
|
{
|
||
|
CharmapReader reader(filename);
|
||
|
|
||
|
for (;;)
|
||
|
{
|
||
|
Lhs lhs = reader.ReadLhs();
|
||
|
|
||
|
if (lhs.type == LhsType::None)
|
||
|
return;
|
||
|
|
||
|
reader.ExpectEqualsSign();
|
||
|
|
||
|
std::string sequence = reader.ReadSequence();
|
||
|
|
||
|
switch (lhs.type)
|
||
|
{
|
||
|
case LhsType::Char:
|
||
|
if (m_chars.find(lhs.code) != m_chars.end())
|
||
|
reader.RaiseError("redefining char");
|
||
|
m_chars[lhs.code] = sequence;
|
||
|
break;
|
||
|
case LhsType::Escape:
|
||
|
if (m_escapes[lhs.code].length() != 0)
|
||
|
reader.RaiseError("redefining escape");
|
||
|
m_escapes[lhs.code] = sequence;
|
||
|
break;
|
||
|
case LhsType::Constant:
|
||
|
if (m_constants.find(lhs.name) != m_constants.end())
|
||
|
reader.RaiseError("redefining constant");
|
||
|
m_constants[lhs.name] = sequence;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
reader.ExpectEmptyRestOfLine();
|
||
|
}
|
||
|
}
|