Implement \x escape sequences

This commit is contained in:
0x4261756D 2024-02-21 16:36:13 +01:00
parent ad3bb57dcc
commit 637638d889
2 changed files with 81 additions and 5 deletions

View File

@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.Json.Serialization;
namespace luaaaaah;
@ -13,6 +14,7 @@ class Tokenizer
int closingLongBracketLevel;
Token? currentToken;
CodeLocation currentLocation = new(line: 0, col: 0);
int escapeSequenceNumber;
public Token[] Tokenize(string content)
{
@ -385,7 +387,7 @@ class Tokenizer
}
else
{
throw new NotImplementedException(ch.ToString());
throw new NotImplementedException($"{ch} at {currentLocation}");
}
}
break;
@ -488,7 +490,12 @@ class Tokenizer
state = State.QuoteBackslashZ;
}
break;
default: throw new Exception($"Unknown escape sequence: \\{ch}");
case 'x':
{
state = State.QuoteBackslashX;
}
break;
default: throw new Exception($"Unknown escape sequence: \\{ch} at {currentLocation}");
}
}
break;
@ -622,6 +629,11 @@ class Tokenizer
state = State.SingleQuoteBackslashZ;
}
break;
case 'x':
{
state = State.SingleQuoteBackslashX;
}
break;
default: throw new Exception($"Unknown escape sequence: \\{ch}");
}
}
@ -658,6 +670,68 @@ class Tokenizer
}
}
break;
case State.SingleQuoteBackslashX:
{
if(char.IsAsciiHexDigit(ch))
{
state = State.SingleQuoteBackslashXHex;
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
}
else
{
throw new Exception($"{currentLocation}: Expected hex digit in \\x escape sequence, got {ch}");
}
}
break;
case State.SingleQuoteBackslashXHex:
{
if(char.IsAsciiHexDigit(ch))
{
state = State.SingleQuote;
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
foreach(char c in Encoding.UTF8.GetChars(BitConverter.GetBytes(escapeSequenceNumber)))
{
AppendDataChar(c);
}
escapeSequenceNumber = 0;
}
else
{
throw new Exception($"{currentLocation}: Expected second hex digit in \\x escape sequence, got {ch}");
}
}
break;
case State.QuoteBackslashX:
{
if(char.IsAsciiHexDigit(ch))
{
state = State.QuoteBackslashXHex;
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
}
else
{
throw new Exception($"{currentLocation}: Expected hex digit in \\x escape sequence, got {ch}");
}
}
break;
case State.QuoteBackslashXHex:
{
if(char.IsAsciiHexDigit(ch))
{
state = State.Quote;
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
foreach(char c in Encoding.UTF8.GetChars(BitConverter.GetBytes(escapeSequenceNumber)))
{
AppendDataChar(c);
}
escapeSequenceNumber = 0;
}
else
{
throw new Exception($"{currentLocation}: Expected second hex digit in \\x escape sequence, got {ch}");
}
}
break;
case State.String:
{
if(currentToken == null || currentToken.type == null)
@ -3660,7 +3734,7 @@ class Tokenizer
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ,
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ, QuoteBackslashX, SingleQuoteBackslashX, QuoteBackslashXHex, SingleQuoteBackslashXHex,
SmallComment, BigComment, BigCommentStartLongBracket, BigCommentEndLongBracket,
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,

View File

@ -1,4 +1,4 @@
"test" "\z
"test" "\z
@ -7,4 +7,6 @@
abc" "123" "sdlfkgj<3" "asldkfj" zzz "" "" "" "" "" "fasd!" "afd" "" "as" zzzz
abc" "123" "sdlfkgj<3" "asldkfj" zzz "" "" "" "" "" "fasd!" "afd" "" "as" zzzz
"\xf7\xAff\x43"