ПОЛНЫЙ РЕФАКТОРИНГ

This commit is contained in:
2024-08-29 21:57:53 +03:00
commit 88d85865a0
299 changed files with 52566 additions and 0 deletions

View File

@ -0,0 +1,350 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 6/26/14
* Time: 12:10 AM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter;
use NilPortugues\Sql\QueryFormatter\Helper\Comment;
use NilPortugues\Sql\QueryFormatter\Helper\Indent;
use NilPortugues\Sql\QueryFormatter\Helper\NewLine;
use NilPortugues\Sql\QueryFormatter\Helper\Parentheses;
use NilPortugues\Sql\QueryFormatter\Helper\Token;
use NilPortugues\Sql\QueryFormatter\Helper\WhiteSpace;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Lightweight Formatter heavily based on https://github.com/jdorn/sql-formatter.
*
* Class Formatter
*/
class Formatter
{
/**
* @var Tokenizer
*/
protected $tokenizer;
/**
* @var NewLine
*/
protected $newLine;
/**
* @var Parentheses
*/
protected $parentheses;
/**
* @var string
*/
protected $tab = ' ';
/**
* @var int
*/
protected $inlineCount = 0;
/**
* @var bool
*/
protected $clauseLimit = false;
/**
* @var string
*/
protected $formattedSql = '';
/**
* @var Indent
*/
protected $indentation;
/**
* @var Comment
*/
protected $comment;
/**
* Returns a SQL string in a readable human-friendly format.
*
* @param string $sql
*
* @return string
*/
public function format($sql)
{
$this->reset();
$tab = "\t";
$originalTokens = $this->tokenizer->tokenize((string) $sql);
$tokens = WhiteSpace::removeTokenWhitespace($originalTokens);
foreach ($tokens as $i => $token) {
$queryValue = $token[Tokenizer::TOKEN_VALUE];
$this->indentation->increaseSpecialIndent()->increaseBlockIndent();
$addedNewline = $this->newLine->addNewLineBreak($tab);
if ($this->comment->stringHasCommentToken($token)) {
$this->formattedSql = $this->comment->writeCommentBlock($token, $tab, $queryValue);
continue;
}
if ($this->parentheses->getInlineParentheses()) {
if ($this->parentheses->stringIsClosingParentheses($token)) {
$this->parentheses->writeInlineParenthesesBlock($tab, $queryValue);
continue;
}
$this->newLine->writeNewLineForLongCommaInlineValues($token);
$this->inlineCount += \strlen($token[Tokenizer::TOKEN_VALUE]);
}
switch ($token) {
case $this->parentheses->stringIsOpeningParentheses($token):
$tokens = $this->formatOpeningParenthesis($token, $i, $tokens, $originalTokens);
break;
case $this->parentheses->stringIsClosingParentheses($token):
$this->indentation->decreaseIndentLevelUntilIndentTypeIsSpecial($this);
$this->newLine->addNewLineBeforeToken($addedNewline, $tab);
break;
case $this->stringIsEndOfLimitClause($token):
$this->clauseLimit = false;
break;
case $token[Tokenizer::TOKEN_VALUE] === ',' && false === $this->parentheses->getInlineParentheses():
$this->newLine->writeNewLineBecauseOfComma();
break;
case Token::isTokenTypeReservedTopLevel($token):
$queryValue = $this->formatTokenTypeReservedTopLevel($addedNewline, $tab, $token, $queryValue);
break;
case $this->newLine->isTokenTypeReservedNewLine($token):
$this->newLine->addNewLineBeforeToken($addedNewline, $tab);
if (WhiteSpace::tokenHasExtraWhiteSpaces($token)) {
$queryValue = \preg_replace('/\s+/', ' ', $queryValue);
}
break;
}
$this->formatBoundaryCharacterToken($token, $i, $tokens, $originalTokens);
$this->formatWhiteSpaceToken($token, $queryValue);
$this->formatDashToken($token, $i, $tokens);
}
return \trim(\str_replace(["\t", " \n"], [$this->tab, "\n"], $this->formattedSql))."\n";
}
/**
*
*/
public function reset()
{
$this->tokenizer = new Tokenizer();
$this->indentation = new Indent();
$this->parentheses = new Parentheses($this, $this->indentation);
$this->newLine = new NewLine($this, $this->indentation, $this->parentheses);
$this->comment = new Comment($this, $this->indentation, $this->newLine);
$this->formattedSql = '';
}
/**
* @param $token
* @param $i
* @param array $tokens
* @param array $originalTokens
*
* @return array
*/
protected function formatOpeningParenthesis($token, $i, array &$tokens, array &$originalTokens)
{
$length = 0;
for ($j = 1; $j <= 250; ++$j) {
if (isset($tokens[$i + $j])) {
$next = $tokens[$i + $j];
if ($this->parentheses->stringIsClosingParentheses($next)) {
$this->parentheses->writeNewInlineParentheses();
break;
}
if ($this->parentheses->invalidParenthesesTokenValue($next)
|| $this->parentheses->invalidParenthesesTokenType($next)
) {
break;
}
$length += \strlen($next[Tokenizer::TOKEN_VALUE]);
}
}
$this->newLine->writeNewLineForLongInlineValues($length);
if (WhiteSpace::isPrecedingCurrentTokenOfTokenTypeWhiteSpace($originalTokens, $token)) {
$this->formattedSql = \rtrim($this->formattedSql, ' ');
}
$this->newLine->addNewLineAfterOpeningParentheses();
return $tokens;
}
/**
* @param $token
*
* @return bool
*/
protected function stringIsEndOfLimitClause($token)
{
return $this->clauseLimit
&& $token[Tokenizer::TOKEN_VALUE] !== ','
&& $token[Tokenizer::TOKEN_TYPE] !== Tokenizer::TOKEN_TYPE_NUMBER
&& $token[Tokenizer::TOKEN_TYPE] !== Tokenizer::TOKEN_TYPE_WHITESPACE;
}
/**
* @param bool $addedNewline
* @param string $tab
* @param $token
* @param $queryValue
*
* @return mixed
*/
protected function formatTokenTypeReservedTopLevel($addedNewline, $tab, $token, $queryValue)
{
$this->indentation
->setIncreaseSpecialIndent(true)
->decreaseSpecialIndentIfCurrentIndentTypeIsSpecial();
$this->newLine->writeNewLineBecauseOfTopLevelReservedWord($addedNewline, $tab);
if (WhiteSpace::tokenHasExtraWhiteSpaces($token)) {
$queryValue = \preg_replace('/\s+/', ' ', $queryValue);
}
Token::tokenHasLimitClause($token, $this->parentheses, $this);
return $queryValue;
}
/**
* @param $token
* @param $i
* @param array $tokens
* @param array $originalTokens
*/
protected function formatBoundaryCharacterToken($token, $i, array &$tokens, array &$originalTokens)
{
if (Token::tokenHasMultipleBoundaryCharactersTogether($token, $tokens, $i, $originalTokens)) {
$this->formattedSql = \rtrim($this->formattedSql, ' ');
}
}
/**
* @param $token
* @param $queryValue
*/
protected function formatWhiteSpaceToken($token, $queryValue)
{
if (WhiteSpace::tokenHasExtraWhiteSpaceLeft($token)) {
$this->formattedSql = \rtrim($this->formattedSql, ' ');
}
$this->formattedSql .= $queryValue.' ';
if (WhiteSpace::tokenHasExtraWhiteSpaceRight($token)) {
$this->formattedSql = \rtrim($this->formattedSql, ' ');
}
}
/**
* @param $token
* @param $i
* @param array $tokens
*/
protected function formatDashToken($token, $i, array &$tokens)
{
if (Token::tokenIsMinusSign($token, $tokens, $i)) {
$previousTokenType = $tokens[$i - 1][Tokenizer::TOKEN_TYPE];
if (WhiteSpace::tokenIsNumberAndHasExtraWhiteSpaceRight($previousTokenType)) {
$this->formattedSql = \rtrim($this->formattedSql, ' ');
}
}
}
/**
* @return string
*/
public function getFormattedSql()
{
return $this->formattedSql;
}
/**
* @param string $formattedSql
*
* @return $this
*/
public function setFormattedSql($formattedSql)
{
$this->formattedSql = $formattedSql;
return $this;
}
/**
* @param $string
*
* @return $this
*/
public function appendToFormattedSql($string)
{
$this->formattedSql .= $string;
return $this;
}
/**
* @return int
*/
public function getInlineCount()
{
return $this->inlineCount;
}
/**
* @param int $inlineCount
*
* @return $this
*/
public function setInlineCount($inlineCount)
{
$this->inlineCount = $inlineCount;
return $this;
}
/**
* @return bool
*/
public function getClauseLimit()
{
return $this->clauseLimit;
}
/**
* @param bool $clauseLimit
*
* @return $this
*/
public function setClauseLimit($clauseLimit)
{
$this->clauseLimit = $clauseLimit;
return $this;
}
}

View File

@ -0,0 +1,80 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/22/14
* Time: 10:09 PM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Helper;
use NilPortugues\Sql\QueryFormatter\Formatter;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class Comment.
*/
class Comment
{
/**
* @var \NilPortugues\Sql\QueryFormatter\Formatter
*/
protected $formatter;
/**
* @var Indent
*/
protected $indentation;
/**
* @var NewLine
*/
protected $newLine;
/**
* @param Formatter $formatter
* @param Indent $indentation
* @param NewLine $newLine
*/
public function __construct(Formatter $formatter, Indent $indentation, NewLine $newLine)
{
$this->formatter = $formatter;
$this->indentation = $indentation;
$this->newLine = $newLine;
}
/**
* @param $token
*
* @return bool
*/
public function stringHasCommentToken($token)
{
return $token[Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_COMMENT
|| $token[Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_BLOCK_COMMENT;
}
/**
* @param $token
* @param string $tab
* @param $queryValue
*
* @return string
*/
public function writeCommentBlock($token, $tab, $queryValue)
{
if ($token[Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_BLOCK_COMMENT) {
$indent = \str_repeat($tab, $this->indentation->getIndentLvl());
$this->formatter->appendToFormattedSql("\n".$indent);
$queryValue = \str_replace("\n", "\n".$indent, $queryValue);
}
$this->formatter->appendToFormattedSql($queryValue);
$this->newLine->setNewline(true);
return $this->formatter->getFormattedSql();
}
}

View File

@ -0,0 +1,213 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/22/14
* Time: 11:37 AM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Helper;
use NilPortugues\Sql\QueryFormatter\Formatter;
/**
* Class Indent.
*/
class Indent
{
/**
* @var bool
*/
protected $inlineIndented = false;
/**
* @var bool
*/
protected $increaseSpecialIndent = false;
/**
* @var int
*/
protected $indentLvl = 0;
/**
* @var bool
*/
protected $increaseBlockIndent = false;
/**
* @var array
*/
protected $indentTypes = [];
/**
* Increase the Special Indent if increaseSpecialIndent is true after the current iteration.
*
* @return $this
*/
public function increaseSpecialIndent()
{
if ($this->increaseSpecialIndent) {
++$this->indentLvl;
$this->increaseSpecialIndent = false;
\array_unshift($this->indentTypes, 'special');
}
return $this;
}
/**
* Increase the Block Indent if increaseBlockIndent is true after the current iteration.
*
* @return $this
*/
public function increaseBlockIndent()
{
if ($this->increaseBlockIndent) {
++$this->indentLvl;
$this->increaseBlockIndent = false;
\array_unshift($this->indentTypes, 'block');
}
return $this;
}
/**
* Closing parentheses decrease the block indent level.
*
* @param Formatter $formatter
*
* @return $this
*/
public function decreaseIndentLevelUntilIndentTypeIsSpecial(Formatter $formatter)
{
$formatter->setFormattedSql(\rtrim($formatter->getFormattedSql(), ' '));
--$this->indentLvl;
while ($j = \array_shift($this->indentTypes)) {
if ('special' !== $j) {
break;
}
--$this->indentLvl;
}
return $this;
}
/**
* @return $this
*/
public function decreaseSpecialIndentIfCurrentIndentTypeIsSpecial()
{
\reset($this->indentTypes);
if (\current($this->indentTypes) === 'special') {
--$this->indentLvl;
\array_shift($this->indentTypes);
}
return $this;
}
/**
* @return bool
*/
public function getIncreaseBlockIndent()
{
return $this->increaseBlockIndent;
}
/**
* @return bool
*/
public function getIncreaseSpecialIndent()
{
return $this->increaseSpecialIndent;
}
/**
* @return int
*/
public function getIndentLvl()
{
return $this->indentLvl;
}
/**
* @return mixed
*/
public function getIndentTypes()
{
return $this->indentTypes;
}
/**
* @param bool $increaseBlockIndent
*
* @return $this
*/
public function setIncreaseBlockIndent($increaseBlockIndent)
{
$this->increaseBlockIndent = $increaseBlockIndent;
return $this;
}
/**
* @param bool $increaseSpecialIndent
*
* @return $this
*/
public function setIncreaseSpecialIndent($increaseSpecialIndent)
{
$this->increaseSpecialIndent = $increaseSpecialIndent;
return $this;
}
/**
* @param int $indentLvl
*
* @return $this
*/
public function setIndentLvl($indentLvl)
{
$this->indentLvl = $indentLvl;
return $this;
}
/**
* @param array $indentTypes
*
* @return $this
*/
public function setIndentTypes($indentTypes)
{
$this->indentTypes = $indentTypes;
return $this;
}
/**
* @param bool $inlineIndented
*
* @return $this
*/
public function setInlineIndented($inlineIndented)
{
$this->inlineIndented = $inlineIndented;
return $this;
}
/**
* @return bool
*/
public function getInlineIndented()
{
return $this->inlineIndented;
}
}

View File

@ -0,0 +1,183 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/22/14
* Time: 11:37 AM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Helper;
use NilPortugues\Sql\QueryFormatter\Formatter;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class NewLine.
*/
class NewLine
{
/**
* @var bool
*/
protected $newline = false;
/**
* @var \NilPortugues\Sql\QueryFormatter\Formatter
*/
protected $formatter;
/**
* @var Indent
*/
protected $indentation;
/**
* @var Parentheses
*/
protected $parentheses;
/**
* @param Formatter $formatter
* @param Indent $indentation
* @param Parentheses $parentheses
*/
public function __construct(Formatter $formatter, Indent $indentation, Parentheses $parentheses)
{
$this->formatter = $formatter;
$this->indentation = $indentation;
$this->parentheses = $parentheses;
}
/**
* Adds a new line break if needed.
*
* @param string $tab
*
* @return bool
*/
public function addNewLineBreak($tab)
{
$addedNewline = false;
if (true === $this->newline) {
$this->formatter->appendToFormattedSql("\n".str_repeat($tab, $this->indentation->getIndentLvl()));
$this->newline = false;
$addedNewline = true;
}
return $addedNewline;
}
/**
* @param $token
*/
public function writeNewLineForLongCommaInlineValues($token)
{
if (',' === $token[Tokenizer::TOKEN_VALUE]) {
if ($this->formatter->getInlineCount() >= 30) {
$this->formatter->setInlineCount(0);
$this->newline = true;
}
}
}
/**
* @param int $length
*/
public function writeNewLineForLongInlineValues($length)
{
if ($this->parentheses->getInlineParentheses() && $length > 30) {
$this->indentation->setIncreaseBlockIndent(true);
$this->indentation->setInlineIndented(true);
$this->newline = true;
}
}
/**
* Adds a new line break for an opening parentheses for a non-inline expression.
*/
public function addNewLineAfterOpeningParentheses()
{
if (false === $this->parentheses->getInlineParentheses()) {
$this->indentation->setIncreaseBlockIndent(true);
$this->newline = true;
}
}
/**
* @param bool $addedNewline
* @param string $tab
*/
public function addNewLineBeforeToken($addedNewline, $tab)
{
if (false === $addedNewline) {
$this->formatter->appendToFormattedSql(
"\n".str_repeat($tab, $this->indentation->getIndentLvl())
);
}
}
/**
* Add a newline before the top level reserved word if necessary and indent.
*
* @param bool $addedNewline
* @param string $tab
*/
public function writeNewLineBecauseOfTopLevelReservedWord($addedNewline, $tab)
{
if (false === $addedNewline) {
$this->formatter->appendToFormattedSql("\n");
} else {
$this->formatter->setFormattedSql(\rtrim($this->formatter->getFormattedSql(), $tab));
}
$this->formatter->appendToFormattedSql(\str_repeat($tab, $this->indentation->getIndentLvl()));
$this->newline = true;
}
/**
* Commas start a new line unless they are found within inline parentheses or SQL 'LIMIT' clause.
* If the previous TOKEN_VALUE is 'LIMIT', undo new line.
*/
public function writeNewLineBecauseOfComma()
{
$this->newline = true;
if (true === $this->formatter->getClauseLimit()) {
$this->newline = false;
$this->formatter->setClauseLimit(false);
}
}
/**
* @param $token
*
* @return bool
*/
public function isTokenTypeReservedNewLine($token)
{
return $token[Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_RESERVED_NEWLINE;
}
/**
* @return bool
*/
public function getNewline()
{
return $this->newline;
}
/**
* @param bool $newline
*
* @return $this
*/
public function setNewline($newline)
{
$this->newline = $newline;
return $this;
}
}

View File

@ -0,0 +1,139 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/22/14
* Time: 11:37 AM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Helper;
use NilPortugues\Sql\QueryFormatter\Formatter;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class Parentheses.
*/
class Parentheses
{
/**
* @var bool
*/
protected $inlineParentheses = false;
/**
* @var \NilPortugues\Sql\QueryFormatter\Formatter
*/
protected $formatter;
/**
* @var Indent
*/
protected $indentation;
/**
* @param Formatter $formatter
* @param Indent $indentation
*/
public function __construct(Formatter $formatter, Indent $indentation)
{
$this->formatter = $formatter;
$this->indentation = $indentation;
}
/**
* @return bool
*/
public function getInlineParentheses()
{
return $this->inlineParentheses;
}
/**
* @param bool $inlineParentheses
*
* @return $this
*/
public function setInlineParentheses($inlineParentheses)
{
$this->inlineParentheses = $inlineParentheses;
return $this;
}
/**
* @param $token
*
* @return bool
*/
public function stringIsOpeningParentheses($token)
{
return $token[Tokenizer::TOKEN_VALUE] === '(';
}
/**
*
*/
public function writeNewInlineParentheses()
{
$this->inlineParentheses = true;
$this->formatter->setInlineCount(0);
$this->indentation->setInlineIndented(false);
}
/**
* @param $token
*
* @return bool
*/
public function invalidParenthesesTokenValue($token)
{
return $token[Tokenizer::TOKEN_VALUE] === ';'
|| $token[Tokenizer::TOKEN_VALUE] === '(';
}
/**
* @param $token
*
* @return bool
*/
public function invalidParenthesesTokenType($token)
{
return $token[Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_RESERVED_TOP_LEVEL
|| $token[Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_RESERVED_NEWLINE
|| $token[Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_COMMENT
|| $token[Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_BLOCK_COMMENT;
}
/**
* @param $token
*
* @return bool
*/
public function stringIsClosingParentheses($token)
{
return $token[Tokenizer::TOKEN_VALUE] === ')';
}
/**
* @param string $tab
* @param $queryValue
*/
public function writeInlineParenthesesBlock($tab, $queryValue)
{
$this->formatter->setFormattedSql(\rtrim($this->formatter->getFormattedSql(), ' '));
if ($this->indentation->getInlineIndented()) {
$indentTypes = $this->indentation->getIndentTypes();
\array_shift($indentTypes);
$this->indentation->setIndentTypes($indentTypes);
$this->indentation->setIndentLvl($this->indentation->getIndentLvl() - 1);
$this->formatter->appendToFormattedSql("\n".str_repeat($tab, $this->indentation->getIndentLvl()));
}
$this->inlineParentheses = false;
$this->formatter->appendToFormattedSql($queryValue.' ');
}
}

View File

@ -0,0 +1,749 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/22/14
* Time: 11:38 AM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Helper;
use NilPortugues\Sql\QueryFormatter\Formatter;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class Token.
*/
final class Token
{
/**
* @var array
*/
public static $reserved = [
'ACCESSIBLE',
'ACTION',
'AGAINST',
'AGGREGATE',
'ALGORITHM',
'ALL',
'ALTER',
'ANALYSE',
'ANALYZE',
'AS',
'ASC',
'AUTOCOMMIT',
'AUTO_INCREMENT',
'BACKUP',
'BEGIN',
'BETWEEN',
'BINLOG',
'BOTH',
'CASCADE',
'CASE',
'CHANGE',
'CHANGED',
'CHARACTER SET',
'CHARSET',
'CHECK',
'CHECKSUM',
'COLLATE',
'COLLATION',
'COLUMN',
'COLUMNS',
'COMMENT',
'COMMIT',
'COMMITTED',
'COMPRESSED',
'CONCURRENT',
'CONSTRAINT',
'CONTAINS',
'CONVERT',
'CREATE',
'CROSS',
'CURRENT_TIMESTAMP',
'DATABASE',
'DATABASES',
'DAY',
'DAY_HOUR',
'DAY_MINUTE',
'DAY_SECOND',
'DEFAULT',
'DEFINER',
'DELAYED',
'DELETE',
'DESC',
'DESCRIBE',
'DETERMINISTIC',
'DISTINCT',
'DISTINCTROW',
'DIV',
'DO',
'DUMPFILE',
'DUPLICATE',
'DYNAMIC',
'ELSE',
'ENCLOSED',
'END',
'ENGINE',
'ENGINE_TYPE',
'ENGINES',
'ESCAPE',
'ESCAPED',
'EVENTS',
'EXEC',
'EXECUTE',
'EXISTS',
'EXPLAIN',
'EXTENDED',
'FAST',
'FIELDS',
'FILE',
'FIRST',
'FIXED',
'FLUSH',
'FOR',
'FORCE',
'FOREIGN',
'FULL',
'FULLTEXT',
'FUNCTION',
'GLOBAL',
'GRANT',
'GRANTS',
'GROUP_CONCAT',
'HEAP',
'HIGH_PRIORITY',
'HOSTS',
'HOUR',
'HOUR_MINUTE',
'HOUR_SECOND',
'IDENTIFIED',
'IF',
'IFNULL',
'IGNORE',
'IN',
'INDEX',
'INDEXES',
'INFILE',
'INSERT',
'INSERT_ID',
'INSERT_METHOD',
'INTERVAL',
'INTO',
'INVOKER',
'IS',
'ISOLATION',
'KEY',
'KEYS',
'KILL',
'LAST_INSERT_ID',
'LEADING',
'LEVEL',
'LIKE',
'LINEAR',
'LINES',
'LOAD',
'LOCAL',
'LOCK',
'LOCKS',
'LOGS',
'LOW_PRIORITY',
'MARIA',
'MASTER',
'MASTER_CONNECT_RETRY',
'MASTER_HOST',
'MASTER_LOG_FILE',
'MATCH',
'MAX_CONNECTIONS_PER_HOUR',
'MAX_QUERIES_PER_HOUR',
'MAX_ROWS',
'MAX_UPDATES_PER_HOUR',
'MAX_USER_CONNECTIONS',
'MEDIUM',
'MERGE',
'MINUTE',
'MINUTE_SECOND',
'MIN_ROWS',
'MODE',
'MODIFY',
'MONTH',
'MRG_MYISAM',
'MYISAM',
'NAMES',
'NATURAL',
'NOT',
'NOW()',
'NULL',
'OFFSET',
'ON',
'OPEN',
'OPTIMIZE',
'OPTION',
'OPTIONALLY',
'ON UPDATE',
'ON DELETE',
'OUTFILE',
'PACK_KEYS',
'PAGE',
'PARTIAL',
'PARTITION',
'PARTITIONS',
'PASSWORD',
'PRIMARY',
'PRIVILEGES',
'PROCEDURE',
'PROCESS',
'PROCESSLIST',
'PURGE',
'QUICK',
'RANGE',
'RAID0',
'RAID_CHUNKS',
'RAID_CHUNKSIZE',
'RAID_TYPE',
'READ',
'READ_ONLY',
'READ_WRITE',
'REFERENCES',
'REGEXP',
'RELOAD',
'RENAME',
'REPAIR',
'REPEATABLE',
'REPLACE',
'REPLICATION',
'RESET',
'RESTORE',
'RESTRICT',
'RETURN',
'RETURNS',
'REVOKE',
'RLIKE',
'ROLLBACK',
'ROW',
'ROWS',
'ROW_FORMAT',
'SECOND',
'SECURITY',
'SEPARATOR',
'SERIALIZABLE',
'SESSION',
'SHARE',
'SHOW',
'SHUTDOWN',
'SLAVE',
'SONAME',
'SOUNDS',
'SQL',
'SQL_AUTO_IS_NULL',
'SQL_BIG_RESULT',
'SQL_BIG_SELECTS',
'SQL_BIG_TABLES',
'SQL_BUFFER_RESULT',
'SQL_CALC_FOUND_ROWS',
'SQL_LOG_BIN',
'SQL_LOG_OFF',
'SQL_LOG_UPDATE',
'SQL_LOW_PRIORITY_UPDATES',
'SQL_MAX_JOIN_SIZE',
'SQL_QUOTE_SHOW_CREATE',
'SQL_SAFE_UPDATES',
'SQL_SELECT_LIMIT',
'SQL_SLAVE_SKIP_COUNTER',
'SQL_SMALL_RESULT',
'SQL_WARNINGS',
'SQL_CACHE',
'SQL_NO_CACHE',
'START',
'STARTING',
'STATUS',
'STOP',
'STORAGE',
'STRAIGHT_JOIN',
'STRING',
'STRIPED',
'SUPER',
'TABLE',
'TABLES',
'TEMPORARY',
'TERMINATED',
'THEN',
'TO',
'TRAILING',
'TRANSACTIONAL',
'TRUE',
'TRUNCATE',
'TYPE',
'TYPES',
'UNCOMMITTED',
'UNIQUE',
'UNLOCK',
'UNSIGNED',
'USAGE',
'USE',
'USING',
'VARIABLES',
'VIEW',
'WHEN',
'WITH',
'WORK',
'WRITE',
'YEAR_MONTH',
];
/**
* @var array
*/
public static $reservedTopLevel = [
'SELECT',
'FROM',
'WHERE',
'SET',
'ORDER BY',
'GROUP BY',
'LIMIT',
'DROP',
'VALUES',
'UPDATE',
'HAVING',
'ADD',
'AFTER',
'ALTER TABLE',
'DELETE FROM',
'UNION ALL',
'UNION',
'EXCEPT',
'INTERSECT',
];
/**
* @var array
*/
public static $reservedNewLine = [
'LEFT OUTER JOIN',
'RIGHT OUTER JOIN',
'LEFT JOIN',
'RIGHT JOIN',
'OUTER JOIN',
'INNER JOIN',
'JOIN',
'XOR',
'OR',
'AND',
];
/**
* @var array
*/
public static $functions = [
'ABS',
'ACOS',
'ADDDATE',
'ADDTIME',
'AES_DECRYPT',
'AES_ENCRYPT',
'AREA',
'ASBINARY',
'ASCII',
'ASIN',
'ASTEXT',
'ATAN',
'ATAN2',
'AVG',
'BDMPOLYFROMTEXT',
'BDMPOLYFROMWKB',
'BDPOLYFROMTEXT',
'BDPOLYFROMWKB',
'BENCHMARK',
'BIN',
'BIT_AND',
'BIT_COUNT',
'BIT_LENGTH',
'BIT_OR',
'BIT_XOR',
'BOUNDARY',
'BUFFER',
'CAST',
'CEIL',
'CEILING',
'CENTROID',
'CHAR',
'CHARACTER_LENGTH',
'CHARSET',
'CHAR_LENGTH',
'COALESCE',
'COERCIBILITY',
'COLLATION',
'COMPRESS',
'CONCAT',
'CONCAT_WS',
'CONNECTION_ID',
'CONTAINS',
'CONV',
'CONVERT',
'CONVERT_TZ',
'CONVEXHULL',
'COS',
'COT',
'COUNT',
'CRC32',
'CROSSES',
'CURDATE',
'CURRENT_DATE',
'CURRENT_TIME',
'CURRENT_TIMESTAMP',
'CURRENT_USER',
'CURTIME',
'DATABASE',
'DATE',
'DATEDIFF',
'DATE_ADD',
'DATE_DIFF',
'DATE_FORMAT',
'DATE_SUB',
'DAY',
'DAYNAME',
'DAYOFMONTH',
'DAYOFWEEK',
'DAYOFYEAR',
'DECODE',
'DEFAULT',
'DEGREES',
'DES_DECRYPT',
'DES_ENCRYPT',
'DIFFERENCE',
'DIMENSION',
'DISJOINT',
'DISTANCE',
'ELT',
'ENCODE',
'ENCRYPT',
'ENDPOINT',
'ENVELOPE',
'EQUALS',
'EXP',
'EXPORT_SET',
'EXTERIORRING',
'EXTRACT',
'EXTRACTVALUE',
'FIELD',
'FIND_IN_SET',
'FLOOR',
'FORMAT',
'FOUND_ROWS',
'FROM_DAYS',
'FROM_UNIXTIME',
'GEOMCOLLFROMTEXT',
'GEOMCOLLFROMWKB',
'GEOMETRYCOLLECTION',
'GEOMETRYCOLLECTIONFROMTEXT',
'GEOMETRYCOLLECTIONFROMWKB',
'GEOMETRYFROMTEXT',
'GEOMETRYFROMWKB',
'GEOMETRYN',
'GEOMETRYTYPE',
'GEOMFROMTEXT',
'GEOMFROMWKB',
'GET_FORMAT',
'GET_LOCK',
'GLENGTH',
'GREATEST',
'GROUP_CONCAT',
'GROUP_UNIQUE_USERS',
'HEX',
'HOUR',
'IF',
'IFNULL',
'INET_ATON',
'INET_NTOA',
'INSERT',
'INSTR',
'INTERIORRINGN',
'INTERSECTION',
'INTERSECTS',
'INTERVAL',
'ISCLOSED',
'ISEMPTY',
'ISNULL',
'ISRING',
'ISSIMPLE',
'IS_FREE_LOCK',
'IS_USED_LOCK',
'LAST_DAY',
'LAST_INSERT_ID',
'LCASE',
'LEAST',
'LEFT',
'LENGTH',
'LINEFROMTEXT',
'LINEFROMWKB',
'LINESTRING',
'LINESTRINGFROMTEXT',
'LINESTRINGFROMWKB',
'LN',
'LOAD_FILE',
'LOCALTIME',
'LOCALTIMESTAMP',
'LOCATE',
'LOG',
'LOG10',
'LOG2',
'LOWER',
'LPAD',
'LTRIM',
'MAKEDATE',
'MAKETIME',
'MAKE_SET',
'MASTER_POS_WAIT',
'MAX',
'MBRCONTAINS',
'MBRDISJOINT',
'MBREQUAL',
'MBRINTERSECTS',
'MBROVERLAPS',
'MBRTOUCHES',
'MBRWITHIN',
'MD5',
'MICROSECOND',
'MID',
'MIN',
'MINUTE',
'MLINEFROMTEXT',
'MLINEFROMWKB',
'MOD',
'MONTH',
'MONTHNAME',
'MPOINTFROMTEXT',
'MPOINTFROMWKB',
'MPOLYFROMTEXT',
'MPOLYFROMWKB',
'MULTILINESTRING',
'MULTILINESTRINGFROMTEXT',
'MULTILINESTRINGFROMWKB',
'MULTIPOINT',
'MULTIPOINTFROMTEXT',
'MULTIPOINTFROMWKB',
'MULTIPOLYGON',
'MULTIPOLYGONFROMTEXT',
'MULTIPOLYGONFROMWKB',
'NAME_CONST',
'NULLIF',
'NUMGEOMETRIES',
'NUMINTERIORRINGS',
'NUMPOINTS',
'OCT',
'OCTET_LENGTH',
'OLD_PASSWORD',
'ORD',
'OVERLAPS',
'PASSWORD',
'PERIOD_ADD',
'PERIOD_DIFF',
'PI',
'POINT',
'POINTFROMTEXT',
'POINTFROMWKB',
'POINTN',
'POINTONSURFACE',
'POLYFROMTEXT',
'POLYFROMWKB',
'POLYGON',
'POLYGONFROMTEXT',
'POLYGONFROMWKB',
'POSITION',
'POW',
'POWER',
'QUARTER',
'QUOTE',
'RADIANS',
'RAND',
'RELATED',
'RELEASE_LOCK',
'REPEAT',
'REPLACE',
'REVERSE',
'RIGHT',
'ROUND',
'ROW_COUNT',
'RPAD',
'RTRIM',
'SCHEMA',
'SECOND',
'SEC_TO_TIME',
'SESSION_USER',
'SHA',
'SHA1',
'SIGN',
'SIN',
'SLEEP',
'SOUNDEX',
'SPACE',
'SQRT',
'SRID',
'STARTPOINT',
'STD',
'STDDEV',
'STDDEV_POP',
'STDDEV_SAMP',
'STRCMP',
'STR_TO_DATE',
'SUBDATE',
'SUBSTR',
'SUBSTRING',
'SUBSTRING_INDEX',
'SUBTIME',
'SUM',
'SYMDIFFERENCE',
'SYSDATE',
'SYSTEM_USER',
'TAN',
'TIME',
'TIMEDIFF',
'TIMESTAMP',
'TIMESTAMPADD',
'TIMESTAMPDIFF',
'TIME_FORMAT',
'TIME_TO_SEC',
'TOUCHES',
'TO_DAYS',
'TRIM',
'TRUNCATE',
'UCASE',
'UNCOMPRESS',
'UNCOMPRESSED_LENGTH',
'UNHEX',
'UNIQUE_USERS',
'UNIX_TIMESTAMP',
'UPDATEXML',
'UPPER',
'USER',
'UTC_DATE',
'UTC_TIME',
'UTC_TIMESTAMP',
'UUID',
'VARIANCE',
'VAR_POP',
'VAR_SAMP',
'VERSION',
'WEEK',
'WEEKDAY',
'WEEKOFYEAR',
'WITHIN',
'X',
'Y',
'YEAR',
'YEARWEEK',
];
/**
* @var array
*/
public static $boundaries = [
',',
';',
')',
'(',
'.',
'=',
'<',
'>',
'+',
'-',
'*',
'/',
'!',
'^',
'%',
'|',
'&',
'#',
];
/**
* @param $token
*
* @return bool
*/
public static function isTokenTypeReservedTopLevel($token)
{
return $token[Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_RESERVED_TOP_LEVEL;
}
/**
* @param string $token
* @param Parentheses $parentheses
* @param Formatter $formatter
*/
public static function tokenHasLimitClause($token, Parentheses $parentheses, Formatter $formatter)
{
if ('LIMIT' === $token[Tokenizer::TOKEN_VALUE] && false === $parentheses->getInlineParentheses()) {
$formatter->setClauseLimit(true);
}
}
/**
* @param $token
* @param $tokens
* @param $i
* @param $originalTokens
*
* @return bool
*/
public static function tokenHasMultipleBoundaryCharactersTogether($token, &$tokens, $i, &$originalTokens)
{
return $token[Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_BOUNDARY
&& self::tokenPreviousCharacterIsBoundary($tokens, $i)
&& self::tokenPreviousCharacterIsWhiteSpace($token, $originalTokens);
}
/**
* @param $tokens
* @param $i
*
* @return bool
*/
public static function tokenPreviousCharacterIsBoundary(&$tokens, $i)
{
return (isset($tokens[$i - 1]) && $tokens[$i - 1][Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_BOUNDARY);
}
/**
* @param $token
* @param $originalTokens
*
* @return bool
*/
public static function tokenPreviousCharacterIsWhiteSpace($token, &$originalTokens)
{
return (isset($originalTokens[$token['i'] - 1])
&& $originalTokens[$token['i'] - 1][Tokenizer::TOKEN_TYPE] !== Tokenizer::TOKEN_TYPE_WHITESPACE);
}
/**
* @param $token
* @param $tokens
* @param $i
*
* @return bool
*/
public static function tokenIsMinusSign($token, &$tokens, $i)
{
return '-' === $token[Tokenizer::TOKEN_VALUE]
&& self::tokenNextCharacterIsNumber($tokens, $i)
&& isset($tokens[$i - 1]);
}
/**
* @param $tokens
* @param $i
*
* @return bool
*/
public static function tokenNextCharacterIsNumber(&$tokens, $i)
{
return (isset($tokens[$i + 1])
&& $tokens[$i + 1][Tokenizer::TOKEN_TYPE] === Tokenizer::TOKEN_TYPE_NUMBER);
}
}

View File

@ -0,0 +1,100 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/22/14
* Time: 1:19 PM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Helper;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class WhiteSpace.
*/
class WhiteSpace
{
/**
* @param $token
*
* @return bool
*/
public static function tokenHasExtraWhiteSpaceLeft($token)
{
return
$token[Tokenizer::TOKEN_VALUE] === '.'
|| $token[Tokenizer::TOKEN_VALUE] === ','
|| $token[Tokenizer::TOKEN_VALUE] === ';';
}
/**
* @param $token
*
* @return bool
*/
public static function tokenHasExtraWhiteSpaceRight($token)
{
return
$token[Tokenizer::TOKEN_VALUE] === '('
|| $token[Tokenizer::TOKEN_VALUE] === '.';
}
/**
* @param $tokenType
*
* @return bool
*/
public static function tokenIsNumberAndHasExtraWhiteSpaceRight($tokenType)
{
return
$tokenType !== Tokenizer::TOKEN_TYPE_QUOTE
&& $tokenType !== Tokenizer::TOKEN_TYPE_BACK_TICK_QUOTE
&& $tokenType !== Tokenizer::TOKEN_TYPE_WORD
&& $tokenType !== Tokenizer::TOKEN_TYPE_NUMBER;
}
/**
* @param $token
*
* @return bool
*/
public static function tokenHasExtraWhiteSpaces($token)
{
return \strpos($token[Tokenizer::TOKEN_VALUE], ' ') !== false
|| \strpos($token[Tokenizer::TOKEN_VALUE], "\n") !== false
|| \strpos($token[Tokenizer::TOKEN_VALUE], "\t") !== false;
}
/**
* @param $originalTokens
* @param $token
*
* @return bool
*/
public static function isPrecedingCurrentTokenOfTokenTypeWhiteSpace($originalTokens, $token)
{
return isset($originalTokens[$token['i'] - 1])
&& $originalTokens[$token['i'] - 1][Tokenizer::TOKEN_TYPE] !== Tokenizer::TOKEN_TYPE_WHITESPACE;
}
/**
* @param $originalTokens
*
* @return array
*/
public static function removeTokenWhitespace(array &$originalTokens)
{
$tokens = [];
foreach ($originalTokens as $i => &$token) {
if ($token[Tokenizer::TOKEN_TYPE] !== Tokenizer::TOKEN_TYPE_WHITESPACE) {
$token['i'] = $i;
$tokens[] = $token;
}
}
return $tokens;
}
}

View File

@ -0,0 +1,58 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/23/14
* Time: 1:34 PM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Tokenizer\Parser;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class Boundary.
*/
final class Boundary
{
/**
* @param Tokenizer $tokenizer
* @param string $string
* @param array $matches
*/
public static function isBoundary(Tokenizer $tokenizer, $string, array &$matches)
{
if (!$tokenizer->getNextToken() &&
self::isBoundaryCharacter($string, $matches, $tokenizer->getRegexBoundaries())
) {
$tokenizer->setNextToken(self::getBoundaryCharacter($matches));
}
}
/**
* @param string $string
* @param array $matches
* @param string $regexBoundaries
*
* @return bool
*/
protected static function isBoundaryCharacter($string, array &$matches, $regexBoundaries)
{
return (1 == \preg_match('/^('.$regexBoundaries.')/', $string, $matches));
}
/**
* @param array $matches
*
* @return array
*/
protected static function getBoundaryCharacter(array &$matches)
{
return [
Tokenizer::TOKEN_VALUE => $matches[1],
Tokenizer::TOKEN_TYPE => Tokenizer::TOKEN_TYPE_BOUNDARY,
];
}
}

View File

@ -0,0 +1,93 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/23/14
* Time: 1:22 PM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Tokenizer\Parser;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class Comment.
*/
final class Comment
{
/**
* @param Tokenizer $tokenizer
* @param string $string
*/
public static function isComment(Tokenizer $tokenizer, $string)
{
if (!$tokenizer->getNextToken() && self::isCommentString($string)) {
$tokenizer->setNextToken(self::getCommentString($string));
}
}
/**
* @param string $string
*
* @return bool
*/
protected static function isCommentString($string)
{
return !empty($string[0]) && ($string[0] === '#' || self::isTwoCharacterComment($string));
}
/**
* @param string $string
*
* @return bool
*/
protected static function isTwoCharacterComment($string)
{
return !empty($string[1]) && (isset($string[1]) && (self::startsWithDoubleDash($string) || self::startsAsBlock($string)));
}
/**
* @param string $string
*
* @return bool
*/
protected static function startsWithDoubleDash($string)
{
return !empty($string[1]) && ($string[0] === '-' && ($string[1] === $string[0]));
}
/**
* @param string $string
*
* @return bool
*/
protected static function startsAsBlock($string)
{
return !empty($string[1]) && ($string[0] === '/' && $string[1] === '*');
}
/**
* @param string $string
*
* @return array
*/
protected static function getCommentString($string)
{
$last = \strpos($string, '*/', 2) + 2;
$type = Tokenizer::TOKEN_TYPE_BLOCK_COMMENT;
if (!empty($string[0]) && ($string[0] === '-' || $string[0] === '#')) {
$last = \strpos($string, "\n");
$type = Tokenizer::TOKEN_TYPE_COMMENT;
}
$last = ($last === false) ? \strlen($string) : $last;
return [
Tokenizer::TOKEN_VALUE => \substr($string, 0, $last),
Tokenizer::TOKEN_TYPE => $type,
];
}
}

View File

@ -0,0 +1,81 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/23/14
* Time: 1:36 PM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Tokenizer\Parser;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class LiteralString.
*/
final class LiteralString
{
/**
* @param Tokenizer $tokenizer
* @param string $string
* @param array $matches
*/
public static function isFunction(Tokenizer $tokenizer, $string, array &$matches)
{
if (!$tokenizer->getNextToken() && self::isFunctionString($string, $matches, $tokenizer->getRegexFunction())) {
$tokenizer->setNextToken(self::getFunctionString($string, $matches));
}
}
/**
* A function must be succeeded by '('.
* This makes it so that a function such as "COUNT(" is considered a function, but "COUNT" alone is not function.
*
* @param string $string
* @param array $matches
* @param string $regexFunction
*
* @return bool
*/
protected static function isFunctionString($string, array &$matches, $regexFunction)
{
return (1 == \preg_match('/^('.$regexFunction.'[(]|\s|[)])/', \strtoupper($string), $matches));
}
/**
* @param string $string
* @param array $matches
*
* @return array
*/
protected static function getFunctionString($string, array &$matches)
{
return [
Tokenizer::TOKEN_TYPE => Tokenizer::TOKEN_TYPE_RESERVED,
Tokenizer::TOKEN_VALUE => \substr($string, 0, \strlen($matches[1]) - 1),
];
}
/**
* @param Tokenizer $tokenizer
* @param string $string
* @param array $matches
*/
public static function getNonReservedString(Tokenizer $tokenizer, $string, array &$matches)
{
if (!$tokenizer->getNextToken()) {
$data = [];
if (1 == \preg_match('/^(.*?)($|\s|["\'`]|'.$tokenizer->getRegexBoundaries().')/', $string, $matches)) {
$data = [
Tokenizer::TOKEN_VALUE => $matches[1],
Tokenizer::TOKEN_TYPE => Tokenizer::TOKEN_TYPE_WORD,
];
}
$tokenizer->setNextToken($data);
}
}
}

View File

@ -0,0 +1,59 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/23/14
* Time: 1:32 PM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Tokenizer\Parser;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class Numeral.
*/
final class Numeral
{
/**
* @param Tokenizer $tokenizer
* @param string $string
* @param array $matches
*
* @return array
*/
public static function isNumeral(Tokenizer $tokenizer, $string, array &$matches)
{
if (!$tokenizer->getNextToken() && self::isNumeralString($string, $matches, $tokenizer->getRegexBoundaries())) {
$tokenizer->setNextToken(self::getNumeralString($matches));
}
}
/**
* @param string $string
* @param array $matches
* @param string $regexBoundaries
*
* @return bool
*/
protected static function isNumeralString($string, array &$matches, $regexBoundaries)
{
return (1 == \preg_match(
'/^([0-9]+(\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)($|\s|"\'`|'.$regexBoundaries.')/',
$string,
$matches
));
}
/**
* @param array $matches
*
* @return array
*/
protected static function getNumeralString(array &$matches)
{
return [Tokenizer::TOKEN_VALUE => $matches[1], Tokenizer::TOKEN_TYPE => Tokenizer::TOKEN_TYPE_NUMBER];
}
}

View File

@ -0,0 +1,84 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/23/14
* Time: 1:23 PM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Tokenizer\Parser;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class Quoted.
*/
final class Quoted
{
/**
* @param Tokenizer $tokenizer
* @param string $string
*/
public static function isQuoted(Tokenizer $tokenizer, $string)
{
if (!$tokenizer->getNextToken() && self::isQuotedString($string)) {
$tokenizer->setNextToken(self::getQuotedString($string));
}
}
/**
* @param string $string
*
* @return bool
*/
protected static function isQuotedString($string)
{
return !empty($string[0]) && ($string[0] === '"' || $string[0] === '\'' || $string[0] === '`' || $string[0] === '[');
}
/**
* @param string $string
*
* @return array
*/
protected static function getQuotedString($string)
{
$tokenType = Tokenizer::TOKEN_TYPE_QUOTE;
if (!empty($string[0]) && ($string[0] === '`' || $string[0] === '[')) {
$tokenType = Tokenizer::TOKEN_TYPE_BACK_TICK_QUOTE;
}
return [
Tokenizer::TOKEN_TYPE => $tokenType,
Tokenizer::TOKEN_VALUE => self::wrapStringWithQuotes($string),
];
}
/**
* This checks for the following patterns:
* 1. backtick quoted string using `` to escape
* 2. square bracket quoted string (SQL Server) using ]] to escape
* 3. double quoted string using "" or \" to escape
* 4. single quoted string using '' or \' to escape.
*
* @param string $string
*
* @return null
*/
public static function wrapStringWithQuotes($string)
{
$returnString = null;
$regex = '/^(((`[^`]*($|`))+)|((\[[^\]]*($|\]))(\][^\]]*($|\]))*)|'.
'(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)|((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/s';
if (1 == \preg_match($regex, $string, $matches)) {
$returnString = $matches[1];
}
return $returnString;
}
}

View File

@ -0,0 +1,116 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/23/14
* Time: 1:18 PM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Tokenizer\Parser;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class Reserved.
*/
final class Reserved
{
/**
* @var array
*/
protected static $regex = [
Tokenizer::TOKEN_TYPE_RESERVED_TOP_LEVEL => 'getRegexReservedTopLevel',
Tokenizer::TOKEN_TYPE_RESERVED_NEWLINE => 'getRegexReservedNewLine',
Tokenizer::TOKEN_TYPE_RESERVED => 'getRegexReserved',
];
/**
* @param Tokenizer $tokenizer
* @param string $string
* @param array|null $previous
*
* @return array
*/
public static function isReserved(Tokenizer $tokenizer, $string, $previous)
{
$tokenData = [];
if (!$tokenizer->getNextToken() && self::isReservedPrecededByDotCharacter($previous)) {
$upperCase = \strtoupper($string);
self::getReservedString($tokenData, Tokenizer::TOKEN_TYPE_RESERVED_TOP_LEVEL, $string, $tokenizer);
self::getReservedString($tokenData, Tokenizer::TOKEN_TYPE_RESERVED_NEWLINE, $upperCase, $tokenizer);
self::getReservedString($tokenData, Tokenizer::TOKEN_TYPE_RESERVED, $string, $tokenizer);
$tokenizer->setNextToken($tokenData);
}
}
/**
* A reserved word cannot be preceded by a "." in order to differentiate "mytable.from" from the token "from".
*
* @param $previous
*
* @return bool
*/
protected static function isReservedPrecededByDotCharacter($previous)
{
return !$previous || !isset($previous[Tokenizer::TOKEN_VALUE]) || $previous[Tokenizer::TOKEN_VALUE] !== '.';
}
/**
* @param array $tokenData
* @param $type
* @param string $string
* @param Tokenizer $tokenizer
*/
protected static function getReservedString(array &$tokenData, $type, $string, Tokenizer $tokenizer)
{
$matches = [];
$method = self::$regex[$type];
if (empty($tokenData) && self::isReservedString(
$string,
$matches,
$tokenizer->$method(),
$tokenizer->getRegexBoundaries()
)
) {
$tokenData = self::getStringTypeArray($type, $string, $matches);
}
}
/**
* @param string $upper
* @param array $matches
* @param string $regexReserved
* @param string $regexBoundaries
*
* @return bool
*/
protected static function isReservedString($upper, array &$matches, $regexReserved, $regexBoundaries)
{
return 1 == \preg_match(
'/^('.$regexReserved.')($|\s|'.$regexBoundaries.')/',
\strtoupper($upper),
$matches
);
}
/**
* @param string $type
* @param string $string
* @param array $matches
*
* @return array
*/
protected static function getStringTypeArray($type, $string, array &$matches)
{
return [
Tokenizer::TOKEN_TYPE => $type,
Tokenizer::TOKEN_VALUE => \substr($string, 0, \strlen($matches[1])),
];
}
}

View File

@ -0,0 +1,88 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/23/14
* Time: 1:26 PM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Tokenizer\Parser;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class UserDefined.
*/
final class UserDefined
{
/**
* @param Tokenizer $tokenizer
* @param string $string
*
* @return array
*/
public static function isUserDefinedVariable(Tokenizer $tokenizer, $string)
{
if (!$tokenizer->getNextToken() && self::isUserDefinedVariableString($string)) {
$tokenizer->setNextToken(self::getUserDefinedVariableString($string));
}
}
/**
* @param string $string
*
* @return bool
*/
protected static function isUserDefinedVariableString(&$string)
{
return !empty($string[0]) && !empty($string[1]) && ($string[0] === '@' && isset($string[1]));
}
/**
* Gets the user defined variables for in quoted or non-quoted fashion.
*
* @param string $string
*
* @return array
*/
protected static function getUserDefinedVariableString(&$string)
{
$returnData = [
Tokenizer::TOKEN_VALUE => null,
Tokenizer::TOKEN_TYPE => Tokenizer::TOKEN_TYPE_VARIABLE,
];
self::setTokenValueStartingWithAtSymbolAndWrapped($returnData, $string);
self::setTokenValueStartingWithAtSymbol($returnData, $string);
return $returnData;
}
/**
* @param array $returnData
* @param string $string
*/
protected static function setTokenValueStartingWithAtSymbolAndWrapped(array &$returnData, $string)
{
if (!empty($string[1]) && ($string[1] === '"' || $string[1] === '\'' || $string[1] === '`')) {
$returnData[Tokenizer::TOKEN_VALUE] = '@'.Quoted::wrapStringWithQuotes(\substr($string, 1));
}
}
/**
* @param array $returnData
* @param string $string
*/
protected static function setTokenValueStartingWithAtSymbol(array &$returnData, $string)
{
if (null === $returnData[Tokenizer::TOKEN_VALUE]) {
$matches = [];
\preg_match('/^(@[a-zA-Z0-9\._\$]+)/', $string, $matches);
if ($matches) {
$returnData[Tokenizer::TOKEN_VALUE] = $matches[1];
}
}
}
}

View File

@ -0,0 +1,55 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 12/23/14
* Time: 1:19 PM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Tokenizer\Parser;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
/**
* Class WhiteSpace.
*/
final class WhiteSpace
{
/**
* @param Tokenizer $tokenizer
* @param string $string
* @param array $matches
*/
public static function isWhiteSpace(Tokenizer $tokenizer, $string, array &$matches)
{
if (self::isWhiteSpaceString($string, $matches)) {
$tokenizer->setNextToken(self::getWhiteSpaceString($matches));
}
}
/**
* @param string $string
* @param array $matches
*
* @return bool
*/
public static function isWhiteSpaceString($string, array &$matches)
{
return (1 == \preg_match('/^\s+/', $string, $matches));
}
/**
* @param array $matches
*
* @return array
*/
public static function getWhiteSpaceString(array &$matches)
{
return [
Tokenizer::TOKEN_VALUE => $matches[0],
Tokenizer::TOKEN_TYPE => Tokenizer::TOKEN_TYPE_WHITESPACE,
];
}
}

View File

@ -0,0 +1,350 @@
<?php
/**
* Author: Nil Portugués Calderó <contact@nilportugues.com>
* Date: 6/26/14
* Time: 12:10 AM.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace NilPortugues\Sql\QueryFormatter\Tokenizer;
use NilPortugues\Sql\QueryFormatter\Helper\Token;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\Boundary;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\Comment;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\Numeral;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\Quoted;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\Reserved;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\LiteralString;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\UserDefined;
use NilPortugues\Sql\QueryFormatter\Tokenizer\Parser\WhiteSpace;
/**
* Class Tokenizer.
*/
class Tokenizer
{
const TOKEN_TYPE_WHITESPACE = 0;
const TOKEN_TYPE_WORD = 1;
const TOKEN_TYPE_QUOTE = 2;
const TOKEN_TYPE_BACK_TICK_QUOTE = 3;
const TOKEN_TYPE_RESERVED = 4;
const TOKEN_TYPE_RESERVED_TOP_LEVEL = 5;
const TOKEN_TYPE_RESERVED_NEWLINE = 6;
const TOKEN_TYPE_BOUNDARY = 7;
const TOKEN_TYPE_COMMENT = 8;
const TOKEN_TYPE_BLOCK_COMMENT = 9;
const TOKEN_TYPE_NUMBER = 10;
const TOKEN_TYPE_ERROR = 11;
const TOKEN_TYPE_VARIABLE = 12;
const TOKEN_TYPE = 0;
const TOKEN_VALUE = 1;
/**
* @var string
*/
protected $regexBoundaries;
/**
* @var string
*/
protected $regexReserved;
/**
* @var string
*/
protected $regexReservedNewLine;
/**
* @var string
*/
protected $regexReservedTopLevel;
/**
* @var string
*/
protected $regexFunction;
/**
* @var int
*/
protected $maxCacheKeySize = 15;
/**
* @var array
*/
protected $tokenCache = [];
/**
* @var array
*/
protected $nextToken = [];
/**
* @var int
*/
protected $currentStringLength = 0;
/**
* @var int
*/
protected $oldStringLength = 0;
/**
* @var string
*/
protected $previousToken = '';
/**
* @var int
*/
protected $tokenLength = 0;
/**
* @var array
*/
protected $tokens = [];
/**
* Builds all the regular expressions needed to Tokenize the input.
*/
public function __construct()
{
$reservedMap = \array_combine(Token::$reserved, \array_map('strlen', Token::$reserved));
\arsort($reservedMap);
Token::$reserved = \array_keys($reservedMap);
$this->regexFunction = $this->initRegex(Token::$functions);
$this->regexBoundaries = $this->initRegex(Token::$boundaries);
$this->regexReserved = $this->initRegex(Token::$reserved);
$this->regexReservedTopLevel = \str_replace(' ', '\\s+', $this->initRegex(Token::$reservedTopLevel));
$this->regexReservedNewLine = \str_replace(' ', '\\s+', $this->initRegex(Token::$reservedNewLine));
}
/**
* @param $variable
*
* @return string
*/
protected function initRegex($variable)
{
return '('.implode('|', \array_map(array($this, 'quoteRegex'), $variable)).')';
}
/**
* Takes a SQL string and breaks it into tokens.
* Each token is an associative array with type and value.
*
* @param string $string
*
* @return array
*/
public function tokenize($string)
{
return (\strlen($string) > 0) ? $this->processTokens($string) : [];
}
/**
* @param string $string
*
* @return array
*/
protected function processTokens($string)
{
$this->tokens = [];
$this->previousToken = '';
$this->currentStringLength = \strlen($string);
$this->oldStringLength = \strlen($string) + 1;
while ($this->currentStringLength >= 0) {
if ($this->oldStringLength <= $this->currentStringLength) {
break;
}
$string = $this->processOneToken($string);
}
return $this->tokens;
}
/**
* @param string $string
*
* @return string
*/
protected function processOneToken($string)
{
$token = $this->getToken($string, $this->currentStringLength, $this->previousToken);
$this->tokens[] = $token;
$this->tokenLength = \strlen($token[self::TOKEN_VALUE]);
$this->previousToken = $token;
$this->oldStringLength = $this->currentStringLength;
$this->currentStringLength -= $this->tokenLength;
return \substr($string, $this->tokenLength);
}
/**
* @param string $string
* @param int $currentStringLength
* @param string string
*
* @return array|mixed
*/
protected function getToken($string, $currentStringLength, $previousToken)
{
$cacheKey = $this->useTokenCache($string, $currentStringLength);
if (!empty($cacheKey) && isset($this->tokenCache[$cacheKey])) {
return $this->getNextTokenFromCache($cacheKey);
}
return $this->getNextTokenFromString($string, $previousToken, $cacheKey);
}
/**
* @param string $string
* @param int $currentStringLength
*
* @return string
*/
protected function useTokenCache($string, $currentStringLength)
{
$cacheKey = '';
if ($currentStringLength >= $this->maxCacheKeySize) {
$cacheKey = \substr($string, 0, $this->maxCacheKeySize);
}
return $cacheKey;
}
/**
* @param string $cacheKey
*
* @return mixed
*/
protected function getNextTokenFromCache($cacheKey)
{
return $this->tokenCache[$cacheKey];
}
/**
* Get the next token and the token type and store it in cache.
*
* @param string $string
* @param string $token
* @param string $cacheKey
*
* @return array
*/
protected function getNextTokenFromString($string, $token, $cacheKey)
{
$token = $this->parseNextToken($string, $token);
if ($cacheKey && \strlen($token[self::TOKEN_VALUE]) < $this->maxCacheKeySize) {
$this->tokenCache[$cacheKey] = $token;
}
return $token;
}
/**
* Return the next token and token type in a SQL string.
* Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens.
*
* @param string $string The SQL string
* @param array $previous The result of the previous parseNextToken() call
*
* @return array An associative array containing the type and value of the token.
*/
protected function parseNextToken($string, $previous = null)
{
$matches = [];
$this->nextToken = [];
WhiteSpace::isWhiteSpace($this, $string, $matches);
Comment::isComment($this, $string);
Quoted::isQuoted($this, $string);
UserDefined::isUserDefinedVariable($this, $string);
Numeral::isNumeral($this, $string, $matches);
Boundary::isBoundary($this, $string, $matches);
Reserved::isReserved($this, $string, $previous);
LiteralString::isFunction($this, $string, $matches);
LiteralString::getNonReservedString($this, $string, $matches);
return $this->nextToken;
}
/**
* @return array
*/
public function getNextToken()
{
return $this->nextToken;
}
/**
* @param array $nextToken
*
* @return $this
*/
public function setNextToken($nextToken)
{
$this->nextToken = $nextToken;
return $this;
}
/**
* @return string
*/
public function getRegexBoundaries()
{
return $this->regexBoundaries;
}
/**
* @return string
*/
public function getRegexFunction()
{
return $this->regexFunction;
}
/**
* @return string
*/
public function getRegexReserved()
{
return $this->regexReserved;
}
/**
* @return string
*/
public function getRegexReservedNewLine()
{
return $this->regexReservedNewLine;
}
/**
* @return string
*/
public function getRegexReservedTopLevel()
{
return $this->regexReservedTopLevel;
}
/**
* Helper function for building regular expressions for reserved words and boundary characters.
*
* @param string $string
*
* @return string
*/
protected function quoteRegex($string)
{
return \preg_quote($string, '/');
}
}