454 lines
11 KiB
PHP
454 lines
11 KiB
PHP
<?php
|
|
/**
|
|
* Author: Nil Portugués Calderó <contact@nilportugues.com>
|
|
* Date: 6/26/14
|
|
* Time: 2:19 AM.
|
|
*
|
|
* For the full copyright and license information, please view the LICENSE
|
|
* file that was distributed with this source code.
|
|
*/
|
|
|
|
namespace NilPortugues\Tests\Sql\QueryFormatter\Tokenizer;
|
|
|
|
use NilPortugues\Sql\QueryFormatter\Tokenizer\Tokenizer;
|
|
|
|
/**
|
|
* Class TokenizerTest.
|
|
*/
|
|
class TokenizerTest extends \PHPUnit_Framework_TestCase
|
|
{
|
|
/**
|
|
* @var Tokenizer
|
|
*/
|
|
private $tokenizer;
|
|
|
|
/**
|
|
*
|
|
*/
|
|
protected function setUp()
|
|
{
|
|
$this->tokenizer = new Tokenizer();
|
|
}
|
|
|
|
/**
|
|
*
|
|
*/
|
|
protected function tearDown()
|
|
{
|
|
$this->tokenizer = null;
|
|
}
|
|
|
|
/**
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeWhiteSpace()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT * FROM users;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$whiteSpacesFound = false;
|
|
foreach ($result as $token) {
|
|
if (' ' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$whiteSpacesFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
$this->assertTrue($whiteSpacesFound);
|
|
}
|
|
|
|
/**
|
|
* Comment starting with # will be treated as tokens representing the whole comment.
|
|
*
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeCommentStartingWithSymbolNumber()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT * FROM users # A comment
|
|
WHERE user_id = 2;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$commentFound = false;
|
|
foreach ($result as $token) {
|
|
if ('# A comment' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$commentFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
$this->assertTrue($commentFound);
|
|
}
|
|
|
|
/**
|
|
* Comment starting with -- will be treated as tokens representing the whole comment.
|
|
*
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeCommentStartingWithDash()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT * FROM
|
|
-- This is another comment
|
|
users
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$commentFound = false;
|
|
foreach ($result as $token) {
|
|
if ('-- This is another comment' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$commentFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
$this->assertTrue($commentFound);
|
|
}
|
|
|
|
/**
|
|
* Comment blocks in SQL Server fashion will be treated as tokens representing the whole comment.
|
|
*
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeCommentWithBlockComment()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT * FROM /* This is a block comment */ WHERE 1 = 2;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$commentFound = false;
|
|
foreach ($result as $token) {
|
|
if ('/* This is a block comment */' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$commentFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
$this->assertTrue($commentFound);
|
|
}
|
|
|
|
/**
|
|
* Unterminated comment block will be processed incorrectly by the tokenizer,
|
|
* yet we should be able to detect this error in order to handle the resulting situation.
|
|
*
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeCommentWithUnterminatedBlockComment()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT * FROM /* This is a block comment WHERE 1 = 2;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$commentStartFound = false;
|
|
foreach ($result as $token) {
|
|
if ('/*' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$commentStartFound = true;
|
|
break;
|
|
}
|
|
}
|
|
$this->assertTrue($commentStartFound);
|
|
}
|
|
|
|
/**
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeQuoted()
|
|
{
|
|
$sql = <<<SQL
|
|
UPDATE `PREFIX_cms_category` SET `position` = 0
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$quotedFound = false;
|
|
foreach ($result as $token) {
|
|
if ('`position`' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$quotedFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
$this->assertTrue($quotedFound);
|
|
}
|
|
|
|
/**
|
|
* SQL Server syntax for defining custom variables.
|
|
*
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeUserDefinedVariableWithQuotations()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT au_lname, au_fname, phone FROM authors WHERE au_lname LIKE @find;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$quotedFound = false;
|
|
foreach ($result as $token) {
|
|
if ('@find' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$quotedFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
$this->assertTrue($quotedFound);
|
|
}
|
|
|
|
/**
|
|
* This validates Microsoft SQL Server syntax for user variables.
|
|
*
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeUserDefinedVariableWithAtSymbol()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT @"weird variable name";
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$this->assertNotEmpty($result);
|
|
}
|
|
|
|
/**
|
|
* This test is an edge case.
|
|
*
|
|
* Given the provided statement, will loop forever if no condition is checking total amount
|
|
* of the input string processed. This will happen because the tokenizer has no matching case
|
|
* and string processing will not progress.
|
|
*
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeUserDefinedVariableNoProgressTokenizer()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT @ and b; /* Edge case */
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$userVariableNotFound = true;
|
|
foreach ($result as $token) {
|
|
if ('@' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$userVariableNotFound = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
$this->assertTrue($userVariableNotFound);
|
|
}
|
|
|
|
/**
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeNumeralInteger()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT user_id FROM user WHERE user_id = 1;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$numeralIntegerFound = false;
|
|
foreach ($result as $token) {
|
|
if ('1' == $token[Tokenizer::TOKEN_VALUE]) {
|
|
$numeralIntegerFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
$this->assertTrue($numeralIntegerFound);
|
|
}
|
|
|
|
/**
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeNumeralNegativeIntegerAsPositiveInteger()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT user_id FROM user WHERE user_id = -1;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$numeralIntegerFound = false;
|
|
foreach ($result as $token) {
|
|
if ('1' == $token[Tokenizer::TOKEN_VALUE]) {
|
|
$numeralIntegerFound = true;
|
|
break;
|
|
}
|
|
}
|
|
$this->assertTrue($numeralIntegerFound);
|
|
}
|
|
|
|
/**
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeNumeralFloat()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT user_id FROM user WHERE user_id = 3.14;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$numeralIntegerFound = false;
|
|
foreach ($result as $token) {
|
|
if ('3.14' == $token[Tokenizer::TOKEN_VALUE]) {
|
|
$numeralIntegerFound = true;
|
|
break;
|
|
}
|
|
}
|
|
$this->assertTrue($numeralIntegerFound);
|
|
}
|
|
|
|
/**
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeNumeralNegativeFloatAsPositiveFloat()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT user_id FROM user WHERE user_id = -3.14;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$numeralIntegerFound = false;
|
|
foreach ($result as $token) {
|
|
if ('3.14' == $token[Tokenizer::TOKEN_VALUE]) {
|
|
$numeralIntegerFound = true;
|
|
break;
|
|
}
|
|
}
|
|
$this->assertTrue($numeralIntegerFound);
|
|
}
|
|
|
|
/**
|
|
* Check if boundary characters are in array. Boundary characters are: ;:)(.=<>+-\/!^%|&#.
|
|
*
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeBoundaryCharacter()
|
|
{
|
|
$sql = 'SELECT id_user, name FROM users';
|
|
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$boundaryFound = false;
|
|
foreach ($result as $token) {
|
|
if (',' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$boundaryFound = true;
|
|
break;
|
|
}
|
|
}
|
|
$this->assertTrue($boundaryFound);
|
|
}
|
|
|
|
/**
|
|
* Tokenize columns should not be a problem, even if using a reserved word as a column name.
|
|
* Example: users.user_id
|
|
* Example of edge cases: users.desc, user.from.
|
|
*
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeReservedWordPrecededByDotCharacter()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT users.desc as userId FROM users;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$reservedTokenFound = false;
|
|
foreach ($result as $token) {
|
|
if ('desc' == $token[Tokenizer::TOKEN_VALUE]) {
|
|
$reservedTokenFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
$this->assertTrue($reservedTokenFound);
|
|
}
|
|
|
|
/**
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeReservedTopLevel()
|
|
{
|
|
$sql = 'SELECT id_user, name FROM users';
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$reservedTopLevelTokenFound = false;
|
|
|
|
foreach ($result as $token) {
|
|
if ('FROM' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$reservedTopLevelTokenFound = true;
|
|
break;
|
|
}
|
|
}
|
|
$this->assertTrue($reservedTopLevelTokenFound);
|
|
}
|
|
|
|
/**
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeReservedNewLine()
|
|
{
|
|
$sql = <<<SQL
|
|
UPDATE users SET registration_date = "0000-00-00" WHERE id_user = 1 OR id_user = 2;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$reservedNewLineTokenFound = false;
|
|
foreach ($result as $token) {
|
|
if ('OR' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$reservedNewLineTokenFound = true;
|
|
break;
|
|
}
|
|
}
|
|
$this->assertTrue($reservedNewLineTokenFound);
|
|
}
|
|
|
|
/**
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeReserved()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT customer_id, customer_name, COUNT(order_id) as total
|
|
FROM customers INNER JOIN orders ON customers.customer_id = orders.customer_id
|
|
GROUP BY customer_id, customer_name
|
|
HAVING COUNT(order_id) > 5
|
|
ORDER BY COUNT(order_id) DESC;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$reservedTokenFound = false;
|
|
foreach ($result as $token) {
|
|
if ('INNER JOIN' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$reservedTokenFound = true;
|
|
break;
|
|
}
|
|
}
|
|
$this->assertTrue($reservedTokenFound);
|
|
}
|
|
|
|
/**
|
|
* @test
|
|
*/
|
|
public function itShouldTokenizeFunction()
|
|
{
|
|
$sql = <<<SQL
|
|
SELECT customer_id, customer_name, COUNT(order_id) as total FROM customers GROUP BY customer_id, customer_name
|
|
HAVING COUNT(order_id) > 5 ORDER BY COUNT(order_id) DESC;
|
|
SQL;
|
|
$result = $this->tokenizer->tokenize($sql);
|
|
|
|
$functionFound = false;
|
|
foreach ($result as $token) {
|
|
if ('COUNT' === $token[Tokenizer::TOKEN_VALUE]) {
|
|
$functionFound = true;
|
|
break;
|
|
}
|
|
}
|
|
$this->assertTrue($functionFound);
|
|
}
|
|
}
|