Tokenizer.php 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. <?php declare(strict_types = 1);
  2. namespace TheSeer\Tokenizer;
  3. class Tokenizer {
  4. /**
  5. * Token Map for "non-tokens"
  6. *
  7. * @var array
  8. */
  9. private $map = [
  10. '(' => 'T_OPEN_BRACKET',
  11. ')' => 'T_CLOSE_BRACKET',
  12. '[' => 'T_OPEN_SQUARE',
  13. ']' => 'T_CLOSE_SQUARE',
  14. '{' => 'T_OPEN_CURLY',
  15. '}' => 'T_CLOSE_CURLY',
  16. ';' => 'T_SEMICOLON',
  17. '.' => 'T_DOT',
  18. ',' => 'T_COMMA',
  19. '=' => 'T_EQUAL',
  20. '<' => 'T_LT',
  21. '>' => 'T_GT',
  22. '+' => 'T_PLUS',
  23. '-' => 'T_MINUS',
  24. '*' => 'T_MULT',
  25. '/' => 'T_DIV',
  26. '?' => 'T_QUESTION_MARK',
  27. '!' => 'T_EXCLAMATION_MARK',
  28. ':' => 'T_COLON',
  29. '"' => 'T_DOUBLE_QUOTES',
  30. '@' => 'T_AT',
  31. '&' => 'T_AMPERSAND',
  32. '%' => 'T_PERCENT',
  33. '|' => 'T_PIPE',
  34. '$' => 'T_DOLLAR',
  35. '^' => 'T_CARET',
  36. '~' => 'T_TILDE',
  37. '`' => 'T_BACKTICK'
  38. ];
  39. public function parse(string $source): TokenCollection {
  40. $result = new TokenCollection();
  41. $tokens = token_get_all($source);
  42. $lastToken = new Token(
  43. $tokens[0][2],
  44. 'Placeholder',
  45. ''
  46. );
  47. foreach ($tokens as $pos => $tok) {
  48. if (is_string($tok)) {
  49. $token = new Token(
  50. $lastToken->getLine(),
  51. $this->map[$tok],
  52. $tok
  53. );
  54. $result->addToken($token);
  55. $lastToken = $token;
  56. continue;
  57. }
  58. $line = $tok[2];
  59. $values = preg_split('/\R+/Uu', $tok[1]);
  60. foreach ($values as $v) {
  61. $token = new Token(
  62. $line,
  63. token_name($tok[0]),
  64. $v
  65. );
  66. $result->addToken($token);
  67. $line++;
  68. $lastToken = $token;
  69. }
  70. }
  71. return $result;
  72. }
  73. }