Files
portmaster/desktop/angular/src/app/shared/netquery/textql/lexer.ts
2024-03-29 09:35:18 +01:00

255 lines
5.9 KiB
TypeScript

import { isDigit, isIdentChar, isLetter, isWhitespace } from "./helper";
import { InputStream } from "./input";
import { Token, TokenType } from "./token";
export class Lexer {
private _current: Token<any> | null = null;
private _input: InputStream;
constructor(input: string) {
this._input = new InputStream(input);
}
/** peek returns the token at the current position in input. */
public peek(): Token<any> | null {
return this._current || (this._current = this.readNextToken());
}
/** next returns either the current token in input or reads the next one */
public next(): Token<any> | null {
let tok = this._current;
this._current = null;
return tok || this.readNextToken();
}
/** eof returns true if the lexer reached the end of the input stream */
public eof(): boolean {
return this.peek() === null;
}
/** croak throws and error message at the current position in the input stream */
public croak(msg: string): never {
return this._input.croak(`${msg}. Current token is "${!!this.peek() ? this.peek()!.literal : null}"`);
}
/** consumes the input stream as long as predicate returns true */
private readWhile(predicate: (ch: string) => boolean): string {
let str = '';
while (!this._input.eof() && predicate(this._input.peek())) {
str += this._input.next();
}
return str;
}
/** reads a number token */
private readNumber(): Token<TokenType.NUMBER> | null {
const start = this._input.pos;
let has_dot = false;
let number = this.readWhile((ch: string) => {
if (ch === '.') {
if (has_dot) {
return false;
}
has_dot = true;
return true;
}
return isDigit(ch);
});
if (!this._input.eof() && !isWhitespace(this._input.peek())) {
this._input.revert(number.length);
return null;
}
return {
type: TokenType.NUMBER,
literal: number,
value: has_dot ? parseFloat(number) : parseInt(number),
start
}
}
private readIdent(): Token<TokenType.IDENT | TokenType.BOOL | TokenType.GROUPBY | TokenType.ORDERBY> {
const start = this._input.pos;
const id = this.readWhile(ch => isIdentChar(ch));
if (id === 'true' || id === 'yes') {
return {
type: TokenType.BOOL,
literal: id,
value: true,
start
}
}
if (id === 'false' || id === 'no') {
return {
type: TokenType.BOOL,
literal: id,
value: false,
start
}
}
if (id === 'groupby') {
return {
type: TokenType.GROUPBY,
literal: id,
value: id,
start
}
}
if (id === 'orderby') {
return {
type: TokenType.ORDERBY,
literal: id,
value: id,
start
}
}
return {
type: TokenType.IDENT,
literal: id,
value: id,
start
};
}
private readEscaped(end: string | RegExp, skipStart: boolean): string {
let escaped = false;
let str = '';
if (skipStart) {
this._input.next();
}
while (!this._input.eof()) {
let ch = this._input.next()!;
if (escaped) {
str += ch;
escaped = false;
} else if (ch === '\\') {
escaped = true;
} else if ((typeof end === 'string' && ch === end) || (end instanceof RegExp && end.test(ch))) {
break;
} else {
str += ch;
}
}
return str;
}
private readString(quote: string | RegExp, skipStart: boolean): Token<TokenType.STRING> {
const start = this._input.pos;
const value = this.readEscaped(quote, skipStart)
return {
type: TokenType.STRING,
literal: value,
value: value,
start
}
}
private readWhitespace(): Token<TokenType.WHITESPACE> {
const start = this._input.pos;
const value = this.readWhile(ch => isWhitespace(ch));
return {
type: TokenType.WHITESPACE,
literal: value,
value: value,
start,
}
}
private readNextToken(): Token<any> | null {
const start = this._input.pos;
const ch = this._input.peek();
if (ch === '') {
return null;
}
if (isWhitespace(ch)) {
return this.readWhitespace()
}
if (ch === '"') {
return this.readString('"', true);
}
if (ch === '\'') {
return this.readString('\'', true);
}
if (isDigit(ch)) {
const number = this.readNumber();
if (number !== null) {
return number;
}
}
if (ch === ':') {
this._input.next();
return {
type: TokenType.COLON,
value: ':',
literal: ':',
start
}
}
if (ch === '!') {
this._input.next();
return {
type: TokenType.NOT,
value: '!',
literal: '!',
start
}
}
if (isIdentChar(ch)) {
const ident = this.readIdent();
const next = this._input.peek();
if (!this._input.eof() && (!isWhitespace(next) && next !== ':')) {
// identifiers should always end in a colon or with a whitespace.
// if neither is the case we are in the middle of a token and are
// likely parsing a string without quotes.
this._input.revert(ident.literal.length);
// read the string and revert by one as we terminate the string
// at the next WHITESPACE token
const tok = this.readString(new RegExp('\\s'), false)
this.revertWhitespace();
return tok;
}
return ident;
}
if (isLetter(ch)) {
const tok = this.readString(new RegExp('\\s'), false)
// read the string and revert by one as we terminate the string
// at the next WHITESPACE token
this.revertWhitespace();
return tok
}
// Failed to handle the input character
return this._input.croak(`Can't handle character: ${ch}`);
}
private revertWhitespace() {
this._input.revert(1)
if (!isWhitespace(this._input.peek())) {
this._input.next();
}
}
}