165 lines
3.4 KiB
Go
165 lines
3.4 KiB
Go
package command
|
|
|
|
import (
|
|
"fmt"
|
|
"regexp"
|
|
)
|
|
|
|
type TokenType byte
|
|
|
|
const (
|
|
TokenEOF TokenType = iota
|
|
|
|
TokenUnknown
|
|
|
|
TokenNumber
|
|
TokenDecimal
|
|
TokenIdentifier
|
|
TokenBracketedIdentifier
|
|
TokenText
|
|
|
|
TokenDirection
|
|
TokenCommand
|
|
TokenSelf
|
|
|
|
TokenWhitespace
|
|
)
|
|
|
|
func (tt TokenType) String() string {
|
|
switch tt {
|
|
case TokenEOF:
|
|
return "EOF"
|
|
case TokenUnknown:
|
|
return "Unknown"
|
|
case TokenNumber:
|
|
return "Number"
|
|
case TokenDecimal:
|
|
return "Decimal"
|
|
case TokenIdentifier:
|
|
return "Identifier"
|
|
case TokenBracketedIdentifier:
|
|
return "BracketedIdentifier"
|
|
case TokenText:
|
|
return "Text"
|
|
case TokenDirection:
|
|
return "Direction"
|
|
case TokenCommand:
|
|
return "Command"
|
|
case TokenSelf:
|
|
return "Self"
|
|
case TokenWhitespace:
|
|
return "Whitespace"
|
|
default:
|
|
return fmt.Sprintf("TokenType(%d)", byte(tt))
|
|
}
|
|
}
|
|
|
|
type Token struct {
|
|
token TokenType
|
|
lexeme string
|
|
index int
|
|
}
|
|
|
|
func CreateToken(token TokenType, lexeme string, index int) Token {
|
|
return Token{
|
|
token: token,
|
|
lexeme: lexeme,
|
|
index: index,
|
|
}
|
|
}
|
|
|
|
func (t Token) Token() TokenType {
|
|
return t.token
|
|
}
|
|
|
|
func (t Token) Lexeme() string {
|
|
return t.lexeme
|
|
}
|
|
|
|
func (t Token) Index() int {
|
|
return t.index
|
|
}
|
|
|
|
func (t Token) String() string {
|
|
return fmt.Sprintf("%3d %16v: %q", t.index, t.token, t.lexeme)
|
|
}
|
|
|
|
type tokenPattern struct {
|
|
tokenType TokenType
|
|
pattern string
|
|
}
|
|
|
|
// Used to tokenize a string input.
|
|
// This is the starting point for parsing a command string.
|
|
// Create with [CreateTokenizer]
|
|
type Tokenizer struct {
|
|
tokenPatterns []tokenPattern
|
|
}
|
|
|
|
func CreateTokenizer() *Tokenizer {
|
|
return &Tokenizer{
|
|
tokenPatterns: []tokenPattern{
|
|
{tokenType: TokenDecimal, pattern: `\b\d+\.\d+\b`},
|
|
{tokenType: TokenNumber, pattern: `\b\d+\b`},
|
|
{tokenType: TokenDirection, pattern: `\b(north|south|east|west|up|down)\b`},
|
|
{tokenType: TokenBracketedIdentifier, pattern: `\[[ a-zA-Z0-9'-][ a-zA-Z0-9'-]*\]`},
|
|
{tokenType: TokenSelf, pattern: `\bself\b`},
|
|
{tokenType: TokenIdentifier, pattern: `\b[a-zA-Z'-][a-zA-Z0-9'-]*\b`},
|
|
{tokenType: TokenWhitespace, pattern: `\s+`},
|
|
{tokenType: TokenUnknown, pattern: `[^ \t\n\r\f\v]+`},
|
|
},
|
|
}
|
|
}
|
|
|
|
// Tokenize a command string
|
|
func (t *Tokenizer) Tokenize(commandString string) (tokens []Token, err error) {
|
|
tokens = []Token{}
|
|
pos := 0
|
|
inputLen := len(commandString)
|
|
|
|
// Continue iterating until we reach the end of the input
|
|
for pos < inputLen {
|
|
matched := false
|
|
remaining := commandString[pos:]
|
|
|
|
// Iterate through each token type and test its pattern
|
|
for _, pattern := range t.tokenPatterns {
|
|
// All patterns are case-insensitive and must match the beginning of the input (^)
|
|
re, regexError := regexp.Compile(`(?i)^` + pattern.pattern)
|
|
|
|
// If we encounter a regex error, stop tokenization ( wrongly defined pattern? )
|
|
if regexError != nil {
|
|
tokens = nil
|
|
err = regexError
|
|
return
|
|
}
|
|
|
|
// If the location of the match isn't nil, that means we've found a match
|
|
if loc := re.FindStringIndex(remaining); loc != nil {
|
|
lexeme := remaining[loc[0]:loc[1]]
|
|
|
|
pos += loc[1]
|
|
matched = true
|
|
|
|
// Skip whitespace
|
|
if pattern.tokenType == TokenWhitespace {
|
|
break
|
|
}
|
|
|
|
tokens = append(tokens, CreateToken(pattern.tokenType, lexeme, pos))
|
|
break
|
|
}
|
|
}
|
|
|
|
// Unknown tokens are still added
|
|
if !matched {
|
|
tokens = append(tokens, CreateToken(TokenUnknown, commandString[pos:pos+1], pos))
|
|
pos++
|
|
}
|
|
}
|
|
|
|
// Mark the end of the tokens
|
|
tokens = append(tokens, CreateToken(TokenEOF, "", pos))
|
|
|
|
return
|
|
}
|