Start tokenizer

This commit is contained in:
Miroslav Vasilev 2025-06-17 08:37:04 +03:00
parent 308b2f1268
commit adbeabc5d4

View file

@ -1,30 +1,83 @@
package commandlib package commandlib
import ( import "strings"
"strconv"
"strings" type TokenType = byte
const (
TokenEOF = iota
TokenUnknown
TokenNumber
TokenDecimal
TokenIdentifier
TokenBracketedIdentifier
TokenDirection
TokenCommand
TokenSelf
TokenPunctuation
) )
func Tokenize(commandMsg string) []any { var tokenPatterns = map[TokenType]string{
split := strings.Split(commandMsg, " ") TokenNumber: `\b\d+\b`,
TokenDecimal: `\b\d+\.\d+\b`,
tokens := []any{} TokenIdentifier: `\b[a-zA-Z][a-zA-Z0-9]*\b`,
TokenBracketedIdentifier: `\[[a-zA-Z][a-zA-Z0-9]*\]`,
for _, v := range split { TokenDirection: `\b(north|south|east|west|up|down)\b`,
valInt, err := strconv.ParseInt(v, 10, 32) TokenSelf: `\bself\b`,
TokenPunctuation: `[,.!?'/":;\-\[\]\(\)]`,
if err == nil { TokenUnknown: `.`,
tokens = append(tokens, valInt) }
}
type Token struct {
valFloat, err := strconv.ParseFloat(v, 32) token TokenType
lexeme string
if err == nil { index int
tokens = append(tokens, valFloat) }
}
func CreateToken(token TokenType, lexeme string, index int) Token {
tokens = append(tokens, v) return Token{
} token: token,
lexeme: lexeme,
return tokens index: index,
}
}
func (t Token) Token() TokenType {
return t.token
}
func (t Token) Lexeme() string {
return t.lexeme
}
func (t Token) Index() int {
return t.index
}
type tokenizer struct {
commandNameTokenRegex string
}
func CreateTokenizer(commandNames []string) *tokenizer {
return &tokenizer{
commandNameTokenRegex: `\b(` + strings.Join(commandNames, "|") + `)\b`,
}
}
func (t *tokenizer) Tokenize(commandMsg string) (tokens []Token) {
tokens = []Token{}
pos := 0
inputLen := len(commandMsg)
for pos < inputLen {
matched := false
for tokenType, pattern := range tokenPatterns {
}
}
} }