LastMUD/src/CommandLib/tokenizer.go

84 lines
1.4 KiB
Go
Raw Normal View History

2025-06-16 14:59:51 +03:00
package commandlib
2025-06-17 08:37:04 +03:00
import "strings"
type TokenType = byte
const (
TokenEOF = iota
TokenUnknown
TokenNumber
TokenDecimal
TokenIdentifier
TokenBracketedIdentifier
TokenDirection
TokenCommand
TokenSelf
TokenPunctuation
2025-06-16 14:59:51 +03:00
)
2025-06-17 08:37:04 +03:00
var tokenPatterns = map[TokenType]string{
TokenNumber: `\b\d+\b`,
TokenDecimal: `\b\d+\.\d+\b`,
TokenIdentifier: `\b[a-zA-Z][a-zA-Z0-9]*\b`,
TokenBracketedIdentifier: `\[[a-zA-Z][a-zA-Z0-9]*\]`,
TokenDirection: `\b(north|south|east|west|up|down)\b`,
TokenSelf: `\bself\b`,
TokenPunctuation: `[,.!?'/":;\-\[\]\(\)]`,
TokenUnknown: `.`,
}
2025-06-16 14:59:51 +03:00
2025-06-17 08:37:04 +03:00
type Token struct {
token TokenType
lexeme string
index int
}
2025-06-16 14:59:51 +03:00
2025-06-17 08:37:04 +03:00
func CreateToken(token TokenType, lexeme string, index int) Token {
return Token{
token: token,
lexeme: lexeme,
index: index,
}
}
2025-06-16 14:59:51 +03:00
2025-06-17 08:37:04 +03:00
func (t Token) Token() TokenType {
return t.token
}
2025-06-16 14:59:51 +03:00
2025-06-17 08:37:04 +03:00
func (t Token) Lexeme() string {
return t.lexeme
}
2025-06-16 14:59:51 +03:00
2025-06-17 08:37:04 +03:00
func (t Token) Index() int {
return t.index
}
2025-06-16 14:59:51 +03:00
2025-06-17 08:37:04 +03:00
type tokenizer struct {
commandNameTokenRegex string
}
func CreateTokenizer(commandNames []string) *tokenizer {
return &tokenizer{
commandNameTokenRegex: `\b(` + strings.Join(commandNames, "|") + `)\b`,
2025-06-16 14:59:51 +03:00
}
2025-06-17 08:37:04 +03:00
}
2025-06-16 14:59:51 +03:00
2025-06-17 08:37:04 +03:00
func (t *tokenizer) Tokenize(commandMsg string) (tokens []Token) {
tokens = []Token{}
pos := 0
inputLen := len(commandMsg)
for pos < inputLen {
matched := false
for tokenType, pattern := range tokenPatterns {
}
}
2025-06-16 14:59:51 +03:00
}