lf/scan.go

322 lines
5.4 KiB
Go
Raw Normal View History

2016-08-13 12:49:04 +00:00
package main
import (
"io"
"io/ioutil"
"log"
"strconv"
2016-08-13 12:49:04 +00:00
"unicode"
)
2016-12-18 15:01:45 +00:00
type tokenType byte
2016-08-13 12:49:04 +00:00
const (
2016-12-17 21:47:37 +00:00
tokenEOF tokenType = iota
2016-08-13 12:49:04 +00:00
// no explicit keyword type
2016-12-17 21:47:37 +00:00
tokenIdent // e.g. set, ratios, 1:2:3
tokenColon // :
tokenPrefix // $, !, &, / or ?
tokenLBraces // {{
tokenRBraces // }}
tokenCommand // in between a prefix to \n or between {{ and }}
tokenSemicolon // ;
2016-08-13 12:49:04 +00:00
// comments are stripped
)
2016-12-17 21:47:37 +00:00
type scanner struct {
2016-08-13 12:49:04 +00:00
buf []byte // input buffer
off int // current offset in buf
chr byte // current character in buf
sem bool // insert semicolon
nln bool // insert newline
eof bool // buffer ended
key bool // scanning keys
2016-08-13 12:49:04 +00:00
blk bool // scanning block
cmd bool // scanning command
2016-12-17 21:47:37 +00:00
typ tokenType // scanned token type
2016-08-13 12:49:04 +00:00
tok string // scanned token value
// TODO: pos
}
2016-12-17 21:47:37 +00:00
func newScanner(r io.Reader) *scanner {
2016-08-13 12:49:04 +00:00
buf, err := ioutil.ReadAll(r)
if err != nil {
2016-09-02 20:13:37 +00:00
log.Printf("scanning: %s", err)
2016-08-13 12:49:04 +00:00
}
var eof bool
var chr byte
if len(buf) == 0 {
eof = true
} else {
eof = false
chr = buf[0]
}
2016-12-17 21:47:37 +00:00
return &scanner{
2016-08-13 12:49:04 +00:00
buf: buf,
eof: eof,
chr: chr,
}
}
2016-12-17 21:47:37 +00:00
func (s *scanner) next() {
2016-08-13 12:49:04 +00:00
if s.off+1 < len(s.buf) {
s.off++
s.chr = s.buf[s.off]
return
}
s.off = len(s.buf)
s.chr = 0
s.eof = true
}
2016-12-17 21:47:37 +00:00
func (s *scanner) peek() byte {
2016-08-13 12:49:04 +00:00
if s.off+1 < len(s.buf) {
return s.buf[s.off+1]
}
return 0
}
func isSpace(b byte) bool {
return unicode.IsSpace(rune(b))
}
func isDigit(b byte) bool {
return unicode.IsDigit(rune(b))
}
2016-08-13 12:49:04 +00:00
func isPrefix(b byte) bool {
// TODO: how to differentiate slash in path vs search?
return b == '$' || b == '!' || b == '&' // || b == '/' || b == '?'
}
2016-12-17 21:47:37 +00:00
func (s *scanner) scan() bool {
2016-08-13 12:49:04 +00:00
scan:
switch {
case s.eof:
s.next()
if s.sem {
2016-12-17 21:47:37 +00:00
s.typ = tokenSemicolon
2016-08-13 12:49:04 +00:00
s.tok = "\n"
s.sem = false
return true
}
if s.nln {
2016-12-17 21:47:37 +00:00
s.typ = tokenSemicolon
2016-08-13 12:49:04 +00:00
s.tok = "\n"
s.nln = false
return true
}
2016-12-17 21:47:37 +00:00
s.typ = tokenEOF
2016-10-15 19:53:07 +00:00
s.tok = "EOF"
2016-08-13 12:49:04 +00:00
return false
case s.key:
beg := s.off
for !s.eof && !isSpace(s.chr) {
s.next()
}
2016-12-17 21:47:37 +00:00
s.typ = tokenIdent
s.tok = string(s.buf[beg:s.off])
s.key = false
2016-08-13 12:49:04 +00:00
case s.blk:
// return here by setting s.cmd to false
// after scanning the command in the loop below
if !s.cmd {
s.next()
s.next()
2016-12-17 21:47:37 +00:00
s.typ = tokenRBraces
2016-08-13 12:49:04 +00:00
s.tok = "}}"
s.blk = false
s.sem = true
return true
}
beg := s.off
for !s.eof {
s.next()
if s.chr == '}' {
if !s.eof && s.peek() == '}' {
2016-12-17 21:47:37 +00:00
s.typ = tokenCommand
2016-08-13 12:49:04 +00:00
s.tok = string(s.buf[beg:s.off])
s.cmd = false
return true
}
}
}
2016-10-15 19:53:07 +00:00
2016-12-17 21:47:37 +00:00
s.typ = tokenEOF
2016-10-15 19:53:07 +00:00
s.tok = "EOF"
2016-08-13 12:49:04 +00:00
return false
case s.cmd:
for !s.eof && isSpace(s.chr) {
s.next()
}
if !s.eof && s.chr == '{' {
if s.peek() == '{' {
s.next()
s.next()
2016-12-17 21:47:37 +00:00
s.typ = tokenLBraces
2016-08-13 12:49:04 +00:00
s.tok = "{{"
s.blk = true
return true
}
}
beg := s.off
for !s.eof && s.chr != '\n' {
s.next()
}
2016-12-17 21:47:37 +00:00
s.typ = tokenCommand
2016-08-13 12:49:04 +00:00
s.tok = string(s.buf[beg:s.off])
s.cmd = false
s.sem = true
case s.chr == '\r':
s.next()
goto scan
2016-08-13 12:49:04 +00:00
case s.chr == '\n':
if s.sem {
2016-12-17 21:47:37 +00:00
s.typ = tokenSemicolon
2016-08-13 12:49:04 +00:00
s.tok = "\n"
s.sem = false
return true
}
s.next()
2016-08-13 12:49:04 +00:00
if s.nln {
2016-12-17 21:47:37 +00:00
s.typ = tokenSemicolon
2016-08-13 12:49:04 +00:00
s.tok = "\n"
s.nln = false
return true
}
goto scan
case isSpace(s.chr):
for !s.eof && isSpace(s.chr) {
s.next()
}
goto scan
case s.chr == ';':
2016-12-17 21:47:37 +00:00
s.typ = tokenSemicolon
2016-08-13 12:49:04 +00:00
s.tok = ";"
s.sem = false
s.next()
case s.chr == '#':
for !s.eof && s.chr != '\n' {
s.next()
}
goto scan
case s.chr == '"':
s.next()
var buf []byte
for !s.eof && s.chr != '"' {
if s.chr == '\\' {
s.next()
switch s.chr {
case '"', '\\':
buf = append(buf, s.chr)
case 'a':
buf = append(buf, '\a')
case 'b':
buf = append(buf, '\b')
case 'f':
buf = append(buf, '\f')
case 'n':
buf = append(buf, '\n')
case 'r':
buf = append(buf, '\r')
case 't':
buf = append(buf, '\t')
case 'v':
buf = append(buf, '\v')
}
if isDigit(s.chr) {
var oct []byte
for isDigit(s.chr) {
oct = append(oct, s.chr)
s.next()
}
n, err := strconv.ParseInt(string(oct), 8, 0)
if err != nil {
log.Printf("scanning: %s", err)
}
buf = append(buf, byte(n))
} else {
s.next()
}
} else {
buf = append(buf, s.chr)
s.next()
}
}
s.typ = tokenIdent
s.tok = string(buf)
s.next()
2016-11-22 18:31:20 +00:00
case s.chr == '\'':
s.next()
beg := s.off
for !s.eof && s.chr != '\'' {
s.next()
}
2016-12-17 21:47:37 +00:00
s.typ = tokenIdent
2016-11-22 18:31:20 +00:00
s.tok = string(s.buf[beg:s.off])
s.next()
2016-08-13 12:49:04 +00:00
case s.chr == ':':
2016-12-17 21:47:37 +00:00
s.typ = tokenColon
2016-08-13 12:49:04 +00:00
s.tok = ":"
s.nln = true
s.next()
2016-10-15 19:53:07 +00:00
case s.chr == '{' && s.peek() == '{':
s.next()
s.next()
2016-12-17 21:47:37 +00:00
s.typ = tokenLBraces
2016-10-15 19:53:07 +00:00
s.tok = "{{"
s.sem = false
s.nln = false
case s.chr == '}' && s.peek() == '}':
s.next()
s.next()
2016-12-17 21:47:37 +00:00
s.typ = tokenRBraces
2016-10-15 19:53:07 +00:00
s.tok = "}}"
s.sem = true
2016-08-13 12:49:04 +00:00
case isPrefix(s.chr):
2016-12-17 21:47:37 +00:00
s.typ = tokenPrefix
2016-08-13 12:49:04 +00:00
s.tok = string(s.chr)
s.cmd = true
s.next()
default:
2016-11-22 19:12:47 +00:00
var buf []byte
2016-08-13 12:49:04 +00:00
for !s.eof && !isSpace(s.chr) && s.chr != ';' && s.chr != '#' {
2016-11-22 19:12:47 +00:00
if s.chr == '\\' {
s.next()
if isSpace(s.chr) {
buf = append(buf, s.chr)
s.next()
} else {
s.next()
}
}
buf = append(buf, s.chr)
2016-08-13 12:49:04 +00:00
s.next()
}
2016-10-15 19:53:07 +00:00
2016-12-17 21:47:37 +00:00
s.typ = tokenIdent
2016-11-22 19:12:47 +00:00
s.tok = string(buf)
2016-10-15 19:53:07 +00:00
s.sem = true
if s.tok == "push" {
s.key = true
2016-10-15 19:53:07 +00:00
for !s.eof && isSpace(s.chr) && s.chr != '\n' {
s.next()
}
}
2016-08-13 12:49:04 +00:00
}
return true
}