Stanislav N. aka pztrn
48d43ca097
Pagination now works. Temporary hardcoded 10 pastes per page, will be put in configuration later. Maybe. From now user will receive readable error message if error occured. Started to work on syntax highlighting, tried to make lexers detection work but apparently to no avail.
275 lines
8.2 KiB
Go
275 lines
8.2 KiB
Go
package syntax
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"math"
|
|
)
|
|
|
|
// similar to prog.go in the go regex package...also with comment 'may not belong in this package'
|
|
|
|
// File provides operator constants for use by the Builder and the Machine.
|
|
|
|
// Implementation notes:
|
|
//
|
|
// Regexps are built into RegexCodes, which contain an operation array,
|
|
// a string table, and some constants.
|
|
//
|
|
// Each operation is one of the codes below, followed by the integer
|
|
// operands specified for each op.
|
|
//
|
|
// Strings and sets are indices into a string table.
|
|
|
|
type InstOp int
|
|
|
|
const (
|
|
// lef/back operands description
|
|
|
|
Onerep InstOp = 0 // lef,back char,min,max a {n}
|
|
Notonerep = 1 // lef,back char,min,max .{n}
|
|
Setrep = 2 // lef,back set,min,max [\d]{n}
|
|
|
|
Oneloop = 3 // lef,back char,min,max a {,n}
|
|
Notoneloop = 4 // lef,back char,min,max .{,n}
|
|
Setloop = 5 // lef,back set,min,max [\d]{,n}
|
|
|
|
Onelazy = 6 // lef,back char,min,max a {,n}?
|
|
Notonelazy = 7 // lef,back char,min,max .{,n}?
|
|
Setlazy = 8 // lef,back set,min,max [\d]{,n}?
|
|
|
|
One = 9 // lef char a
|
|
Notone = 10 // lef char [^a]
|
|
Set = 11 // lef set [a-z\s] \w \s \d
|
|
|
|
Multi = 12 // lef string abcd
|
|
Ref = 13 // lef group \#
|
|
|
|
Bol = 14 // ^
|
|
Eol = 15 // $
|
|
Boundary = 16 // \b
|
|
Nonboundary = 17 // \B
|
|
Beginning = 18 // \A
|
|
Start = 19 // \G
|
|
EndZ = 20 // \Z
|
|
End = 21 // \Z
|
|
|
|
Nothing = 22 // Reject!
|
|
|
|
// Primitive control structures
|
|
|
|
Lazybranch = 23 // back jump straight first
|
|
Branchmark = 24 // back jump branch first for loop
|
|
Lazybranchmark = 25 // back jump straight first for loop
|
|
Nullcount = 26 // back val set counter, null mark
|
|
Setcount = 27 // back val set counter, make mark
|
|
Branchcount = 28 // back jump,limit branch++ if zero<=c<limit
|
|
Lazybranchcount = 29 // back jump,limit same, but straight first
|
|
Nullmark = 30 // back save position
|
|
Setmark = 31 // back save position
|
|
Capturemark = 32 // back group define group
|
|
Getmark = 33 // back recall position
|
|
Setjump = 34 // back save backtrack state
|
|
Backjump = 35 // zap back to saved state
|
|
Forejump = 36 // zap backtracking state
|
|
Testref = 37 // backtrack if ref undefined
|
|
Goto = 38 // jump just go
|
|
|
|
Prune = 39 // prune it baby
|
|
Stop = 40 // done!
|
|
|
|
ECMABoundary = 41 // \b
|
|
NonECMABoundary = 42 // \B
|
|
|
|
// Modifiers for alternate modes
|
|
|
|
Mask = 63 // Mask to get unmodified ordinary operator
|
|
Rtl = 64 // bit to indicate that we're reverse scanning.
|
|
Back = 128 // bit to indicate that we're backtracking.
|
|
Back2 = 256 // bit to indicate that we're backtracking on a second branch.
|
|
Ci = 512 // bit to indicate that we're case-insensitive.
|
|
)
|
|
|
|
type Code struct {
|
|
Codes []int // the code
|
|
Strings [][]rune // string table
|
|
Sets []*CharSet //character set table
|
|
TrackCount int // how many instructions use backtracking
|
|
Caps map[int]int // mapping of user group numbers -> impl group slots
|
|
Capsize int // number of impl group slots
|
|
FcPrefix *Prefix // the set of candidate first characters (may be null)
|
|
BmPrefix *BmPrefix // the fixed prefix string as a Boyer-Moore machine (may be null)
|
|
Anchors AnchorLoc // the set of zero-length start anchors (RegexFCD.Bol, etc)
|
|
RightToLeft bool // true if right to left
|
|
}
|
|
|
|
func opcodeBacktracks(op InstOp) bool {
|
|
op &= Mask
|
|
|
|
switch op {
|
|
case Oneloop, Notoneloop, Setloop, Onelazy, Notonelazy, Setlazy, Lazybranch, Branchmark, Lazybranchmark,
|
|
Nullcount, Setcount, Branchcount, Lazybranchcount, Setmark, Capturemark, Getmark, Setjump, Backjump,
|
|
Forejump, Goto:
|
|
return true
|
|
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func opcodeSize(op InstOp) int {
|
|
op &= Mask
|
|
|
|
switch op {
|
|
case Nothing, Bol, Eol, Boundary, Nonboundary, ECMABoundary, NonECMABoundary, Beginning, Start, EndZ,
|
|
End, Nullmark, Setmark, Getmark, Setjump, Backjump, Forejump, Stop:
|
|
return 1
|
|
|
|
case One, Notone, Multi, Ref, Testref, Goto, Nullcount, Setcount, Lazybranch, Branchmark, Lazybranchmark,
|
|
Prune, Set:
|
|
return 2
|
|
|
|
case Capturemark, Branchcount, Lazybranchcount, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy,
|
|
Setlazy, Setrep, Setloop:
|
|
return 3
|
|
|
|
default:
|
|
panic(fmt.Errorf("Unexpected op code: %v", op))
|
|
}
|
|
}
|
|
|
|
var codeStr = []string{
|
|
"Onerep", "Notonerep", "Setrep",
|
|
"Oneloop", "Notoneloop", "Setloop",
|
|
"Onelazy", "Notonelazy", "Setlazy",
|
|
"One", "Notone", "Set",
|
|
"Multi", "Ref",
|
|
"Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End",
|
|
"Nothing",
|
|
"Lazybranch", "Branchmark", "Lazybranchmark",
|
|
"Nullcount", "Setcount", "Branchcount", "Lazybranchcount",
|
|
"Nullmark", "Setmark", "Capturemark", "Getmark",
|
|
"Setjump", "Backjump", "Forejump", "Testref", "Goto",
|
|
"Prune", "Stop",
|
|
"ECMABoundary", "NonECMABoundary",
|
|
}
|
|
|
|
func operatorDescription(op InstOp) string {
|
|
desc := codeStr[op&Mask]
|
|
if (op & Ci) != 0 {
|
|
desc += "-Ci"
|
|
}
|
|
if (op & Rtl) != 0 {
|
|
desc += "-Rtl"
|
|
}
|
|
if (op & Back) != 0 {
|
|
desc += "-Back"
|
|
}
|
|
if (op & Back2) != 0 {
|
|
desc += "-Back2"
|
|
}
|
|
|
|
return desc
|
|
}
|
|
|
|
// OpcodeDescription is a humman readable string of the specific offset
|
|
func (c *Code) OpcodeDescription(offset int) string {
|
|
buf := &bytes.Buffer{}
|
|
|
|
op := InstOp(c.Codes[offset])
|
|
fmt.Fprintf(buf, "%06d ", offset)
|
|
|
|
if opcodeBacktracks(op & Mask) {
|
|
buf.WriteString("*")
|
|
} else {
|
|
buf.WriteString(" ")
|
|
}
|
|
buf.WriteString(operatorDescription(op))
|
|
buf.WriteString("(")
|
|
op &= Mask
|
|
|
|
switch op {
|
|
case One, Notone, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy:
|
|
buf.WriteString("Ch = ")
|
|
buf.WriteString(CharDescription(rune(c.Codes[offset+1])))
|
|
|
|
case Set, Setrep, Setloop, Setlazy:
|
|
buf.WriteString("Set = ")
|
|
buf.WriteString(c.Sets[c.Codes[offset+1]].String())
|
|
|
|
case Multi:
|
|
fmt.Fprintf(buf, "String = %s", string(c.Strings[c.Codes[offset+1]]))
|
|
|
|
case Ref, Testref:
|
|
fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
|
|
|
|
case Capturemark:
|
|
fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
|
|
if c.Codes[offset+2] != -1 {
|
|
fmt.Fprintf(buf, ", Unindex = %d", c.Codes[offset+2])
|
|
}
|
|
|
|
case Nullcount, Setcount:
|
|
fmt.Fprintf(buf, "Value = %d", c.Codes[offset+1])
|
|
|
|
case Goto, Lazybranch, Branchmark, Lazybranchmark, Branchcount, Lazybranchcount:
|
|
fmt.Fprintf(buf, "Addr = %d", c.Codes[offset+1])
|
|
}
|
|
|
|
switch op {
|
|
case Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, Setrep, Setloop, Setlazy:
|
|
buf.WriteString(", Rep = ")
|
|
if c.Codes[offset+2] == math.MaxInt32 {
|
|
buf.WriteString("inf")
|
|
} else {
|
|
fmt.Fprintf(buf, "%d", c.Codes[offset+2])
|
|
}
|
|
|
|
case Branchcount, Lazybranchcount:
|
|
buf.WriteString(", Limit = ")
|
|
if c.Codes[offset+2] == math.MaxInt32 {
|
|
buf.WriteString("inf")
|
|
} else {
|
|
fmt.Fprintf(buf, "%d", c.Codes[offset+2])
|
|
}
|
|
|
|
}
|
|
|
|
buf.WriteString(")")
|
|
|
|
return buf.String()
|
|
}
|
|
|
|
func (c *Code) Dump() string {
|
|
buf := &bytes.Buffer{}
|
|
|
|
if c.RightToLeft {
|
|
fmt.Fprintln(buf, "Direction: right-to-left")
|
|
} else {
|
|
fmt.Fprintln(buf, "Direction: left-to-right")
|
|
}
|
|
if c.FcPrefix == nil {
|
|
fmt.Fprintln(buf, "Firstchars: n/a")
|
|
} else {
|
|
fmt.Fprintf(buf, "Firstchars: %v\n", c.FcPrefix.PrefixSet.String())
|
|
}
|
|
|
|
if c.BmPrefix == nil {
|
|
fmt.Fprintln(buf, "Prefix: n/a")
|
|
} else {
|
|
fmt.Fprintf(buf, "Prefix: %v\n", Escape(c.BmPrefix.String()))
|
|
}
|
|
|
|
fmt.Fprintf(buf, "Anchors: %v\n", c.Anchors)
|
|
fmt.Fprintln(buf)
|
|
|
|
if c.BmPrefix != nil {
|
|
fmt.Fprintln(buf, "BoyerMoore:")
|
|
fmt.Fprintln(buf, c.BmPrefix.Dump(" "))
|
|
}
|
|
for i := 0; i < len(c.Codes); i += opcodeSize(InstOp(c.Codes[i])) {
|
|
fmt.Fprintln(buf, c.OpcodeDescription(i))
|
|
}
|
|
|
|
return buf.String()
|
|
}
|