Stanislav N. aka pztrn 48d43ca097 Pagination, readable error messages to user, syntax highlighting started.
Pagination now works. Temporary hardcoded 10 pastes per page, will be put
in configuration later. Maybe.

From now user will receive readable error message if error occured.

Started to work on syntax highlighting, tried to make lexers detection
work but apparently to no avail.
2018-05-01 02:37:51 +05:00

275 lines
8.2 KiB

package syntax
import (
// similar to prog.go in the go regex package...also with comment 'may not belong in this package'
// File provides operator constants for use by the Builder and the Machine.
// Implementation notes:
// Regexps are built into RegexCodes, which contain an operation array,
// a string table, and some constants.
// Each operation is one of the codes below, followed by the integer
// operands specified for each op.
// Strings and sets are indices into a string table.
type InstOp int
const (
// lef/back operands description
Onerep InstOp = 0 // lef,back char,min,max a {n}
Notonerep = 1 // lef,back char,min,max .{n}
Setrep = 2 // lef,back set,min,max [\d]{n}
Oneloop = 3 // lef,back char,min,max a {,n}
Notoneloop = 4 // lef,back char,min,max .{,n}
Setloop = 5 // lef,back set,min,max [\d]{,n}
Onelazy = 6 // lef,back char,min,max a {,n}?
Notonelazy = 7 // lef,back char,min,max .{,n}?
Setlazy = 8 // lef,back set,min,max [\d]{,n}?
One = 9 // lef char a
Notone = 10 // lef char [^a]
Set = 11 // lef set [a-z\s] \w \s \d
Multi = 12 // lef string abcd
Ref = 13 // lef group \#
Bol = 14 // ^
Eol = 15 // $
Boundary = 16 // \b
Nonboundary = 17 // \B
Beginning = 18 // \A
Start = 19 // \G
EndZ = 20 // \Z
End = 21 // \Z
Nothing = 22 // Reject!
// Primitive control structures
Lazybranch = 23 // back jump straight first
Branchmark = 24 // back jump branch first for loop
Lazybranchmark = 25 // back jump straight first for loop
Nullcount = 26 // back val set counter, null mark
Setcount = 27 // back val set counter, make mark
Branchcount = 28 // back jump,limit branch++ if zero<=c<limit
Lazybranchcount = 29 // back jump,limit same, but straight first
Nullmark = 30 // back save position
Setmark = 31 // back save position
Capturemark = 32 // back group define group
Getmark = 33 // back recall position
Setjump = 34 // back save backtrack state
Backjump = 35 // zap back to saved state
Forejump = 36 // zap backtracking state
Testref = 37 // backtrack if ref undefined
Goto = 38 // jump just go
Prune = 39 // prune it baby
Stop = 40 // done!
ECMABoundary = 41 // \b
NonECMABoundary = 42 // \B
// Modifiers for alternate modes
Mask = 63 // Mask to get unmodified ordinary operator
Rtl = 64 // bit to indicate that we're reverse scanning.
Back = 128 // bit to indicate that we're backtracking.
Back2 = 256 // bit to indicate that we're backtracking on a second branch.
Ci = 512 // bit to indicate that we're case-insensitive.
type Code struct {
Codes []int // the code
Strings [][]rune // string table
Sets []*CharSet //character set table
TrackCount int // how many instructions use backtracking
Caps map[int]int // mapping of user group numbers -> impl group slots
Capsize int // number of impl group slots
FcPrefix *Prefix // the set of candidate first characters (may be null)
BmPrefix *BmPrefix // the fixed prefix string as a Boyer-Moore machine (may be null)
Anchors AnchorLoc // the set of zero-length start anchors (RegexFCD.Bol, etc)
RightToLeft bool // true if right to left
func opcodeBacktracks(op InstOp) bool {
op &= Mask
switch op {
case Oneloop, Notoneloop, Setloop, Onelazy, Notonelazy, Setlazy, Lazybranch, Branchmark, Lazybranchmark,
Nullcount, Setcount, Branchcount, Lazybranchcount, Setmark, Capturemark, Getmark, Setjump, Backjump,
Forejump, Goto:
return true
return false
func opcodeSize(op InstOp) int {
op &= Mask
switch op {
case Nothing, Bol, Eol, Boundary, Nonboundary, ECMABoundary, NonECMABoundary, Beginning, Start, EndZ,
End, Nullmark, Setmark, Getmark, Setjump, Backjump, Forejump, Stop:
return 1
case One, Notone, Multi, Ref, Testref, Goto, Nullcount, Setcount, Lazybranch, Branchmark, Lazybranchmark,
Prune, Set:
return 2
case Capturemark, Branchcount, Lazybranchcount, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy,
Setlazy, Setrep, Setloop:
return 3
panic(fmt.Errorf("Unexpected op code: %v", op))
var codeStr = []string{
"Onerep", "Notonerep", "Setrep",
"Oneloop", "Notoneloop", "Setloop",
"Onelazy", "Notonelazy", "Setlazy",
"One", "Notone", "Set",
"Multi", "Ref",
"Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End",
"Lazybranch", "Branchmark", "Lazybranchmark",
"Nullcount", "Setcount", "Branchcount", "Lazybranchcount",
"Nullmark", "Setmark", "Capturemark", "Getmark",
"Setjump", "Backjump", "Forejump", "Testref", "Goto",
"Prune", "Stop",
"ECMABoundary", "NonECMABoundary",
func operatorDescription(op InstOp) string {
desc := codeStr[op&Mask]
if (op & Ci) != 0 {
desc += "-Ci"
if (op & Rtl) != 0 {
desc += "-Rtl"
if (op & Back) != 0 {
desc += "-Back"
if (op & Back2) != 0 {
desc += "-Back2"
return desc
// OpcodeDescription is a humman readable string of the specific offset
func (c *Code) OpcodeDescription(offset int) string {
buf := &bytes.Buffer{}
op := InstOp(c.Codes[offset])
fmt.Fprintf(buf, "%06d ", offset)
if opcodeBacktracks(op & Mask) {
} else {
buf.WriteString(" ")
op &= Mask
switch op {
case One, Notone, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy:
buf.WriteString("Ch = ")
case Set, Setrep, Setloop, Setlazy:
buf.WriteString("Set = ")
case Multi:
fmt.Fprintf(buf, "String = %s", string(c.Strings[c.Codes[offset+1]]))
case Ref, Testref:
fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
case Capturemark:
fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
if c.Codes[offset+2] != -1 {
fmt.Fprintf(buf, ", Unindex = %d", c.Codes[offset+2])
case Nullcount, Setcount:
fmt.Fprintf(buf, "Value = %d", c.Codes[offset+1])
case Goto, Lazybranch, Branchmark, Lazybranchmark, Branchcount, Lazybranchcount:
fmt.Fprintf(buf, "Addr = %d", c.Codes[offset+1])
switch op {
case Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, Setrep, Setloop, Setlazy:
buf.WriteString(", Rep = ")
if c.Codes[offset+2] == math.MaxInt32 {
} else {
fmt.Fprintf(buf, "%d", c.Codes[offset+2])
case Branchcount, Lazybranchcount:
buf.WriteString(", Limit = ")
if c.Codes[offset+2] == math.MaxInt32 {
} else {
fmt.Fprintf(buf, "%d", c.Codes[offset+2])
return buf.String()
func (c *Code) Dump() string {
buf := &bytes.Buffer{}
if c.RightToLeft {
fmt.Fprintln(buf, "Direction: right-to-left")
} else {
fmt.Fprintln(buf, "Direction: left-to-right")
if c.FcPrefix == nil {
fmt.Fprintln(buf, "Firstchars: n/a")
} else {
fmt.Fprintf(buf, "Firstchars: %v\n", c.FcPrefix.PrefixSet.String())
if c.BmPrefix == nil {
fmt.Fprintln(buf, "Prefix: n/a")
} else {
fmt.Fprintf(buf, "Prefix: %v\n", Escape(c.BmPrefix.String()))
fmt.Fprintf(buf, "Anchors: %v\n", c.Anchors)
if c.BmPrefix != nil {
fmt.Fprintln(buf, "BoyerMoore:")
fmt.Fprintln(buf, c.BmPrefix.Dump(" "))
for i := 0; i < len(c.Codes); i += opcodeSize(InstOp(c.Codes[i])) {
fmt.Fprintln(buf, c.OpcodeDescription(i))
return buf.String()